Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

## (unreleased)

### New features and changes

* experimental SPDX 3.0 support, built on the `spdx-python-model` bindings:
* read a SPDX 3 JSON-LD file into memory via `spdx_tools.spdx3.parser.parse_file`

## v0.8.5 (2026-03-13)

### New features and changes
Expand Down
32 changes: 30 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,17 @@ dependencies = [
"pyyaml",
"rdflib",
"semantic_version",
"spdx-python-model>=0.0.6",
"uritools",
"xmltodict",
]
dynamic = ["version"]

[project.optional-dependencies]
test = ["pyshacl", "pytest", "tzdata"]
code_style = ["black", "flake8", "isort"]
code_style = ["black", "flake8", "isort", "mypy"]
graph_generation = ["networkx", "pygraphviz"]
development = ["black", "flake8", "isort", "networkx", "pyshacl", "pytest"]
development = ["black", "flake8", "isort", "mypy", "networkx", "pyshacl", "pytest"]

[project.scripts]
pyspdxtools = "spdx_tools.spdx.clitools.pyspdxtools:main"
Expand All @@ -59,6 +60,32 @@ Repository = "https://github.com/spdx/tools-python.git"
Issues = "https://github.com/spdx/tools-python/issues"
Changelog = "https://github.com/spdx/tools-python/blob/main/CHANGELOG.md"

[tool.mypy]
python_version = "3.10"
strict = true
mypy_path = "src"
explicit_package_bases = true
# The new SPDX 3 code is fully typed and checked under --strict.
# The legacy SPDX 2 modules are not strict-clean yet and are out of scope here;
# they are excluded below so they don't mask issues in the new code.
files = [
"src/spdx_tools/spdx3/formats.py",
"src/spdx_tools/spdx3/object_set.py",
"src/spdx_tools/spdx3/parser",
]

# spdx-python-model ships a py.typed marker but its generated bindings expose
# many attributes as Any; this keeps strict checks meaningful without noise.
[[tool.mypy.overrides]]
module = ["spdx_python_model.*"]
ignore_missing_imports = true

# Pre-existing SPDX 2 code imported by the new modules (e.g. SPDXParsingError)
# is not yet strict-typed; do not fail the new-code check on it.
[[tool.mypy.overrides]]
module = ["spdx_tools.spdx.*"]
ignore_errors = true

[tool.setuptools]
include-package-data = true

Expand All @@ -78,6 +105,7 @@ release = "clean --all sdist --formats=gztar bdist_wheel"

[tool.black]
line-length = 119
target-version = ["py310", "py311", "py312", "py313", "py314"]
include = "(^/src/.*.py|^/tests/.*.py)"

[tool.isort]
Expand Down
21 changes: 21 additions & 0 deletions src/spdx_tools/spdx3/formats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# SPDX-FileCopyrightText: 2026-present SPDX contributors
# SPDX-License-Identifier: Apache-2.0
from enum import Enum, auto

from spdx_tools.spdx.parser.error import SPDXParsingError


class FileFormat(Enum):
JSON_LD = auto()


def file_name_to_format(file_name: str) -> FileFormat:
# SPDX 3.0 currently defines a single serialization: JSON-LD.
# Common extensions are "spdx3.json", ".json" and ".jsonld".
# See:
# https://github.com/OpenChain-Project/Telco-WG/blob/main/OpenChain-Telco-SBOM-Guide_1.2_DRAFT_EN.md
# https://www.iana.org/assignments/media-types/application/spdx3+json
if file_name.endswith(".json") or file_name.endswith(".jsonld"):
return FileFormat.JSON_LD
else:
raise SPDXParsingError(["Unsupported SPDX 3 file type: " + str(file_name)])
22 changes: 22 additions & 0 deletions src/spdx_tools/spdx3/object_set.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# SPDX-FileCopyrightText: 2026-present SPDX contributors
# SPDX-License-Identifier: Apache-2.0
from typing import Any, Iterable, Optional, Protocol, Set, runtime_checkable


@runtime_checkable
class SpdxObjectSet(Protocol):
"""Version-agnostic structural type for a SHACL object set.

Satisfied by ``SHACLObjectSet`` from any ``spdx_python_model.vX_Y_Z``
version module, so callers are not tied to a specific SPDX spec version.
"""

def foreach(self) -> Iterable[Any]: ...

def foreach_type(self, typ: Any, *, match_subclass: bool = True) -> Iterable[Any]: ...

def find_by_id(self, _id: str, default: Optional[Any] = None) -> Optional[Any]: ...

def add(self, obj: Any) -> Any: ...

def link(self) -> Set[str]: ...
5 changes: 5 additions & 0 deletions src/spdx_tools/spdx3/parser/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# SPDX-FileCopyrightText: 2026-present SPDX contributors
# SPDX-License-Identifier: Apache-2.0
from spdx_tools.spdx3.parser.parse_anything import parse_file

__all__ = ["parse_file"]
2 changes: 2 additions & 0 deletions src/spdx_tools/spdx3/parser/json_ld/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# SPDX-FileCopyrightText: 2026-present SPDX contributors
# SPDX-License-Identifier: Apache-2.0
21 changes: 21 additions & 0 deletions src/spdx_tools/spdx3/parser/json_ld/json_ld_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# SPDX-FileCopyrightText: 2026-present SPDX contributors
# SPDX-License-Identifier: Apache-2.0
from spdx_python_model import v3_0_1 as spdx_3_0

from spdx_tools.spdx3.object_set import SpdxObjectSet
from spdx_tools.spdx.parser.error import SPDXParsingError


def parse_from_file(file_name: str, encoding: str = "utf-8") -> SpdxObjectSet:
"""Read a SPDX 3 JSON-LD file into a SHACLObjectSet (the in-memory representation
provided by the spdx-python-model bindings)."""
object_set = spdx_3_0.SHACLObjectSet()
try:
# The binding's deserializer reads from a binary stream.
with open(file_name, "rb") as file:
spdx_3_0.JSONLDDeserializer().read(file, object_set)
except OSError as err:
raise SPDXParsingError([f"Could not open file {file_name}: {err}"])
except Exception as err:
raise SPDXParsingError([f"Error while parsing {file_name}: {err}"])
return object_set
18 changes: 18 additions & 0 deletions src/spdx_tools/spdx3/parser/parse_anything.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# SPDX-FileCopyrightText: 2026-present SPDX contributors
# SPDX-License-Identifier: Apache-2.0
from spdx_tools.spdx3.formats import FileFormat, file_name_to_format
from spdx_tools.spdx3.object_set import SpdxObjectSet
from spdx_tools.spdx3.parser.json_ld import json_ld_parser
from spdx_tools.spdx.parser.error import SPDXParsingError


def parse_file(file_name: str, encoding: str = "utf-8") -> SpdxObjectSet:
"""Parse a SPDX 3 file into a SHACLObjectSet, dispatching on the file format.

SPDX 3.0 currently defines a single serialization (JSON-LD); the dispatch is
kept to mirror the SPDX 2 ``parse_file`` API and to ease adding formats later.
"""
input_format = file_name_to_format(file_name)
if input_format == FileFormat.JSON_LD:
return json_ld_parser.parse_from_file(file_name, encoding)
raise SPDXParsingError([f"Unsupported SPDX 3 file format: {input_format}"])
Empty file added src/spdx_tools/spdx3/py.typed
Empty file.
2 changes: 2 additions & 0 deletions tests/spdx3/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# SPDX-FileCopyrightText: 2026-present SPDX contributors
# SPDX-License-Identifier: Apache-2.0
Loading
Loading