Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 45 additions & 11 deletions scanpipe/pipes/spdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,6 @@ def as_dict(self):
"SPDXID": self.spdx_id,
"name": self.safe_document_name(self.name),
"documentNamespace": self.namespace,
"documentDescribes": self.describes,
"creationInfo": self.creation_info.as_dict(),
"packages": [package.as_dict(self.version) for package in self.packages],
}
Expand All @@ -620,10 +619,24 @@ def as_dict(self):
license_info.as_dict() for license_info in self.extracted_licenses
]

if self.relationships:
data["relationships"] = [
relationship.as_dict() for relationship in self.relationships
]
# The SPDX 2.3 spec deprecated the top-level `documentDescribes` field.
# Instead, emit explicit DESCRIBES relationships prepended to maintain
# deterministic ordering and ensure document-level relationships appear first.
# See https://github.com/spdx/spdx-spec/issues/395
describes_relationships = [
{
"spdxElementId": self.spdx_id,
"relatedSpdxElement": spdx_id,
"relationshipType": "DESCRIBES",
}
for spdx_id in (self.describes or [])
]
existing_relationships = [
relationship.as_dict() for relationship in (self.relationships or [])
]
all_relationships = describes_relationships + existing_relationships
if all_relationships:
data["relationships"] = all_relationships

if self.comment:
data["comment"] = self.comment
Expand All @@ -636,13 +649,37 @@ def as_json(self, indent=2):

@classmethod
def from_data(cls, data):
spdx_id = data.get("SPDXID")
relationships_data = data.get("relationships", [])

# Backward compatibility: reconstruct `describes` from the legacy
# `documentDescribes` field if present, or derive it from DESCRIBES
# relationships where spdxElementId matches the document SPDXID.
describes = data.get("documentDescribes") or [
r["relatedSpdxElement"]
for r in relationships_data
if r.get("relationshipType") == "DESCRIBES"
and r.get("spdxElementId") == spdx_id
]

# Exclude DESCRIBES relationships that were re-derived into `describes`
# to avoid duplication when the document is round-tripped via as_dict().
other_relationships = [
Relationship.from_data(r)
for r in relationships_data
if not (
r.get("relationshipType") == "DESCRIBES"
and r.get("spdxElementId") == spdx_id
)
]

return cls(
spdx_id=data.get("SPDXID"),
spdx_id=spdx_id,
version=data.get("spdxVersion", "").split("SPDX-")[-1],
data_license=data.get("dataLicense"),
name=data.get("name"),
namespace=data.get("documentNamespace"),
describes=data.get("documentDescribes"),
describes=describes,
creation_info=CreationInfo.from_data(data.get("creationInfo", {})),
packages=[
Package.from_data(package_data)
Expand All @@ -653,10 +690,7 @@ def from_data(cls, data):
ExtractedLicensingInfo.from_data(license_info_data)
for license_info_data in data.get("hasExtractedLicensingInfos", [])
],
relationships=[
Relationship.from_data(relationship_data)
for relationship_data in data.get("relationships", [])
],
relationships=other_relationships,
comment=data.get("comment"),
)

Expand Down
8 changes: 5 additions & 3 deletions scanpipe/tests/data/asgiref/asgiref-3.3.0.spdx.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@
"SPDXID": "SPDXRef-DOCUMENT-92fe63d9-1d53-4b63-b19a-85022fb7a3f3",
"name": "scancodeio_asgiref",
"documentNamespace": "https://scancode.io/spdxdocs/92fe63d9-1d53-4b63-b19a-85022fb7a3f3",
"documentDescribes": [
"SPDXRef-scancodeio-project-92fe63d9-1d53-4b63-b19a-85022fb7a3f3"
],
"creationInfo": {
"created": "2000-01-01T01:02:03Z",
"creators": [
Expand Down Expand Up @@ -131,6 +128,11 @@
],
"files": [],
"relationships": [
{
"spdxElementId": "SPDXRef-DOCUMENT-92fe63d9-1d53-4b63-b19a-85022fb7a3f3",
"relatedSpdxElement": "SPDXRef-scancodeio-project-92fe63d9-1d53-4b63-b19a-85022fb7a3f3",
"relationshipType": "DESCRIBES"
},
{
"spdxElementId": "SPDXRef-scancodeio-project-92fe63d9-1d53-4b63-b19a-85022fb7a3f3",
"relatedSpdxElement": "SPDXRef-scancodeio-discoveredpackage-543a3583-3a13-4b5d-a039-c6bc4072de35",
Expand Down
62 changes: 60 additions & 2 deletions scanpipe/tests/pipes/test_spdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,6 @@ def setUp(self):
"SPDXID": "SPDXRef-DOCUMENT",
"name": "document_name",
"documentNamespace": "https://[CreatorWebsite]/[DocumentName]-[UUID]",
"documentDescribes": ["SPDXRef-project"],
"creationInfo": {
"created": "2022-09-21T13:50:20Z",
"creators": [
Expand Down Expand Up @@ -272,11 +271,16 @@ def setUp(self):
}
],
"relationships": [
{
"spdxElementId": "SPDXRef-DOCUMENT",
"relatedSpdxElement": "SPDXRef-project",
"relationshipType": "DESCRIBES",
},
{
"spdxElementId": "SPDXRef-package1",
"relatedSpdxElement": "SPDXRef-file1",
"relationshipType": "CONTAINS",
}
},
],
"comment": "This document was created using SPDXCode-1.0",
}
Expand Down Expand Up @@ -412,3 +416,57 @@ def test_spdx_validate_document(self):

with self.assertRaises(Exception):
spdx.validate_document({}, self.schema_2_3)

def test_spdx_document_describes_uses_relationship(self):
"""documentDescribes is removed; equivalent DESCRIBES relationships are emitted."""
document = spdx.Document(**self.document_data)
result = document.as_dict()

assert "documentDescribes" not in result

describes_rels = [
r
for r in result.get("relationships", [])
if r.get("relationshipType") == "DESCRIBES"
]
assert len(describes_rels) == 1
assert describes_rels[0]["spdxElementId"] == "SPDXRef-DOCUMENT"
assert describes_rels[0]["relatedSpdxElement"] == "SPDXRef-project"

def test_spdx_document_from_data_backward_compat(self):
"""Legacy documentDescribes input round-trips correctly to DESCRIBES relationships."""
legacy_data = {
"spdxVersion": "SPDX-2.3",
"dataLicense": "CC0-1.0",
"SPDXID": "SPDXRef-DOCUMENT",
"name": "legacy_doc",
"documentNamespace": "https://example.com/legacy",
"documentDescribes": ["SPDXRef-root"],
"creationInfo": {
"created": "2022-01-01T00:00:00Z",
"creators": ["Tool: OldTool-1.0"],
},
"packages": [
{
"SPDXID": "SPDXRef-root",
"name": "root-pkg",
"downloadLocation": "NOASSERTION",
"filesAnalyzed": False,
}
],
}
document = spdx.Document.from_data(legacy_data)

# Internal describes is reconstructed correctly
assert document.describes == ["SPDXRef-root"]

# Re-serialized output uses relationships, not the legacy field
result = document.as_dict()
assert "documentDescribes" not in result
describes_rels = [
r
for r in result.get("relationships", [])
if r.get("relationshipType") == "DESCRIBES"
]
assert len(describes_rels) == 1
assert describes_rels[0]["relatedSpdxElement"] == "SPDXRef-root"