diff --git a/scanpipe/pipes/spdx.py b/scanpipe/pipes/spdx.py index 5bb3e6bb16..23b3e6041e 100644 --- a/scanpipe/pipes/spdx.py +++ b/scanpipe/pipes/spdx.py @@ -607,7 +607,6 @@ def as_dict(self): "SPDXID": self.spdx_id, "name": self.safe_document_name(self.name), "documentNamespace": self.namespace, - "documentDescribes": self.describes, "creationInfo": self.creation_info.as_dict(), "packages": [package.as_dict(self.version) for package in self.packages], } @@ -620,10 +619,24 @@ def as_dict(self): license_info.as_dict() for license_info in self.extracted_licenses ] - if self.relationships: - data["relationships"] = [ - relationship.as_dict() for relationship in self.relationships - ] + # The SPDX 2.3 spec deprecated the top-level `documentDescribes` field. + # Instead, emit explicit DESCRIBES relationships prepended to maintain + # deterministic ordering and ensure document-level relationships appear first. + # See https://github.com/spdx/spdx-spec/issues/395 + describes_relationships = [ + { + "spdxElementId": self.spdx_id, + "relatedSpdxElement": spdx_id, + "relationshipType": "DESCRIBES", + } + for spdx_id in (self.describes or []) + ] + existing_relationships = [ + relationship.as_dict() for relationship in (self.relationships or []) + ] + all_relationships = describes_relationships + existing_relationships + if all_relationships: + data["relationships"] = all_relationships if self.comment: data["comment"] = self.comment @@ -636,13 +649,37 @@ def as_json(self, indent=2): @classmethod def from_data(cls, data): + spdx_id = data.get("SPDXID") + relationships_data = data.get("relationships", []) + + # Backward compatibility: reconstruct `describes` from the legacy + # `documentDescribes` field if present, or derive it from DESCRIBES + # relationships where spdxElementId matches the document SPDXID. + describes = data.get("documentDescribes") or [ + r["relatedSpdxElement"] + for r in relationships_data + if r.get("relationshipType") == "DESCRIBES" + and r.get("spdxElementId") == spdx_id + ] + + # Exclude DESCRIBES relationships that were re-derived into `describes` + # to avoid duplication when the document is round-tripped via as_dict(). + other_relationships = [ + Relationship.from_data(r) + for r in relationships_data + if not ( + r.get("relationshipType") == "DESCRIBES" + and r.get("spdxElementId") == spdx_id + ) + ] + return cls( - spdx_id=data.get("SPDXID"), + spdx_id=spdx_id, version=data.get("spdxVersion", "").split("SPDX-")[-1], data_license=data.get("dataLicense"), name=data.get("name"), namespace=data.get("documentNamespace"), - describes=data.get("documentDescribes"), + describes=describes, creation_info=CreationInfo.from_data(data.get("creationInfo", {})), packages=[ Package.from_data(package_data) @@ -653,10 +690,7 @@ def from_data(cls, data): ExtractedLicensingInfo.from_data(license_info_data) for license_info_data in data.get("hasExtractedLicensingInfos", []) ], - relationships=[ - Relationship.from_data(relationship_data) - for relationship_data in data.get("relationships", []) - ], + relationships=other_relationships, comment=data.get("comment"), ) diff --git a/scanpipe/tests/data/asgiref/asgiref-3.3.0.spdx.json b/scanpipe/tests/data/asgiref/asgiref-3.3.0.spdx.json index 565e2f4506..28ee50be87 100644 --- a/scanpipe/tests/data/asgiref/asgiref-3.3.0.spdx.json +++ b/scanpipe/tests/data/asgiref/asgiref-3.3.0.spdx.json @@ -4,9 +4,6 @@ "SPDXID": "SPDXRef-DOCUMENT-92fe63d9-1d53-4b63-b19a-85022fb7a3f3", "name": "scancodeio_asgiref", "documentNamespace": "https://scancode.io/spdxdocs/92fe63d9-1d53-4b63-b19a-85022fb7a3f3", - "documentDescribes": [ - "SPDXRef-scancodeio-project-92fe63d9-1d53-4b63-b19a-85022fb7a3f3" - ], "creationInfo": { "created": "2000-01-01T01:02:03Z", "creators": [ @@ -131,6 +128,11 @@ ], "files": [], "relationships": [ + { + "spdxElementId": "SPDXRef-DOCUMENT-92fe63d9-1d53-4b63-b19a-85022fb7a3f3", + "relatedSpdxElement": "SPDXRef-scancodeio-project-92fe63d9-1d53-4b63-b19a-85022fb7a3f3", + "relationshipType": "DESCRIBES" + }, { "spdxElementId": "SPDXRef-scancodeio-project-92fe63d9-1d53-4b63-b19a-85022fb7a3f3", "relatedSpdxElement": "SPDXRef-scancodeio-discoveredpackage-543a3583-3a13-4b5d-a039-c6bc4072de35", diff --git a/scanpipe/tests/pipes/test_spdx.py b/scanpipe/tests/pipes/test_spdx.py index befa12913a..0d01b70a63 100644 --- a/scanpipe/tests/pipes/test_spdx.py +++ b/scanpipe/tests/pipes/test_spdx.py @@ -196,7 +196,6 @@ def setUp(self): "SPDXID": "SPDXRef-DOCUMENT", "name": "document_name", "documentNamespace": "https://[CreatorWebsite]/[DocumentName]-[UUID]", - "documentDescribes": ["SPDXRef-project"], "creationInfo": { "created": "2022-09-21T13:50:20Z", "creators": [ @@ -272,11 +271,16 @@ def setUp(self): } ], "relationships": [ + { + "spdxElementId": "SPDXRef-DOCUMENT", + "relatedSpdxElement": "SPDXRef-project", + "relationshipType": "DESCRIBES", + }, { "spdxElementId": "SPDXRef-package1", "relatedSpdxElement": "SPDXRef-file1", "relationshipType": "CONTAINS", - } + }, ], "comment": "This document was created using SPDXCode-1.0", } @@ -412,3 +416,57 @@ def test_spdx_validate_document(self): with self.assertRaises(Exception): spdx.validate_document({}, self.schema_2_3) + + def test_spdx_document_describes_uses_relationship(self): + """documentDescribes is removed; equivalent DESCRIBES relationships are emitted.""" + document = spdx.Document(**self.document_data) + result = document.as_dict() + + assert "documentDescribes" not in result + + describes_rels = [ + r + for r in result.get("relationships", []) + if r.get("relationshipType") == "DESCRIBES" + ] + assert len(describes_rels) == 1 + assert describes_rels[0]["spdxElementId"] == "SPDXRef-DOCUMENT" + assert describes_rels[0]["relatedSpdxElement"] == "SPDXRef-project" + + def test_spdx_document_from_data_backward_compat(self): + """Legacy documentDescribes input round-trips correctly to DESCRIBES relationships.""" + legacy_data = { + "spdxVersion": "SPDX-2.3", + "dataLicense": "CC0-1.0", + "SPDXID": "SPDXRef-DOCUMENT", + "name": "legacy_doc", + "documentNamespace": "https://example.com/legacy", + "documentDescribes": ["SPDXRef-root"], + "creationInfo": { + "created": "2022-01-01T00:00:00Z", + "creators": ["Tool: OldTool-1.0"], + }, + "packages": [ + { + "SPDXID": "SPDXRef-root", + "name": "root-pkg", + "downloadLocation": "NOASSERTION", + "filesAnalyzed": False, + } + ], + } + document = spdx.Document.from_data(legacy_data) + + # Internal describes is reconstructed correctly + assert document.describes == ["SPDXRef-root"] + + # Re-serialized output uses relationships, not the legacy field + result = document.as_dict() + assert "documentDescribes" not in result + describes_rels = [ + r + for r in result.get("relationships", []) + if r.get("relationshipType") == "DESCRIBES" + ] + assert len(describes_rels) == 1 + assert describes_rels[0]["relatedSpdxElement"] == "SPDXRef-root"