diff --git a/scanpipe/models.py b/scanpipe/models.py index ac8e2da155..726d51d6af 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -3827,7 +3827,7 @@ def create_from_data(cls, project, package_data): return if not package_data.get("type"): - package_data["type"] = "unknown" + package_data["type"] = "generic" qualifiers = package_data.get("qualifiers") if qualifiers: diff --git a/scanpipe/pipes/benchmark.py b/scanpipe/pipes/benchmark.py index df8dbc05b0..f6182d3a8a 100644 --- a/scanpipe/pipes/benchmark.py +++ b/scanpipe/pipes/benchmark.py @@ -70,7 +70,7 @@ def compare_purls(project, expected_purls): - Lines starting with '+' are unexpected in the project. """ sorted_project_purls = get_unique_project_purls(project) - diff_result = difflib.ndiff(sorted_project_purls, expected_purls) + diff_result = difflib.ndiff(sorted_project_purls, sorted(expected_purls)) # Keep only lines that are diffs (- or +) filtered_diff = [line for line in diff_result if line.startswith(("-", "+"))] diff --git a/scanpipe/pipes/resolve.py b/scanpipe/pipes/resolve.py index 0a409dd88c..c99de00223 100644 --- a/scanpipe/pipes/resolve.py +++ b/scanpipe/pipes/resolve.py @@ -311,27 +311,77 @@ def convert_spdx_expression(license_expression_spdx): return get_license_detections_and_expression(license_expression_spdx)[1] +def build_spdx_purl(spdx_package): + """ + Return a PackageURL dict for the SPDX package. + + Resolution order: + 1. Use declared PURL unless type == "unknown" + 2. Fallback to deterministic generic PURL + """ + for ref in spdx_package.external_refs: + if ref.type == "purl" and ref.locator: + declared = PackageURL.from_string(ref.locator) + + # If declared type is meaningful it will use it + if declared.type and declared.type != "unknown": + return declared.to_dict(encode=True), False + + # If declared type is unknown it will upgrade to generic + name = declared.name + version = declared.version + + if name: + generic = PackageURL( + type="generic", + name=name, + version=version, + ) + return generic.to_dict(encode=True), True + + # No declared PURL - fallback + name = (spdx_package.name or "").strip() + version = (spdx_package.version or "").strip() + + if name: + generic = PackageURL( + type="generic", + name=name, + version=version or None, + ) + return generic.to_dict(encode=True), True + + return {}, False + + def spdx_package_to_package_data(spdx_package): """Convert the provided spdx_package into package_data.""" - package_url_dict = {} - # Store the original "SPDXID" as package_uid for dependencies resolution. package_uid = spdx_package.spdx_id - for ref in spdx_package.external_refs: - if ref.type == "purl": - purl = ref.locator - package_url_dict = PackageURL.from_string(purl).to_dict(encode=True) + # Resolve declared or fallback PURL + package_url_dict, inferred = build_spdx_purl(spdx_package) + # Collect checksums checksum_data = { checksum.algorithm.lower(): checksum.value for checksum in spdx_package.checksums } + # License handling declared_license_expression_spdx = spdx_package.license_concluded declared_expression = "" if declared_license_expression_spdx: declared_expression = convert_spdx_expression(declared_license_expression_spdx) + # Structured identity metadata + identity = { + "source": "inferred" if inferred else "declared", + "origin": { + "download_location": spdx_package.download_location, + "homepage": spdx_package.homepage, + }, + } + package_data = { "package_uid": package_uid, "name": spdx_package.name, @@ -345,6 +395,9 @@ def spdx_package_to_package_data(spdx_package): "filename": spdx_package.filename, "description": spdx_package.description, "release_date": spdx_package.release_date, + "extra_data": { + "identity": identity, + }, **package_url_dict, **checksum_data, } diff --git a/scanpipe/tests/pipes/test_resolve.py b/scanpipe/tests/pipes/test_resolve.py index 2c7aa33bcb..abb8b171a2 100644 --- a/scanpipe/tests/pipes/test_resolve.py +++ b/scanpipe/tests/pipes/test_resolve.py @@ -224,6 +224,15 @@ def test_scanpipe_pipes_resolve_spdx_package_to_package_data(self): "qualifiers": "arch=all", "md5": "76cf50f29e47676962645632737365a7", } + expected["extra_data"] = { + "identity": { + "source": "declared", + "origin": { + "download_location": "https://download.url/package.zip", + "homepage": "https://packages.debian.org", + }, + } + } self.assertEqual(expected, package_data) def test_scanpipe_pipes_spdx_relationship_to_dependency_data(self): @@ -250,6 +259,55 @@ def test_scanpipe_pipes_resolve_spdx_packages(self): packages_data = resolve.resolve_spdx_packages(input_location) self.assertEqual(4, len(packages_data)) + def test_scanpipe_resolve_spdx_package_generates_generic_purl_when_missing(self): + """ + SPDX package without externalRefs should generate + a deterministic generic PURL and mark it as inferred. + """ + spdx_content = { + "spdxVersion": "SPDX-2.3", + "SPDXID": "SPDXRef-DOCUMENT", + "name": "test-doc", + "dataLicense": "CC0-1.0", + "documentNamespace": "http://example.com/spdx/test", + "creationInfo": { + "created": "2024-01-01T00:00:00Z", + "creators": ["Tool: pytest"], + }, + "packages": [ + { + "name": "examplepkg", + "SPDXID": "SPDXRef-Package-examplepkg", + "versionInfo": "1.0.0", + "downloadLocation": "NOASSERTION", + "licenseConcluded": "MIT", + "licenseDeclared": "MIT", + "copyrightText": "NOASSERTION", + } + ], + "relationships": [], + } + + test_file = self.data / "spdx" / "temp_test.spdx.json" + test_file.write_text(json.dumps(spdx_content)) + + try: + packages = resolve.resolve_spdx_packages(test_file) + finally: + test_file.unlink(missing_ok=True) + + self.assertEqual(1, len(packages)) + package = packages[0] + + self.assertEqual("generic", package.get("type")) + self.assertEqual("examplepkg", package.get("name")) + self.assertEqual("1.0.0", package.get("version")) + + self.assertEqual( + "inferred", + package.get("extra_data", {}).get("identity", {}).get("source"), + ) + def test_scanpipe_pipes_resolve_spdx_dependencies(self): input_location = self.data / "spdx" / "SPDXJSONExample-v2.3.spdx.json" dependencies_data = resolve.resolve_spdx_dependencies(input_location) diff --git a/scanpipe/tests/test_integrations_ort.py b/scanpipe/tests/test_integrations_ort.py index 5bc1034189..c9029d6852 100644 --- a/scanpipe/tests/test_integrations_ort.py +++ b/scanpipe/tests/test_integrations_ort.py @@ -43,7 +43,7 @@ "dependencies": 25, "vulnerabilities": 10, "purls": [ - "pkg:unknown/alpine@3.17.0", + "pkg:generic/alpine@3.17.0", ], }, @@ -141,7 +141,7 @@ "pkg:maven/pkg4-grp/pkg4@0.0.1", "pkg:maven/pkg6-grp/pkg6@0.0.1", "pkg:maven/pkg7-grp/pkg7@0.0.1", - "pkg:unknown/proj1@0.0.1", + "pkg:generic/proj1@0.0.1", ], }, "ort-reporter-spdx-2.3/synthetic-scan-result-expected-output.spdx.json": { @@ -158,7 +158,7 @@ "pkg:maven/pkg4-grp/pkg4@0.0.1", "pkg:maven/pkg6-grp/pkg6@0.0.1", "pkg:maven/pkg7-grp/pkg7@0.0.1", - "pkg:unknown/proj1@0.0.1", + "pkg:generic/proj1@0.0.1", ], }, } diff --git a/scanpipe/tests/test_integrations_sca_tools.py b/scanpipe/tests/test_integrations_sca_tools.py index 9382f3f51d..80f3cf106f 100644 --- a/scanpipe/tests/test_integrations_sca_tools.py +++ b/scanpipe/tests/test_integrations_sca_tools.py @@ -47,7 +47,7 @@ "dependencies": 25, "vulnerabilities": 10, "purls": [ - "pkg:unknown/alpine@3.17.0", + "pkg:generic/alpine@3.17.0", ], }, @@ -104,85 +104,85 @@ "pkg:apk/alpine/scanelf@1.3.5-r1?arch=x86_64&distro=alpine-3.17.0&upstream=pax-utils", "pkg:apk/alpine/ssl_client@1.35.0-r29?arch=x86_64&distro=alpine-3.17.0&upstream=busybox", "pkg:apk/alpine/zlib@1.2.13-r0?arch=x86_64&distro=alpine-3.17.0", - "pkg:unknown/alpine@3.17.0", - "pkg:unknown/bin/busybox", - "pkg:unknown/etc/apk/keys/alpine-devel%40lists.alpinelinux.org-4a6a0840.rsa.pub", - "pkg:unknown/etc/apk/keys/alpine-devel%40lists.alpinelinux.org-5243ef4b.rsa.pub", - "pkg:unknown/etc/apk/keys/alpine-devel%40lists.alpinelinux.org-5261cecb.rsa.pub", - "pkg:unknown/etc/apk/keys/alpine-devel%40lists.alpinelinux.org-6165ee59.rsa.pub", - "pkg:unknown/etc/apk/keys/alpine-devel%40lists.alpinelinux.org-61666e3f.rsa.pub", - "pkg:unknown/etc/crontabs/root", - "pkg:unknown/etc/fstab", - "pkg:unknown/etc/group", - "pkg:unknown/etc/hostname", - "pkg:unknown/etc/hosts", - "pkg:unknown/etc/inittab", - "pkg:unknown/etc/logrotate.d/acpid", - "pkg:unknown/etc/modprobe.d/aliases.conf", - "pkg:unknown/etc/modprobe.d/blacklist.conf", - "pkg:unknown/etc/modprobe.d/i386.conf", - "pkg:unknown/etc/modprobe.d/kms.conf", - "pkg:unknown/etc/modules", - "pkg:unknown/etc/motd", - "pkg:unknown/etc/network/if-up.d/dad", - "pkg:unknown/etc/nsswitch.conf", - "pkg:unknown/etc/passwd", - "pkg:unknown/etc/profile", - "pkg:unknown/etc/profile.d/README", - "pkg:unknown/etc/profile.d/color_prompt.sh.disabled", - "pkg:unknown/etc/profile.d/locale.sh", - "pkg:unknown/etc/protocols", - "pkg:unknown/etc/securetty", - "pkg:unknown/etc/services", - "pkg:unknown/etc/shadow", - "pkg:unknown/etc/shells", - "pkg:unknown/etc/ssl/certs/ca-certificates.crt", - "pkg:unknown/etc/ssl/ct_log_list.cnf", - "pkg:unknown/etc/ssl/ct_log_list.cnf.dist", - "pkg:unknown/etc/ssl/misc/CA.pl", - "pkg:unknown/etc/ssl/misc/tsget.pl", - "pkg:unknown/etc/ssl/openssl.cnf", - "pkg:unknown/etc/ssl/openssl.cnf.dist", - "pkg:unknown/etc/sysctl.conf", - "pkg:unknown/etc/udhcpd.conf", - "pkg:unknown/lib/apk/db/installed", - "pkg:unknown/lib/ld-musl-x86_64.so.1", - "pkg:unknown/lib/libapk.so.3.12.0", - "pkg:unknown/lib/libcrypto.so.3", - "pkg:unknown/lib/libssl.so.3", - "pkg:unknown/lib/libz.so.1.2.13", - "pkg:unknown/lib/sysctl.d/00-alpine.conf", - "pkg:unknown/sbin/apk", - "pkg:unknown/sbin/ldconfig", - "pkg:unknown/usr/bin/getconf", - "pkg:unknown/usr/bin/getent", - "pkg:unknown/usr/bin/iconv", - "pkg:unknown/usr/bin/ldd", - "pkg:unknown/usr/bin/scanelf", - "pkg:unknown/usr/bin/ssl_client", - "pkg:unknown/usr/lib/engines-3/afalg.so", - "pkg:unknown/usr/lib/engines-3/capi.so", - "pkg:unknown/usr/lib/engines-3/loader_attic.so", - "pkg:unknown/usr/lib/engines-3/padlock.so", - "pkg:unknown/usr/lib/ossl-modules/legacy.so", - "pkg:unknown/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-4a6a0840.rsa.pub", - "pkg:unknown/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-5243ef4b.rsa.pub", - "pkg:unknown/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-524d27bb.rsa.pub", - "pkg:unknown/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-5261cecb.rsa.pub", - "pkg:unknown/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-58199dcc.rsa.pub", - "pkg:unknown/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-58cbb476.rsa.pub", - "pkg:unknown/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-58e4f17d.rsa.pub", - "pkg:unknown/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-5e69ca50.rsa.pub", - "pkg:unknown/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-60ac2099.rsa.pub", - "pkg:unknown/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-6165ee59.rsa.pub", - "pkg:unknown/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-61666e3f.rsa.pub", - "pkg:unknown/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-616a9724.rsa.pub", - "pkg:unknown/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-616abc23.rsa.pub", - "pkg:unknown/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-616ac3bc.rsa.pub", - "pkg:unknown/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-616adfeb.rsa.pub", - "pkg:unknown/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-616ae350.rsa.pub", - "pkg:unknown/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-616db30d.rsa.pub", - "pkg:unknown/usr/share/udhcpc/default.script", + "pkg:generic/alpine@3.17.0", + "pkg:generic/bin/busybox", + "pkg:generic/etc/apk/keys/alpine-devel%40lists.alpinelinux.org-4a6a0840.rsa.pub", + "pkg:generic/etc/apk/keys/alpine-devel%40lists.alpinelinux.org-5243ef4b.rsa.pub", + "pkg:generic/etc/apk/keys/alpine-devel%40lists.alpinelinux.org-5261cecb.rsa.pub", + "pkg:generic/etc/apk/keys/alpine-devel%40lists.alpinelinux.org-6165ee59.rsa.pub", + "pkg:generic/etc/apk/keys/alpine-devel%40lists.alpinelinux.org-61666e3f.rsa.pub", + "pkg:generic/etc/crontabs/root", + "pkg:generic/etc/fstab", + "pkg:generic/etc/group", + "pkg:generic/etc/hostname", + "pkg:generic/etc/hosts", + "pkg:generic/etc/inittab", + "pkg:generic/etc/logrotate.d/acpid", + "pkg:generic/etc/modprobe.d/aliases.conf", + "pkg:generic/etc/modprobe.d/blacklist.conf", + "pkg:generic/etc/modprobe.d/i386.conf", + "pkg:generic/etc/modprobe.d/kms.conf", + "pkg:generic/etc/modules", + "pkg:generic/etc/motd", + "pkg:generic/etc/network/if-up.d/dad", + "pkg:generic/etc/nsswitch.conf", + "pkg:generic/etc/passwd", + "pkg:generic/etc/profile", + "pkg:generic/etc/profile.d/README", + "pkg:generic/etc/profile.d/color_prompt.sh.disabled", + "pkg:generic/etc/profile.d/locale.sh", + "pkg:generic/etc/protocols", + "pkg:generic/etc/securetty", + "pkg:generic/etc/services", + "pkg:generic/etc/shadow", + "pkg:generic/etc/shells", + "pkg:generic/etc/ssl/certs/ca-certificates.crt", + "pkg:generic/etc/ssl/ct_log_list.cnf", + "pkg:generic/etc/ssl/ct_log_list.cnf.dist", + "pkg:generic/etc/ssl/misc/CA.pl", + "pkg:generic/etc/ssl/misc/tsget.pl", + "pkg:generic/etc/ssl/openssl.cnf", + "pkg:generic/etc/ssl/openssl.cnf.dist", + "pkg:generic/etc/sysctl.conf", + "pkg:generic/etc/udhcpd.conf", + "pkg:generic/lib/apk/db/installed", + "pkg:generic/lib/ld-musl-x86_64.so.1", + "pkg:generic/lib/libapk.so.3.12.0", + "pkg:generic/lib/libcrypto.so.3", + "pkg:generic/lib/libssl.so.3", + "pkg:generic/lib/libz.so.1.2.13", + "pkg:generic/lib/sysctl.d/00-alpine.conf", + "pkg:generic/sbin/apk", + "pkg:generic/sbin/ldconfig", + "pkg:generic/usr/bin/getconf", + "pkg:generic/usr/bin/getent", + "pkg:generic/usr/bin/iconv", + "pkg:generic/usr/bin/ldd", + "pkg:generic/usr/bin/scanelf", + "pkg:generic/usr/bin/ssl_client", + "pkg:generic/usr/lib/engines-3/afalg.so", + "pkg:generic/usr/lib/engines-3/capi.so", + "pkg:generic/usr/lib/engines-3/loader_attic.so", + "pkg:generic/usr/lib/engines-3/padlock.so", + "pkg:generic/usr/lib/ossl-modules/legacy.so", + "pkg:generic/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-4a6a0840.rsa.pub", + "pkg:generic/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-5243ef4b.rsa.pub", + "pkg:generic/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-524d27bb.rsa.pub", + "pkg:generic/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-5261cecb.rsa.pub", + "pkg:generic/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-58199dcc.rsa.pub", + "pkg:generic/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-58cbb476.rsa.pub", + "pkg:generic/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-58e4f17d.rsa.pub", + "pkg:generic/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-5e69ca50.rsa.pub", + "pkg:generic/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-60ac2099.rsa.pub", + "pkg:generic/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-6165ee59.rsa.pub", + "pkg:generic/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-61666e3f.rsa.pub", + "pkg:generic/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-616a9724.rsa.pub", + "pkg:generic/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-616abc23.rsa.pub", + "pkg:generic/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-616ac3bc.rsa.pub", + "pkg:generic/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-616adfeb.rsa.pub", + "pkg:generic/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-616ae350.rsa.pub", + "pkg:generic/usr/share/apk/keys/alpine-devel%40lists.alpinelinux.org-616db30d.rsa.pub", + "pkg:generic/usr/share/udhcpc/default.script", ], }, ### CycloneDX cdxgen @@ -282,7 +282,7 @@ "pkg:apk/alpine/scanelf@1.3.5-r1?arch=x86_64&distro=3.17.0&origin=pax-utils", "pkg:apk/alpine/ssl_client@1.35.0-r29?arch=x86_64&distro=3.17.0&origin=busybox", "pkg:apk/alpine/zlib@1.2.13-r0?arch=x86_64&distro=3.17.0&origin=zlib", - "pkg:unknown/main@0", + "pkg:generic/main@0", ], }, # Example file from osv-scanner documentation: @@ -310,21 +310,21 @@ "vulnerabilities": 0, "purls": [ "pkg:swid/Company/sbom.company.com/DockerImage@1.0.0?tag_id=60e3f440-f9a8-449e-b516-da3049700fff", - "pkg:unknown/alpine-baselayout-data@3.4.0-r0", - "pkg:unknown/alpine-baselayout@3.4.0-r0", - "pkg:unknown/alpine-keys@2.4-r1", - "pkg:unknown/apk-tools@2.12.10-r1", - "pkg:unknown/busybox-binsh@1.35.0-r29", - "pkg:unknown/busybox@1.35.0-r29", - "pkg:unknown/ca-certificates-bundle@20220614-r2", - "pkg:unknown/libc-utils@0.7.2-r3", - "pkg:unknown/libcrypto3@3.0.7-r0", - "pkg:unknown/libssl3@3.0.7-r0", - "pkg:unknown/musl-utils@1.2.3-r4", - "pkg:unknown/musl@1.2.3-r4", - "pkg:unknown/scanelf@1.3.5-r1", - "pkg:unknown/ssl_client@1.35.0-r29", - "pkg:unknown/zlib@1.2.13-r0", + "pkg:generic/alpine-baselayout-data@3.4.0-r0", + "pkg:generic/alpine-baselayout@3.4.0-r0", + "pkg:generic/alpine-keys@2.4-r1", + "pkg:generic/apk-tools@2.12.10-r1", + "pkg:generic/busybox-binsh@1.35.0-r29", + "pkg:generic/busybox@1.35.0-r29", + "pkg:generic/ca-certificates-bundle@20220614-r2", + "pkg:generic/libc-utils@0.7.2-r3", + "pkg:generic/libcrypto3@3.0.7-r0", + "pkg:generic/libssl3@3.0.7-r0", + "pkg:generic/musl-utils@1.2.3-r4", + "pkg:generic/musl@1.2.3-r4", + "pkg:generic/scanelf@1.3.5-r1", + "pkg:generic/ssl_client@1.35.0-r29", + "pkg:generic/zlib@1.2.13-r0", ], }, ### Trivy @@ -352,7 +352,7 @@ "pkg:apk/alpine/scanelf@1.3.5-r1?arch=x86_64&distro=3.17.0", "pkg:apk/alpine/ssl_client@1.35.0-r29?arch=x86_64&distro=3.17.0", "pkg:apk/alpine/zlib@1.2.13-r0?arch=x86_64&distro=3.17.0", - "pkg:unknown/alpine@3.17.0", + "pkg:generic/alpine@3.17.0", ], }, } diff --git a/scanpipe/tests/test_models.py b/scanpipe/tests/test_models.py index e4a5b4cb7d..692e0cc940 100644 --- a/scanpipe/tests/test_models.py +++ b/scanpipe/tests/test_models.py @@ -3246,8 +3246,8 @@ def test_scanpipe_discovered_package_model_create_from_data_missing_type(self): package = DiscoveredPackage.create_from_data(project1, incomplete_data) self.assertEqual(project1, package.project) - self.assertEqual("pkg:unknown/debian/adduser@3.118?arch=all", str(package)) - self.assertEqual("unknown", package.type) + self.assertEqual("pkg:generic/debian/adduser@3.118?arch=all", str(package)) + self.assertEqual("generic", package.type) @skipIf(connection.vendor == "sqlite", "No max_length constraints on SQLite.") def test_scanpipe_discovered_dependency_model_create_from_data(self):