Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3827,7 +3827,7 @@ def create_from_data(cls, project, package_data):
return

if not package_data.get("type"):
package_data["type"] = "unknown"
package_data["type"] = "generic"

qualifiers = package_data.get("qualifiers")
if qualifiers:
Expand Down
2 changes: 1 addition & 1 deletion scanpipe/pipes/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def compare_purls(project, expected_purls):
- Lines starting with '+' are unexpected in the project.
"""
sorted_project_purls = get_unique_project_purls(project)
diff_result = difflib.ndiff(sorted_project_purls, expected_purls)
diff_result = difflib.ndiff(sorted_project_purls, sorted(expected_purls))

# Keep only lines that are diffs (- or +)
filtered_diff = [line for line in diff_result if line.startswith(("-", "+"))]
Expand Down
65 changes: 59 additions & 6 deletions scanpipe/pipes/resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,27 +311,77 @@ def convert_spdx_expression(license_expression_spdx):
return get_license_detections_and_expression(license_expression_spdx)[1]


def build_spdx_purl(spdx_package):
"""
Return a PackageURL dict for the SPDX package.

Resolution order:
1. Use declared PURL unless type == "unknown"
2. Fallback to deterministic generic PURL
"""
for ref in spdx_package.external_refs:
if ref.type == "purl" and ref.locator:
declared = PackageURL.from_string(ref.locator)

# If declared type is meaningful it will use it
if declared.type and declared.type != "unknown":
return declared.to_dict(encode=True), False

# If declared type is unknown it will upgrade to generic
name = declared.name
version = declared.version

if name:
generic = PackageURL(
type="generic",
name=name,
version=version,
)
return generic.to_dict(encode=True), True

# No declared PURL - fallback
name = (spdx_package.name or "").strip()
version = (spdx_package.version or "").strip()

if name:
generic = PackageURL(
type="generic",
name=name,
version=version or None,
)
return generic.to_dict(encode=True), True

return {}, False


def spdx_package_to_package_data(spdx_package):
"""Convert the provided spdx_package into package_data."""
package_url_dict = {}
# Store the original "SPDXID" as package_uid for dependencies resolution.
package_uid = spdx_package.spdx_id

for ref in spdx_package.external_refs:
if ref.type == "purl":
purl = ref.locator
package_url_dict = PackageURL.from_string(purl).to_dict(encode=True)
# Resolve declared or fallback PURL
package_url_dict, inferred = build_spdx_purl(spdx_package)

# Collect checksums
checksum_data = {
checksum.algorithm.lower(): checksum.value
for checksum in spdx_package.checksums
}

# License handling
declared_license_expression_spdx = spdx_package.license_concluded
declared_expression = ""
if declared_license_expression_spdx:
declared_expression = convert_spdx_expression(declared_license_expression_spdx)

# Structured identity metadata
identity = {
"source": "inferred" if inferred else "declared",
"origin": {
"download_location": spdx_package.download_location,
"homepage": spdx_package.homepage,
},
}

package_data = {
"package_uid": package_uid,
"name": spdx_package.name,
Expand All @@ -345,6 +395,9 @@ def spdx_package_to_package_data(spdx_package):
"filename": spdx_package.filename,
"description": spdx_package.description,
"release_date": spdx_package.release_date,
"extra_data": {
"identity": identity,
},
**package_url_dict,
**checksum_data,
}
Expand Down
58 changes: 58 additions & 0 deletions scanpipe/tests/pipes/test_resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,15 @@ def test_scanpipe_pipes_resolve_spdx_package_to_package_data(self):
"qualifiers": "arch=all",
"md5": "76cf50f29e47676962645632737365a7",
}
expected["extra_data"] = {
"identity": {
"source": "declared",
"origin": {
"download_location": "https://download.url/package.zip",
"homepage": "https://packages.debian.org",
},
}
}
self.assertEqual(expected, package_data)

def test_scanpipe_pipes_spdx_relationship_to_dependency_data(self):
Expand All @@ -250,6 +259,55 @@ def test_scanpipe_pipes_resolve_spdx_packages(self):
packages_data = resolve.resolve_spdx_packages(input_location)
self.assertEqual(4, len(packages_data))

def test_scanpipe_resolve_spdx_package_generates_generic_purl_when_missing(self):
"""
SPDX package without externalRefs should generate
a deterministic generic PURL and mark it as inferred.
"""
spdx_content = {
"spdxVersion": "SPDX-2.3",
"SPDXID": "SPDXRef-DOCUMENT",
"name": "test-doc",
"dataLicense": "CC0-1.0",
"documentNamespace": "http://example.com/spdx/test",
"creationInfo": {
"created": "2024-01-01T00:00:00Z",
"creators": ["Tool: pytest"],
},
"packages": [
{
"name": "examplepkg",
"SPDXID": "SPDXRef-Package-examplepkg",
"versionInfo": "1.0.0",
"downloadLocation": "NOASSERTION",
"licenseConcluded": "MIT",
"licenseDeclared": "MIT",
"copyrightText": "NOASSERTION",
}
],
"relationships": [],
}

test_file = self.data / "spdx" / "temp_test.spdx.json"
test_file.write_text(json.dumps(spdx_content))

try:
packages = resolve.resolve_spdx_packages(test_file)
finally:
test_file.unlink(missing_ok=True)

self.assertEqual(1, len(packages))
package = packages[0]

self.assertEqual("generic", package.get("type"))
self.assertEqual("examplepkg", package.get("name"))
self.assertEqual("1.0.0", package.get("version"))

self.assertEqual(
"inferred",
package.get("extra_data", {}).get("identity", {}).get("source"),
)

def test_scanpipe_pipes_resolve_spdx_dependencies(self):
input_location = self.data / "spdx" / "SPDXJSONExample-v2.3.spdx.json"
dependencies_data = resolve.resolve_spdx_dependencies(input_location)
Expand Down
6 changes: 3 additions & 3 deletions scanpipe/tests/test_integrations_ort.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
"dependencies": 25,
"vulnerabilities": 10,
"purls": [
"pkg:unknown/alpine@3.17.0",
"pkg:generic/alpine@3.17.0",
],
},
Expand Down Expand Up @@ -141,7 +141,7 @@
"pkg:maven/pkg4-grp/pkg4@0.0.1",
"pkg:maven/pkg6-grp/pkg6@0.0.1",
"pkg:maven/pkg7-grp/pkg7@0.0.1",
"pkg:unknown/proj1@0.0.1",
"pkg:generic/proj1@0.0.1",
],
},
"ort-reporter-spdx-2.3/synthetic-scan-result-expected-output.spdx.json": {
Expand All @@ -158,7 +158,7 @@
"pkg:maven/pkg4-grp/pkg4@0.0.1",
"pkg:maven/pkg6-grp/pkg6@0.0.1",
"pkg:maven/pkg7-grp/pkg7@0.0.1",
"pkg:unknown/proj1@0.0.1",
"pkg:generic/proj1@0.0.1",
],
},
}
Expand Down
Loading