Skip to content

Commit c12280b

Browse files
committed
Store advisories in security_advisories cluster
Signed-off-by: Keshav Priyadarshi <git@keshav.space>
1 parent c1eb792 commit c12280b

File tree

7 files changed

+100
-108
lines changed

7 files changed

+100
-108
lines changed

vulnerabilities/pipelines/exporters/federate_vulnerabilities.py

Lines changed: 46 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ def steps(cls):
3636
cls.check_federatedcode_eligibility,
3737
cls.create_federatedcode_working_dir,
3838
cls.fetch_federation_config,
39-
cls.clone_vulnerabilities_repo,
40-
cls.publish_package_vulnerabilities,
39+
cls.clone_federation_repository,
40+
cls.publish_package_related_advisories,
4141
cls.publish_advisories,
4242
cls.delete_working_dir,
4343
)
@@ -56,54 +56,44 @@ def fetch_federation_config(self):
5656
name="aboutcode-data",
5757
remote_root_url="https://github.com/aboutcode-data",
5858
)
59-
self.data_cluster = data_federation.get_cluster("purls")
59+
self.data_cluster = data_federation.get_cluster("security_advisories")
6060

61-
def clone_vulnerabilities_repo(self):
61+
def clone_federation_repository(self):
6262
self.repo = federatedcode.clone_repository(
6363
repo_url=settings.FEDERATEDCODE_VULNERABILITIES_REPO,
64-
clone_path=self.working_path / "vulnerabilities-data",
64+
clone_path=self.working_path / "advisories-data",
6565
logger=self.log,
6666
)
6767

68-
def publish_package_vulnerabilities(self):
69-
"""Publish package vulnerabilities to FederatedCode"""
68+
def publish_package_related_advisories(self):
69+
"""Publish package advisories relations to FederatedCode"""
7070
repo_path = Path(self.repo.working_dir)
7171
commit_count = 1
7272
batch_size = 2000
73-
chunk_size = 1000
73+
chunk_size = 500
7474
files_to_commit = set()
7575

7676
distinct_packages_count = (
77-
PackageV2.objects.values("type", "namespace", "name")
78-
.distinct("type", "namespace", "name")
77+
PackageV2.objects.values("type", "namespace", "name", "version")
78+
.distinct("type", "namespace", "name", "version")
7979
.count()
8080
)
8181
package_qs = package_prefetched_qs()
8282
grouped_packages = itertools.groupby(
8383
package_qs.iterator(chunk_size=chunk_size),
84-
key=attrgetter("type", "namespace", "name"),
84+
key=attrgetter("type", "namespace", "name", "version"),
8585
)
8686

87-
self.log(f"Exporting vulnerabilities for {distinct_packages_count} packages.")
87+
self.log(f"Exporting advisory relation for {distinct_packages_count} packages.")
8888
progress = LoopProgress(
8989
total_iterations=distinct_packages_count,
9090
progress_step=5,
9191
logger=self.log,
9292
)
9393
for _, packages in progress.iter(grouped_packages):
94-
package_urls, package_vulnerabilities = get_package_vulnerabilities(packages)
95-
purl = package_urls[0]
96-
package_repo, datafile_path = self.data_cluster.get_datafile_repo_and_path(purl=purl)
97-
package_vulnerability_path = datafile_path.replace("/purls.yml", "/vulnerabilities.yml")
98-
package_vulnerability_path = f"packages/{package_repo}/{package_vulnerability_path}"
99-
package_path = f"packages/{package_repo}/{datafile_path}"
100-
101-
write_file(
102-
repo_path=repo_path,
103-
file_path=package_path,
104-
data=package_urls,
105-
)
106-
files_to_commit.add(package_path)
94+
purl, package_vulnerabilities = get_package_related_advisory(packages)
95+
package_repo, datafile_path = self.data_cluster.get_datafile_repo_and_path(purl)
96+
package_vulnerability_path = f"packages/{package_repo}/{datafile_path}"
10797

10898
write_file(
10999
repo_path=repo_path,
@@ -114,7 +104,7 @@ def publish_package_vulnerabilities(self):
114104

115105
if len(files_to_commit) > batch_size:
116106
if federatedcode.commit_and_push_changes(
117-
commit_message=self.commit_message("package vulnerabilities", commit_count),
107+
commit_message=self.commit_message("package advisory relations", commit_count),
118108
repo=self.repo,
119109
files_to_commit=files_to_commit,
120110
logger=self.log,
@@ -125,7 +115,7 @@ def publish_package_vulnerabilities(self):
125115
if files_to_commit:
126116
federatedcode.commit_and_push_changes(
127117
commit_message=self.commit_message(
128-
"package vulnerabilities",
118+
"package advisory relations",
129119
commit_count,
130120
commit_count,
131121
),
@@ -134,7 +124,7 @@ def publish_package_vulnerabilities(self):
134124
logger=self.log,
135125
)
136126

137-
self.log(f"Federated {distinct_packages_count} package vulnerabilities.")
127+
self.log(f"Federated {distinct_packages_count} package advisories.")
138128

139129
def publish_advisories(self):
140130
"""Publish advisory to FederatedCode"""
@@ -146,15 +136,15 @@ def publish_advisories(self):
146136
advisory_qs = advisory_prefetched_qs()
147137
advisory_count = advisory_qs.count()
148138

149-
self.log(f"Exporting vulnerabilities for {advisory_count} advisory.")
139+
self.log(f"Exporting {advisory_count} advisory.")
150140
progress = LoopProgress(
151141
total_iterations=advisory_count,
152142
progress_step=5,
153143
logger=self.log,
154144
)
155145
for advisory in progress.iter(advisory_qs.iterator(chunk_size=chunk_size)):
156146
advisory_data = serialize_advisory(advisory)
157-
adv_file = f"vulnerabilities/{advisory.avid}.yml"
147+
adv_file = f"advisories/{advisory.avid}.yml"
158148
write_file(
159149
repo_path=repo_path,
160150
file_path=adv_file,
@@ -184,7 +174,7 @@ def publish_advisories(self):
184174
logger=self.log,
185175
)
186176

187-
self.log(f"Successfully federated {advisory_count} vulnerabilities.")
177+
self.log(f"Successfully federated {advisory_count} advisories.")
188178

189179
def delete_working_dir(self):
190180
"""Remove temporary working dir."""
@@ -200,7 +190,7 @@ def commit_message(
200190
commit_count,
201191
total_commit_count="many",
202192
):
203-
"""Commit message for pushing Package vulnerability."""
193+
"""Commit message for pushing package vulnerability."""
204194
return federatedcode.commit_message(
205195
item_type=item_type,
206196
commit_count=commit_count,
@@ -211,30 +201,48 @@ def commit_message(
211201
def package_prefetched_qs():
212202
return (
213203
PackageV2.objects.order_by("type", "namespace", "name", "version")
214-
.only("id", "package_url", "type", "namespace", "name", "version")
204+
.only("package_url", "type", "namespace", "name", "version")
215205
.prefetch_related(
216206
Prefetch(
217207
"affected_in_impacts",
218-
queryset=ImpactedPackage.objects.only("id", "advisory_id").prefetch_related(
208+
queryset=ImpactedPackage.objects.only("advisory_id").prefetch_related(
219209
Prefetch(
220210
"advisory",
221-
queryset=AdvisoryV2.objects.only("id", "avid"),
211+
queryset=AdvisoryV2.objects.only("avid"),
222212
)
223213
),
224214
),
225215
Prefetch(
226216
"fixed_in_impacts",
227-
queryset=ImpactedPackage.objects.only("id", "advisory_id").prefetch_related(
217+
queryset=ImpactedPackage.objects.only("advisory_id").prefetch_related(
228218
Prefetch(
229219
"advisory",
230-
queryset=AdvisoryV2.objects.only("id", "avid"),
220+
queryset=AdvisoryV2.objects.only("avid"),
231221
)
232222
),
233223
),
234224
)
235225
)
236226

237227

228+
def get_package_related_advisory(packages):
229+
package_vulnerabilities = []
230+
for package in packages:
231+
affected_by_vulnerabilities = [
232+
impact.advisory.avid for impact in package.affected_in_impacts.all()
233+
]
234+
fixing_vulnerabilities = [impact.advisory.avid for impact in package.fixed_in_impacts.all()]
235+
236+
package_vulnerability = {
237+
"purl": package.package_url,
238+
"affected_by_advisories": sorted(affected_by_vulnerabilities),
239+
"fixing_advisories": sorted(fixing_vulnerabilities),
240+
}
241+
package_vulnerabilities.append(package_vulnerability)
242+
243+
return package.package_url, package_vulnerabilities
244+
245+
238246
def advisory_prefetched_qs():
239247
return AdvisoryV2.objects.prefetch_related(
240248
"impacted_packages",
@@ -245,29 +253,6 @@ def advisory_prefetched_qs():
245253
)
246254

247255

248-
def get_package_vulnerabilities(packages):
249-
"""Return list of PURLs and serialized package vulnerability"""
250-
package_urls = []
251-
package_vulnerabilities = []
252-
for package in packages:
253-
package_urls.append(package.package_url)
254-
package_vulnerabilities.append(serialize_package_vulnerability(package))
255-
return package_urls, package_vulnerabilities
256-
257-
258-
def serialize_package_vulnerability(package):
259-
affected_by_vulnerabilities = [
260-
impact.advisory.avid for impact in package.affected_in_impacts.all()
261-
]
262-
fixing_vulnerabilities = [impact.advisory.avid for impact in package.fixed_in_impacts.all()]
263-
264-
return {
265-
"purl": package.package_url,
266-
"affected_by_vulnerabilities": affected_by_vulnerabilities,
267-
"fixing_vulnerabilities": fixing_vulnerabilities,
268-
}
269-
270-
271256
def serialize_severity(sev):
272257
return {
273258
"score": sev.value,
@@ -288,7 +273,7 @@ def serialize_references(reference):
288273

289274
def serialize_advisory(advisory):
290275
"""Return a plain data mapping serialized from advisory object."""
291-
aliases = [a.alias for a in advisory.aliases.all()]
276+
aliases = sorted([a.alias for a in advisory.aliases.all()])
292277
severities = [serialize_severity(sev) for sev in advisory.severities.all()]
293278
weaknesses = [wkns.cwe for wkns in advisory.weaknesses.all()]
294279
references = [serialize_references(ref) for ref in advisory.references.all()]

vulnerabilities/tests/pipelines/exporters/test_federate_vulnerabilities.py

Lines changed: 34 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class TestFederatePackageVulnerabilities(TestCase):
3737
def setUp(self):
3838
self.logger = TestLogger()
3939

40-
advisory = AdvisoryDataV2(
40+
advisory1 = AdvisoryDataV2(
4141
summary="Test advisory",
4242
aliases=["CVE-2025-0001"],
4343
references=[],
@@ -51,53 +51,65 @@ def setUp(self):
5151
introduced_by_commit_patches=[],
5252
fixed_by_commit_patches=[],
5353
),
54+
],
55+
patches=[],
56+
advisory_id="ADV-001",
57+
date_published=datetime.now() - timedelta(days=10),
58+
url="https://example.com/advisory/1",
59+
)
60+
advisory2 = AdvisoryDataV2(
61+
summary="Test advisory2",
62+
aliases=["CVE-2025-0002"],
63+
references=[],
64+
severities=[],
65+
weaknesses=[],
66+
affected_packages=[
5467
AffectedPackageV2(
5568
package=PackageURL.from_string("pkg:npm/foobar"),
56-
affected_version_range=VersionRange.from_string("vers:npm/<=3.2.3"),
57-
fixed_version_range=VersionRange.from_string("vers:npm/3.2.4"),
69+
affected_version_range=VersionRange.from_string("vers:npm/>=1.2.4"),
70+
fixed_version_range=VersionRange.from_string("vers:npm/2.0.0"),
5871
introduced_by_commit_patches=[],
5972
fixed_by_commit_patches=[],
6073
),
6174
],
6275
patches=[],
63-
advisory_id="ADV-123",
76+
advisory_id="ADV-002",
6477
date_published=datetime.now() - timedelta(days=10),
65-
url="https://example.com/advisory/1",
78+
url="https://example.com/advisory/2",
6679
)
6780
insert_advisory_v2(
68-
advisory=advisory,
81+
advisory=advisory1,
82+
pipeline_id="test_pipeline_v2",
83+
)
84+
insert_advisory_v2(
85+
advisory=advisory2,
6986
pipeline_id="test_pipeline_v2",
7087
)
7188

7289
@patch(
73-
"vulnerabilities.pipelines.exporters.federate_vulnerabilities.FederatePackageVulnerabilities.clone_vulnerabilities_repo"
90+
"vulnerabilities.pipelines.exporters.federate_vulnerabilities.FederatePackageVulnerabilities.clone_federation_repository"
7491
)
7592
@patch("vulnerabilities.pipes.federatedcode.commit_and_push_changes")
7693
@patch("vulnerabilities.pipes.federatedcode.check_federatedcode_configured_and_available")
7794
def test_vulnerabilities_federation_v2(self, mock_check_fed, mock_commit, mock_clone):
7895
mock_check_fed.return_value = None
7996
mock_commit.return_value = None
80-
mock_clone.__name__ = "clone_vulnerabilities_repo"
97+
mock_clone.__name__ = "clone_federation_repository"
8198

8299
working_dir = Path(tempfile.mkdtemp())
83-
print(working_dir)
84-
85100
pipeline = FederatePackageVulnerabilities()
86101
pipeline.repo = Repo.init(working_dir)
87102
pipeline.log = self.logger.write
88103
pipeline.execute()
89-
print(self.logger.getvalue())
90104

91-
result_purl_yml = next(working_dir.rglob("purls.yml"))
92-
result_vulnerabilities_yml = next(working_dir.rglob("vulnerabilities.yml"))
93-
result_advisory_yml = next(working_dir.rglob("ADV-123.yml"))
105+
result_advisories_yml = next(working_dir.rglob("1.2.4/advisories.yml"))
106+
result_advisory1_yml = next(working_dir.rglob("ADV-001.yml"))
107+
result_advisory2_yml = next(working_dir.rglob("ADV-002.yml"))
94108

95-
expected_purl_yml = TEST_DATA / "purls-expected.yml"
96-
expected_vulnerabilities_yml = TEST_DATA / "vulnerabilities-expected.yml"
97-
expected_advisory_yml = TEST_DATA / "ADV-123-expected.yml"
109+
expected_advisories_yml = TEST_DATA / "1.2.4" / "advisories-expected.yml"
110+
expected_advisory1_yml = TEST_DATA / "ADV-001-expected.yml"
111+
expected_advisory2_yml = TEST_DATA / "ADV-002-expected.yml"
98112

99-
util_tests.check_results_and_expected_files(result_purl_yml, expected_purl_yml)
100-
util_tests.check_results_and_expected_files(
101-
result_vulnerabilities_yml, expected_vulnerabilities_yml
102-
)
103-
util_tests.check_results_and_expected_files(result_advisory_yml, expected_advisory_yml)
113+
util_tests.check_results_and_expected_files(result_advisories_yml, expected_advisories_yml)
114+
util_tests.check_results_and_expected_files(result_advisory1_yml, expected_advisory1_yml)
115+
util_tests.check_results_and_expected_files(result_advisory2_yml, expected_advisory2_yml)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
- purl: pkg:npm/foobar@1.2.4
2+
affected_by_advisories:
3+
- test_pipeline_v2/ADV-002
4+
fixing_advisories:
5+
- test_pipeline_v2/ADV-001

vulnerabilities/tests/test_data/exporters/federate_vulnerabilities/ADV-123-expected.yml renamed to vulnerabilities/tests/test_data/exporters/federate_vulnerabilities/ADV-001-expected.yml

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
advisory_id: ADV-123
2-
datasource_id: test_pipeline_v2/ADV-123
1+
advisory_id: ADV-001
2+
datasource_id: test_pipeline_v2/ADV-001
33
datasource_url: https://example.com/advisory/1
44
aliases:
55
- CVE-2025-0001
@@ -8,9 +8,6 @@ impacted_packages:
88
- purl: pkg:npm/foobar
99
affected_versions: vers:npm/<=1.2.3
1010
fixed_versions: vers:npm/1.2.4
11-
- purl: pkg:npm/foobar
12-
affected_versions: vers:npm/<=3.2.3
13-
fixed_versions: vers:npm/3.2.4
1411
severities: []
1512
weaknesses: []
1613
references: []
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
advisory_id: ADV-002
2+
datasource_id: test_pipeline_v2/ADV-002
3+
datasource_url: https://example.com/advisory/2
4+
aliases:
5+
- CVE-2025-0002
6+
summary: Test advisory2
7+
impacted_packages:
8+
- purl: pkg:npm/foobar
9+
affected_versions: vers:npm/>=1.2.4
10+
fixed_versions: vers:npm/2.0.0
11+
severities: []
12+
weaknesses: []
13+
references: []

vulnerabilities/tests/test_data/exporters/federate_vulnerabilities/purls-expected.yml

Lines changed: 0 additions & 4 deletions
This file was deleted.

vulnerabilities/tests/test_data/exporters/federate_vulnerabilities/vulnerabilities-expected.yml

Lines changed: 0 additions & 16 deletions
This file was deleted.

0 commit comments

Comments
 (0)