@@ -36,8 +36,8 @@ def steps(cls):
3636 cls .check_federatedcode_eligibility ,
3737 cls .create_federatedcode_working_dir ,
3838 cls .fetch_federation_config ,
39- cls .clone_vulnerabilities_repo ,
40- cls .publish_package_vulnerabilities ,
39+ cls .clone_federation_repository ,
40+ cls .publish_package_related_advisories ,
4141 cls .publish_advisories ,
4242 cls .delete_working_dir ,
4343 )
@@ -56,54 +56,44 @@ def fetch_federation_config(self):
5656 name = "aboutcode-data" ,
5757 remote_root_url = "https://github.com/aboutcode-data" ,
5858 )
59- self .data_cluster = data_federation .get_cluster ("purls " )
59+ self .data_cluster = data_federation .get_cluster ("security_advisories " )
6060
61- def clone_vulnerabilities_repo (self ):
61+ def clone_federation_repository (self ):
6262 self .repo = federatedcode .clone_repository (
6363 repo_url = settings .FEDERATEDCODE_VULNERABILITIES_REPO ,
64- clone_path = self .working_path / "vulnerabilities -data" ,
64+ clone_path = self .working_path / "advisories -data" ,
6565 logger = self .log ,
6666 )
6767
68- def publish_package_vulnerabilities (self ):
69- """Publish package vulnerabilities to FederatedCode"""
68+ def publish_package_related_advisories (self ):
69+ """Publish package advisories relations to FederatedCode"""
7070 repo_path = Path (self .repo .working_dir )
7171 commit_count = 1
7272 batch_size = 2000
73- chunk_size = 1000
73+ chunk_size = 500
7474 files_to_commit = set ()
7575
7676 distinct_packages_count = (
77- PackageV2 .objects .values ("type" , "namespace" , "name" )
78- .distinct ("type" , "namespace" , "name" )
77+ PackageV2 .objects .values ("type" , "namespace" , "name" , "version" )
78+ .distinct ("type" , "namespace" , "name" , "version" )
7979 .count ()
8080 )
8181 package_qs = package_prefetched_qs ()
8282 grouped_packages = itertools .groupby (
8383 package_qs .iterator (chunk_size = chunk_size ),
84- key = attrgetter ("type" , "namespace" , "name" ),
84+ key = attrgetter ("type" , "namespace" , "name" , "version" ),
8585 )
8686
87- self .log (f"Exporting vulnerabilities for { distinct_packages_count } packages." )
87+ self .log (f"Exporting advisory relation for { distinct_packages_count } packages." )
8888 progress = LoopProgress (
8989 total_iterations = distinct_packages_count ,
9090 progress_step = 5 ,
9191 logger = self .log ,
9292 )
9393 for _ , packages in progress .iter (grouped_packages ):
94- package_urls , package_vulnerabilities = get_package_vulnerabilities (packages )
95- purl = package_urls [0 ]
96- package_repo , datafile_path = self .data_cluster .get_datafile_repo_and_path (purl = purl )
97- package_vulnerability_path = datafile_path .replace ("/purls.yml" , "/vulnerabilities.yml" )
98- package_vulnerability_path = f"packages/{ package_repo } /{ package_vulnerability_path } "
99- package_path = f"packages/{ package_repo } /{ datafile_path } "
100-
101- write_file (
102- repo_path = repo_path ,
103- file_path = package_path ,
104- data = package_urls ,
105- )
106- files_to_commit .add (package_path )
94+ purl , package_vulnerabilities = get_package_related_advisory (packages )
95+ package_repo , datafile_path = self .data_cluster .get_datafile_repo_and_path (purl )
96+ package_vulnerability_path = f"packages/{ package_repo } /{ datafile_path } "
10797
10898 write_file (
10999 repo_path = repo_path ,
@@ -114,7 +104,7 @@ def publish_package_vulnerabilities(self):
114104
115105 if len (files_to_commit ) > batch_size :
116106 if federatedcode .commit_and_push_changes (
117- commit_message = self .commit_message ("package vulnerabilities " , commit_count ),
107+ commit_message = self .commit_message ("package advisory relations " , commit_count ),
118108 repo = self .repo ,
119109 files_to_commit = files_to_commit ,
120110 logger = self .log ,
@@ -125,7 +115,7 @@ def publish_package_vulnerabilities(self):
125115 if files_to_commit :
126116 federatedcode .commit_and_push_changes (
127117 commit_message = self .commit_message (
128- "package vulnerabilities " ,
118+ "package advisory relations " ,
129119 commit_count ,
130120 commit_count ,
131121 ),
@@ -134,7 +124,7 @@ def publish_package_vulnerabilities(self):
134124 logger = self .log ,
135125 )
136126
137- self .log (f"Federated { distinct_packages_count } package vulnerabilities ." )
127+ self .log (f"Federated { distinct_packages_count } package advisories ." )
138128
139129 def publish_advisories (self ):
140130 """Publish advisory to FederatedCode"""
@@ -146,15 +136,15 @@ def publish_advisories(self):
146136 advisory_qs = advisory_prefetched_qs ()
147137 advisory_count = advisory_qs .count ()
148138
149- self .log (f"Exporting vulnerabilities for { advisory_count } advisory." )
139+ self .log (f"Exporting { advisory_count } advisory." )
150140 progress = LoopProgress (
151141 total_iterations = advisory_count ,
152142 progress_step = 5 ,
153143 logger = self .log ,
154144 )
155145 for advisory in progress .iter (advisory_qs .iterator (chunk_size = chunk_size )):
156146 advisory_data = serialize_advisory (advisory )
157- adv_file = f"vulnerabilities /{ advisory .avid } .yml"
147+ adv_file = f"advisories /{ advisory .avid } .yml"
158148 write_file (
159149 repo_path = repo_path ,
160150 file_path = adv_file ,
@@ -184,7 +174,7 @@ def publish_advisories(self):
184174 logger = self .log ,
185175 )
186176
187- self .log (f"Successfully federated { advisory_count } vulnerabilities ." )
177+ self .log (f"Successfully federated { advisory_count } advisories ." )
188178
189179 def delete_working_dir (self ):
190180 """Remove temporary working dir."""
@@ -200,7 +190,7 @@ def commit_message(
200190 commit_count ,
201191 total_commit_count = "many" ,
202192 ):
203- """Commit message for pushing Package vulnerability."""
193+ """Commit message for pushing package vulnerability."""
204194 return federatedcode .commit_message (
205195 item_type = item_type ,
206196 commit_count = commit_count ,
@@ -211,30 +201,48 @@ def commit_message(
211201def package_prefetched_qs ():
212202 return (
213203 PackageV2 .objects .order_by ("type" , "namespace" , "name" , "version" )
214- .only ("id" , " package_url" , "type" , "namespace" , "name" , "version" )
204+ .only ("package_url" , "type" , "namespace" , "name" , "version" )
215205 .prefetch_related (
216206 Prefetch (
217207 "affected_in_impacts" ,
218- queryset = ImpactedPackage .objects .only ("id" , " advisory_id" ).prefetch_related (
208+ queryset = ImpactedPackage .objects .only ("advisory_id" ).prefetch_related (
219209 Prefetch (
220210 "advisory" ,
221- queryset = AdvisoryV2 .objects .only ("id" , " avid" ),
211+ queryset = AdvisoryV2 .objects .only ("avid" ),
222212 )
223213 ),
224214 ),
225215 Prefetch (
226216 "fixed_in_impacts" ,
227- queryset = ImpactedPackage .objects .only ("id" , " advisory_id" ).prefetch_related (
217+ queryset = ImpactedPackage .objects .only ("advisory_id" ).prefetch_related (
228218 Prefetch (
229219 "advisory" ,
230- queryset = AdvisoryV2 .objects .only ("id" , " avid" ),
220+ queryset = AdvisoryV2 .objects .only ("avid" ),
231221 )
232222 ),
233223 ),
234224 )
235225 )
236226
237227
228+ def get_package_related_advisory (packages ):
229+ package_vulnerabilities = []
230+ for package in packages :
231+ affected_by_vulnerabilities = [
232+ impact .advisory .avid for impact in package .affected_in_impacts .all ()
233+ ]
234+ fixing_vulnerabilities = [impact .advisory .avid for impact in package .fixed_in_impacts .all ()]
235+
236+ package_vulnerability = {
237+ "purl" : package .package_url ,
238+ "affected_by_advisories" : sorted (affected_by_vulnerabilities ),
239+ "fixing_advisories" : sorted (fixing_vulnerabilities ),
240+ }
241+ package_vulnerabilities .append (package_vulnerability )
242+
243+ return package .package_url , package_vulnerabilities
244+
245+
238246def advisory_prefetched_qs ():
239247 return AdvisoryV2 .objects .prefetch_related (
240248 "impacted_packages" ,
@@ -245,29 +253,6 @@ def advisory_prefetched_qs():
245253 )
246254
247255
248- def get_package_vulnerabilities (packages ):
249- """Return list of PURLs and serialized package vulnerability"""
250- package_urls = []
251- package_vulnerabilities = []
252- for package in packages :
253- package_urls .append (package .package_url )
254- package_vulnerabilities .append (serialize_package_vulnerability (package ))
255- return package_urls , package_vulnerabilities
256-
257-
258- def serialize_package_vulnerability (package ):
259- affected_by_vulnerabilities = [
260- impact .advisory .avid for impact in package .affected_in_impacts .all ()
261- ]
262- fixing_vulnerabilities = [impact .advisory .avid for impact in package .fixed_in_impacts .all ()]
263-
264- return {
265- "purl" : package .package_url ,
266- "affected_by_vulnerabilities" : affected_by_vulnerabilities ,
267- "fixing_vulnerabilities" : fixing_vulnerabilities ,
268- }
269-
270-
271256def serialize_severity (sev ):
272257 return {
273258 "score" : sev .value ,
@@ -288,7 +273,7 @@ def serialize_references(reference):
288273
289274def serialize_advisory (advisory ):
290275 """Return a plain data mapping serialized from advisory object."""
291- aliases = [a .alias for a in advisory .aliases .all ()]
276+ aliases = sorted ( [a .alias for a in advisory .aliases .all ()])
292277 severities = [serialize_severity (sev ) for sev in advisory .severities .all ()]
293278 weaknesses = [wkns .cwe for wkns in advisory .weaknesses .all ()]
294279 references = [serialize_references (ref ) for ref in advisory .references .all ()]
0 commit comments