From 6583958523d9f6f7aba3bf904e60c0660ad231e1 Mon Sep 17 00:00:00 2001 From: ziad hany Date: Mon, 23 Feb 2026 16:37:59 +0200 Subject: [PATCH 1/2] Create an improver to collect patch texts wherever patch URL is present but text is missing Signed-off-by: ziad hany --- vulnerabilities/improvers/__init__.py | 2 + .../pipelines/v2_improvers/fetch_patch_url.py | 83 +++++++++++++++++++ .../v2_improvers/test_fetch_patch_url.py | 58 +++++++++++++ 3 files changed, 143 insertions(+) create mode 100644 vulnerabilities/pipelines/v2_improvers/fetch_patch_url.py create mode 100644 vulnerabilities/tests/pipelines/v2_improvers/test_fetch_patch_url.py diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 97c18e6f9..8c3ed83b0 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -33,6 +33,7 @@ from vulnerabilities.pipelines.v2_improvers import flag_ghost_packages as flag_ghost_packages_v2 from vulnerabilities.pipelines.v2_improvers import relate_severities from vulnerabilities.pipelines.v2_improvers import unfurl_version_range as unfurl_version_range_v2 +from vulnerabilities.pipelines.v2_improvers import fetch_patch_url as fetch_patch_url_v2 from vulnerabilities.utils import create_registry IMPROVERS_REGISTRY = create_registry( @@ -71,6 +72,7 @@ compute_version_rank_v2.ComputeVersionRankPipeline, compute_advisory_todo_v2.ComputeToDo, unfurl_version_range_v2.UnfurlVersionRangePipeline, + fetch_patch_url_v2.FetchPatchURLImproverPipeline, compute_advisory_todo.ComputeToDo, collect_ssvc_trees.CollectSSVCPipeline, relate_severities.RelateSeveritiesPipeline, diff --git a/vulnerabilities/pipelines/v2_improvers/fetch_patch_url.py b/vulnerabilities/pipelines/v2_improvers/fetch_patch_url.py new file mode 100644 index 000000000..3da8c4043 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/fetch_patch_url.py @@ -0,0 +1,83 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from vulnerabilities.models import PackageCommitPatch, Patch +from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.utils import fetch_response + + +class FetchPatchURLImproverPipeline(VulnerableCodePipeline): + """FetchPatchURL Improver Pipeline""" + + pipeline_id = "fetch_patch_url" + precedence = 200 + + @classmethod + def steps(cls): + return ( + cls.collect_patch_text, + ) + + def fetch_patch_content(self, url): + """ + Fetches the text content of a patch from a URL. + """ + if not url: + return None + + self.log(f"Fetching `{url}`") + + response = fetch_response(url) + if response: + return response.text.replace("\x00", "") + + self.log(f"Skipping {url} due to fetch failure.") + return None + + def advisories_count(self) -> int: + return ( + PackageCommitPatch.objects.filter(patch_text__isnull=True).count() + + Patch.objects.filter(patch_text__isnull=True).count() + ) + + def collect_patch_text(self): + for pcp in PackageCommitPatch.objects.filter(patch_text__isnull=True): + patch_url = generate_patch_url(pcp.vcs_url, pcp.commit_hash) + content = self.fetch_patch_content(patch_url) + if not content: + continue + pcp.patch_text = content + pcp.save() + + for patch in Patch.objects.filter(patch_text__isnull=True): + content = self.fetch_patch_content(patch.patch_url) + if not content: + continue + + patch.patch_text = content + patch.save() + +def generate_patch_url(vcs_url, commit_hash): + """ + Generate patch URL from VCS URL and commit hash. + """ + if not vcs_url or not commit_hash: + return None + + vcs_url = vcs_url.rstrip("/") + + if vcs_url.startswith("https://github.com"): + return f"{vcs_url}/commit/{commit_hash}.patch" + elif vcs_url.startswith("https://gitlab.com"): + return f"{vcs_url}/-/commit/{commit_hash}.patch" + elif vcs_url.startswith("https://bitbucket.org"): + return f"{vcs_url}/-/commit/{commit_hash}/raw" + elif vcs_url.startswith("https://git.kernel.org"): + return f"{vcs_url}.git/patch/?id={commit_hash}" + return diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_fetch_patch_url.py b/vulnerabilities/tests/pipelines/v2_improvers/test_fetch_patch_url.py new file mode 100644 index 000000000..8fcf9a92b --- /dev/null +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_fetch_patch_url.py @@ -0,0 +1,58 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest import mock +from unittest.mock import MagicMock + +import pytest + +from vulnerabilities.models import PackageCommitPatch, Patch +from vulnerabilities.pipelines.v2_improvers.fetch_patch_url import FetchPatchURLImproverPipeline + + +@pytest.mark.django_db +@mock.patch("vulnerabilities.utils.requests.get") +def test_collect_patch_text_success(mock_get): + res1 = MagicMock(status_code=200, text="diff --git a/file1") + res2 = MagicMock(status_code=200, text="diff --git a/file2") + mock_get.side_effect = [res1, res2] + + pcp = PackageCommitPatch.objects.create( + vcs_url="https://github.com/nexB/vulnerablecode", + commit_hash="abc1234", + patch_text=None + ) + + patch = Patch.objects.create( + patch_url="https://gitlab.com/nexB/vulnerablecode/-/commit/def5678.patch", + patch_text=None + ) + pipeline = FetchPatchURLImproverPipeline() + pipeline.collect_patch_text() + + pcp.refresh_from_db() + patch.refresh_from_db() + + assert pcp.patch_text == "diff --git a/file1" + assert patch.patch_text == "diff --git a/file2" + +@pytest.mark.django_db +@mock.patch("vulnerabilities.utils.requests.get") +def test_collect_patch_text_failure(mock_get): + mock_get.side_effect = Exception("Connection Error") + + pcp = PackageCommitPatch.objects.create( + vcs_url="https://github.com/nexB/vulnerablecode", + commit_hash="abc1234", + patch_text=None + ) + + pipeline = FetchPatchURLImproverPipeline() + pipeline.collect_patch_text() + assert pcp.patch_text is None \ No newline at end of file From 8723c4aa923619e41ef52f40a92f8faf02314ce1 Mon Sep 17 00:00:00 2001 From: ziad hany Date: Tue, 3 Mar 2026 01:23:22 +0200 Subject: [PATCH 2/2] Move generate_patch_url function to utils Signed-off-by: ziad hany --- vulnerabilities/improvers/__init__.py | 2 +- .../pipelines/v2_improvers/fetch_patch_url.py | 31 ++++--------------- .../v2_improvers/test_fetch_patch_url.py | 17 +++++----- vulnerabilities/utils.py | 20 ++++++++++++ 4 files changed, 34 insertions(+), 36 deletions(-) diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 8c3ed83b0..5e562f65d 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -30,10 +30,10 @@ from vulnerabilities.pipelines.v2_improvers import ( enhance_with_metasploit as enhance_with_metasploit_v2, ) +from vulnerabilities.pipelines.v2_improvers import fetch_patch_url as fetch_patch_url_v2 from vulnerabilities.pipelines.v2_improvers import flag_ghost_packages as flag_ghost_packages_v2 from vulnerabilities.pipelines.v2_improvers import relate_severities from vulnerabilities.pipelines.v2_improvers import unfurl_version_range as unfurl_version_range_v2 -from vulnerabilities.pipelines.v2_improvers import fetch_patch_url as fetch_patch_url_v2 from vulnerabilities.utils import create_registry IMPROVERS_REGISTRY = create_registry( diff --git a/vulnerabilities/pipelines/v2_improvers/fetch_patch_url.py b/vulnerabilities/pipelines/v2_improvers/fetch_patch_url.py index 3da8c4043..5ade44030 100644 --- a/vulnerabilities/pipelines/v2_improvers/fetch_patch_url.py +++ b/vulnerabilities/pipelines/v2_improvers/fetch_patch_url.py @@ -7,9 +7,11 @@ # See https://aboutcode.org for more information about nexB OSS projects. # -from vulnerabilities.models import PackageCommitPatch, Patch +from vulnerabilities.models import PackageCommitPatch +from vulnerabilities.models import Patch from vulnerabilities.pipelines import VulnerableCodePipeline from vulnerabilities.utils import fetch_response +from vulnerabilities.utils import generate_patch_url class FetchPatchURLImproverPipeline(VulnerableCodePipeline): @@ -20,9 +22,7 @@ class FetchPatchURLImproverPipeline(VulnerableCodePipeline): @classmethod def steps(cls): - return ( - cls.collect_patch_text, - ) + return (cls.collect_patch_text,) def fetch_patch_content(self, url): """ @@ -42,8 +42,8 @@ def fetch_patch_content(self, url): def advisories_count(self) -> int: return ( - PackageCommitPatch.objects.filter(patch_text__isnull=True).count() + - Patch.objects.filter(patch_text__isnull=True).count() + PackageCommitPatch.objects.filter(patch_text__isnull=True).count() + + Patch.objects.filter(patch_text__isnull=True).count() ) def collect_patch_text(self): @@ -62,22 +62,3 @@ def collect_patch_text(self): patch.patch_text = content patch.save() - -def generate_patch_url(vcs_url, commit_hash): - """ - Generate patch URL from VCS URL and commit hash. - """ - if not vcs_url or not commit_hash: - return None - - vcs_url = vcs_url.rstrip("/") - - if vcs_url.startswith("https://github.com"): - return f"{vcs_url}/commit/{commit_hash}.patch" - elif vcs_url.startswith("https://gitlab.com"): - return f"{vcs_url}/-/commit/{commit_hash}.patch" - elif vcs_url.startswith("https://bitbucket.org"): - return f"{vcs_url}/-/commit/{commit_hash}/raw" - elif vcs_url.startswith("https://git.kernel.org"): - return f"{vcs_url}.git/patch/?id={commit_hash}" - return diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_fetch_patch_url.py b/vulnerabilities/tests/pipelines/v2_improvers/test_fetch_patch_url.py index 8fcf9a92b..421a88967 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_fetch_patch_url.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_fetch_patch_url.py @@ -12,7 +12,8 @@ import pytest -from vulnerabilities.models import PackageCommitPatch, Patch +from vulnerabilities.models import PackageCommitPatch +from vulnerabilities.models import Patch from vulnerabilities.pipelines.v2_improvers.fetch_patch_url import FetchPatchURLImproverPipeline @@ -24,14 +25,11 @@ def test_collect_patch_text_success(mock_get): mock_get.side_effect = [res1, res2] pcp = PackageCommitPatch.objects.create( - vcs_url="https://github.com/nexB/vulnerablecode", - commit_hash="abc1234", - patch_text=None + vcs_url="https://github.com/nexB/vulnerablecode", commit_hash="abc1234", patch_text=None ) patch = Patch.objects.create( - patch_url="https://gitlab.com/nexB/vulnerablecode/-/commit/def5678.patch", - patch_text=None + patch_url="https://gitlab.com/nexB/vulnerablecode/-/commit/def5678.patch", patch_text=None ) pipeline = FetchPatchURLImproverPipeline() pipeline.collect_patch_text() @@ -42,17 +40,16 @@ def test_collect_patch_text_success(mock_get): assert pcp.patch_text == "diff --git a/file1" assert patch.patch_text == "diff --git a/file2" + @pytest.mark.django_db @mock.patch("vulnerabilities.utils.requests.get") def test_collect_patch_text_failure(mock_get): mock_get.side_effect = Exception("Connection Error") pcp = PackageCommitPatch.objects.create( - vcs_url="https://github.com/nexB/vulnerablecode", - commit_hash="abc1234", - patch_text=None + vcs_url="https://github.com/nexB/vulnerablecode", commit_hash="abc1234", patch_text=None ) pipeline = FetchPatchURLImproverPipeline() pipeline.collect_patch_text() - assert pcp.patch_text is None \ No newline at end of file + assert pcp.patch_text is None diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index 0eb1d1258..d9691d822 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -867,3 +867,23 @@ def group_advisories_by_content(advisories): entry["secondary"].add(advisory) return grouped + + +def generate_patch_url(vcs_url, commit_hash): + """ + Generate patch URL from VCS URL and commit hash. + """ + if not vcs_url or not commit_hash: + return None + + vcs_url = vcs_url.rstrip("/") + + if vcs_url.startswith("https://github.com"): + return f"{vcs_url}/commit/{commit_hash}.patch" + elif vcs_url.startswith("https://gitlab.com"): + return f"{vcs_url}/-/commit/{commit_hash}.patch" + elif vcs_url.startswith("https://bitbucket.org"): + return f"{vcs_url}/-/commit/{commit_hash}/raw" + elif vcs_url.startswith("https://git.kernel.org"): + return f"{vcs_url}.git/patch/?id={commit_hash}" + return