From ab667dd2a4d5e153031c34395d6163c0b387543c Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 11 Jun 2026 17:23:08 +0530 Subject: [PATCH 1/3] Fix ehancement pipelines Signed-off-by: Tushar Goel --- vulnerabilities/tests/test_utils.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/vulnerabilities/tests/test_utils.py b/vulnerabilities/tests/test_utils.py index d0623c408..b4b001581 100644 --- a/vulnerabilities/tests/test_utils.py +++ b/vulnerabilities/tests/test_utils.py @@ -10,10 +10,10 @@ from datetime import datetime from datetime import timedelta +import pytest from django.test import TestCase from fetchcode.package_versions import PackageVersion from packageurl import PackageURL -import pytest from univers.version_constraint import VersionConstraint from univers.version_range import GemVersionRange from univers.version_range import VersionRange @@ -25,15 +25,17 @@ from vulnerabilities.importer import PackageCommitPatchData from vulnerabilities.importer import PatchData from vulnerabilities.importer import VulnerabilitySeverity -from vulnerabilities.models import AdvisoryAlias, AdvisoryV2 +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryV2 from vulnerabilities.pipelines import insert_advisory_v2 from vulnerabilities.references import XsaReferenceV2 from vulnerabilities.references import ZbxReferenceV2 from vulnerabilities.tests.pipelines import TestLogger -from vulnerabilities.utils import AffectedPackage, relate_aliases_with_advisories +from vulnerabilities.utils import AffectedPackage from vulnerabilities.utils import get_item from vulnerabilities.utils import get_severity_range from vulnerabilities.utils import nearest_patched_package +from vulnerabilities.utils import relate_aliases_with_advisories from vulnerabilities.utils import resolve_version_range from vulnerabilities.utils import split_markdown_front_matter @@ -332,9 +334,7 @@ def test_handles_mixed_aliases_and_advisory_ids(): alias = AdvisoryAlias.objects.create(alias="CVE-2025-1") alias.advisories.add(alias_adv) - result = relate_aliases_with_advisories( - ["CVE-2025-1", "GHSA-2"] - ) + result = relate_aliases_with_advisories(["CVE-2025-1", "GHSA-2"]) assert result == {alias_adv, advisory_id_adv} @@ -411,8 +411,6 @@ def test_deduplicates_results(): alias1.advisories.add(advisory) alias2.advisories.add(advisory) - result = relate_aliases_with_advisories( - ["CVE-1", "CVE-2"] - ) + result = relate_aliases_with_advisories(["CVE-1", "CVE-2"]) assert result == {advisory} From 3b5ca880b6ff1235162ed9a618c07aea0e919349 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 11 Jun 2026 17:31:26 +0530 Subject: [PATCH 2/3] Fix tests Signed-off-by: Tushar Goel --- .../v2_improvers/test_enhance_with_exploitdb_v2.py | 2 ++ .../pipelines/v2_improvers/test_enhance_with_github_poc.py | 6 ++++++ .../pipelines/v2_improvers/test_enhance_with_kev_v2.py | 2 ++ .../v2_improvers/test_enhance_with_metasploit_v2.py | 2 ++ 4 files changed, 12 insertions(+) diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_exploitdb_v2.py b/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_exploitdb_v2.py index 46d0a4092..3a10582bd 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_exploitdb_v2.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_exploitdb_v2.py @@ -46,6 +46,8 @@ def test_exploit_db_improver(mock_get): unique_content_id="i3giu", url="https://test.com", date_collected=datetime.now(), + _all_impacts_unfurled_at=datetime.now(), + is_latest=True, ) alias = AdvisoryAlias.objects.create(alias="CVE-2009-3699") diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_github_poc.py b/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_github_poc.py index ca1d94c42..951717cc8 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_github_poc.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_github_poc.py @@ -42,6 +42,8 @@ def test_github_poc_db_improver(mock_fetch_via_vcs): url="https://test.com", date_collected=datetime.now(), pipeline_id="ghsa_pipeline_v2", + is_latest=True, + _all_impacts_unfurled_at=datetime.now(), ) adv2 = AdvisoryV2.objects.create( advisory_id="VCIO-123-1002", @@ -51,6 +53,8 @@ def test_github_poc_db_improver(mock_fetch_via_vcs): url="https://test.com", date_collected=datetime.now(), pipeline_id="ghsa_pipeline_v2", + is_latest=True, + _all_impacts_unfurled_at=datetime.now(), ) adv3 = AdvisoryV2.objects.create( advisory_id="VCIO-123-1003", @@ -60,6 +64,8 @@ def test_github_poc_db_improver(mock_fetch_via_vcs): url="https://test.com", date_collected=datetime.now(), pipeline_id="ghsa_pipeline_v2", + is_latest=True, + _all_impacts_unfurled_at=datetime.now(), ) alias1 = AdvisoryAlias.objects.create(alias="CVE-2022-0236") diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_kev_v2.py b/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_kev_v2.py index 8950f3b9d..0e441eb2e 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_kev_v2.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_kev_v2.py @@ -46,6 +46,8 @@ def test_kev_improver(mock_get): unique_content_id="i3giu", url="https://test.com", date_collected=datetime.now(), + is_latest=True, + _all_impacts_unfurled_at=datetime.now(), ) adv1.save() diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_metasploit_v2.py b/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_metasploit_v2.py index d5030a292..68c3373ec 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_metasploit_v2.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_enhance_with_metasploit_v2.py @@ -49,6 +49,8 @@ def test_metasploit_improver(mock_get): unique_content_id="i3giu", url="https://test.com", date_collected=datetime.now(), + _all_impacts_unfurled_at=datetime.now(), + is_latest=True, ) alias = AdvisoryAlias.objects.create(alias="CVE-2007-4387") From 4f114d603a3c0d2f2ea78604a591c9b952a29702 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 11 Jun 2026 20:35:31 +0530 Subject: [PATCH 3/3] Add exploits in bulk Signed-off-by: Tushar Goel --- ...0135_advisoryexploit_record_id_and_more.py | 30 ++++ vulnerabilities/models.py | 15 ++ .../v2_improvers/enhance_with_exploitdb.py | 146 ++++++++---------- .../v2_improvers/enhance_with_kev.py | 90 ++++++----- .../v2_improvers/enhance_with_metasploit.py | 145 +++++++++-------- vulnerabilities/utils.py | 31 ++++ 6 files changed, 280 insertions(+), 177 deletions(-) create mode 100644 vulnerabilities/migrations/0135_advisoryexploit_record_id_and_more.py diff --git a/vulnerabilities/migrations/0135_advisoryexploit_record_id_and_more.py b/vulnerabilities/migrations/0135_advisoryexploit_record_id_and_more.py new file mode 100644 index 000000000..c41b33f36 --- /dev/null +++ b/vulnerabilities/migrations/0135_advisoryexploit_record_id_and_more.py @@ -0,0 +1,30 @@ +# Generated by Django 5.2.11 on 2026-06-11 13:01 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0134_advisoryv2__all_impacts_unfurled_at_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="advisoryexploit", + name="record_id", + field=models.CharField( + blank=True, + help_text="The unique identifier for the exploit record in the original upstream data source, such as the CISA KEV ID or the exploitdb ID.", + max_length=255, + null=True, + ), + ), + migrations.AddConstraint( + model_name="advisoryexploit", + constraint=models.UniqueConstraint( + fields=("advisory", "data_source", "record_id"), + name="unique_advisory_exploit_source", + ), + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 93253937a..a704bfb06 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -3959,10 +3959,25 @@ class AdvisoryExploit(models.Model): help_text="The URL to the exploit as provided in the original upstream data source.", ) + record_id = models.CharField( + null=True, + blank=True, + max_length=255, + help_text="The unique identifier for the exploit record in the original upstream data source, such as the CISA KEV ID or the exploitdb ID.", + ) + @property def get_known_ransomware_campaign_use_type(self): return "Known" if self.known_ransomware_campaign_use else "Unknown" + class Meta: + constraints = [ + models.UniqueConstraint( + fields=["advisory", "data_source", "record_id"], + name="unique_advisory_exploit_source", + ) + ] + class SSVC(models.Model): vector = models.CharField(max_length=255, help_text="The vector string representing the SSVC.") diff --git a/vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py b/vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py index 07becd0df..561e1deff 100644 --- a/vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py +++ b/vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py @@ -15,14 +15,10 @@ import requests from aboutcode.pipeline import LoopProgress from dateutil import parser as dateparser -from django.db import DataError -from vulnerabilities.models import AdvisoryAlias from vulnerabilities.models import AdvisoryExploit -from vulnerabilities.models import AdvisoryReference -from vulnerabilities.models import AdvisoryV2 from vulnerabilities.pipelines import VulnerableCodePipeline -from vulnerabilities.utils import relate_aliases_with_advisories +from vulnerabilities.utils import build_alias_to_advisory_map class ExploitDBImproverPipeline(VulnerableCodePipeline): @@ -66,86 +62,80 @@ def add_exploit(self): raw_data = list(csvreader) fetched_exploit_count = len(raw_data) - vulnerability_exploit_count = 0 self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records") progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log) - for row in progress.iter(raw_data): - vulnerability_exploit_count += add_vulnerability_exploit(row, self.log) - - self.log(f"Successfully added {vulnerability_exploit_count:,d} exploit-db advisory exploit") - - -def add_vulnerability_exploit(row, logger): - advisories = set() - - aliases = row["codes"].split(";") if row["codes"] else [] - - if not aliases: - return 0 + all_aliases = set() - advisories = relate_aliases_with_advisories(aliases) + for row in raw_data: + if row["codes"]: + all_aliases.update(row["codes"].split(";")) - if not advisories: - logger(f"No advisory found for aliases {aliases}") - return 0 + alias_to_advisories = build_alias_to_advisory_map(all_aliases) - date_added = parse_date(row["date_added"]) - source_date_published = parse_date(row["date_published"]) - source_date_updated = parse_date(row["date_updated"]) + exploits = [] + seen = set() - for advisory in advisories: - add_exploit_references(row["codes"], row["source_url"], row["file"], advisory, logger) - try: - AdvisoryExploit.objects.update_or_create( - advisory=advisory, - data_source="Exploit-DB", - defaults={ - "date_added": date_added, - "description": row["description"], - "known_ransomware_campaign_use": row["verified"], - "source_date_published": source_date_published, - "exploit_type": row["type"], - "platform": row["platform"], - "source_date_updated": source_date_updated, - "source_url": row["source_url"], - }, - ) - except DataError as e: - logger( - f"Failed to Create the Vulnerability Exploit-DB with error {e!r}:\n{traceback_format_exc()}", - level=logging.ERROR, - ) - return 1 - - -def add_exploit_references(ref_id, direct_url, path, adv, logger): - url_map = { - "file_url": f"https://gitlab.com/exploit-database/exploitdb/-/blob/main/{path}", - "direct_url": direct_url, - } - - for key, url in url_map.items(): - if url: - try: - ref, created = AdvisoryReference.objects.update_or_create( - url=url, - defaults={ - "reference_id": ref_id, - "reference_type": AdvisoryReference.EXPLOIT, - }, - ) - - if created: - ref.advisories.add(adv) - ref.save() - logger(f"Created {ref} for {adv} with {key}={url}") - - except DataError as e: - logger( - f"Failed to Create the Vulnerability Reference For Exploit-DB with error {e!r}:\n{traceback_format_exc()}", - level=logging.ERROR, - ) + for row in progress.iter(raw_data): + aliases = row["codes"].split(";") if row["codes"] else [] + + if not aliases: + continue + + date_added = parse_date(row["date_added"]) + source_date_published = parse_date(row["date_published"]) + source_date_updated = parse_date(row["date_updated"]) + + for alias in aliases: + for advisory in alias_to_advisories.get(alias, ()): + + key = ( + advisory.id, + "Exploit-DB", + alias, + ) + + if key in seen: + continue + + seen.add(key) + + exploits.append( + AdvisoryExploit( + advisory=advisory, + record_id=alias, + data_source="Exploit-DB", + date_added=date_added, + description=row["description"], + known_ransomware_campaign_use=row["verified"], + source_date_published=source_date_published, + exploit_type=row["type"], + platform=row["platform"], + source_date_updated=source_date_updated, + source_url=row["source_url"], + ) + ) + + AdvisoryExploit.objects.bulk_create( + exploits, + update_conflicts=True, + unique_fields=[ + "advisory", + "data_source", + "record_id", + ], + update_fields=[ + "date_added", + "description", + "known_ransomware_campaign_use", + "source_date_published", + "exploit_type", + "platform", + "source_date_updated", + "source_url", + ], + batch_size=1000, + ) def parse_date(date_string): diff --git a/vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py b/vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py index c62a41c91..cc7721acd 100644 --- a/vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py +++ b/vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py @@ -13,11 +13,9 @@ import requests from aboutcode.pipeline import LoopProgress -from vulnerabilities.models import AdvisoryAlias from vulnerabilities.models import AdvisoryExploit -from vulnerabilities.models import AdvisoryV2 from vulnerabilities.pipelines import VulnerableCodePipeline -from vulnerabilities.utils import relate_aliases_with_advisories +from vulnerabilities.utils import build_alias_to_advisory_map class VulnerabilityKevPipeline(VulnerableCodePipeline): @@ -54,40 +52,56 @@ def fetch_exploits(self): def add_exploits(self): fetched_exploit_count = self.kev_data.get("count") self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records") - - vulnerability_exploit_count = 0 progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log) - - for record in progress.iter(self.kev_data.get("vulnerabilities", [])): - vulnerability_exploit_count += add_vulnerability_exploit( - kev_vul=record, - logger=self.log, - ) - - self.log(f"Successfully added {vulnerability_exploit_count:,d} kev exploit") - - -def add_vulnerability_exploit(kev_vul, logger): - cve_id = kev_vul.get("cveID") - - if not cve_id: - return 0 - - advisories = relate_aliases_with_advisories([cve_id]) - - for advisory in advisories: - AdvisoryExploit.objects.update_or_create( - advisory=advisory, - data_source="KEV", - defaults={ - "description": kev_vul["shortDescription"], - "date_added": kev_vul["dateAdded"], - "required_action": kev_vul["requiredAction"], - "due_date": kev_vul["dueDate"], - "notes": kev_vul["notes"], - "known_ransomware_campaign_use": ( - True if kev_vul["knownRansomwareCampaignUse"] == "Known" else False - ), - }, + cve_ids = { + record["cveID"] for record in self.kev_data["vulnerabilities"] if record.get("cveID") + } + + cve_to_advisories = build_alias_to_advisory_map(cve_ids) + + exploits = [] + + advisories_seen_multiple_times = set() + + for record in progress.iter(self.kev_data["vulnerabilities"]): + cve_id = record.get("cveID") + + if not cve_id: + continue + + for advisory in cve_to_advisories.get(cve_id, []): + if (advisory.avid, cve_id) in advisories_seen_multiple_times: + continue + advisories_seen_multiple_times.add((advisory.avid, cve_id)) + exploits.append( + AdvisoryExploit( + advisory=advisory, + record_id=cve_id, + data_source="KEV", + description=record["shortDescription"], + date_added=record["dateAdded"], + required_action=record["requiredAction"], + due_date=record["dueDate"], + notes=record["notes"], + known_ransomware_campaign_use=( + record["knownRansomwareCampaignUse"] == "Known" + ), + ) + ) + if not exploits: + return + + AdvisoryExploit.objects.bulk_create( + exploits, + update_conflicts=True, + unique_fields=["advisory", "data_source", "record_id"], + update_fields=[ + "description", + "date_added", + "required_action", + "due_date", + "notes", + "known_ransomware_campaign_use", + ], + batch_size=1000, ) - return 1 diff --git a/vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py b/vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py index e50fbe308..bee3e47a7 100644 --- a/vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py +++ b/vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py @@ -15,11 +15,9 @@ from aboutcode.pipeline import LoopProgress from dateutil import parser as dateparser -from vulnerabilities.models import AdvisoryAlias from vulnerabilities.models import AdvisoryExploit -from vulnerabilities.models import AdvisoryV2 from vulnerabilities.pipelines import VulnerableCodePipeline -from vulnerabilities.utils import relate_aliases_with_advisories +from vulnerabilities.utils import build_alias_to_advisory_map class MetasploitImproverPipeline(VulnerableCodePipeline): @@ -56,64 +54,89 @@ def fetch_exploits(self): def add_advisory_exploits(self): fetched_exploit_count = len(self.metasploit_data) self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records") - - vulnerability_exploit_count = 0 progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log) - for _, record in progress.iter(self.metasploit_data.items()): - vulnerability_exploit_count += add_advisory_exploit( - record=record, - logger=self.log, - ) - self.log(f"Successfully added {vulnerability_exploit_count:,d} vulnerability exploit") - - -def add_advisory_exploit(record, logger): - advisories = set() - references = record.get("references", []) - - interesting_references = [ - ref for ref in references if not ref.startswith("OSVDB") and not ref.startswith("URL-") - ] - - if not interesting_references: - return 0 - - advisories = [] - - advisories = relate_aliases_with_advisories(interesting_references) - - if not advisories: - logger(f"No advisories found for aliases {interesting_references}") - return 0 - - description = record.get("description", "") - notes = record.get("notes", {}) - platform = record.get("platform") - - source_url = "" - if path := record.get("path"): - source_url = f"https://github.com/rapid7/metasploit-framework/tree/master{path}" - source_date_published = None - - if disclosure_date := record.get("disclosure_date"): - try: - source_date_published = dateparser.parse(disclosure_date).date() - except ValueError as e: - logger( - f"Error while parsing date {disclosure_date} with error {e!r}:\n{traceback_format_exc()}", - level=logging.ERROR, - ) - for advisory in advisories: - AdvisoryExploit.objects.update_or_create( - advisory=advisory, - data_source="Metasploit", - defaults={ - "description": description, - "notes": saneyaml.dump(notes), - "source_date_published": source_date_published, - "platform": platform, - "source_url": source_url, - }, + all_references = set() + + for record in self.metasploit_data.values(): + for ref in record.get("references", []): + if not ref.startswith("OSVDB") and not ref.startswith("URL-"): + all_references.add(ref) + + reference_to_advisories = build_alias_to_advisory_map(all_references) + + exploits = [] + seen = set() + + for record in progress.iter(self.metasploit_data.values()): + description = record.get("description", "") + notes = record.get("notes", {}) + platform = record.get("platform") + + source_url = "" + if path := record.get("path"): + source_url = f"https://github.com/rapid7/metasploit-framework/tree/master{path}" + source_date_published = None + + if disclosure_date := record.get("disclosure_date"): + try: + source_date_published = dateparser.parse(disclosure_date).date() + except ValueError as e: + self.log( + f"Error while parsing date {disclosure_date} with error {e!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + refs = [ + ref + for ref in record.get("references", []) + if not ref.startswith("OSVDB") and not ref.startswith("URL-") + ] + + record_id = record.get("path") + + if not record_id: + continue + + for ref in refs: + for advisory in reference_to_advisories.get(ref, ()): + + key = ( + advisory.id, + record_id, + ) + + if key in seen: + continue + + seen.add(key) + + exploits.append( + AdvisoryExploit( + advisory=advisory, + data_source="Metasploit", + record_id=record_id, + description=description, + notes=saneyaml.dump(notes), + source_date_published=source_date_published, + platform=platform, + source_url=source_url, + ) + ) + + AdvisoryExploit.objects.bulk_create( + exploits, + update_conflicts=True, + unique_fields=[ + "advisory", + "data_source", + "record_id", + ], + update_fields=[ + "description", + "notes", + "source_date_published", + "platform", + "source_url", + ], + batch_size=1000, ) - return 1 diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index fb6c728ea..ff229a5f1 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -1083,3 +1083,34 @@ def relate_aliases_with_advisories(aliases): advisories = set(alias_advisories) advisories.update(advisory_id_advisories) return advisories + + +def build_alias_to_advisory_map(aliases_strs): + from vulnerabilities.models import AdvisoryAlias + from vulnerabilities.models import AdvisoryV2 + + aliases_strs = set(aliases_strs) + alias_to_advisories = defaultdict(set) + + advisory_aliases = AdvisoryAlias.objects.filter(alias__in=aliases_strs).prefetch_related( + Prefetch( + "advisories", + queryset=AdvisoryV2.objects.filter( + is_latest=True, + _all_impacts_unfurled_at__isnull=False, + ), + to_attr="latest_advisories", + ) + ) + + for alias in advisory_aliases: + for advisory in alias.latest_advisories: + alias_to_advisories[alias.alias].add(advisory) + + for advisory in AdvisoryV2.objects.filter( + advisory_id__in=aliases_strs, + _all_impacts_unfurled_at__isnull=False, + is_latest=True, + ): + alias_to_advisories[advisory.advisory_id].add(advisory) + return alias_to_advisories