Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions docs/content/supported_tools/parsers/file/xygeni.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
---
title: "Xygeni"
toc_hide: true
---
### About Xygeni
[Xygeni](https://xygeni.io) is a Software Supply Chain Security platform whose
scanners produce JSON reports for code vulnerabilities (SAST), open-source
dependency vulnerabilities (SCA), hard-coded secrets, IaC flaws, web-application
vulnerabilities (DAST), CI/CD and SCM misconfigurations, and malicious or
suspect components.

This parser handles three Xygeni scan kinds in phase 1: **SAST**, **SCA**, and
**Secrets**. All three share a common `metadata` envelope; the parser
dispatches on `metadata.scanType`.

### Scan Types
| Scan type | `metadata.scanType` | Xygeni CLI command (typical) |
| ------------------------ | ------------------- | ---------------------------- |
| `Xygeni SAST Scan` | `sast` | `xygeni scan --scan-type=sast --format=json` |
| `Xygeni SCA Scan` | `deps` | `xygeni scan --scan-type=deps --format=json` |
| `Xygeni Secrets Scan` | `secrets` | `xygeni scan --scan-type=secrets --format=json` |

See the Xygeni documentation at <https://docs.xygeni.io> for installation and
the full set of CLI options.

### Acceptable JSON Format
All three scan types share the same envelope:

~~~
{
"metadata": {
"uuid": "...",
"timestamp": "2026-04-26T07:08:29Z",
"projectName": "...",
"scanType": "sast" | "deps" | "secrets",
"format": "<scanType>-xygeni",
"reportProperties": {
"tool.name": "Xygeni",
"tool.version": "..."
}
},
...
}
~~~

The kind-specific payload then follows:

- **SAST** — `vulnerabilities[]` — each entry carries `detector` (the rule id),
`severity`, `location.{filepath, beginLine, endLine, code}`, `cwe` /
`cwes[]`, `tags[]`, `explanation`, `uniqueHash`, `issueId`, and an optional
`codeFlows[]` block describing source / sink frames and the data path.
- **SCA** — `dependencies[]` — each dependency has `name`, `version`,
`ecosystem`, and a nested `vulnerabilities[]` of CVE/GHSA advisories with
`cve`, `cwes`, `fixedVersion`, `aliases`, `overallCvssScore`, `references`,
`description`, `uniqueHash`, `issueId`.
- **Secrets** — `secrets[]` — each entry has `type` (e.g.
`aws_access_key`), `detector`, `severity`, `location` (same shape as SAST),
`description`, `tags`, `uniqueHash`, `issueId`. The `secret` value and
`location.code` are already redacted by the Xygeni CLI before serialisation.

### Sample Scan Data
Sample Xygeni JSON reports can be found
[here](https://github.com/DefectDojo/django-DefectDojo/tree/master/unittests/scans/xygeni).

### Deduplication

Every finding carries `unique_id_from_tool` (set from Xygeni's vendor-stable
`uniqueHash`) and `vuln_id_from_tool` (set from `issueId`). The deduplication
algorithm is configured per scan type:

| Scan type | Algorithm | Hash-code fields (fallback) |
| -------------------- | ---------------------------------- | -------------------------------------------------------------- |
| Xygeni SAST Scan | `unique_id_from_tool` | n/a |
| Xygeni SCA Scan | `unique_id_from_tool_or_hash_code` | `vulnerability_ids`, `component_name`, `component_version` |
| Xygeni Secrets Scan | `unique_id_from_tool` | n/a |

For SCA the hash-code fallback enables cross-tool deduplication: the same
CVE on the same package@version reported by Xygeni and another SCA scanner
(Snyk, Trivy, etc.) collapse into a single Finding.
5 changes: 5 additions & 0 deletions dojo/settings/settings.dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -1093,6 +1093,7 @@ def generate_url(scheme, double_slashes, user, password, host, port, path, param
"n0s1 Scanner": ["description"],
"IriusRisk Threats Scan": ["title", "component_name"],
"Orca Security Alerts": ["title", "component_name"],
"Xygeni SCA Scan": ["vulnerability_ids", "component_name", "component_version"],
"Qualys VMDR": ["title", "component_name", "vuln_id_from_tool"],
}

Expand Down Expand Up @@ -1168,6 +1169,7 @@ def generate_url(scheme, double_slashes, user, password, host, port, path, param
"Cyberwatch scan (Galeax)": True,
"OpenVAS Parser v2": True,
"OpenReports": True,
"Xygeni SCA Scan": True,
}

# List of fields that are known to be usable in hash_code computation)
Expand Down Expand Up @@ -1363,6 +1365,9 @@ def generate_url(scheme, double_slashes, user, password, host, port, path, param
"OpenReports": DEDUPE_ALGO_HASH_CODE,
"IriusRisk Threats Scan": DEDUPE_ALGO_HASH_CODE,
"Orca Security Alerts": DEDUPE_ALGO_HASH_CODE,
"Xygeni SAST Scan": DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL,
"Xygeni SCA Scan": DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE,
"Xygeni Secrets Scan": DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL,
"Qualys VMDR": DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE,
}

Expand Down
Empty file added dojo/tools/xygeni/__init__.py
Empty file.
55 changes: 55 additions & 0 deletions dojo/tools/xygeni/_common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""Shared helpers for the Xygeni multi-scan-type parser."""

import re

SEVERITY_MAP = {
"critical": "Critical",
"high": "High",
"medium": "Medium",
"low": "Low",
"info": "Info",
}

_CWE_TAG_RE = re.compile(r"^CWE[:\-]?(\d+)$", re.IGNORECASE)


def map_severity(value):
"""Map a Xygeni lowercase severity to a DefectDojo severity. Unknown values become Info."""
if value is None:
return "Info"
return SEVERITY_MAP.get(str(value).lower(), "Info")


def parse_cwe(cwes=None, cwe=None, tags=None):
"""
Resolve a CWE integer from any of the Xygeni representations.

Preference order:
1. The numeric ``cwe`` field on the finding.
2. The first ``"CWE-N"`` entry in ``cwes``.
3. The first ``"CWE:N"`` / ``"cwe:N"`` entry in ``tags``.
"""
if isinstance(cwe, int):
return cwe
for entry in cwes or []:
match = _CWE_TAG_RE.match(str(entry))
if match:
return int(match.group(1))
for entry in tags or []:
match = _CWE_TAG_RE.match(str(entry))
if match:
return int(match.group(1))
return None


def extract_scan_type(data):
"""Read ``metadata.scanType`` from a Xygeni report. Raises ``ValueError`` if absent."""
if not isinstance(data, dict):
msg = "Xygeni report root must be a JSON object"
raise TypeError(msg)
metadata = data.get("metadata") or {}
scan_type = metadata.get("scanType")
if not scan_type:
msg = "Xygeni report is missing required 'metadata.scanType' field"
raise ValueError(msg)
return str(scan_type).lower()
67 changes: 67 additions & 0 deletions dojo/tools/xygeni/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""
Parser for Xygeni JSON reports.

Xygeni (https://xygeni.io) is a Software Supply Chain Security platform.
It emits a separate JSON report per scanner kind (SAST, SCA, secrets, IaC,
CI/CD misconfig, DAST, suspect dependencies, code tampering). All reports
share a common ``metadata`` envelope with a ``scanType`` discriminator.

Phase 1 of this parser handles SAST, SCA, and Secrets. Additional scan
types are dispatched-on the same way and can be added incrementally.
"""

import json
import logging

from dojo.tools.xygeni._common import extract_scan_type
from dojo.tools.xygeni.sast import parse_sast
from dojo.tools.xygeni.sca import parse_sca
from dojo.tools.xygeni.secrets import parse_secrets

logger = logging.getLogger(__name__)


SCAN_TYPE_SAST = "Xygeni SAST Scan"
SCAN_TYPE_SCA = "Xygeni SCA Scan"
SCAN_TYPE_SECRETS = "Xygeni Secrets Scan"

# Map from the ``metadata.scanType`` value emitted by the Xygeni CLI to the
# per-kind handler. Keys are lowercase, matching ``extract_scan_type``.
_HANDLERS = {
"sast": parse_sast,
"deps": parse_sca,
"secrets": parse_secrets,
}


class XygeniParser:

"""Single parser dispatching on ``metadata.scanType`` across Xygeni scan kinds."""

def get_scan_types(self):
return [SCAN_TYPE_SAST, SCAN_TYPE_SCA, SCAN_TYPE_SECRETS]

def get_label_for_scan_types(self, scan_type):
return scan_type

def get_description_for_scan_types(self, scan_type):
if scan_type == SCAN_TYPE_SAST:
return "Xygeni SAST JSON report (code vulnerabilities). Generated with 'xygeni scan --scan-type=sast'."
if scan_type == SCAN_TYPE_SCA:
return "Xygeni SCA JSON report (open-source dependency vulnerabilities). Generated with 'xygeni scan --scan-type=deps'."
if scan_type == SCAN_TYPE_SECRETS:
return "Xygeni Secrets JSON report (hard-coded secrets). Generated with 'xygeni scan --scan-type=secrets'."
return "Xygeni JSON report."

def get_findings(self, file, test):
data = json.load(file)
kind = extract_scan_type(data)
handler = _HANDLERS.get(kind)
if handler is None:
msg = (
f"Unsupported Xygeni scanType '{kind}'. "
f"Phase 1 supports: {sorted(_HANDLERS)}."
)
raise ValueError(msg)
logger.debug("Xygeni parser dispatching on scanType=%s", kind)
return handler(data, test)
79 changes: 79 additions & 0 deletions dojo/tools/xygeni/sast.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""Parse Xygeni SAST reports into DefectDojo Findings."""

from dojo.models import Finding
from dojo.tools.xygeni._common import map_severity, parse_cwe


def parse_sast(data, test):
"""Convert a Xygeni SAST JSON report into a list of Findings."""
return [_build_finding(vuln, test) for vuln in data.get("vulnerabilities") or []]


def _build_finding(vuln, test):
location = vuln.get("location") or {}
file_path = location.get("filepath")
line = location.get("beginLine")
code = location.get("code")

description_parts = []
if vuln.get("explanation"):
description_parts.append(str(vuln["explanation"]))
if code:
description_parts.append(f"```\n{code}\n```")

code_flow_text = _render_code_flows(vuln.get("codeFlows") or [])
if code_flow_text:
description_parts.append(code_flow_text)

finding = Finding(
test=test,
title=str(vuln.get("detector") or "Xygeni SAST finding"),
description="\n\n".join(description_parts) if description_parts else "",
severity=map_severity(vuln.get("severity")),
file_path=file_path,
line=line,
cwe=parse_cwe(cwes=vuln.get("cwes"), cwe=vuln.get("cwe"), tags=vuln.get("tags")),
static_finding=True,
dynamic_finding=False,
unique_id_from_tool=vuln.get("uniqueHash"),
vuln_id_from_tool=vuln.get("issueId"),
)

_apply_code_flow_fields(finding, vuln.get("codeFlows") or [])
return finding


def _render_code_flows(code_flows):
"""Render Xygeni codeFlows[] into a human-readable markdown block for Finding.description."""
if not code_flows:
return ""

flow = code_flows[0]
lines = ["**Data flow**"]
for frame in flow.get("frames") or []:
kind = frame.get("kind") or "step"
loc = frame.get("location") or {}
filepath = loc.get("filepath", "?")
line = loc.get("beginLine", "?")
snippet = (loc.get("code") or "").strip()
lines.append(f"- **{kind}** {filepath}:{line} — `{snippet}`")
return "\n".join(lines) if len(lines) > 1 else ""


def _apply_code_flow_fields(finding, code_flows):
"""Populate Finding.sast_source_* / sast_sink_object from the first code flow's first source/sink."""
if not code_flows:
return
frames = code_flows[0].get("frames") or []
source = next((f for f in frames if f.get("kind") == "source"), None)
sink = next((f for f in frames if f.get("kind") == "sink"), None)

if source:
loc = source.get("location") or {}
finding.sast_source_file_path = loc.get("filepath")
finding.sast_source_line = loc.get("beginLine")
if source.get("injectionPoint"):
finding.sast_source_object = source["injectionPoint"]

if sink:
finding.sast_sink_object = sink.get("injectionPoint") or (sink.get("location") or {}).get("code")
77 changes: 77 additions & 0 deletions dojo/tools/xygeni/sca.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""Parse Xygeni SCA (dependency-vulnerability) reports into DefectDojo Findings."""

from dojo.models import Finding
from dojo.tools.xygeni._common import map_severity, parse_cwe


def parse_sca(data, test):
"""
Convert a Xygeni SCA JSON report into a list of Findings.

The Xygeni SCA report stores findings nested inside ``dependencies[]`` —
each dependency may carry a ``vulnerabilities[]`` array of CVE/GHSA
advisories. This parser emits one Finding per nested vulnerability.
"""
findings = []
for dep in data.get("dependencies") or []:
findings.extend(
_build_finding(dep, vuln, test) for vuln in dep.get("vulnerabilities") or []
)
return findings


def _build_finding(dep, vuln, test):
component_name = dep.get("name")
component_version = dep.get("version")

title = str(vuln.get("cve") or vuln.get("id") or "Xygeni SCA finding")

fixed_version = vuln.get("fixedVersion")
mitigation = None
if fixed_version and component_name:
mitigation = f"Upgrade {component_name} to version {fixed_version} or later."
elif fixed_version:
mitigation = f"Upgrade to version {fixed_version} or later."

references = "\n".join(str(r) for r in (vuln.get("references") or []) if r) or None

cvss_score = vuln.get("overallCvssScore")
if cvss_score is None or cvss_score < 0:
cvss_score = None

finding = Finding(
test=test,
title=title,
description=str(vuln.get("description") or ""),
severity=map_severity(vuln.get("severity")),
cwe=parse_cwe(cwes=vuln.get("cwes")),
cvssv3_score=cvss_score,
mitigation=mitigation,
references=references,
component_name=component_name,
component_version=component_version,
static_finding=True,
dynamic_finding=False,
unique_id_from_tool=vuln.get("uniqueHash"),
vuln_id_from_tool=vuln.get("issueId"),
)

if vuln.get("cve"):
finding.cve = vuln["cve"]

finding.unsaved_vulnerability_ids = _collect_vulnerability_ids(vuln)
return finding


def _collect_vulnerability_ids(vuln):
"""Return a deduplicated list of CVE/GHSA-style aliases for a Xygeni SCA vulnerability."""
ids = []
seen = set()
for value in (vuln.get("cve"), *(vuln.get("aliases") or [])):
if not value:
continue
token = str(value)
if token not in seen:
seen.add(token)
ids.append(token)
return ids
Loading
Loading