From ca8a6d178150cce3a3512c188b8698f395c3f14f Mon Sep 17 00:00:00 2001 From: David Mariyajebamalai Date: Sat, 2 May 2026 13:25:36 +0530 Subject: [PATCH 1/2] Handle XML ParseError in scan import API to prevent worker crash (fixes #14752) Translates xml.etree.ElementTree.ParseError into a DRF ValidationError to return a 400 Bad Request instead of a 500 error or worker termination. --- dojo/api_v2/serializers.py | 7 +++++++ unittests/scans/zap/malformed.xml | 1 + unittests/test_issue_14752.py | 27 +++++++++++++++++++++++++++ 3 files changed, 35 insertions(+) create mode 100644 unittests/scans/zap/malformed.xml create mode 100644 unittests/test_issue_14752.py diff --git a/dojo/api_v2/serializers.py b/dojo/api_v2/serializers.py index d1d36de473a..0a86feb0b5c 100644 --- a/dojo/api_v2/serializers.py +++ b/dojo/api_v2/serializers.py @@ -4,6 +4,7 @@ import logging import re import time +import xml.etree.ElementTree as ET from datetime import datetime import six @@ -2399,6 +2400,8 @@ def process_scan( raise Exception(se) except ValueError as ve: raise Exception(ve) + except ET.ParseError as e: + raise serializers.ValidationError(f"Malformed XML: {e}") def validate(self, data: dict) -> dict: scan_type = data.get("scan_type") @@ -2707,6 +2710,8 @@ def process_scan( raise Exception(se) except ValueError as ve: raise Exception(ve) + except ET.ParseError as e: + raise serializers.ValidationError(f"Malformed XML: {e}") def save(self, *, push_to_jira=False): # Go through the validate method @@ -2787,6 +2792,8 @@ def save(self): raise Exception(se) except ValueError as ve: raise Exception(ve) + except ET.ParseError as e: + raise serializers.ValidationError(f"Malformed XML: {e}") class LanguageTypeSerializer(serializers.ModelSerializer): diff --git a/unittests/scans/zap/malformed.xml b/unittests/scans/zap/malformed.xml new file mode 100644 index 00000000000..bbdf0d66937 --- /dev/null +++ b/unittests/scans/zap/malformed.xml @@ -0,0 +1 @@ + Date: Sat, 2 May 2026 13:43:59 +0530 Subject: [PATCH 2/2] Refactor XML ParseError handling and use defusedxml (fixes #14752) Switched to defusedxml for better security alignment and updated the error format to use a field-specific dictionary as recommended in DRF. Also moved the catch block to avoid misleading comment alignment. --- dojo/api_v2/serializers.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dojo/api_v2/serializers.py b/dojo/api_v2/serializers.py index 0a86feb0b5c..e73531e16f9 100644 --- a/dojo/api_v2/serializers.py +++ b/dojo/api_v2/serializers.py @@ -4,11 +4,11 @@ import logging import re import time -import xml.etree.ElementTree as ET from datetime import datetime import six import tagulous +from defusedxml import ElementTree as ET from django.conf import settings from django.contrib.auth.models import Group, Permission from django.contrib.auth.password_validation import validate_password @@ -2394,14 +2394,14 @@ def process_scan( duration = time.perf_counter() - start_time LargeScanSizeProductAnnouncement(response_data=data, duration=duration) ScanTypeProductAnnouncement(response_data=data, scan_type=context.get("scan_type")) + except ET.ParseError as e: + raise serializers.ValidationError({"file": f"Malformed XML: {e}"}) # convert to exception otherwise django rest framework will swallow them as 400 error # exceptions are already logged in the importer except SyntaxError as se: raise Exception(se) except ValueError as ve: raise Exception(ve) - except ET.ParseError as e: - raise serializers.ValidationError(f"Malformed XML: {e}") def validate(self, data: dict) -> dict: scan_type = data.get("scan_type") @@ -2704,14 +2704,14 @@ def process_scan( duration = time.perf_counter() - start_time LargeScanSizeProductAnnouncement(response_data=data, duration=duration) ScanTypeProductAnnouncement(response_data=data, scan_type=context.get("scan_type")) + except ET.ParseError as e: + raise serializers.ValidationError({"file": f"Malformed XML: {e}"}) # convert to exception otherwise django rest framework will swallow them as 400 error # exceptions are already logged in the importer except SyntaxError as se: raise Exception(se) except ValueError as ve: raise Exception(ve) - except ET.ParseError as e: - raise serializers.ValidationError(f"Malformed XML: {e}") def save(self, *, push_to_jira=False): # Go through the validate method @@ -2788,12 +2788,12 @@ def save(self): create_dojo_meta, origin="API", ) + except ET.ParseError as e: + raise serializers.ValidationError({"file": f"Malformed XML: {e}"}) except SyntaxError as se: raise Exception(se) except ValueError as ve: raise Exception(ve) - except ET.ParseError as e: - raise serializers.ValidationError(f"Malformed XML: {e}") class LanguageTypeSerializer(serializers.ModelSerializer):