From 58c17035911677d4d8b4a6691a20292f53d7c4db Mon Sep 17 00:00:00 2001 From: Yufeng He <40085740+he-yufeng@users.noreply.github.com> Date: Fri, 19 Jun 2026 23:13:29 +0800 Subject: [PATCH] Python: reject non-base64 data URIs in detect_media_type_from_base64 --- python/packages/core/agent_framework/_types.py | 7 ++++++- python/packages/core/tests/core/test_types.py | 13 +++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/python/packages/core/agent_framework/_types.py b/python/packages/core/agent_framework/_types.py index 38b955bcb7..97ebd25288 100644 --- a/python/packages/core/agent_framework/_types.py +++ b/python/packages/core/agent_framework/_types.py @@ -121,7 +121,12 @@ def detect_media_type_from_base64( if data_uri is not None: if data is not None: raise ValueError("Provide exactly one of data_bytes, data_str, or data_uri.") - # Remove data URI prefix if present + # Strip the data URI prefix. Only base64-encoded payloads are + # supported here, so a URI without ";base64," (e.g. a URL-encoded + # "data:image/svg+xml,") gets the documented ValueError rather + # than an opaque IndexError from the split below. + if ";base64," not in data_uri: + raise ValueError("data_uri must be a base64-encoded data URI (e.g. 'data:image/png;base64,').") data_str = data_uri.split(";base64,", 1)[1] if data_str is not None: if data is not None: diff --git a/python/packages/core/tests/core/test_types.py b/python/packages/core/tests/core/test_types.py index 544edf4484..8d0108a8c8 100644 --- a/python/packages/core/tests/core/test_types.py +++ b/python/packages/core/tests/core/test_types.py @@ -208,6 +208,19 @@ def test_data_content_detect_image_format_from_base64(): detect_media_type_from_base64(data_str="data", data_uri="data:application/octet-stream;base64,AAA") +def test_detect_media_type_rejects_non_base64_data_uri(): + """A data URI without a ';base64,' segment must raise a clear ValueError. + + Data URIs may carry URL-encoded (non-base64) payloads, e.g. + "data:image/svg+xml,". detect_media_type_from_base64 only understands + base64 payloads, so such a URI should hit the documented ValueError instead + of leaking an IndexError out of the internal split. + """ + for uri in ("data:image/svg+xml,", "data:text/plain,Hello", "data:image/png,rawbytes"): + with pytest.raises(ValueError, match="base64"): + detect_media_type_from_base64(data_uri=uri) + + def test_data_content_create_data_uri_from_base64(): """Test the create_data_uri_from_base64 class method.""" # Test with PNG data