From 58c17035911677d4d8b4a6691a20292f53d7c4db Mon Sep 17 00:00:00 2001
From: Yufeng He <40085740+he-yufeng@users.noreply.github.com>
Date: Fri, 19 Jun 2026 23:13:29 +0800
Subject: [PATCH] Python: reject non-base64 data URIs in
detect_media_type_from_base64
---
python/packages/core/agent_framework/_types.py | 7 ++++++-
python/packages/core/tests/core/test_types.py | 13 +++++++++++++
2 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/python/packages/core/agent_framework/_types.py b/python/packages/core/agent_framework/_types.py
index 38b955bcb7..97ebd25288 100644
--- a/python/packages/core/agent_framework/_types.py
+++ b/python/packages/core/agent_framework/_types.py
@@ -121,7 +121,12 @@ def detect_media_type_from_base64(
if data_uri is not None:
if data is not None:
raise ValueError("Provide exactly one of data_bytes, data_str, or data_uri.")
- # Remove data URI prefix if present
+ # Strip the data URI prefix. Only base64-encoded payloads are
+ # supported here, so a URI without ";base64," (e.g. a URL-encoded
+ # "data:image/svg+xml,") gets the documented ValueError rather
+ # than an opaque IndexError from the split below.
+ if ";base64," not in data_uri:
+ raise ValueError("data_uri must be a base64-encoded data URI (e.g. 'data:image/png;base64,').")
data_str = data_uri.split(";base64,", 1)[1]
if data_str is not None:
if data is not None:
diff --git a/python/packages/core/tests/core/test_types.py b/python/packages/core/tests/core/test_types.py
index 544edf4484..8d0108a8c8 100644
--- a/python/packages/core/tests/core/test_types.py
+++ b/python/packages/core/tests/core/test_types.py
@@ -208,6 +208,19 @@ def test_data_content_detect_image_format_from_base64():
detect_media_type_from_base64(data_str="data", data_uri="data:application/octet-stream;base64,AAA")
+def test_detect_media_type_rejects_non_base64_data_uri():
+ """A data URI without a ';base64,' segment must raise a clear ValueError.
+
+ Data URIs may carry URL-encoded (non-base64) payloads, e.g.
+ "data:image/svg+xml,". detect_media_type_from_base64 only understands
+ base64 payloads, so such a URI should hit the documented ValueError instead
+ of leaking an IndexError out of the internal split.
+ """
+ for uri in ("data:image/svg+xml,", "data:text/plain,Hello", "data:image/png,rawbytes"):
+ with pytest.raises(ValueError, match="base64"):
+ detect_media_type_from_base64(data_uri=uri)
+
+
def test_data_content_create_data_uri_from_base64():
"""Test the create_data_uri_from_base64 class method."""
# Test with PNG data