From 456716f63cff1a0ad6a57e54e2f2ed0b0f225989 Mon Sep 17 00:00:00 2001 From: Ritesh Tripathi Date: Tue, 7 Apr 2026 09:44:38 +0000 Subject: [PATCH 1/2] fix: preserve full input context in span attributes for multi-message inputs --- CHANGELOG.md | 5 ++ python/fi_instrumentation/fi_types.py | 1 + .../openai/traceai_openai/_span_io_handler.py | 17 ++-- python/tests/test_framework_openai.py | 77 ++++++++++++++++++- 4 files changed, 91 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e26e151..5200e407 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Enhanced streaming support - Performance optimizations +### Python + +#### fi-instrumentation-otel (Core) +- 🐛 Fix issue where SpanAttributes.INPUT_VALUE was overwritten multiple times, causing loss of context for multi-message inputs (fixes #151) + --- ## [2025-07-08] diff --git a/python/fi_instrumentation/fi_types.py b/python/fi_instrumentation/fi_types.py index 1e2e6b8d..2027bd88 100644 --- a/python/fi_instrumentation/fi_types.py +++ b/python/fi_instrumentation/fi_types.py @@ -203,6 +203,7 @@ class SpanAttributes: OUTPUT_MIME_TYPE = "output.mime_type" INPUT_VALUE = "input.value" INPUT_MIME_TYPE = "input.mime_type" + INPUT_RAW = "input.raw" # Embeddings EMBEDDING_EMBEDDINGS = "embedding.embeddings" diff --git a/python/frameworks/openai/traceai_openai/_span_io_handler.py b/python/frameworks/openai/traceai_openai/_span_io_handler.py index 20c4ec47..aa48eb60 100644 --- a/python/frameworks/openai/traceai_openai/_span_io_handler.py +++ b/python/frameworks/openai/traceai_openai/_span_io_handler.py @@ -51,21 +51,22 @@ def _process_input_data(input_data: Any, span: _WithSpan) -> None: input_content.append(msg) eval_input.append(msg_content) if input_content: - input_value = json.dumps(input_content, ensure_ascii=False) - span.set_attribute(SpanAttributes.INPUT_VALUE, input_value) + input_raw = json.dumps(input_content, ensure_ascii=False) + span.set_attribute(SpanAttributes.INPUT_RAW, input_raw) if input_images: images_value = json.dumps(input_images, ensure_ascii=False) span.set_attribute(SpanAttributes.INPUT_IMAGES, images_value) if eval_input: eval_input_str = " \n ".join(map(str, eval_input)) span.set_attribute(SpanAttributes.INPUT_VALUE, eval_input_str) - if eval_input and len(eval_input) > 0: - span.set_attribute(SpanAttributes.INPUT_VALUE, eval_input[0]) else: - try: - input_str = json.dumps(input_data, ensure_ascii=False).strip() - except (TypeError, ValueError): - input_str = str(input_data).strip() + if isinstance(input_data, str): + input_str = input_data.strip() + else: + try: + input_str = json.dumps(input_data, ensure_ascii=False).strip() + except (TypeError, ValueError): + input_str = str(input_data).strip() span.set_attribute(SpanAttributes.INPUT_VALUE, input_str) diff --git a/python/tests/test_framework_openai.py b/python/tests/test_framework_openai.py index 5917a54f..c7b45b5d 100644 --- a/python/tests/test_framework_openai.py +++ b/python/tests/test_framework_openai.py @@ -290,4 +290,79 @@ def test_instrumentor_uninstrumentation(self): # Methods should be restored (back to functions) assert openai.OpenAI.request == original_request - assert openai.AsyncOpenAI.request == original_async_request \ No newline at end of file + assert openai.AsyncOpenAI.request == original_async_request + + +class TestSpanIOHandler: + """Test the span I/O handler functions.""" + + def test_process_input_data_multi_message(self): + """Test _process_input_data with multiple messages preserves full context.""" + from traceai_openai._span_io_handler import _process_input_data + from fi_instrumentation.fi_types import SpanAttributes + from unittest.mock import MagicMock + + # Create mock span + mock_span = MagicMock() + + # Test input with multiple messages + input_data = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there!"}, + {"role": "user", "content": "How are you?"} + ] + + _process_input_data(input_data, mock_span) + + # Verify INPUT_VALUE contains full joined text + expected_value = "You are a helpful assistant. \n Hello \n Hi there! \n How are you?" + mock_span.set_attribute.assert_any_call(SpanAttributes.INPUT_VALUE, expected_value) + + # Verify INPUT_RAW contains structured JSON + import json + expected_raw = json.dumps([ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there!"}, + {"role": "user", "content": "How are you?"} + ], ensure_ascii=False) + mock_span.set_attribute.assert_any_call(SpanAttributes.INPUT_RAW, expected_raw) + + def test_process_input_data_single_message(self): + """Test _process_input_data with single message for regression.""" + from traceai_openai._span_io_handler import _process_input_data + from fi_instrumentation.fi_types import SpanAttributes + from unittest.mock import MagicMock + + mock_span = MagicMock() + + input_data = [ + {"role": "user", "content": "Hello world"} + ] + + _process_input_data(input_data, mock_span) + + # Should still set INPUT_VALUE to the single message + expected_value = "Hello world" + mock_span.set_attribute.assert_any_call(SpanAttributes.INPUT_VALUE, expected_value) + + # And INPUT_RAW + import json + expected_raw = json.dumps([{"role": "user", "content": "Hello world"}], ensure_ascii=False) + mock_span.set_attribute.assert_any_call(SpanAttributes.INPUT_RAW, expected_raw) + + def test_process_input_data_non_list(self): + """Test _process_input_data with non-list input (string prompt).""" + from traceai_openai._span_io_handler import _process_input_data + from fi_instrumentation.fi_types import SpanAttributes + from unittest.mock import MagicMock + + mock_span = MagicMock() + + input_data = "What is the capital of France?" + + _process_input_data(input_data, mock_span) + + # Should set INPUT_VALUE to the string + mock_span.set_attribute.assert_called_once_with(SpanAttributes.INPUT_VALUE, "What is the capital of France?") \ No newline at end of file From 2d31b8f5cab1ea81d69c855cdac7b747d62c1654 Mon Sep 17 00:00:00 2001 From: Ritesh Tripathi Date: Thu, 9 Apr 2026 16:51:46 +0000 Subject: [PATCH 2/2] fix: remove INPUT_RAW and keep minimal input overwrite fix --- python/fi_instrumentation/fi_types.py | 1 - .../openai/traceai_openai/_span_io_handler.py | 3 --- python/tests/test_framework_openai.py | 15 --------------- 3 files changed, 19 deletions(-) diff --git a/python/fi_instrumentation/fi_types.py b/python/fi_instrumentation/fi_types.py index 2027bd88..1e2e6b8d 100644 --- a/python/fi_instrumentation/fi_types.py +++ b/python/fi_instrumentation/fi_types.py @@ -203,7 +203,6 @@ class SpanAttributes: OUTPUT_MIME_TYPE = "output.mime_type" INPUT_VALUE = "input.value" INPUT_MIME_TYPE = "input.mime_type" - INPUT_RAW = "input.raw" # Embeddings EMBEDDING_EMBEDDINGS = "embedding.embeddings" diff --git a/python/frameworks/openai/traceai_openai/_span_io_handler.py b/python/frameworks/openai/traceai_openai/_span_io_handler.py index aa48eb60..0ba95cf2 100644 --- a/python/frameworks/openai/traceai_openai/_span_io_handler.py +++ b/python/frameworks/openai/traceai_openai/_span_io_handler.py @@ -50,9 +50,6 @@ def _process_input_data(input_data: Any, span: _WithSpan) -> None: else: input_content.append(msg) eval_input.append(msg_content) - if input_content: - input_raw = json.dumps(input_content, ensure_ascii=False) - span.set_attribute(SpanAttributes.INPUT_RAW, input_raw) if input_images: images_value = json.dumps(input_images, ensure_ascii=False) span.set_attribute(SpanAttributes.INPUT_IMAGES, images_value) diff --git a/python/tests/test_framework_openai.py b/python/tests/test_framework_openai.py index c7b45b5d..79b35595 100644 --- a/python/tests/test_framework_openai.py +++ b/python/tests/test_framework_openai.py @@ -319,16 +319,6 @@ def test_process_input_data_multi_message(self): expected_value = "You are a helpful assistant. \n Hello \n Hi there! \n How are you?" mock_span.set_attribute.assert_any_call(SpanAttributes.INPUT_VALUE, expected_value) - # Verify INPUT_RAW contains structured JSON - import json - expected_raw = json.dumps([ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "Hello"}, - {"role": "assistant", "content": "Hi there!"}, - {"role": "user", "content": "How are you?"} - ], ensure_ascii=False) - mock_span.set_attribute.assert_any_call(SpanAttributes.INPUT_RAW, expected_raw) - def test_process_input_data_single_message(self): """Test _process_input_data with single message for regression.""" from traceai_openai._span_io_handler import _process_input_data @@ -347,11 +337,6 @@ def test_process_input_data_single_message(self): expected_value = "Hello world" mock_span.set_attribute.assert_any_call(SpanAttributes.INPUT_VALUE, expected_value) - # And INPUT_RAW - import json - expected_raw = json.dumps([{"role": "user", "content": "Hello world"}], ensure_ascii=False) - mock_span.set_attribute.assert_any_call(SpanAttributes.INPUT_RAW, expected_raw) - def test_process_input_data_non_list(self): """Test _process_input_data with non-list input (string prompt).""" from traceai_openai._span_io_handler import _process_input_data