From dd77628c516616e15e31b90b6c06cad156bb5ada Mon Sep 17 00:00:00 2001 From: Michal Kulakowski Date: Wed, 22 Apr 2026 11:26:25 +0200 Subject: [PATCH 1/3] Fix responses --- demos/continuous_batching/README.md | 2 +- src/llm/apis/openai_responses.cpp | 293 ++++++++++++++++++++------ src/llm/http_llm_calculator.cc | 2 + src/llm/servable.cpp | 28 ++- src/test/http_openai_handler_test.cpp | 112 ++++++++++ 5 files changed, 371 insertions(+), 66 deletions(-) diff --git a/demos/continuous_batching/README.md b/demos/continuous_batching/README.md index 34e0c61ee3..8fc4514abc 100644 --- a/demos/continuous_batching/README.md +++ b/demos/continuous_batching/README.md @@ -337,7 +337,7 @@ client = OpenAI( ) stream = client.responses.create( - model="meta-llama/Meta-Llama-3-8B-Instruct", + model="Qwen3-30B-A3B-Instruct-2507-int4-ov", input="Say this is a test", stream=True, ) diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp index e5d63985e6..bacafd253b 100644 --- a/src/llm/apis/openai_responses.cpp +++ b/src/llm/apis/openai_responses.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -45,6 +46,102 @@ namespace ovms { static constexpr const char* OUTPUT_ITEM_ID = "msg-0"; static constexpr const char* REASONING_ITEM_ID = "rs-0"; +static absl::StatusOr parseResponsesTextField(const rapidjson::Value& item, const char* itemType, const char* fieldName) { + auto it = item.FindMember(fieldName); + if (it == item.MemberEnd() || !it->value.IsString()) { + return absl::InvalidArgumentError(absl::StrCat(itemType, " requires a valid ", fieldName, " field")); + } + return std::string(it->value.GetString(), it->value.GetStringLength()); +} + +static absl::StatusOr serializeResponsesJsonField(const rapidjson::Value& item, const char* itemType, const char* fieldName) { + auto it = item.FindMember(fieldName); + if (it == item.MemberEnd() || it->value.IsNull()) { + return std::string("{}"); + } + if (it->value.IsString()) { + return std::string(it->value.GetString(), it->value.GetStringLength()); + } + if (!it->value.IsObject() && !it->value.IsArray()) { + return absl::InvalidArgumentError(absl::StrCat(itemType, " requires ", fieldName, " to be a string, object or array")); + } + + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + it->value.Accept(writer); + return std::string(buffer.GetString(), buffer.GetSize()); +} + +static absl::StatusOr parseResponsesContentValue(const rapidjson::Value& value, size_t messageIndex, + std::optional allowedLocalMediaPath, std::optional> allowedMediaDomains, + ImageHistory& imageHistory) { + if (value.IsString()) { + return std::string(value.GetString(), value.GetStringLength()); + } + + if (!value.IsArray()) { + return absl::InvalidArgumentError("input item content must be a string or array"); + } + if (value.GetArray().Size() == 0) { + return absl::InvalidArgumentError("Invalid message structure - content array is empty"); + } + + std::string contentText; + for (const auto& contentItem : value.GetArray()) { + if (!contentItem.IsObject()) { + return absl::InvalidArgumentError("input content items must be objects"); + } + auto contentObj = contentItem.GetObject(); + auto typeIt = contentObj.FindMember("type"); + if (typeIt == contentObj.MemberEnd() || !typeIt->value.IsString()) { + return absl::InvalidArgumentError("input content item type is missing or invalid"); + } + + const std::string type = typeIt->value.GetString(); + if (type == "input_text" || type == "output_text" || type == "text") { + auto textStatus = parseResponsesTextField(contentObj, type.c_str(), "text"); + if (!textStatus.ok()) { + return textStatus.status(); + } + contentText += textStatus.value(); + } else if (type == "refusal") { + auto refusalStatus = parseResponsesTextField(contentObj, "refusal", "refusal"); + if (!refusalStatus.ok()) { + return refusalStatus.status(); + } + contentText += refusalStatus.value(); + } else if (type == "input_image") { + std::string imageUrl; + auto imageUrlIt = contentObj.FindMember("image_url"); + if (imageUrlIt == contentObj.MemberEnd()) { + return absl::InvalidArgumentError("input_image requires image_url field"); + } + if (imageUrlIt->value.IsString()) { + imageUrl = imageUrlIt->value.GetString(); + } else if (imageUrlIt->value.IsObject()) { + auto imageUrlObj = imageUrlIt->value.GetObject(); + auto urlIt = imageUrlObj.FindMember("url"); + if (urlIt == imageUrlObj.MemberEnd() || !urlIt->value.IsString()) { + return absl::InvalidArgumentError("input_image.image_url.url is missing or invalid"); + } + imageUrl = urlIt->value.GetString(); + } else { + return absl::InvalidArgumentError("input_image.image_url must be a string or object"); + } + + auto tensorResult = loadImage(imageUrl, allowedLocalMediaPath, allowedMediaDomains); + if (!tensorResult.ok()) { + return tensorResult.status(); + } + imageHistory.push_back({messageIndex, tensorResult.value()}); + } else { + return absl::InvalidArgumentError("Unsupported content type. Supported types are input_text, output_text, text and input_image."); + } + } + + return contentText; +} + static std::string joinServerSideEvents(const std::vector& events) { if (events.empty()) { return ""; @@ -88,6 +185,7 @@ absl::Status OpenAIResponsesHandler::parseInput(std::optional allow return absl::InvalidArgumentError("Messages array cannot be empty"); } + std::map toolNamesByCallId; for (size_t i = 0; i < inputIt->value.GetArray().Size(); ++i) { auto& item = inputIt->value.GetArray()[i]; if (!item.IsObject()) { @@ -95,79 +193,98 @@ absl::Status OpenAIResponsesHandler::parseInput(std::optional allow } auto itemObj = item.GetObject(); - auto roleIt = itemObj.FindMember("role"); - if (roleIt == itemObj.MemberEnd() || !roleIt->value.IsString()) { - return absl::InvalidArgumentError("input item role is missing or invalid"); + std::string itemType = "message"; + auto typeIt = itemObj.FindMember("type"); + if (typeIt != itemObj.MemberEnd()) { + if (!typeIt->value.IsString()) { + return absl::InvalidArgumentError("input item type is invalid"); + } + itemType = typeIt->value.GetString(); } - request.chatHistory.push_back({}); - request.chatHistory.last()["role"] = roleIt->value.GetString(); + if (itemType == "function_call") { + auto nameStatus = parseResponsesTextField(itemObj, "function_call", "name"); + if (!nameStatus.ok()) { + return nameStatus.status(); + } + auto callIdStatus = parseResponsesTextField(itemObj, "function_call", "call_id"); + if (!callIdStatus.ok()) { + return callIdStatus.status(); + } + auto argumentsStatus = serializeResponsesJsonField(itemObj, "function_call", "arguments"); + if (!argumentsStatus.ok()) { + return argumentsStatus.status(); + } - auto contentIt = itemObj.FindMember("content"); - if (contentIt == itemObj.MemberEnd()) { - return absl::InvalidArgumentError("input item content is missing"); - } + ov::genai::JsonContainer toolCall = ov::genai::JsonContainer::object(); + toolCall["id"] = callIdStatus.value(); + toolCall["type"] = "function"; + toolCall["function"] = ov::genai::JsonContainer::object(); + toolCall["function"]["name"] = nameStatus.value(); + toolCall["function"]["arguments"] = argumentsStatus.value(); + + request.chatHistory.push_back({}); + request.chatHistory.last()["role"] = "assistant"; + request.chatHistory.last()["content"] = ""; + request.chatHistory.last()["tool_calls"] = ov::genai::JsonContainer::array(); + request.chatHistory.last()["tool_calls"].push_back(toolCall); - if (contentIt->value.IsString()) { - request.chatHistory.last()["content"] = contentIt->value.GetString(); + toolNamesByCallId[callIdStatus.value()] = nameStatus.value(); continue; } - if (!contentIt->value.IsArray()) { - return absl::InvalidArgumentError("input item content must be a string or array"); + if (itemType == "function_call_output") { + auto callIdStatus = parseResponsesTextField(itemObj, "function_call_output", "call_id"); + if (!callIdStatus.ok()) { + return callIdStatus.status(); + } + auto outputIt = itemObj.FindMember("output"); + if (outputIt == itemObj.MemberEnd()) { + return absl::InvalidArgumentError("function_call_output requires output field"); + } + auto contentStatus = parseResponsesContentValue(outputIt->value, i, allowedLocalMediaPath, allowedMediaDomains, request.imageHistory); + if (!contentStatus.ok()) { + return contentStatus.status(); + } + + request.chatHistory.push_back({}); + request.chatHistory.last()["role"] = "tool"; + request.chatHistory.last()["tool_call_id"] = callIdStatus.value(); + request.chatHistory.last()["content"] = contentStatus.value(); + auto toolNameIt = toolNamesByCallId.find(callIdStatus.value()); + if (toolNameIt != toolNamesByCallId.end()) { + request.chatHistory.last()["name"] = toolNameIt->second; + } + continue; } - if (contentIt->value.GetArray().Size() == 0) { - return absl::InvalidArgumentError("Invalid message structure - content array is empty"); + + if (itemType != "message") { + return absl::InvalidArgumentError(absl::StrCat("Unsupported input item type: ", itemType)); } - std::string contentText = ""; - for (auto& contentItem : contentIt->value.GetArray()) { - if (!contentItem.IsObject()) { - return absl::InvalidArgumentError("input content items must be objects"); - } - auto contentObj = contentItem.GetObject(); - auto typeIt = contentObj.FindMember("type"); - if (typeIt == contentObj.MemberEnd() || !typeIt->value.IsString()) { - return absl::InvalidArgumentError("input content item type is missing or invalid"); - } + auto roleIt = itemObj.FindMember("role"); + if (roleIt == itemObj.MemberEnd() || !roleIt->value.IsString()) { + return absl::InvalidArgumentError("input item role is missing or invalid"); + } - const std::string type = typeIt->value.GetString(); - if (type == "input_text") { - auto textIt = contentObj.FindMember("text"); - if (textIt == contentObj.MemberEnd() || !textIt->value.IsString()) { - return absl::InvalidArgumentError("input_text requires a valid text field"); - } - contentText = textIt->value.GetString(); - } else if (type == "input_image") { - std::string imageUrl; - auto imageUrlIt = contentObj.FindMember("image_url"); - if (imageUrlIt == contentObj.MemberEnd()) { - return absl::InvalidArgumentError("input_image requires image_url field"); - } - if (imageUrlIt->value.IsString()) { - imageUrl = imageUrlIt->value.GetString(); - } else if (imageUrlIt->value.IsObject()) { - auto imageUrlObj = imageUrlIt->value.GetObject(); - auto urlIt = imageUrlObj.FindMember("url"); - if (urlIt == imageUrlObj.MemberEnd() || !urlIt->value.IsString()) { - return absl::InvalidArgumentError("input_image.image_url.url is missing or invalid"); - } - imageUrl = urlIt->value.GetString(); - } else { - return absl::InvalidArgumentError("input_image.image_url must be a string or object"); - } + std::string role = roleIt->value.GetString(); + if (role == "developer") { + // OpenAI Responses may send developer role; normalize to system for template compatibility. + role = "system"; + } - auto tensorResult = loadImage(imageUrl, allowedLocalMediaPath, allowedMediaDomains); - if (!tensorResult.ok()) { - return tensorResult.status(); - } - request.imageHistory.push_back({i, tensorResult.value()}); - } else { - return absl::InvalidArgumentError("Unsupported content type. Supported types are input_text and input_image."); - } + auto contentIt = itemObj.FindMember("content"); + if (contentIt == itemObj.MemberEnd()) { + return absl::InvalidArgumentError("input item content is missing"); + } + auto contentStatus = parseResponsesContentValue(contentIt->value, i, allowedLocalMediaPath, allowedMediaDomains, request.imageHistory); + if (!contentStatus.ok()) { + return contentStatus.status(); } - request.chatHistory.last()["content"] = contentText; + request.chatHistory.push_back({}); + request.chatHistory.last()["role"] = role; + request.chatHistory.last()["content"] = contentStatus.value(); } } else { return absl::InvalidArgumentError("input is not a string or array"); @@ -239,14 +356,68 @@ absl::Status OpenAIResponsesHandler::parseResponsesPart(std::optional Value messagesArray(kArrayType); for (size_t i = 0; i < request.chatHistory.size(); ++i) { Value msgObj(kObjectType); - auto role = request.chatHistory[i]["role"].as_string(); + const auto& historyItem = request.chatHistory[i]; + auto role = historyItem["role"].as_string(); if (role.has_value()) { msgObj.AddMember("role", Value(role.value().c_str(), alloc), alloc); } - auto content = request.chatHistory[i]["content"].as_string(); + auto content = historyItem["content"].as_string(); if (content.has_value()) { msgObj.AddMember("content", Value(content.value().c_str(), alloc), alloc); } + if (historyItem.contains("tool_call_id")) { + auto toolCallId = historyItem["tool_call_id"].as_string(); + if (toolCallId.has_value()) { + msgObj.AddMember("tool_call_id", Value(toolCallId.value().c_str(), alloc), alloc); + } + } + if (historyItem.contains("name")) { + auto name = historyItem["name"].as_string(); + if (name.has_value()) { + msgObj.AddMember("name", Value(name.value().c_str(), alloc), alloc); + } + } + if (historyItem.contains("tool_calls") && historyItem["tool_calls"].is_array()) { + Value toolCallsArray(kArrayType); + const auto& toolCalls = historyItem["tool_calls"]; + for (size_t j = 0; j < toolCalls.size(); ++j) { + const auto& toolCall = toolCalls[j]; + Value toolCallObj(kObjectType); + + if (toolCall.contains("id")) { + auto id = toolCall["id"].as_string(); + if (id.has_value()) { + toolCallObj.AddMember("id", Value(id.value().c_str(), alloc), alloc); + } + } + if (toolCall.contains("type")) { + auto type = toolCall["type"].as_string(); + if (type.has_value()) { + toolCallObj.AddMember("type", Value(type.value().c_str(), alloc), alloc); + } + } + if (toolCall.contains("function") && toolCall["function"].is_object()) { + Value functionObj(kObjectType); + const auto& function = toolCall["function"]; + if (function.contains("name")) { + auto functionName = function["name"].as_string(); + if (functionName.has_value()) { + functionObj.AddMember("name", Value(functionName.value().c_str(), alloc), alloc); + } + } + if (function.contains("arguments")) { + auto arguments = function["arguments"].as_string(); + if (arguments.has_value()) { + functionObj.AddMember("arguments", Value(arguments.value().c_str(), alloc), alloc); + } + } + toolCallObj.AddMember("function", functionObj, alloc); + } + + toolCallsArray.PushBack(toolCallObj, alloc); + } + msgObj.AddMember("tool_calls", toolCallsArray, alloc); + } messagesArray.PushBack(msgObj, alloc); } processedDoc.AddMember("messages", messagesArray, alloc); diff --git a/src/llm/http_llm_calculator.cc b/src/llm/http_llm_calculator.cc index cadce01dd0..f69c5c317b 100644 --- a/src/llm/http_llm_calculator.cc +++ b/src/llm/http_llm_calculator.cc @@ -191,6 +191,8 @@ class HttpLLMCalculator : public CalculatorBase { } } catch (ov::AssertFailure& e) { return handleGenerationError(cc, e.what()); + } catch (const std::exception& e) { + return handleGenerationError(cc, e.what()); } catch (...) { return handleGenerationError(cc, "Response generation failed"); } diff --git a/src/llm/servable.cpp b/src/llm/servable.cpp index 3079a07603..755cd8b6ff 100644 --- a/src/llm/servable.cpp +++ b/src/llm/servable.cpp @@ -132,7 +132,13 @@ absl::Status GenAiServable::parseRequest(std::shared_ptrapiHandler->parseRequest(getProperties()->maxTokensLimit, getProperties()->bestOfLimit, getProperties()->maxModelLength, config.getServerSettings().allowedLocalMediaPath, config.getServerSettings().allowedMediaDomains); + absl::Status status; + try { + status = executionContext->apiHandler->parseRequest(getProperties()->maxTokensLimit, getProperties()->bestOfLimit, getProperties()->maxModelLength, config.getServerSettings().allowedLocalMediaPath, config.getServerSettings().allowedMediaDomains); + } catch (const std::exception& e) { + SPDLOG_LOGGER_ERROR(llm_calculator_logger, "Exception while parsing request: {}", e.what()); + return absl::InvalidArgumentError(std::string("Exception while parsing request: ") + e.what()); + } if (!status.ok()) { SPDLOG_LOGGER_ERROR(llm_calculator_logger, "Failed to parse request: {}", status.message()); return status; @@ -156,8 +162,13 @@ absl::Status GenAiServable::parseRequest(std::shared_ptrtoolParserName, getProperties()->enableToolGuidedGeneration, getProperties()->decodingMethod); - executionContext->generationConfigBuilder->parseConfigFromRequest(executionContext->apiHandler->getRequest()); - executionContext->generationConfigBuilder->adjustConfigForDecodingMethod(); + try { + executionContext->generationConfigBuilder->parseConfigFromRequest(executionContext->apiHandler->getRequest()); + executionContext->generationConfigBuilder->adjustConfigForDecodingMethod(); + } catch (const std::exception& e) { + SPDLOG_LOGGER_ERROR(llm_calculator_logger, "Exception while parsing generation config: {}", e.what()); + return absl::InvalidArgumentError(std::string("Exception while parsing generation config: ") + e.what()); + } try { executionContext->generationConfigBuilder->validateStructuredOutputConfig(getProperties()->tokenizer); } catch (const std::exception& e) { @@ -219,8 +230,17 @@ absl::Status GenAiServable::prepareInputs(std::shared_ptrapiHandler->getChatHistory().size() > 0) { #if (PYTHON_DISABLE == 0) - bool success = PyJinjaTemplateProcessor::applyChatTemplate(getProperties()->templateProcessor, getProperties()->modelsPath, executionContext->apiHandler->getProcessedJson(), inputText); + bool success; + const std::string& processedJson = executionContext->apiHandler->getProcessedJson(); + if (processedJson.size() > 0) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Applying chat template with Responses processedJson (first 500 chars): {}", processedJson.substr(0, 500)); + success = PyJinjaTemplateProcessor::applyChatTemplate(getProperties()->templateProcessor, getProperties()->modelsPath, processedJson, inputText); + } else { + SPDLOG_LOGGER_WARN(llm_calculator_logger, "Responses processedJson is empty! Falling back to original body"); + success = PyJinjaTemplateProcessor::applyChatTemplate(getProperties()->templateProcessor, getProperties()->modelsPath, executionContext->payload.body, inputText); + } if (!success) { + SPDLOG_LOGGER_ERROR(llm_calculator_logger, "Chat template application failed. Error: {}", inputText); return absl::Status(absl::StatusCode::kInvalidArgument, inputText); } #else diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index a4e6585af0..fad34af048 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -2748,6 +2748,118 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParsingResponsesToolChoiceNoneRemovesTools) EXPECT_EQ(apiHandler->getToolChoice(), "none"); } + TEST_F(HttpOpenAIHandlerParsingTest, ParsingResponsesFunctionCallItemsStoredInChatHistory) { + std::string json = R"({ + "model": "llama", + "input": [ + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "What is the weather like in Paris today?"}]}, + {"type": "function_call", "call_id": "call_123", "name": "get_weather", "arguments": {"location": "Paris"}}, + {"type": "function_call_output", "call_id": "call_123", "output": [{"type": "input_text", "text": "15 degrees Celsius"}]} + ] + })"; + doc.Parse(json.c_str()); + ASSERT_FALSE(doc.HasParseError()); + std::optional maxTokensLimit; + uint32_t bestOfLimit = 0; + std::optional maxModelLength; + std::shared_ptr apiHandler = + std::make_shared(doc, ovms::Endpoint::RESPONSES, std::chrono::system_clock::now(), *tokenizer); + ASSERT_EQ(apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength), absl::OkStatus()); + + ov::genai::ChatHistory& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 3); + + EXPECT_EQ(history[0]["role"].get_string(), "user"); + EXPECT_EQ(history[0]["content"].get_string(), "What is the weather like in Paris today?"); + + EXPECT_EQ(history[1]["role"].get_string(), "assistant"); + EXPECT_EQ(history[1]["content"].get_string(), ""); + ASSERT_TRUE(history[1].contains("tool_calls")); + ASSERT_EQ(history[1]["tool_calls"].size(), 1); + EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_123"); + EXPECT_EQ(history[1]["tool_calls"][0]["type"].get_string(), "function"); + EXPECT_EQ(history[1]["tool_calls"][0]["function"]["name"].get_string(), "get_weather"); + EXPECT_EQ(history[1]["tool_calls"][0]["function"]["arguments"].get_string(), "{\"location\":\"Paris\"}"); + + EXPECT_EQ(history[2]["role"].get_string(), "tool"); + EXPECT_EQ(history[2]["tool_call_id"].get_string(), "call_123"); + EXPECT_EQ(history[2]["name"].get_string(), "get_weather"); + EXPECT_EQ(history[2]["content"].get_string(), "15 degrees Celsius"); + + #if (PYTHON_DISABLE == 0) + const std::string& processedJson = apiHandler->getProcessedJson(); + ASSERT_FALSE(processedJson.empty()); + rapidjson::Document processedDoc; + processedDoc.Parse(processedJson.c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + ASSERT_TRUE(processedDoc.HasMember("messages")); + ASSERT_TRUE(processedDoc["messages"].IsArray()); + ASSERT_EQ(processedDoc["messages"].Size(), 3u); + ASSERT_TRUE(processedDoc["messages"][1].HasMember("tool_calls")); + ASSERT_EQ(processedDoc["messages"][1]["tool_calls"].Size(), 1u); + EXPECT_STREQ(processedDoc["messages"][1]["tool_calls"][0]["function"]["name"].GetString(), "get_weather"); + EXPECT_STREQ(processedDoc["messages"][2]["tool_call_id"].GetString(), "call_123"); + EXPECT_STREQ(processedDoc["messages"][2]["name"].GetString(), "get_weather"); + #else + EXPECT_TRUE(apiHandler->getProcessedJson().empty()) << "processedJson should be empty when Python is disabled"; + #endif + } + + TEST_F(HttpOpenAIHandlerParsingTest, ParsingResponsesDeveloperRoleMappedToSystem) { + std::string json = R"({ + "model": "llama", + "input": [ + {"type": "message", "role": "developer", "content": [{"type": "input_text", "text": "You are helpful."}]}, + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "Hello"}]} + ] + })"; + doc.Parse(json.c_str()); + ASSERT_FALSE(doc.HasParseError()); + std::optional maxTokensLimit; + uint32_t bestOfLimit = 0; + std::optional maxModelLength; + std::shared_ptr apiHandler = + std::make_shared(doc, ovms::Endpoint::RESPONSES, std::chrono::system_clock::now(), *tokenizer); + ASSERT_EQ(apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength), absl::OkStatus()); + + ov::genai::ChatHistory& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 2); + EXPECT_EQ(history[0]["role"].get_string(), "system"); + EXPECT_EQ(history[0]["content"].get_string(), "You are helpful."); + EXPECT_EQ(history[1]["role"].get_string(), "user"); + EXPECT_EQ(history[1]["content"].get_string(), "Hello"); + } + +TEST_F(HttpOpenAIHandlerParsingTest, ParsingResponsesConsecutiveDeveloperMessagesAreMerged) { + std::string json = R"({ + "model": "llama", + "input": [ + {"type": "message", "role": "developer", "content": [{"type": "input_text", "text": "Instruction A"}]}, + {"type": "message", "role": "developer", "content": [{"type": "input_text", "text": "Instruction B"}]}, + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "Hello"}]} + ] + })"; + doc.Parse(json.c_str()); + ASSERT_FALSE(doc.HasParseError()); + std::optional maxTokensLimit; + uint32_t bestOfLimit = 0; + std::optional maxModelLength; + std::shared_ptr apiHandler = + std::make_shared(doc, ovms::Endpoint::RESPONSES, std::chrono::system_clock::now(), *tokenizer); + ASSERT_EQ(apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength), absl::OkStatus()); + + ov::genai::ChatHistory& history = apiHandler->getChatHistory(); + // After simplification, consecutive system/developer messages are NOT merged; + // each message is preserved separately to let the chat template handle role sequences. + ASSERT_EQ(history.size(), 3); + EXPECT_EQ(history[0]["role"].get_string(), "system"); + EXPECT_EQ(history[0]["content"].get_string(), "Instruction A"); + EXPECT_EQ(history[1]["role"].get_string(), "system"); + EXPECT_EQ(history[1]["content"].get_string(), "Instruction B"); + EXPECT_EQ(history[2]["role"].get_string(), "user"); + EXPECT_EQ(history[2]["content"].get_string(), "Hello"); +} + // Provide get_weather2 but take none TEST_F(HttpOpenAIHandlerParsingTest, ParseRequestWithTools_Provided1_ChoiceNone) { std::string providedTools = R"( From 66624283552e07e9baf9dde0acab313368407475 Mon Sep 17 00:00:00 2001 From: Michal Kulakowski Date: Wed, 22 Apr 2026 11:41:45 +0200 Subject: [PATCH 2/3] fix --- src/llm/apis/openai_responses.cpp | 4 ---- src/llm/servable.cpp | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp index bacafd253b..078d0373ed 100644 --- a/src/llm/apis/openai_responses.cpp +++ b/src/llm/apis/openai_responses.cpp @@ -268,10 +268,6 @@ absl::Status OpenAIResponsesHandler::parseInput(std::optional allow } std::string role = roleIt->value.GetString(); - if (role == "developer") { - // OpenAI Responses may send developer role; normalize to system for template compatibility. - role = "system"; - } auto contentIt = itemObj.FindMember("content"); if (contentIt == itemObj.MemberEnd()) { diff --git a/src/llm/servable.cpp b/src/llm/servable.cpp index 755cd8b6ff..d69a6b410f 100644 --- a/src/llm/servable.cpp +++ b/src/llm/servable.cpp @@ -137,7 +137,7 @@ absl::Status GenAiServable::parseRequest(std::shared_ptrapiHandler->parseRequest(getProperties()->maxTokensLimit, getProperties()->bestOfLimit, getProperties()->maxModelLength, config.getServerSettings().allowedLocalMediaPath, config.getServerSettings().allowedMediaDomains); } catch (const std::exception& e) { SPDLOG_LOGGER_ERROR(llm_calculator_logger, "Exception while parsing request: {}", e.what()); - return absl::InvalidArgumentError(std::string("Exception while parsing request: ") + e.what()); + return absl::InvalidArgumentError("Exception while parsing request"); } if (!status.ok()) { SPDLOG_LOGGER_ERROR(llm_calculator_logger, "Failed to parse request: {}", status.message()); @@ -167,7 +167,7 @@ absl::Status GenAiServable::parseRequest(std::shared_ptrgenerationConfigBuilder->adjustConfigForDecodingMethod(); } catch (const std::exception& e) { SPDLOG_LOGGER_ERROR(llm_calculator_logger, "Exception while parsing generation config: {}", e.what()); - return absl::InvalidArgumentError(std::string("Exception while parsing generation config: ") + e.what()); + return absl::InvalidArgumentError("Exception while parsing generation config"); } try { executionContext->generationConfigBuilder->validateStructuredOutputConfig(getProperties()->tokenizer); From 99735a9a212859ed57bcd9974f0960a571044df1 Mon Sep 17 00:00:00 2001 From: mkulakow Date: Wed, 29 Apr 2026 08:59:55 +0200 Subject: [PATCH 3/3] fix --- src/llm/apis/openai_responses.cpp | 281 +++++++++++++++++++++++++----- 1 file changed, 242 insertions(+), 39 deletions(-) diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp index 078d0373ed..281e7c60bb 100644 --- a/src/llm/apis/openai_responses.cpp +++ b/src/llm/apis/openai_responses.cpp @@ -186,6 +186,56 @@ absl::Status OpenAIResponsesHandler::parseInput(std::optional allow } std::map toolNamesByCallId; + // Pre-scan: collect function_call items by call_id so we can emit each + // assistant{tool_calls:[call]} message immediately followed by its matching + // tool message. The gpt-oss/Harmony chat template renders only tool_calls[0] + // per assistant message and requires each tool message to be adjacent to its + // assistant tool-call message. + std::map pendingToolCalls; + std::vector pendingToolCallOrder; + for (size_t k = 0; k < inputIt->value.GetArray().Size(); ++k) { + auto& scanItem = inputIt->value.GetArray()[k]; + if (!scanItem.IsObject()) { + continue; + } + auto scanObj = scanItem.GetObject(); + auto scanTypeIt = scanObj.FindMember("type"); + if (scanTypeIt == scanObj.MemberEnd() || !scanTypeIt->value.IsString() || + std::string(scanTypeIt->value.GetString()) != "function_call") { + continue; + } + auto nameStatus = parseResponsesTextField(scanObj, "function_call", "name"); + if (!nameStatus.ok()) { + return nameStatus.status(); + } + auto callIdStatus = parseResponsesTextField(scanObj, "function_call", "call_id"); + if (!callIdStatus.ok()) { + return callIdStatus.status(); + } + auto argumentsStatus = serializeResponsesJsonField(scanObj, "function_call", "arguments"); + if (!argumentsStatus.ok()) { + return argumentsStatus.status(); + } + ov::genai::JsonContainer toolCall = ov::genai::JsonContainer::object(); + toolCall["id"] = callIdStatus.value(); + toolCall["type"] = "function"; + toolCall["function"] = ov::genai::JsonContainer::object(); + toolCall["function"]["name"] = nameStatus.value(); + toolCall["function"]["arguments"] = argumentsStatus.value(); + if (pendingToolCalls.emplace(callIdStatus.value(), toolCall).second) { + pendingToolCallOrder.push_back(callIdStatus.value()); + } + toolNamesByCallId[callIdStatus.value()] = nameStatus.value(); + } + + auto emitAssistantWithSingleToolCall = [&](const ov::genai::JsonContainer& toolCall) { + request.chatHistory.push_back({}); + request.chatHistory.last()["role"] = "assistant"; + request.chatHistory.last()["content"] = ""; + request.chatHistory.last()["tool_calls"] = ov::genai::JsonContainer::array(); + request.chatHistory.last()["tool_calls"].push_back(toolCall); + }; + for (size_t i = 0; i < inputIt->value.GetArray().Size(); ++i) { auto& item = inputIt->value.GetArray()[i]; if (!item.IsObject()) { @@ -203,33 +253,9 @@ absl::Status OpenAIResponsesHandler::parseInput(std::optional allow } if (itemType == "function_call") { - auto nameStatus = parseResponsesTextField(itemObj, "function_call", "name"); - if (!nameStatus.ok()) { - return nameStatus.status(); - } - auto callIdStatus = parseResponsesTextField(itemObj, "function_call", "call_id"); - if (!callIdStatus.ok()) { - return callIdStatus.status(); - } - auto argumentsStatus = serializeResponsesJsonField(itemObj, "function_call", "arguments"); - if (!argumentsStatus.ok()) { - return argumentsStatus.status(); - } - - ov::genai::JsonContainer toolCall = ov::genai::JsonContainer::object(); - toolCall["id"] = callIdStatus.value(); - toolCall["type"] = "function"; - toolCall["function"] = ov::genai::JsonContainer::object(); - toolCall["function"]["name"] = nameStatus.value(); - toolCall["function"]["arguments"] = argumentsStatus.value(); - - request.chatHistory.push_back({}); - request.chatHistory.last()["role"] = "assistant"; - request.chatHistory.last()["content"] = ""; - request.chatHistory.last()["tool_calls"] = ov::genai::JsonContainer::array(); - request.chatHistory.last()["tool_calls"].push_back(toolCall); - - toolNamesByCallId[callIdStatus.value()] = nameStatus.value(); + // Already collected during pre-scan; emission is deferred until the + // matching function_call_output is seen so each assistant tool-call + // message is immediately followed by its tool result. continue; } @@ -247,13 +273,74 @@ absl::Status OpenAIResponsesHandler::parseInput(std::optional allow return contentStatus.status(); } + std::string toolName; + auto explicitNameIt = itemObj.FindMember("name"); + if (explicitNameIt != itemObj.MemberEnd() && explicitNameIt->value.IsString()) { + toolName = explicitNameIt->value.GetString(); + } else { + auto toolNameIt = toolNamesByCallId.find(callIdStatus.value()); + if (toolNameIt != toolNamesByCallId.end()) { + toolName = toolNameIt->second; + } + } + + // Emit the matching assistant tool-call message immediately before + // the tool message, so the chat template's adjacency requirement holds. + auto pendingIt = pendingToolCalls.find(callIdStatus.value()); + if (pendingIt != pendingToolCalls.end()) { + emitAssistantWithSingleToolCall(pendingIt->second); + pendingToolCalls.erase(pendingIt); + } + request.chatHistory.push_back({}); request.chatHistory.last()["role"] = "tool"; request.chatHistory.last()["tool_call_id"] = callIdStatus.value(); request.chatHistory.last()["content"] = contentStatus.value(); - auto toolNameIt = toolNamesByCallId.find(callIdStatus.value()); - if (toolNameIt != toolNamesByCallId.end()) { - request.chatHistory.last()["name"] = toolNameIt->second; + if (!toolName.empty()) { + request.chatHistory.last()["name"] = toolName; + } + continue; + } + + if (itemType == "reasoning") { + std::string reasoningContent; + auto summaryIt = itemObj.FindMember("summary"); + if (summaryIt != itemObj.MemberEnd() && summaryIt->value.IsArray()) { + for (const auto& summaryItem : summaryIt->value.GetArray()) { + if (!summaryItem.IsObject()) { + continue; + } + auto textIt = summaryItem.GetObject().FindMember("text"); + if (textIt != summaryItem.GetObject().MemberEnd() && textIt->value.IsString()) { + if (!reasoningContent.empty()) { + reasoningContent += "\n"; + } + reasoningContent += std::string(textIt->value.GetString(), textIt->value.GetStringLength()); + } + } + } + + if (!reasoningContent.empty()) { + // Merge reasoning into the preceding assistant message if one exists. + // This preserves the adjacency between an assistant tool-call message + // and the subsequent tool message, which the chat template requires. + if (!request.chatHistory.empty() && request.chatHistory.last().contains("role")) { + auto lastRole = request.chatHistory.last()["role"].as_string(); + if (lastRole.has_value() && lastRole.value() == "assistant") { + std::string existing; + if (request.chatHistory.last().contains("content")) { + auto c = request.chatHistory.last()["content"].as_string(); + if (c.has_value()) { + existing = c.value(); + } + } + request.chatHistory.last()["content"] = existing.empty() ? reasoningContent : (existing + "\n" + reasoningContent); + continue; + } + } + request.chatHistory.push_back({}); + request.chatHistory.last()["role"] = "assistant"; + request.chatHistory.last()["content"] = reasoningContent; } continue; } @@ -269,18 +356,91 @@ absl::Status OpenAIResponsesHandler::parseInput(std::optional allow std::string role = roleIt->value.GetString(); + std::string content; auto contentIt = itemObj.FindMember("content"); - if (contentIt == itemObj.MemberEnd()) { - return absl::InvalidArgumentError("input item content is missing"); + if (contentIt != itemObj.MemberEnd()) { + auto contentStatus = parseResponsesContentValue(contentIt->value, i, allowedLocalMediaPath, allowedMediaDomains, request.imageHistory); + if (!contentStatus.ok()) { + return contentStatus.status(); + } + content = contentStatus.value(); } - auto contentStatus = parseResponsesContentValue(contentIt->value, i, allowedLocalMediaPath, allowedMediaDomains, request.imageHistory); - if (!contentStatus.ok()) { - return contentStatus.status(); + + if (role == "assistant") { + auto toolCallsIt = itemObj.FindMember("tool_calls"); + if (toolCallsIt != itemObj.MemberEnd() && toolCallsIt->value.IsArray()) { + request.chatHistory.push_back({}); + request.chatHistory.last()["role"] = "assistant"; + request.chatHistory.last()["content"] = content; + request.chatHistory.last()["tool_calls"] = rapidJsonValueToJsonContainer(toolCallsIt->value); + + for (const auto& toolCall : toolCallsIt->value.GetArray()) { + if (!toolCall.IsObject()) { + continue; + } + auto idIt = toolCall.GetObject().FindMember("id"); + if (idIt != toolCall.GetObject().MemberEnd() && idIt->value.IsString()) { + auto functionIt = toolCall.GetObject().FindMember("function"); + if (functionIt != toolCall.GetObject().MemberEnd() && functionIt->value.IsObject()) { + auto nameIt = functionIt->value.GetObject().FindMember("name"); + if (nameIt != functionIt->value.GetObject().MemberEnd() && nameIt->value.IsString()) { + toolNamesByCallId[idIt->value.GetString()] = nameIt->value.GetString(); + } + } + } + } + continue; + } + } + + if (role == "tool") { + std::string callId; + auto toolCallIdIt = itemObj.FindMember("tool_call_id"); + if (toolCallIdIt != itemObj.MemberEnd() && toolCallIdIt->value.IsString()) { + callId = toolCallIdIt->value.GetString(); + } else { + auto callIdIt = itemObj.FindMember("call_id"); + if (callIdIt != itemObj.MemberEnd() && callIdIt->value.IsString()) { + callId = callIdIt->value.GetString(); + } + } + + if (!callId.empty()) { + std::string toolName; + auto nameIt = itemObj.FindMember("name"); + if (nameIt != itemObj.MemberEnd() && nameIt->value.IsString()) { + toolName = nameIt->value.GetString(); + } else { + auto toolNameIt = toolNamesByCallId.find(callId); + if (toolNameIt != toolNamesByCallId.end()) { + toolName = toolNameIt->second; + } + } + + request.chatHistory.push_back({}); + request.chatHistory.last()["role"] = "tool"; + request.chatHistory.last()["tool_call_id"] = callId; + request.chatHistory.last()["content"] = content; + if (!toolName.empty()) { + request.chatHistory.last()["name"] = toolName; + } + continue; + } } request.chatHistory.push_back({}); request.chatHistory.last()["role"] = role; - request.chatHistory.last()["content"] = contentStatus.value(); + request.chatHistory.last()["content"] = content; + } + + // Any function_calls without a matching function_call_output (e.g. the model + // just made tool calls and there are no results yet) are emitted at the end + // in original order, each as its own assistant message. + for (const auto& callId : pendingToolCallOrder) { + auto pendingIt = pendingToolCalls.find(callId); + if (pendingIt != pendingToolCalls.end()) { + emitAssistantWithSingleToolCall(pendingIt->second); + } } } else { return absl::InvalidArgumentError("input is not a string or array"); @@ -418,11 +578,54 @@ absl::Status OpenAIResponsesHandler::parseResponsesPart(std::optional } processedDoc.AddMember("messages", messagesArray, alloc); - // Copy tools from original doc if present + // Copy tools from original doc if present. Normalize flat Responses tools + // ({"type":"function","name":...,"parameters":...}) into wrapped form + // ({"type":"function","function":{...}}) for compatibility with + // Jinja templates that reference tool.function.*. auto toolsIt = doc.FindMember("tools"); if (toolsIt != doc.MemberEnd() && !toolsIt->value.IsNull()) { - Value toolsCopy(toolsIt->value, alloc); - processedDoc.AddMember("tools", toolsCopy, alloc); + if (toolsIt->value.IsArray()) { + Value normalizedTools(kArrayType); + for (const auto& tool : toolsIt->value.GetArray()) { + if (!tool.IsObject()) { + normalizedTools.PushBack(Value(tool, alloc), alloc); + continue; + } + + auto toolObj = tool.GetObject(); + auto functionIt = toolObj.FindMember("function"); + if (functionIt != toolObj.MemberEnd() && functionIt->value.IsObject()) { + normalizedTools.PushBack(Value(tool, alloc), alloc); + continue; + } + + auto typeIt = toolObj.FindMember("type"); + if (typeIt != toolObj.MemberEnd() && typeIt->value.IsString() && std::string(typeIt->value.GetString()) == "function") { + Value wrappedTool(kObjectType); + wrappedTool.AddMember("type", Value("function", alloc), alloc); + + Value functionObj(kObjectType); + for (auto memberIt = toolObj.MemberBegin(); memberIt != toolObj.MemberEnd(); ++memberIt) { + const std::string key = memberIt->name.GetString(); + if (key == "type") { + continue; + } + Value memberKey(memberIt->name, alloc); + Value memberValue(memberIt->value, alloc); + functionObj.AddMember(memberKey, memberValue, alloc); + } + + wrappedTool.AddMember("function", functionObj, alloc); + normalizedTools.PushBack(wrappedTool, alloc); + } else { + normalizedTools.PushBack(Value(tool, alloc), alloc); + } + } + processedDoc.AddMember("tools", normalizedTools, alloc); + } else { + Value toolsCopy(toolsIt->value, alloc); + processedDoc.AddMember("tools", toolsCopy, alloc); + } } // Copy chat_template_kwargs from original doc if present