openvinotoolkit · dkalinowski · Mar 10, 2026 · Mar 10, 2026 · Mar 11, 2026
diff --git a/src/llm/apis/openai_completions.cpp b/src/llm/apis/openai_completions.cpp
@@ -700,6 +700,16 @@ absl::Status OpenAIChatCompletionsHandler::parseCommonPart(std::optional<uint32_
         return absl::InvalidArgumentError("model missing in request");
     }
 
+    // lora_adapter: string; optional
+    // Extension, select a LoRA adapter to use for this request by name
+    it = doc.FindMember("lora_adapter");
+    if (it != doc.MemberEnd() && !it->value.IsNull()) {
+        if (!it->value.IsString())
+            return absl::InvalidArgumentError("lora_adapter is not a string");
+        SPDLOG_ERROR("Found lora adapter in request => {}", it->value.GetString());
+        request.loraAdapter = it->value.GetString();
+    }
+
     // ignore_eos: bool; optional - defaults to false
     // Extension, unsupported by OpenAI API, however supported by vLLM and CB lib
     it = doc.FindMember("ignore_eos");

diff --git a/src/llm/apis/openai_request.hpp b/src/llm/apis/openai_request.hpp
@@ -73,6 +73,9 @@ struct OpenAIChatCompletionsRequest {
 
     std::optional<uint32_t> maxModelLength;
 
+    // LoRA adapter selection
+    std::optional<std::string> loraAdapter{std::nullopt};
+
     // Guided generation specific
     // String representation of response format object
     std::optional<std::string> responseFormat{std::nullopt};

diff --git a/src/llm/language_model/continuous_batching/llm_executor.hpp b/src/llm/language_model/continuous_batching/llm_executor.hpp
@@ -102,7 +102,7 @@ struct LLMExecutor {
     void printMetrics() {
         ov::genai::PipelineMetrics metrics = pipe->get_metrics();
         SPDLOG_LOGGER_INFO(llm_executor_logger, "All requests: {}; Scheduled requests: {}; Cache {};",
-            metrics.requests, metrics.scheduled_requests, formatCacheInfo(metrics.cache_usage, metrics.kv_cache_size_in_bytes, this->isDynamicKVCache));
+            metrics.requests, metrics.scheduled_requests, metrics.cache_usage);
     }
 };
 #pragma GCC diagnostic pop

diff --git a/src/llm/language_model/continuous_batching/servable_initializer.cpp b/src/llm/language_model/continuous_batching/servable_initializer.cpp
@@ -19,6 +19,7 @@
 #include <string>
 #include <vector>
 #include <openvino/genai/cache_eviction.hpp>
+#include <openvino/genai/lora_adapter.hpp>
 #include <openvino/genai/sparse_attention.hpp>
 #include <openvino/genai/continuous_batching_pipeline.hpp>
 #include <openvino/openvino.hpp>
@@ -198,6 +199,37 @@ Status ContinuousBatchingServableInitializer::initialize(std::shared_ptr<GenAiSe
         return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED;
     }
 
+    if (nodeOptions.lora_adapter_size() > 0) {
+        SPDLOG_INFO("LoRA adapters will be applied to the model. Number of adapters: {}", nodeOptions.lora_adapter_size());
+        for (int i = 0; i < nodeOptions.lora_adapter_size(); ++i) {
+            SPDLOG_INFO("Processing LoRA adapter number {} with model path: {} alpha: {}", i, nodeOptions.lora_adapter(i).model_path(), nodeOptions.lora_adapter(i).alpha());
+            const auto& loraAdapterOption = nodeOptions.lora_adapter(i);
+            auto fsLoraPath = std::filesystem::path(loraAdapterOption.model_path());
+            std::string loraPath;
+            if (fsLoraPath.is_relative()) {
+                loraPath = (std::filesystem::path(graphPath) / fsLoraPath).string();
+            } else {
+                loraPath = fsLoraPath.string();
+            }
+            try {
+                ov::genai::Adapter adapter(loraPath);
+                properties->adapterConfig.add(adapter, loraAdapterOption.alpha());
+                std::string adapterName = loraAdapterOption.has_name()
+                    ? loraAdapterOption.name()
+                    : std::filesystem::path(loraPath).stem().string();
+                properties->adaptersByName.emplace(adapterName, adapter);
+                SPDLOG_INFO("Registered LoRA adapter '{}' from path: {}", adapterName, loraPath);
+            } catch (const std::exception& e) {
+                SPDLOG_ERROR("Error during LoRA adapter initialization for model_path: {} exception: {}", loraPath, e.what());
+                return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED;
+            } catch (...) {
+                SPDLOG_ERROR("Error during LoRA adapter initialization for model_path: {}", loraPath);
+                return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED;
+            }
+        }
+        properties->pluginConfig.insert(ov::genai::adapters(properties->adapterConfig));
+    }
+
     status = JsonParser::parsePluginConfig(nodeOptions.plugin_config(), properties->pluginConfig);
     if (!status.ok()) {
         SPDLOG_ERROR("Error during llm node plugin_config option parsing to JSON: {}", nodeOptions.plugin_config());

diff --git a/src/llm/language_model/legacy/servable.cpp b/src/llm/language_model/legacy/servable.cpp
@@ -120,6 +120,12 @@ absl::Status LegacyServable::parseRequest(std::shared_ptr<GenAiServableExecution
         SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool guided generation will not be applied due to JSON schema validation failure: {}", e.what());
         legacyExecutionContext->generationConfigBuilder->unsetStructuredOutputConfig();
     }
+
+    auto adapterStatus = applyLoraAdapter(executionContext);
+    if (!adapterStatus.ok()) {
+        return adapterStatus;
+    }
+
     return absl::OkStatus();
 }
 

diff --git a/src/llm/language_model/legacy/servable_initializer.cpp b/src/llm/language_model/legacy/servable_initializer.cpp
@@ -19,6 +19,7 @@
 #include <vector>
 
 #include "openvino/genai/llm_pipeline.hpp"
+#include <openvino/genai/lora_adapter.hpp>
 #include <openvino/openvino.hpp>
 #include <spdlog/spdlog.h>
 
@@ -76,6 +77,35 @@ Status LegacyServableInitializer::initialize(std::shared_ptr<GenAiServable>& ser
         return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED;
     }
 
+    if (nodeOptions.lora_adapter_size() > 0) {
+        for (int i = 0; i < nodeOptions.lora_adapter_size(); ++i) {
+            const auto& loraAdapterOption = nodeOptions.lora_adapter(i);
+            auto fsLoraPath = std::filesystem::path(loraAdapterOption.model_path());
+            std::string loraPath;
+            if (fsLoraPath.is_relative()) {
+                loraPath = (std::filesystem::path(graphPath) / fsLoraPath).string();
+            } else {
+                loraPath = fsLoraPath.string();
+            }
+            try {
+                ov::genai::Adapter adapter(loraPath);
+                properties->adapterConfig.add(adapter, loraAdapterOption.alpha());
+                std::string adapterName = loraAdapterOption.has_name()
+                    ? loraAdapterOption.name()
+                    : std::filesystem::path(loraPath).stem().string();
+                properties->adaptersByName.emplace(adapterName, adapter);
+                SPDLOG_INFO("Registered LoRA adapter '{}' from path: {}", adapterName, loraPath);
+            } catch (const std::exception& e) {
+                SPDLOG_ERROR("Error during LoRA adapter initialization for model_path: {} exception: {}", loraPath, e.what());
+                return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED;
+            } catch (...) {
+                SPDLOG_ERROR("Error during LoRA adapter initialization for model_path: {}", loraPath);
+                return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED;
+            }
+        }
+        properties->pluginConfig.insert(ov::genai::adapters(properties->adapterConfig));
+    }
+
     status = JsonParser::parsePluginConfig(nodeOptions.plugin_config(), properties->pluginConfig);
     if (!status.ok()) {
         SPDLOG_ERROR("Error during llm node plugin_config option parsing to JSON: {}", nodeOptions.plugin_config());

diff --git a/src/llm/llm_calculator.proto b/src/llm/llm_calculator.proto
@@ -26,6 +26,12 @@ message LLMCalculatorOptions {
     optional LLMCalculatorOptions ext = 113473750;
     }
 
+    message LoraAdapter {
+      required string model_path = 1;
+      optional float alpha = 2 [default = 1];
+      optional string name = 3;
+    }
+
     message KVCrushConfig {
       enum AnchorPointMode {
         RANDOM = 0;
@@ -135,4 +141,6 @@ message LLMCalculatorOptions {
     optional bool enable_tool_guided_generation = 23 [default = false];
 
     optional SparseAttentionConfig sparse_attention_config = 24;
+
+    repeated LoraAdapter lora_adapter = 25;
 }
diff --git a/src/llm/servable.cpp b/src/llm/servable.cpp
@@ -153,6 +153,28 @@ absl::Status GenAiServable::parseRequest(std::shared_ptr<GenAiServableExecutionC
         executionContext->generationConfigBuilder->unsetStructuredOutputConfig();
     }
 
+    auto adapterStatus = applyLoraAdapter(executionContext);
+    if (!adapterStatus.ok()) {
+        return adapterStatus;
+    }
+
+    return absl::OkStatus();
+}
+
+absl::Status GenAiServable::applyLoraAdapter(std::shared_ptr<GenAiServableExecutionContext>& executionContext) {
+    const auto& request = executionContext->apiHandler->getRequest();
+    if (request.loraAdapter.has_value()) {
+        auto props = getProperties();
+        auto it = props->adaptersByName.find(request.loraAdapter.value());
+        if (it == props->adaptersByName.end()) {
+            SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Unknown LoRA adapter requested: {}", request.loraAdapter.value());
+            return absl::InvalidArgumentError("Unknown LoRA adapter: " + request.loraAdapter.value());
+        }
+        //float alpha = props->adapterConfig.get_alpha(it->second);
+        executionContext->generationConfigBuilder->getConfig().adapters =
+            ov::genai::AdapterConfig(it->second, 0.5);//alpha);
+        SPDLOG_INFO("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+    }
     return absl::OkStatus();
 }
 

diff --git a/src/llm/servable.hpp b/src/llm/servable.hpp
@@ -20,6 +20,8 @@
 #include <unordered_map>
 #include <vector>
 
+#include <openvino/genai/lora_adapter.hpp>
+
 #pragma warning(push)
 #pragma warning(disable : 4251 4005 4309 6001 6385 6386 6326 6011 4005 4456 6246)
 #pragma GCC diagnostic push
@@ -111,6 +113,9 @@ struct GenAiServableProperties {
     ov::genai::Tokenizer tokenizer;
     // Specific pipeline properties
     bool eagle3Mode = false;
+    // LoRA adapter support
+    ov::genai::AdapterConfig adapterConfig;
+    std::unordered_map<std::string, ov::genai::Adapter> adaptersByName;
 
 #if (PYTHON_DISABLE == 0)
     PyJinjaTemplateProcessor templateProcessor;
@@ -157,6 +162,12 @@ class GenAiServable {
     */
     virtual absl::Status parseRequest(std::shared_ptr<GenAiServableExecutionContext>& executionContext);
 
+protected:
+    // Sets per-request LoRA adapter on generationConfigBuilder if lora_adapter is specified in the request
+    absl::Status applyLoraAdapter(std::shared_ptr<GenAiServableExecutionContext>& executionContext);
+
+public:
+
     /*
     prepareInputs method implementation MUST fill executionContext inputIds field.
     Base implementation applies chat template to the payload body and encodes it with tokenizer.

diff --git a/src/llm/visual_language_model/legacy/legacy_executor.cpp b/src/llm/visual_language_model/legacy/legacy_executor.cpp
@@ -40,7 +40,30 @@ void VisualLanguageModelLegacyExecutor::processRequest() {
     } else {
         SPDLOG_LOGGER_TRACE(llm_executor_logger, "Generation started");
         try {
-            requestExecutionContext->results = pipe->generate(requestExecutionContext->inputText, requestExecutionContext->inputImages, requestExecutionContext->generationConfigBuilder->getConfig(), requestExecutionContext->textStreamer);
+            /*
+            absl::Status GenAiServable::applyLoraAdapter(std::shared_ptr<GenAiServableExecutionContext>& executionContext) {
+    const auto& request = executionContext->apiHandler->getRequest();
+    if (request.loraAdapter.has_value()) {
+        auto props = getProperties();
+        auto it = props->adaptersByName.find(request.loraAdapter.value());
+        if (it == props->adaptersByName.end()) {
+            SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Unknown LoRA adapter requested: {}", request.loraAdapter.value());
+            return absl::InvalidArgumentError("Unknown LoRA adapter: " + request.loraAdapter.value());
+        }
+        float alpha = props->adapterConfig.get_alpha(it->second);
+        executionContext->generationConfigBuilder->getConfig().adapters =
+            ov::genai::AdapterConfig(it->second, alpha);
+    }
+    return absl::OkStatus();
+}
+            */
+
+
+            requestExecutionContext->results = pipe->generate(
+                requestExecutionContext->inputText,
+                requestExecutionContext->inputImages,
+                requestExecutionContext->generationConfigBuilder->getConfig(),
+                requestExecutionContext->textStreamer);
         } catch (std::exception& e) {
             requestExecutionContext->success = false;
             SPDLOG_LOGGER_ERROR(llm_executor_logger, "VLM pipeline generation failed: {}.", e.what());

diff --git a/src/llm/visual_language_model/legacy/servable.cpp b/src/llm/visual_language_model/legacy/servable.cpp
@@ -130,6 +130,12 @@ absl::Status VisualLanguageModelLegacyServable::parseRequest(std::shared_ptr<Gen
         SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool guided generation will not be applied due to JSON schema validation failure: {}", e.what());
         legacyExecutionContext->generationConfigBuilder->unsetStructuredOutputConfig();
     }
+
+    auto adapterStatus = applyLoraAdapter(executionContext);
+    if (!adapterStatus.ok()) {
+        return adapterStatus;
+    }
+
     return absl::OkStatus();
 }
 

diff --git a/src/llm/visual_language_model/legacy/servable_initializer.cpp b/src/llm/visual_language_model/legacy/servable_initializer.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <vector>
 
+#include <openvino/genai/lora_adapter.hpp>
 #include <openvino/genai/visual_language/pipeline.hpp>
 #include <openvino/openvino.hpp>
 #include <spdlog/spdlog.h>
@@ -75,6 +76,35 @@ Status VisualLanguageModelLegacyServableInitializer::initialize(std::shared_ptr<
         return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED;
     }
 
+    if (nodeOptions.lora_adapter_size() > 0) {
+        for (int i = 0; i < nodeOptions.lora_adapter_size(); ++i) {
+            const auto& loraAdapterOption = nodeOptions.lora_adapter(i);
+            auto fsLoraPath = std::filesystem::path(loraAdapterOption.model_path());
+            std::string loraPath;
+            if (fsLoraPath.is_relative()) {
+                loraPath = (std::filesystem::path(graphPath) / fsLoraPath).string();
+            } else {
+                loraPath = fsLoraPath.string();
+            }
+            try {
+                ov::genai::Adapter adapter(loraPath);
+                properties->adapterConfig.add(adapter, loraAdapterOption.alpha());
+                std::string adapterName = loraAdapterOption.has_name()
+                    ? loraAdapterOption.name()
+                    : std::filesystem::path(loraPath).stem().string();
+                properties->adaptersByName.emplace(adapterName, adapter);
+                SPDLOG_INFO("Registered LoRA adapter '{}' from path: {}", adapterName, loraPath);
+            } catch (const std::exception& e) {
+                SPDLOG_ERROR("Error during LoRA adapter initialization for model_path: {} exception: {}", loraPath, e.what());
+                return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED;
+            } catch (...) {
+                SPDLOG_ERROR("Error during LoRA adapter initialization for model_path: {}", loraPath);
+                return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED;
+            }
+        }
+        properties->pluginConfig.insert(ov::genai::adapters(properties->adapterConfig));
+    }
+
     status = JsonParser::parsePluginConfig(nodeOptions.plugin_config(), properties->pluginConfig);
     if (!status.ok()) {
         SPDLOG_ERROR("Error during llm node plugin_config option parsing to JSON: {}", nodeOptions.plugin_config());