diff --git a/.dockerignore b/.dockerignore index ab4fdc7ab2..bc085a874a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,7 +1,23 @@ .venv .venv-style **/.venv +.pytest_cache +.devcontainer +.vscode +.vs +.idea +.gdb_history out +bazel-bin +bazel-model_server/ +bazel-openvino-model-server/ +bazel-out +bazel-ovms +bazel-ovms-c +bazel-testlogs demos/continuous_batching demos/embeddings -demos/common/export_models/models \ No newline at end of file +demos/common/export_models/models +*.log +*.img +models diff --git a/.gitignore b/.gitignore index 62dca37b89..9db061a1e9 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,8 @@ __pycache__/ report.json trace.json bazel-bin +bazel-model_server/ +bazel-openvino-model-server/ bazel-out bazel-ovms bazel-ovms-c @@ -28,8 +30,6 @@ tags src/test/llm_testing node_modules/ yarn.* -bazel-openvino-model-server/ -bazel-model_server/ out .user.bazelrc *.log diff --git a/demos/image_generation/cat.png b/demos/image_generation/cat.png new file mode 100644 index 0000000000..a344d4aeb8 Binary files /dev/null and b/demos/image_generation/cat.png differ diff --git a/demos/image_generation/cat_mask.png b/demos/image_generation/cat_mask.png new file mode 100644 index 0000000000..9c88ef5ad2 Binary files /dev/null and b/demos/image_generation/cat_mask.png differ diff --git a/demos/image_generation/cat_mask_inverted.png b/demos/image_generation/cat_mask_inverted.png new file mode 100644 index 0000000000..2146ee11ae Binary files /dev/null and b/demos/image_generation/cat_mask_inverted.png differ diff --git a/src/http_frontend/multi_part_parser_drogon_impl.cpp b/src/http_frontend/multi_part_parser_drogon_impl.cpp index 7282535df4..17d05967e2 100644 --- a/src/http_frontend/multi_part_parser_drogon_impl.cpp +++ b/src/http_frontend/multi_part_parser_drogon_impl.cpp @@ -50,6 +50,17 @@ std::string_view DrogonMultiPartParser::getFileContentByFieldName(const std::str return it->second.fileContent(); } +std::vector DrogonMultiPartParser::getFilesArrayByFieldName(const std::string& name) const { + const std::vector& files = this->parser->getFiles(); + std::vector result; + for (const drogon::HttpFile& file : files) { + if (file.getItemName() == name) { + result.push_back(file.fileContent()); + } + } + return result; +} + std::set DrogonMultiPartParser::getAllFieldNames() const { std::set fieldNames; auto fileMap = this->parser->getFilesMap(); diff --git a/src/http_frontend/multi_part_parser_drogon_impl.hpp b/src/http_frontend/multi_part_parser_drogon_impl.hpp index a37de000f0..7741b82946 100644 --- a/src/http_frontend/multi_part_parser_drogon_impl.hpp +++ b/src/http_frontend/multi_part_parser_drogon_impl.hpp @@ -47,6 +47,7 @@ class DrogonMultiPartParser : public MultiPartParser { std::string getFieldByName(const std::string& name) const override; std::vector getArrayFieldByName(const std::string& name) const override; std::string_view getFileContentByFieldName(const std::string& name) const override; + std::vector getFilesArrayByFieldName(const std::string& name) const override; std::set getAllFieldNames() const override; }; diff --git a/src/image_gen/http_image_gen_calculator.cc b/src/image_gen/http_image_gen_calculator.cc index 8aa5912fe9..77eea57181 100644 --- a/src/image_gen/http_image_gen_calculator.cc +++ b/src/image_gen/http_image_gen_calculator.cc @@ -49,6 +49,7 @@ static bool progress_bar(size_t step, size_t num_steps, ov::Tensor&) { SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Image Generation Step: {}/{}", step + 1, num_steps); return false; } + // written out separately to avoid msvc crashing when using try-catch in process method ... static absl::Status generateTensor(ov::genai::Text2ImagePipeline& request, const std::string& prompt, ov::AnyMap& requestOptions, @@ -94,6 +95,28 @@ static absl::Status generateTensorImg2Img(ov::genai::Image2ImagePipeline& reques return absl::OkStatus(); } // written out separately to avoid msvc crashing when using try-catch in process method ... +static absl::Status generateTensorInpainting(ov::genai::InpaintingPipeline& request, + const std::string& prompt, ov::Tensor image, ov::Tensor mask, ov::AnyMap& requestOptions, + std::unique_ptr& images) { + try { + requestOptions.insert(ov::genai::callback(progress_bar)); + images = std::make_unique(request.generate(prompt, image, mask, requestOptions)); + auto dims = images->get_shape(); + std::stringstream ss; + for (const auto& dim : dims) { + ss << dim << " "; + } + ss << " element type: " << images->get_element_type().get_type_name(); + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator generated inpainting tensor: {}", ss.str()); + } catch (const std::exception& e) { + SPDLOG_LOGGER_ERROR(llm_calculator_logger, "ImageGenCalculator Inpainting Error: {}", e.what()); + return absl::InternalError("Error during inpainting generation"); + } catch (...) { + return absl::InternalError("Unknown error during inpainting generation"); + } + return absl::OkStatus(); +} +// written out separately to avoid msvc crashing when using try-catch in process method ... static absl::Status makeTensorFromString(const std::string& filePayload, ov::Tensor& imageTensor) { try { imageTensor = loadImageStbiFromMemory(filePayload); @@ -140,10 +163,12 @@ class ImageGenCalculator : public CalculatorBase { auto pipe = it->second; auto payload = cc->Inputs().Tag(INPUT_TAG_NAME).Get(); + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator [Node: {}] Request URI: {}", cc->NodeName(), payload.uri); std::unique_ptr images; // output if (absl::StartsWith(payload.uri, "/v3/images/generations")) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator [Node: {}] Routed to image generations path", cc->NodeName()); if (payload.parsedJson->HasParseError()) return absl::InvalidArgumentError("Failed to parse JSON"); @@ -154,13 +179,15 @@ class ImageGenCalculator : public CalculatorBase { SET_OR_RETURN(std::string, prompt, getPromptField(*payload.parsedJson)); SET_OR_RETURN(ov::AnyMap, requestOptions, getImageGenerationRequestOptions(*payload.parsedJson, pipe->args)); - ov::genai::Text2ImagePipeline request = pipe->text2ImagePipeline->clone(); - - auto status = generateTensor(request, prompt, requestOptions, images); + // single request assumption - use pipeline instance directly + if (!pipe->text2ImagePipeline) + return absl::FailedPreconditionError("Text-to-image pipeline is not available for this model"); + auto status = generateTensor(*pipe->text2ImagePipeline, prompt, requestOptions, images); if (!status.ok()) { return status; } } else if (absl::StartsWith(payload.uri, "/v3/images/edits")) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator [Node: {}] Routed to image edits path", cc->NodeName()); if (payload.multipartParser->hasParseError()) return absl::InvalidArgumentError("Failed to parse multipart data"); @@ -176,8 +203,29 @@ class ImageGenCalculator : public CalculatorBase { SET_OR_RETURN(ov::AnyMap, requestOptions, getImageEditRequestOptions(*payload.multipartParser, pipe->args)); - ov::genai::Image2ImagePipeline request = pipe->image2ImagePipeline->clone(); - status = generateTensorImg2Img(request, prompt, imageTensor, requestOptions, images); + SET_OR_RETURN(std::optional, mask, getFileFromPayload(*payload.multipartParser, "mask")); + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator [Node: {}] Mask present: {}", cc->NodeName(), mask.has_value() && !mask.value().empty()); + + if (mask.has_value() && !mask.value().empty()) { + if (!pipe->inpaintingPipeline) + return absl::FailedPreconditionError("Inpainting pipeline is not available for this model"); + // Inpainting path — uses the pre-built InpaintingPipeline that was loaded from disk + // during initialization. Do NOT derive InpaintingPipeline from Image2ImagePipeline + // at request time — that derivation direction causes a SEGFAULT in GenAI. + ov::Tensor maskTensor; + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator [Node: {}] Inpainting: decoding mask tensor", cc->NodeName()); + status = makeTensorFromString(std::string(mask.value()), maskTensor); + if (!status.ok()) { + return status; + } + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator [Node: {}] Inpainting: mask tensor decoded, invoking generate()", cc->NodeName()); + status = generateTensorInpainting(*pipe->inpaintingPipeline, prompt, imageTensor, maskTensor, requestOptions, images); + } else { + if (!pipe->image2ImagePipeline) + return absl::FailedPreconditionError("Image-to-image pipeline is not available for this model"); + // image-to-image path - single pipeline instance, no clone needed + status = generateTensorImg2Img(*pipe->image2ImagePipeline, prompt, imageTensor, requestOptions, images); + } if (!status.ok()) { return status; } diff --git a/src/image_gen/imagegenutils.cpp b/src/image_gen/imagegenutils.cpp index 4a11e3e80c..d50471784b 100644 --- a/src/image_gen/imagegenutils.cpp +++ b/src/image_gen/imagegenutils.cpp @@ -410,6 +410,7 @@ std::variant getImageGenerationRequestOptions(const ra static std::set acceptedFields{ "prompt", "prompt_2", "prompt_3", "image", + "mask", "negative_prompt", "negative_prompt_2", "negative_prompt_3", "size", "height", "width", "n", "num_images_per_prompt", @@ -441,7 +442,7 @@ std::variant getImageEditRequestOptions(const ovms::Mu // prompt REQUIRED // image string (REQUIRED) or array (NOT SUPPORTED) // background REJECTED string NO optional default=auto - // mask file NO + // mask file DONE // model string NO optional default=dall-e-2 // n optional default=1 ----> num_images_per_prompt // output_compression REJECTED int NO optional default=100 @@ -527,6 +528,7 @@ std::variant getImageEditRequestOptions(const ovms::Mu static std::set acceptedFields{ "prompt", "prompt_2", "prompt_3", "image", + "mask", "negative_prompt", "negative_prompt_2", "negative_prompt_3", "size", "height", "width", "n", "num_images_per_prompt", diff --git a/src/image_gen/pipelines.cpp b/src/image_gen/pipelines.cpp index 54c7ac1e1b..d55bbce375 100644 --- a/src/image_gen/pipelines.cpp +++ b/src/image_gen/pipelines.cpp @@ -17,11 +17,44 @@ #include -#include "../logging.hpp" -#include "../stringutils.hpp" +#include +#include +#include + +#include "src/logging.hpp" +#include "src/stringutils.hpp" namespace ovms { +// Reshape and compile a pipeline that was loaded from disk. +// Derived (weight-sharing) pipelines inherit the compiled state from the parent and skip this. +template +static void reshapeAndCompile(PipelineT& pipeline, + const ImageGenPipelineArgs& args, + const std::vector& device) { + if (args.staticReshapeSettings.has_value() && args.staticReshapeSettings.value().resolution.size() == 1) { + auto numImagesPerPrompt = args.staticReshapeSettings.value().numImagesPerPrompt.value_or(ov::genai::ImageGenerationConfig().num_images_per_prompt); + auto guidanceScale = args.staticReshapeSettings.value().guidanceScale.value_or(ov::genai::ImageGenerationConfig().guidance_scale); + + SPDLOG_DEBUG("Image Generation Pipeline reshape to static {}x{} resolution, batch: {}, guidance scale: {}", + args.staticReshapeSettings.value().resolution[0].first, args.staticReshapeSettings.value().resolution[0].second, numImagesPerPrompt, guidanceScale); + + pipeline.reshape( + numImagesPerPrompt, + args.staticReshapeSettings.value().resolution[0].first, + args.staticReshapeSettings.value().resolution[0].second, + guidanceScale); + } + + if (device.size() == 1) { + SPDLOG_DEBUG("Image Generation Pipeline compiling to device: {}", device[0]); + pipeline.compile(device[0], args.pluginConfig); + } else { + SPDLOG_DEBUG("Image Generation Pipeline compiling to devices: text_encode={} denoise={} vae={}", device[0], device[1], device[2]); + pipeline.compile(device[0], device[1], device[2], args.pluginConfig); + } +} + ImageGenerationPipelines::ImageGenerationPipelines(const ImageGenPipelineArgs& args) : args(args) { std::vector device; @@ -33,30 +66,81 @@ ImageGenerationPipelines::ImageGenerationPipelines(const ImageGenPipelineArgs& a SPDLOG_DEBUG("Image Generation Pipelines weights loading from: {}", args.modelsPath); - image2ImagePipeline = std::make_unique(args.modelsPath); + // Pipeline construction strategy: + // Preferred chain (weight-sharing, single model load): + // INP(disk) → reshape+compile → I2I(INP) → T2I(I2I) + // + // Some models don't support all derivation directions (e.g. inpainting-specific + // models reject I2I(INP) with "Cannot create Image2ImagePipeline from InpaintingPipeline + // with inpainting model"). When derivation fails, fall back to loading from disk + // (separate model load + reshape+compile). We WARN on individual failures and only + // throw if no pipeline could be created at all. - if (args.staticReshapeSettings.has_value() && args.staticReshapeSettings.value().resolution.size() == 1) { - auto numImagesPerPrompt = args.staticReshapeSettings.value().numImagesPerPrompt.value_or(ov::genai::ImageGenerationConfig().num_images_per_prompt); - auto guidanceScale = args.staticReshapeSettings.value().guidanceScale.value_or(ov::genai::ImageGenerationConfig().guidance_scale); + // --- Step 1: InpaintingPipeline from disk --- + try { + inpaintingPipeline = std::make_unique(args.modelsPath); + reshapeAndCompile(*inpaintingPipeline, args, device); + SPDLOG_DEBUG("InpaintingPipeline created from disk"); + } catch (const std::exception& e) { + SPDLOG_WARN("Failed to create InpaintingPipeline from disk: {}", e.what()); + inpaintingPipeline.reset(); + } - SPDLOG_DEBUG("Image Generation Pipelines will be reshaped to static {}x{} resolution, batch: {}, guidance scale: {}", - args.staticReshapeSettings.value().resolution[0].first, args.staticReshapeSettings.value().resolution[0].second, numImagesPerPrompt, guidanceScale); + // --- Step 2: Image2ImagePipeline — derive from INP, fallback to disk --- + if (inpaintingPipeline) { + try { + image2ImagePipeline = std::make_unique(*inpaintingPipeline); + SPDLOG_DEBUG("Image2ImagePipeline derived from InpaintingPipeline"); + } catch (const std::exception& e) { + SPDLOG_WARN("Failed to derive Image2ImagePipeline from InpaintingPipeline: {}", e.what()); + } + } + if (!image2ImagePipeline) { + try { + image2ImagePipeline = std::make_unique(args.modelsPath); + reshapeAndCompile(*image2ImagePipeline, args, device); + SPDLOG_DEBUG("Image2ImagePipeline created from disk (fallback)"); + } catch (const std::exception& e) { + SPDLOG_WARN("Failed to create Image2ImagePipeline from disk: {}", e.what()); + image2ImagePipeline.reset(); + } + } - image2ImagePipeline->reshape( - numImagesPerPrompt, - args.staticReshapeSettings.value().resolution[0].first, // at this point it should be validated for existence - args.staticReshapeSettings.value().resolution[0].second, // at this point it should be validated for existence - guidanceScale); + // --- Step 3: Text2ImagePipeline — derive from I2I or INP, fallback to disk --- + if (image2ImagePipeline) { + try { + text2ImagePipeline = std::make_unique(*image2ImagePipeline); + SPDLOG_DEBUG("Text2ImagePipeline derived from Image2ImagePipeline"); + } catch (const std::exception& e) { + SPDLOG_WARN("Failed to derive Text2ImagePipeline from Image2ImagePipeline: {}", e.what()); + } + } + if (!text2ImagePipeline && inpaintingPipeline) { + try { + text2ImagePipeline = std::make_unique(*inpaintingPipeline); + SPDLOG_DEBUG("Text2ImagePipeline derived from InpaintingPipeline"); + } catch (const std::exception& e) { + SPDLOG_WARN("Failed to derive Text2ImagePipeline from InpaintingPipeline: {}", e.what()); + } + } + if (!text2ImagePipeline) { + try { + text2ImagePipeline = std::make_unique(args.modelsPath); + reshapeAndCompile(*text2ImagePipeline, args, device); + SPDLOG_DEBUG("Text2ImagePipeline created from disk (fallback)"); + } catch (const std::exception& e) { + SPDLOG_WARN("Failed to create Text2ImagePipeline from disk: {}", e.what()); + text2ImagePipeline.reset(); + } } - if (device.size() == 1) { - SPDLOG_DEBUG("Image Generation Pipelines compiling to devices: text_encode={} denoise={} vae={}", device[0], device[0], device[0]); - image2ImagePipeline->compile(device[0], args.pluginConfig); - } else { - SPDLOG_DEBUG("Image Generation Pipelines compiling to devices: text_encode={} denoise={} vae={}", device[0], device[1], device[2]); - image2ImagePipeline->compile(device[0], device[1], device[2], args.pluginConfig); + if (!inpaintingPipeline && !image2ImagePipeline && !text2ImagePipeline) { + throw std::runtime_error("Failed to create any image generation pipeline from: " + args.modelsPath); } - text2ImagePipeline = std::make_unique(*image2ImagePipeline); + SPDLOG_INFO("Image Generation Pipelines ready — T2I: {} | I2I: {} | INP: {}", + text2ImagePipeline ? "OK" : "N/A", + image2ImagePipeline ? "OK" : "N/A", + inpaintingPipeline ? "OK" : "N/A"); } } // namespace ovms diff --git a/src/image_gen/pipelines.hpp b/src/image_gen/pipelines.hpp index 7c83bdc2e9..b8c56f0197 100644 --- a/src/image_gen/pipelines.hpp +++ b/src/image_gen/pipelines.hpp @@ -18,8 +18,9 @@ #include #include -#include #include +#include +#include #include "imagegenpipelineargs.hpp" @@ -27,6 +28,7 @@ namespace ovms { struct ImageGenerationPipelines { std::unique_ptr image2ImagePipeline; std::unique_ptr text2ImagePipeline; + std::unique_ptr inpaintingPipeline; ImageGenPipelineArgs args; ImageGenerationPipelines() = delete; diff --git a/src/multi_part_parser.hpp b/src/multi_part_parser.hpp index 19f28f51ba..d2b367e0c2 100644 --- a/src/multi_part_parser.hpp +++ b/src/multi_part_parser.hpp @@ -38,6 +38,9 @@ class MultiPartParser { // Returns empty string if file is not found. virtual std::string_view getFileContentByFieldName(const std::string& name) const = 0; + // API for MP calculators to get all file contents for a given array field name (e.g. "image[]"). + virtual std::vector getFilesArrayByFieldName(const std::string& name) const = 0; + // API for MP calculators to get all field names. virtual std::set getAllFieldNames() const = 0; }; diff --git a/src/pull_module/optimum_export.cpp b/src/pull_module/optimum_export.cpp index 2c8fde8426..b5b2326e13 100644 --- a/src/pull_module/optimum_export.cpp +++ b/src/pull_module/optimum_export.cpp @@ -107,6 +107,9 @@ std::string OptimumDownloader::getExportCmdImageGeneration() { oss << this->OPTIMUM_CLI_EXPORT_COMMAND; oss << "--model " << this->sourceModel; oss << " --weight-format " << this->exportSettings.precision; + if (this->exportSettings.extraQuantizationParams.has_value()) { + oss << " " << this->exportSettings.extraQuantizationParams.value(); + } // TODO FIXME check if its not needed to propagate to other exports oss << " " << this->downloadPath; // clang-format on diff --git a/src/test/pull_hf_model_test.cpp b/src/test/pull_hf_model_test.cpp index b29bbee326..5274bb38ff 100644 --- a/src/test/pull_hf_model_test.cpp +++ b/src/test/pull_hf_model_test.cpp @@ -401,6 +401,19 @@ TEST_F(TestOptimumDownloaderSetup, RerankExportCmd) { TEST_F(TestOptimumDownloaderSetup, ImageGenExportCmd) { inHfSettings.task = ovms::IMAGE_GENERATION_GRAPH; std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); + std::string expectedCmd = "optimum-cli export openvino --model model/name --weight-format fp64 --param --param value \\path\\to\\Download\\model\\name"; + std::string expectedCmd2 = ""; +#ifdef __linux__ + std::replace(expectedCmd.begin(), expectedCmd.end(), '\\', '/'); +#endif + ASSERT_EQ(optimumDownloader->getExportCmd(), expectedCmd); + ASSERT_EQ(optimumDownloader->getConvertCmd(), expectedCmd2); +} + +TEST_F(TestOptimumDownloaderSetup, ImageGenExportCmdNoExtraParams) { + inHfSettings.task = ovms::IMAGE_GENERATION_GRAPH; + inHfSettings.exportSettings.extraQuantizationParams = std::nullopt; + std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); std::string expectedCmd = "optimum-cli export openvino --model model/name --weight-format fp64 \\path\\to\\Download\\model\\name"; std::string expectedCmd2 = ""; #ifdef __linux__ diff --git a/src/test/test_http_utils.hpp b/src/test/test_http_utils.hpp index 83138b3b5f..62ca393c40 100644 --- a/src/test/test_http_utils.hpp +++ b/src/test/test_http_utils.hpp @@ -49,6 +49,7 @@ class MockedMultiPartParser final : public ovms::MultiPartParser { MOCK_METHOD(std::vector, getArrayFieldByName, (const std::string&), (const override)); MOCK_METHOD(std::string, getFieldByName, (const std::string&), (const override)); MOCK_METHOD(std::string_view, getFileContentByFieldName, (const std::string&), (const override)); + MOCK_METHOD(std::vector, getFilesArrayByFieldName, (const std::string&), (const override)); MOCK_METHOD(std::set, getAllFieldNames, (), (const, override)); }; diff --git a/src/test/text2image_test.cpp b/src/test/text2image_test.cpp index 0f7916ac60..2dbbaf6c2f 100644 --- a/src/test/text2image_test.cpp +++ b/src/test/text2image_test.cpp @@ -48,6 +48,7 @@ class MockedMultiPartParser final : public ovms::MultiPartParser { MOCK_METHOD(std::vector, getArrayFieldByName, (const std::string& name), (const, override)); MOCK_METHOD(std::string, getFieldByName, (const std::string& name), (const, override)); MOCK_METHOD(std::string_view, getFileContentByFieldName, (const std::string& name), (const, override)); + MOCK_METHOD(std::vector, getFilesArrayByFieldName, (const std::string& name), (const, override)); MOCK_METHOD(std::set, getAllFieldNames, (), (const, override)); };