Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,23 @@
.venv
.venv-style
**/.venv
.pytest_cache
.devcontainer
.vscode
.vs
.idea
.gdb_history
out
bazel-bin
bazel-model_server/
bazel-openvino-model-server/
bazel-out
bazel-ovms
bazel-ovms-c
bazel-testlogs
demos/continuous_batching
demos/embeddings
demos/common/export_models/models
demos/common/export_models/models
*.log
*.img
models
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ __pycache__/
report.json
trace.json
bazel-bin
bazel-model_server/
bazel-openvino-model-server/
bazel-out
bazel-ovms
bazel-ovms-c
Expand All @@ -28,8 +30,6 @@ tags
src/test/llm_testing
node_modules/
yarn.*
bazel-openvino-model-server/
bazel-model_server/
out
.user.bazelrc
*.log
Expand Down
Binary file added demos/image_generation/cat.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added demos/image_generation/cat_mask.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added demos/image_generation/cat_mask_inverted.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
11 changes: 11 additions & 0 deletions src/http_frontend/multi_part_parser_drogon_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,17 @@ std::string_view DrogonMultiPartParser::getFileContentByFieldName(const std::str
return it->second.fileContent();
}

std::vector<std::string_view> DrogonMultiPartParser::getFilesArrayByFieldName(const std::string& name) const {
const std::vector<drogon::HttpFile>& files = this->parser->getFiles();
std::vector<std::string_view> result;
for (const drogon::HttpFile& file : files) {
if (file.getItemName() == name) {
result.push_back(file.fileContent());
}
}
return result;
}

std::set<std::string> DrogonMultiPartParser::getAllFieldNames() const {
std::set<std::string> fieldNames;
auto fileMap = this->parser->getFilesMap();
Expand Down
1 change: 1 addition & 0 deletions src/http_frontend/multi_part_parser_drogon_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class DrogonMultiPartParser : public MultiPartParser {
std::string getFieldByName(const std::string& name) const override;
std::vector<std::string> getArrayFieldByName(const std::string& name) const override;
std::string_view getFileContentByFieldName(const std::string& name) const override;
std::vector<std::string_view> getFilesArrayByFieldName(const std::string& name) const override;
std::set<std::string> getAllFieldNames() const override;
};

Expand Down
58 changes: 53 additions & 5 deletions src/image_gen/http_image_gen_calculator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ static bool progress_bar(size_t step, size_t num_steps, ov::Tensor&) {
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Image Generation Step: {}/{}", step + 1, num_steps);
return false;
}

// written out separately to avoid msvc crashing when using try-catch in process method ...
static absl::Status generateTensor(ov::genai::Text2ImagePipeline& request,
const std::string& prompt, ov::AnyMap& requestOptions,
Expand Down Expand Up @@ -94,6 +95,28 @@ static absl::Status generateTensorImg2Img(ov::genai::Image2ImagePipeline& reques
return absl::OkStatus();
}
// written out separately to avoid msvc crashing when using try-catch in process method ...
static absl::Status generateTensorInpainting(ov::genai::InpaintingPipeline& request,
const std::string& prompt, ov::Tensor image, ov::Tensor mask, ov::AnyMap& requestOptions,
std::unique_ptr<ov::Tensor>& images) {
try {
requestOptions.insert(ov::genai::callback(progress_bar));
images = std::make_unique<ov::Tensor>(request.generate(prompt, image, mask, requestOptions));
auto dims = images->get_shape();
std::stringstream ss;
for (const auto& dim : dims) {
ss << dim << " ";
}
ss << " element type: " << images->get_element_type().get_type_name();
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator generated inpainting tensor: {}", ss.str());
} catch (const std::exception& e) {
SPDLOG_LOGGER_ERROR(llm_calculator_logger, "ImageGenCalculator Inpainting Error: {}", e.what());
return absl::InternalError("Error during inpainting generation");
} catch (...) {
return absl::InternalError("Unknown error during inpainting generation");
}
return absl::OkStatus();
}
// written out separately to avoid msvc crashing when using try-catch in process method ...
static absl::Status makeTensorFromString(const std::string& filePayload, ov::Tensor& imageTensor) {
try {
imageTensor = loadImageStbiFromMemory(filePayload);
Expand Down Expand Up @@ -140,10 +163,12 @@ class ImageGenCalculator : public CalculatorBase {
auto pipe = it->second;

auto payload = cc->Inputs().Tag(INPUT_TAG_NAME).Get<ovms::HttpPayload>();
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator [Node: {}] Request URI: {}", cc->NodeName(), payload.uri);

std::unique_ptr<ov::Tensor> images; // output

if (absl::StartsWith(payload.uri, "/v3/images/generations")) {
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator [Node: {}] Routed to image generations path", cc->NodeName());
if (payload.parsedJson->HasParseError())
return absl::InvalidArgumentError("Failed to parse JSON");

Expand All @@ -154,13 +179,15 @@ class ImageGenCalculator : public CalculatorBase {
SET_OR_RETURN(std::string, prompt, getPromptField(*payload.parsedJson));
SET_OR_RETURN(ov::AnyMap, requestOptions, getImageGenerationRequestOptions(*payload.parsedJson, pipe->args));

ov::genai::Text2ImagePipeline request = pipe->text2ImagePipeline->clone();

auto status = generateTensor(request, prompt, requestOptions, images);
// single request assumption - use pipeline instance directly
if (!pipe->text2ImagePipeline)
return absl::FailedPreconditionError("Text-to-image pipeline is not available for this model");
auto status = generateTensor(*pipe->text2ImagePipeline, prompt, requestOptions, images);
if (!status.ok()) {
return status;
}
} else if (absl::StartsWith(payload.uri, "/v3/images/edits")) {
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator [Node: {}] Routed to image edits path", cc->NodeName());
if (payload.multipartParser->hasParseError())
return absl::InvalidArgumentError("Failed to parse multipart data");

Expand All @@ -176,8 +203,29 @@ class ImageGenCalculator : public CalculatorBase {

SET_OR_RETURN(ov::AnyMap, requestOptions, getImageEditRequestOptions(*payload.multipartParser, pipe->args));

ov::genai::Image2ImagePipeline request = pipe->image2ImagePipeline->clone();
status = generateTensorImg2Img(request, prompt, imageTensor, requestOptions, images);
SET_OR_RETURN(std::optional<std::string_view>, mask, getFileFromPayload(*payload.multipartParser, "mask"));
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator [Node: {}] Mask present: {}", cc->NodeName(), mask.has_value() && !mask.value().empty());

if (mask.has_value() && !mask.value().empty()) {
if (!pipe->inpaintingPipeline)
return absl::FailedPreconditionError("Inpainting pipeline is not available for this model");
// Inpainting path — uses the pre-built InpaintingPipeline that was loaded from disk
// during initialization. Do NOT derive InpaintingPipeline from Image2ImagePipeline
// at request time — that derivation direction causes a SEGFAULT in GenAI.
ov::Tensor maskTensor;
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator [Node: {}] Inpainting: decoding mask tensor", cc->NodeName());
status = makeTensorFromString(std::string(mask.value()), maskTensor);
if (!status.ok()) {
return status;
}
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator [Node: {}] Inpainting: mask tensor decoded, invoking generate()", cc->NodeName());
status = generateTensorInpainting(*pipe->inpaintingPipeline, prompt, imageTensor, maskTensor, requestOptions, images);
} else {
if (!pipe->image2ImagePipeline)
return absl::FailedPreconditionError("Image-to-image pipeline is not available for this model");
// image-to-image path - single pipeline instance, no clone needed
status = generateTensorImg2Img(*pipe->image2ImagePipeline, prompt, imageTensor, requestOptions, images);
}
if (!status.ok()) {
return status;
}
Expand Down
4 changes: 3 additions & 1 deletion src/image_gen/imagegenutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,7 @@ std::variant<absl::Status, ov::AnyMap> getImageGenerationRequestOptions(const ra
static std::set<std::string> acceptedFields{
"prompt", "prompt_2", "prompt_3",
"image",
"mask",
"negative_prompt", "negative_prompt_2", "negative_prompt_3",
"size", "height", "width",
"n", "num_images_per_prompt",
Expand Down Expand Up @@ -441,7 +442,7 @@ std::variant<absl::Status, ov::AnyMap> getImageEditRequestOptions(const ovms::Mu
// prompt REQUIRED
// image string (REQUIRED) or array (NOT SUPPORTED)
// background REJECTED string NO optional default=auto
// mask file NO
// mask file DONE
// model string NO optional default=dall-e-2
// n optional default=1 ----> num_images_per_prompt
// output_compression REJECTED int NO optional default=100
Expand Down Expand Up @@ -527,6 +528,7 @@ std::variant<absl::Status, ov::AnyMap> getImageEditRequestOptions(const ovms::Mu
static std::set<std::string> acceptedFields{
"prompt", "prompt_2", "prompt_3",
"image",
"mask",
"negative_prompt", "negative_prompt_2", "negative_prompt_3",
"size", "height", "width",
"n", "num_images_per_prompt",
Expand Down
124 changes: 104 additions & 20 deletions src/image_gen/pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,44 @@

#include <vector>

#include "../logging.hpp"
#include "../stringutils.hpp"
#include <openvino/genai/image_generation/inpainting_pipeline.hpp>
#include <openvino/genai/image_generation/text2image_pipeline.hpp>
#include <openvino/genai/image_generation/image2image_pipeline.hpp>

#include "src/logging.hpp"
#include "src/stringutils.hpp"

namespace ovms {

// Reshape and compile a pipeline that was loaded from disk.
// Derived (weight-sharing) pipelines inherit the compiled state from the parent and skip this.
template <typename PipelineT>
static void reshapeAndCompile(PipelineT& pipeline,
const ImageGenPipelineArgs& args,
const std::vector<std::string>& device) {
if (args.staticReshapeSettings.has_value() && args.staticReshapeSettings.value().resolution.size() == 1) {
auto numImagesPerPrompt = args.staticReshapeSettings.value().numImagesPerPrompt.value_or(ov::genai::ImageGenerationConfig().num_images_per_prompt);
auto guidanceScale = args.staticReshapeSettings.value().guidanceScale.value_or(ov::genai::ImageGenerationConfig().guidance_scale);

SPDLOG_DEBUG("Image Generation Pipeline reshape to static {}x{} resolution, batch: {}, guidance scale: {}",
args.staticReshapeSettings.value().resolution[0].first, args.staticReshapeSettings.value().resolution[0].second, numImagesPerPrompt, guidanceScale);

pipeline.reshape(
numImagesPerPrompt,
args.staticReshapeSettings.value().resolution[0].first,
args.staticReshapeSettings.value().resolution[0].second,
guidanceScale);
}

if (device.size() == 1) {
SPDLOG_DEBUG("Image Generation Pipeline compiling to device: {}", device[0]);
pipeline.compile(device[0], args.pluginConfig);
} else {
SPDLOG_DEBUG("Image Generation Pipeline compiling to devices: text_encode={} denoise={} vae={}", device[0], device[1], device[2]);
pipeline.compile(device[0], device[1], device[2], args.pluginConfig);
}
}

ImageGenerationPipelines::ImageGenerationPipelines(const ImageGenPipelineArgs& args) :
args(args) {
std::vector<std::string> device;
Expand All @@ -33,30 +66,81 @@ ImageGenerationPipelines::ImageGenerationPipelines(const ImageGenPipelineArgs& a

SPDLOG_DEBUG("Image Generation Pipelines weights loading from: {}", args.modelsPath);

image2ImagePipeline = std::make_unique<ov::genai::Image2ImagePipeline>(args.modelsPath);
// Pipeline construction strategy:
// Preferred chain (weight-sharing, single model load):
// INP(disk) → reshape+compile → I2I(INP) → T2I(I2I)
//
// Some models don't support all derivation directions (e.g. inpainting-specific
// models reject I2I(INP) with "Cannot create Image2ImagePipeline from InpaintingPipeline
// with inpainting model"). When derivation fails, fall back to loading from disk
// (separate model load + reshape+compile). We WARN on individual failures and only
// throw if no pipeline could be created at all.

if (args.staticReshapeSettings.has_value() && args.staticReshapeSettings.value().resolution.size() == 1) {
auto numImagesPerPrompt = args.staticReshapeSettings.value().numImagesPerPrompt.value_or(ov::genai::ImageGenerationConfig().num_images_per_prompt);
auto guidanceScale = args.staticReshapeSettings.value().guidanceScale.value_or(ov::genai::ImageGenerationConfig().guidance_scale);
// --- Step 1: InpaintingPipeline from disk ---
try {
inpaintingPipeline = std::make_unique<ov::genai::InpaintingPipeline>(args.modelsPath);
reshapeAndCompile(*inpaintingPipeline, args, device);
SPDLOG_DEBUG("InpaintingPipeline created from disk");
} catch (const std::exception& e) {
SPDLOG_WARN("Failed to create InpaintingPipeline from disk: {}", e.what());
inpaintingPipeline.reset();
}

SPDLOG_DEBUG("Image Generation Pipelines will be reshaped to static {}x{} resolution, batch: {}, guidance scale: {}",
args.staticReshapeSettings.value().resolution[0].first, args.staticReshapeSettings.value().resolution[0].second, numImagesPerPrompt, guidanceScale);
// --- Step 2: Image2ImagePipeline — derive from INP, fallback to disk ---
if (inpaintingPipeline) {
try {
image2ImagePipeline = std::make_unique<ov::genai::Image2ImagePipeline>(*inpaintingPipeline);
SPDLOG_DEBUG("Image2ImagePipeline derived from InpaintingPipeline");
} catch (const std::exception& e) {
SPDLOG_WARN("Failed to derive Image2ImagePipeline from InpaintingPipeline: {}", e.what());
}
}
if (!image2ImagePipeline) {
try {
image2ImagePipeline = std::make_unique<ov::genai::Image2ImagePipeline>(args.modelsPath);
reshapeAndCompile(*image2ImagePipeline, args, device);
SPDLOG_DEBUG("Image2ImagePipeline created from disk (fallback)");
} catch (const std::exception& e) {
SPDLOG_WARN("Failed to create Image2ImagePipeline from disk: {}", e.what());
image2ImagePipeline.reset();
}
}

image2ImagePipeline->reshape(
numImagesPerPrompt,
args.staticReshapeSettings.value().resolution[0].first, // at this point it should be validated for existence
args.staticReshapeSettings.value().resolution[0].second, // at this point it should be validated for existence
guidanceScale);
// --- Step 3: Text2ImagePipeline — derive from I2I or INP, fallback to disk ---
if (image2ImagePipeline) {
try {
text2ImagePipeline = std::make_unique<ov::genai::Text2ImagePipeline>(*image2ImagePipeline);
SPDLOG_DEBUG("Text2ImagePipeline derived from Image2ImagePipeline");
} catch (const std::exception& e) {
SPDLOG_WARN("Failed to derive Text2ImagePipeline from Image2ImagePipeline: {}", e.what());
}
}
if (!text2ImagePipeline && inpaintingPipeline) {
try {
text2ImagePipeline = std::make_unique<ov::genai::Text2ImagePipeline>(*inpaintingPipeline);
SPDLOG_DEBUG("Text2ImagePipeline derived from InpaintingPipeline");
} catch (const std::exception& e) {
SPDLOG_WARN("Failed to derive Text2ImagePipeline from InpaintingPipeline: {}", e.what());
}
}
if (!text2ImagePipeline) {
try {
text2ImagePipeline = std::make_unique<ov::genai::Text2ImagePipeline>(args.modelsPath);
reshapeAndCompile(*text2ImagePipeline, args, device);
SPDLOG_DEBUG("Text2ImagePipeline created from disk (fallback)");
} catch (const std::exception& e) {
SPDLOG_WARN("Failed to create Text2ImagePipeline from disk: {}", e.what());
text2ImagePipeline.reset();
}
}

if (device.size() == 1) {
SPDLOG_DEBUG("Image Generation Pipelines compiling to devices: text_encode={} denoise={} vae={}", device[0], device[0], device[0]);
image2ImagePipeline->compile(device[0], args.pluginConfig);
} else {
SPDLOG_DEBUG("Image Generation Pipelines compiling to devices: text_encode={} denoise={} vae={}", device[0], device[1], device[2]);
image2ImagePipeline->compile(device[0], device[1], device[2], args.pluginConfig);
if (!inpaintingPipeline && !image2ImagePipeline && !text2ImagePipeline) {
throw std::runtime_error("Failed to create any image generation pipeline from: " + args.modelsPath);
}

text2ImagePipeline = std::make_unique<ov::genai::Text2ImagePipeline>(*image2ImagePipeline);
SPDLOG_INFO("Image Generation Pipelines ready — T2I: {} | I2I: {} | INP: {}",
text2ImagePipeline ? "OK" : "N/A",
image2ImagePipeline ? "OK" : "N/A",
inpaintingPipeline ? "OK" : "N/A");
}
} // namespace ovms
4 changes: 3 additions & 1 deletion src/image_gen/pipelines.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,17 @@
#include <memory>
#include <string>

#include <openvino/genai/image_generation/text2image_pipeline.hpp>
#include <openvino/genai/image_generation/image2image_pipeline.hpp>
#include <openvino/genai/image_generation/inpainting_pipeline.hpp>
#include <openvino/genai/image_generation/text2image_pipeline.hpp>

#include "imagegenpipelineargs.hpp"

namespace ovms {
struct ImageGenerationPipelines {
std::unique_ptr<ov::genai::Image2ImagePipeline> image2ImagePipeline;
std::unique_ptr<ov::genai::Text2ImagePipeline> text2ImagePipeline;
std::unique_ptr<ov::genai::InpaintingPipeline> inpaintingPipeline;
ImageGenPipelineArgs args;

ImageGenerationPipelines() = delete;
Expand Down
3 changes: 3 additions & 0 deletions src/multi_part_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ class MultiPartParser {
// Returns empty string if file is not found.
virtual std::string_view getFileContentByFieldName(const std::string& name) const = 0;

// API for MP calculators to get all file contents for a given array field name (e.g. "image[]").
virtual std::vector<std::string_view> getFilesArrayByFieldName(const std::string& name) const = 0;

// API for MP calculators to get all field names.
virtual std::set<std::string> getAllFieldNames() const = 0;
};
Expand Down
3 changes: 3 additions & 0 deletions src/pull_module/optimum_export.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@ std::string OptimumDownloader::getExportCmdImageGeneration() {
oss << this->OPTIMUM_CLI_EXPORT_COMMAND;
oss << "--model " << this->sourceModel;
oss << " --weight-format " << this->exportSettings.precision;
if (this->exportSettings.extraQuantizationParams.has_value()) {
oss << " " << this->exportSettings.extraQuantizationParams.value();
} // TODO FIXME check if its not needed to propagate to other exports
oss << " " << this->downloadPath;
// clang-format on

Expand Down
Loading