Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions examples/cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ Generation Options:
--extra-sample-args <string> extra sampler/scheduler args, key=value list. lcm supports noise_clip_std,
noise_scale_start, noise_scale_end; ltx2 supports max_shift, base_shift,
stretch, terminal; euler_ge supports gamma
--extra-tiling-args <string> extra VAE tiling args, key=value list. LTX video VAE supports
temporal_tile_frames (default: 4), temporal_tile_overlap (default: 1)
-H, --height <int> image height, in pixel space (default: 512)
-W, --width <int> image width, in pixel space (default: 512)
--steps <int> number of sample steps (default: 20)
Expand Down
20 changes: 18 additions & 2 deletions examples/common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,10 @@ ArgOptions SDGenerationParams::get_options() {
"--extra-sample-args",
"extra sampler/scheduler args, key=value list. lcm supports noise_clip_std, noise_scale_start, noise_scale_end; ltx2 supports max_shift, base_shift, stretch, terminal; euler_ge supports gamma",
&extra_sample_args},
{"",
"--extra-tiling-args",
"extra VAE tiling args, key=value list. LTX video VAE supports temporal_tile_frames (default: 4), temporal_tile_overlap (default: 1)",
&extra_tiling_args},
};

options.int_options = {
Expand Down Expand Up @@ -1780,6 +1784,9 @@ bool SDGenerationParams::from_json_str(
if (tiling_json.contains("rel_size_y") && tiling_json["rel_size_y"].is_number()) {
vae_tiling_params.rel_size_y = tiling_json["rel_size_y"];
}
if (tiling_json.contains("extra_tiling_args") && tiling_json["extra_tiling_args"].is_string()) {
extra_tiling_args = tiling_json["extra_tiling_args"].get<std::string>();
}
}

if (!parse_lora_json_field(j, lora_path_resolver, lora_map, high_noise_lora_map)) {
Expand Down Expand Up @@ -2002,6 +2009,8 @@ bool SDGenerationParams::initialize_cache_params() {
}

bool SDGenerationParams::resolve(const std::string& lora_model_dir, const std::string& hires_upscalers_dir, bool strict) {
vae_tiling_params.extra_tiling_args = extra_tiling_args.empty() ? nullptr : extra_tiling_args.c_str();

if (high_noise_sample_params.sample_steps <= 0) {
high_noise_sample_params.sample_steps = -1;
}
Expand Down Expand Up @@ -2188,6 +2197,7 @@ sd_img_gen_params_t SDGenerationParams::to_sd_img_gen_params_t() {
sample_params.custom_sigmas_count = static_cast<int>(custom_sigmas.size());
sample_params.extra_sample_args = extra_sample_args.empty() ? nullptr : extra_sample_args.c_str();
high_noise_sample_params.extra_sample_args = high_noise_extra_sample_args.empty() ? nullptr : high_noise_extra_sample_args.c_str();
vae_tiling_params.extra_tiling_args = extra_tiling_args.empty() ? nullptr : extra_tiling_args.c_str();
cache_params.scm_mask = scm_mask.empty() ? nullptr : scm_mask.c_str();

sd_pm_params_t pm_params = {
Expand Down Expand Up @@ -2261,6 +2271,7 @@ sd_vid_gen_params_t SDGenerationParams::to_sd_vid_gen_params_t() {
sample_params.custom_sigmas_count = static_cast<int>(custom_sigmas.size());
sample_params.extra_sample_args = extra_sample_args.empty() ? nullptr : extra_sample_args.c_str();
high_noise_sample_params.extra_sample_args = high_noise_extra_sample_args.empty() ? nullptr : high_noise_extra_sample_args.c_str();
vae_tiling_params.extra_tiling_args = extra_tiling_args.empty() ? nullptr : extra_tiling_args.c_str();
cache_params.scm_mask = scm_mask.empty() ? nullptr : scm_mask.c_str();

params.loras = lora_vec.empty() ? nullptr : lora_vec.data();
Expand Down Expand Up @@ -2386,7 +2397,8 @@ std::string SDGenerationParams::to_string() const {
<< vae_tiling_params.tile_size_y << ", "
<< vae_tiling_params.target_overlap << ", "
<< vae_tiling_params.rel_size_x << ", "
<< vae_tiling_params.rel_size_y << " },\n"
<< vae_tiling_params.rel_size_y << ", "
<< "\"" << extra_tiling_args << "\" },\n"
<< "}";
return oss.str();
}
Expand Down Expand Up @@ -2565,14 +2577,18 @@ std::string build_sdcpp_image_metadata_json(const SDContextParams& ctx_params,
};
}

if (gen_params.vae_tiling_params.enabled) {
if (gen_params.vae_tiling_params.enabled ||
gen_params.vae_tiling_params.temporal_tiling ||
!gen_params.extra_tiling_args.empty()) {
root["vae_tiling"] = {
{"enabled", gen_params.vae_tiling_params.enabled},
{"temporal_tiling", gen_params.vae_tiling_params.temporal_tiling},
{"tile_size_x", gen_params.vae_tiling_params.tile_size_x},
{"tile_size_y", gen_params.vae_tiling_params.tile_size_y},
{"target_overlap", gen_params.vae_tiling_params.target_overlap},
{"rel_size_x", gen_params.vae_tiling_params.rel_size_x},
{"rel_size_y", gen_params.vae_tiling_params.rel_size_y},
{"extra_tiling_args", gen_params.extra_tiling_args},
};
}

Expand Down
3 changes: 2 additions & 1 deletion examples/common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,8 @@ struct SDGenerationParams {
int video_frames = 1;
int fps = 16;
float vace_strength = 1.f;
sd_tiling_params_t vae_tiling_params = {false, false, 0, 0, 0.5f, 0.0f, 0.0f};
sd_tiling_params_t vae_tiling_params = {false, false, 0, 0, 0.5f, 0.0f, 0.0f, nullptr};
std::string extra_tiling_args;

std::string pm_id_images_dir;
std::string pm_id_embed_path;
Expand Down
3 changes: 3 additions & 0 deletions examples/server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,8 @@ Default Generation Options:
--extra-sample-args <string> extra sampler/scheduler args, key=value list. lcm supports noise_clip_std,
noise_scale_start, noise_scale_end; ltx2 supports max_shift, base_shift,
stretch, terminal; euler_ge supports gamma
--extra-tiling-args <string> extra VAE tiling args, key=value list. LTX video VAE supports
temporal_tile_frames (default: 4), temporal_tile_overlap (default: 1)
-H, --height <int> image height, in pixel space (default: 512)
-W, --width <int> image width, in pixel space (default: 512)
--steps <int> number of sample steps (default: 20)
Expand Down Expand Up @@ -264,6 +266,7 @@ Default Generation Options:
--disable-auto-resize-ref-image disable auto resize of ref images
--disable-image-metadata do not embed generation metadata on image files
--vae-tiling process vae in tiles to reduce memory usage
--temporal-tiling enable temporal tiling for LTX video VAE decode
--hires enable highres fix
-s, --seed RNG seed (default: 42, use random seed for < 0)
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m,
Expand Down
28 changes: 26 additions & 2 deletions examples/server/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -504,18 +504,22 @@ Shared default fields used by both `img_gen` and `vid_gen`:
| `sample_params.guidance.slg.scale` | `number` |
| `vae_tiling_params` | `object` |
| `vae_tiling_params.enabled` | `boolean` |
| `vae_tiling_params.temporal_tiling` | `boolean` |
| `vae_tiling_params.tile_size_x` | `integer` |
| `vae_tiling_params.tile_size_y` | `integer` |
| `vae_tiling_params.target_overlap` | `number` |
| `vae_tiling_params.rel_size_x` | `number` |
| `vae_tiling_params.rel_size_y` | `number` |
| `vae_tiling_params.extra_tiling_args` | `string` |
| `cache_mode` | `string` |
| `cache_option` | `string` |
| `scm_mask` | `string` |
| `scm_policy_dynamic` | `boolean` |
| `output_format` | `string` |
| `output_compression` | `integer` |

`vae_tiling_params.extra_tiling_args` accepts a key=value list. For LTX video VAE temporal tiling, `temporal_tile_frames` defaults to `4` and `temporal_tile_overlap` defaults to `1`.

`img_gen`-specific default fields:

| Field | Type |
Expand Down Expand Up @@ -692,11 +696,13 @@ Example:

"vae_tiling_params": {
"enabled": false,
"temporal_tiling": false,
"tile_size_x": 0,
"tile_size_y": 0,
"target_overlap": 0.5,
"rel_size_x": 0.0,
"rel_size_y": 0.0
"rel_size_y": 0.0,
"extra_tiling_args": ""
},

"cache_mode": "disabled",
Expand Down Expand Up @@ -804,6 +810,14 @@ Other native fields:
| `hires.custom_sigmas` | `array<number>` |
| `hires.upscale_tile_size` | `integer` |
| `vae_tiling_params` | `object` |
| `vae_tiling_params.enabled` | `boolean` |
| `vae_tiling_params.temporal_tiling` | `boolean` |
| `vae_tiling_params.tile_size_x` | `integer` |
| `vae_tiling_params.tile_size_y` | `integer` |
| `vae_tiling_params.target_overlap` | `number` |
| `vae_tiling_params.rel_size_x` | `number` |
| `vae_tiling_params.rel_size_y` | `number` |
| `vae_tiling_params.extra_tiling_args` | `string` |
| `cache_mode` | `string` |
| `cache_option` | `string` |
| `scm_mask` | `string` |
Expand Down Expand Up @@ -1012,11 +1026,13 @@ Example:

"vae_tiling_params": {
"enabled": false,
"temporal_tiling": false,
"tile_size_x": 0,
"tile_size_y": 0,
"target_overlap": 0.5,
"rel_size_x": 0.0,
"rel_size_y": 0.0
"rel_size_y": 0.0,
"extra_tiling_args": ""
},

"cache_mode": "disabled",
Expand Down Expand Up @@ -1134,6 +1150,14 @@ Other native fields:
| Field | Type |
| --- | --- |
| `vae_tiling_params` | `object` |
| `vae_tiling_params.enabled` | `boolean` |
| `vae_tiling_params.temporal_tiling` | `boolean` |
| `vae_tiling_params.tile_size_x` | `integer` |
| `vae_tiling_params.tile_size_y` | `integer` |
| `vae_tiling_params.target_overlap` | `number` |
| `vae_tiling_params.rel_size_x` | `number` |
| `vae_tiling_params.rel_size_y` | `number` |
| `vae_tiling_params.extra_tiling_args` | `string` |
| `cache_mode` | `string` |
| `cache_option` | `string` |
| `scm_mask` | `string` |
Expand Down
2 changes: 2 additions & 0 deletions examples/server/routes_sdcpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,13 @@ static const char* capability_sample_method_name(enum sample_method_t sample_met
static json make_vae_tiling_json(const sd_tiling_params_t& params) {
return {
{"enabled", params.enabled},
{"temporal_tiling", params.temporal_tiling},
{"tile_size_x", params.tile_size_x},
{"tile_size_y", params.tile_size_y},
{"target_overlap", params.target_overlap},
{"rel_size_x", params.rel_size_x},
{"rel_size_y", params.rel_size_y},
{"extra_tiling_args", params.extra_tiling_args ? params.extra_tiling_args : ""},
};
}

Expand Down
1 change: 1 addition & 0 deletions include/stable-diffusion.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ typedef struct {
float target_overlap;
float rel_size_x;
float rel_size_y;
const char* extra_tiling_args;
} sd_tiling_params_t;

typedef struct {
Expand Down
2 changes: 1 addition & 1 deletion src/ggml_extend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3172,7 +3172,7 @@ class Conv2d_grouped : public UnaryBlock {
void init_params(ggml_context* ctx, const String2TensorStorage& tensor_storage_map, const std::string prefix = "") override {
this->prefix = prefix;
enum ggml_type wtype = GGML_TYPE_F16;
params["weight"] = ggml_new_tensor_4d(ctx, wtype, kernel_size.second, kernel_size.first, in_channels / groups, out_channels);
params["weight"] = ggml_new_tensor_4d(ctx, wtype, kernel_size.second, kernel_size.first, in_channels / groups, out_channels);
if (bias) {
enum ggml_type wtype = GGML_TYPE_F32;
params["bias"] = ggml_new_tensor_1d(ctx, wtype, out_channels);
Expand Down
Loading
Loading