From 08f5b41956827c55e58638545f4b6913858c09f5 Mon Sep 17 00:00:00 2001 From: leejet Date: Mon, 10 Jun 2024 22:42:15 +0800 Subject: [PATCH] refector: make the sampling module more independent --- denoiser.hpp | 398 +++++++++++++++++++++++++++++++++++++++++++ stable-diffusion.cpp | 388 +---------------------------------------- 2 files changed, 401 insertions(+), 385 deletions(-) diff --git a/denoiser.hpp b/denoiser.hpp index 255167c2..6ecb618b 100644 --- a/denoiser.hpp +++ b/denoiser.hpp @@ -261,4 +261,402 @@ struct CompVisVDenoiser : public Denoiser { } }; +typedef std::function denoise_cb_t; + + +// k diffusion reverse ODE: dx = (x - D(x;\sigma)) / \sigma dt; \sigma(t) = t +void sample_k_diffusion(sample_method_t method, + denoise_cb_t model, + ggml_context* work_ctx, + ggml_tensor* x, + std::vector sigmas, + std::shared_ptr rng) { + size_t steps = sigmas.size() - 1; + // sample_euler_ancestral + switch (method) { + case EULER_A: { + struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); + struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + float sigma = sigmas[i]; + + // denoise + ggml_tensor* denoised = model(x, sigma, i + 1); + + // d = (x - denoised) / sigma + { + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + float* vec_denoised = (float*)denoised->data; + + for (int i = 0; i < ggml_nelements(d); i++) { + vec_d[i] = (vec_x[i] - vec_denoised[i]) / sigma; + } + } + + // get_ancestral_step + float sigma_up = std::min(sigmas[i + 1], + std::sqrt(sigmas[i + 1] * sigmas[i + 1] * (sigmas[i] * sigmas[i] - sigmas[i + 1] * sigmas[i + 1]) / (sigmas[i] * sigmas[i]))); + float sigma_down = std::sqrt(sigmas[i + 1] * sigmas[i + 1] - sigma_up * sigma_up); + + // Euler method + float dt = sigma_down - sigmas[i]; + // x = x + d * dt + { + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + + for (int i = 0; i < ggml_nelements(x); i++) { + vec_x[i] = vec_x[i] + vec_d[i] * dt; + } + } + + if (sigmas[i + 1] > 0) { + // x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up + ggml_tensor_set_f32_randn(noise, rng); + // noise = load_tensor_from_file(work_ctx, "./rand" + std::to_string(i+1) + ".bin"); + { + float* vec_x = (float*)x->data; + float* vec_noise = (float*)noise->data; + + for (int i = 0; i < ggml_nelements(x); i++) { + vec_x[i] = vec_x[i] + vec_noise[i] * sigma_up; + } + } + } + } + } break; + case EULER: // Implemented without any sigma churn + { + struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + float sigma = sigmas[i]; + + // denoise + ggml_tensor* denoised = model(x, sigma, i + 1); + + // d = (x - denoised) / sigma + { + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + float* vec_denoised = (float*)denoised->data; + + for (int j = 0; j < ggml_nelements(d); j++) { + vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigma; + } + } + + float dt = sigmas[i + 1] - sigma; + // x = x + d * dt + { + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x[j] = vec_x[j] + vec_d[j] * dt; + } + } + } + } break; + case HEUN: { + struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x); + struct ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + // denoise + ggml_tensor* denoised = model(x, sigmas[i], -(i + 1)); + + // d = (x - denoised) / sigma + { + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + float* vec_denoised = (float*)denoised->data; + + for (int j = 0; j < ggml_nelements(x); j++) { + vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigmas[i]; + } + } + + float dt = sigmas[i + 1] - sigmas[i]; + if (sigmas[i + 1] == 0) { + // Euler step + // x = x + d * dt + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x[j] = vec_x[j] + vec_d[j] * dt; + } + } else { + // Heun step + float* vec_d = (float*)d->data; + float* vec_d2 = (float*)d->data; + float* vec_x = (float*)x->data; + float* vec_x2 = (float*)x2->data; + + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x2[j] = vec_x[j] + vec_d[j] * dt; + } + + ggml_tensor* denoised = model(x2, sigmas[i + 1], i + 1); + float* vec_denoised = (float*)denoised->data; + for (int j = 0; j < ggml_nelements(x); j++) { + float d2 = (vec_x2[j] - vec_denoised[j]) / sigmas[i + 1]; + vec_d[j] = (vec_d[j] + d2) / 2; + vec_x[j] = vec_x[j] + vec_d[j] * dt; + } + } + } + } break; + case DPM2: { + struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x); + struct ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + // denoise + ggml_tensor* denoised = model(x, sigmas[i], i + 1); + + // d = (x - denoised) / sigma + { + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + float* vec_denoised = (float*)denoised->data; + + for (int j = 0; j < ggml_nelements(x); j++) { + vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigmas[i]; + } + } + + if (sigmas[i + 1] == 0) { + // Euler step + // x = x + d * dt + float dt = sigmas[i + 1] - sigmas[i]; + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x[j] = vec_x[j] + vec_d[j] * dt; + } + } else { + // DPM-Solver-2 + float sigma_mid = exp(0.5f * (log(sigmas[i]) + log(sigmas[i + 1]))); + float dt_1 = sigma_mid - sigmas[i]; + float dt_2 = sigmas[i + 1] - sigmas[i]; + + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + float* vec_x2 = (float*)x2->data; + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x2[j] = vec_x[j] + vec_d[j] * dt_1; + } + + ggml_tensor* denoised = model(x2, sigma_mid, i + 1); + float* vec_denoised = (float*)denoised->data; + for (int j = 0; j < ggml_nelements(x); j++) { + float d2 = (vec_x2[j] - vec_denoised[j]) / sigma_mid; + vec_x[j] = vec_x[j] + d2 * dt_2; + } + } + } + + } break; + case DPMPP2S_A: { + struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); + struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x); + struct ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + // denoise + ggml_tensor* denoised = model(x, sigmas[i], i + 1); + + // get_ancestral_step + float sigma_up = std::min(sigmas[i + 1], + std::sqrt(sigmas[i + 1] * sigmas[i + 1] * (sigmas[i] * sigmas[i] - sigmas[i + 1] * sigmas[i + 1]) / (sigmas[i] * sigmas[i]))); + float sigma_down = std::sqrt(sigmas[i + 1] * sigmas[i + 1] - sigma_up * sigma_up); + auto t_fn = [](float sigma) -> float { return -log(sigma); }; + auto sigma_fn = [](float t) -> float { return exp(-t); }; + + if (sigma_down == 0) { + // Euler step + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + float* vec_denoised = (float*)denoised->data; + + for (int j = 0; j < ggml_nelements(d); j++) { + vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigmas[i]; + } + + // TODO: If sigma_down == 0, isn't this wrong? + // But + // https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/sampling.py#L525 + // has this exactly the same way. + float dt = sigma_down - sigmas[i]; + for (int j = 0; j < ggml_nelements(d); j++) { + vec_x[j] = vec_x[j] + vec_d[j] * dt; + } + } else { + // DPM-Solver++(2S) + float t = t_fn(sigmas[i]); + float t_next = t_fn(sigma_down); + float h = t_next - t; + float s = t + 0.5f * h; + + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + float* vec_x2 = (float*)x2->data; + float* vec_denoised = (float*)denoised->data; + + // First half-step + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x2[j] = (sigma_fn(s) / sigma_fn(t)) * vec_x[j] - (exp(-h * 0.5f) - 1) * vec_denoised[j]; + } + + ggml_tensor* denoised = model(x2, sigmas[i + 1], i + 1); + + // Second half-step + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x[j] = (sigma_fn(t_next) / sigma_fn(t)) * vec_x[j] - (exp(-h) - 1) * vec_denoised[j]; + } + } + + // Noise addition + if (sigmas[i + 1] > 0) { + ggml_tensor_set_f32_randn(noise, rng); + { + float* vec_x = (float*)x->data; + float* vec_noise = (float*)noise->data; + + for (int i = 0; i < ggml_nelements(x); i++) { + vec_x[i] = vec_x[i] + vec_noise[i] * sigma_up; + } + } + } + } + } break; + case DPMPP2M: // DPM++ (2M) from Karras et al (2022) + { + struct ggml_tensor* old_denoised = ggml_dup_tensor(work_ctx, x); + + auto t_fn = [](float sigma) -> float { return -log(sigma); }; + + for (int i = 0; i < steps; i++) { + // denoise + ggml_tensor* denoised = model(x, sigmas[i], i + 1); + + float t = t_fn(sigmas[i]); + float t_next = t_fn(sigmas[i + 1]); + float h = t_next - t; + float a = sigmas[i + 1] / sigmas[i]; + float b = exp(-h) - 1.f; + float* vec_x = (float*)x->data; + float* vec_denoised = (float*)denoised->data; + float* vec_old_denoised = (float*)old_denoised->data; + + if (i == 0 || sigmas[i + 1] == 0) { + // Simpler step for the edge cases + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x[j] = a * vec_x[j] - b * vec_denoised[j]; + } + } else { + float h_last = t - t_fn(sigmas[i - 1]); + float r = h_last / h; + for (int j = 0; j < ggml_nelements(x); j++) { + float denoised_d = (1.f + 1.f / (2.f * r)) * vec_denoised[j] - (1.f / (2.f * r)) * vec_old_denoised[j]; + vec_x[j] = a * vec_x[j] - b * denoised_d; + } + } + + // old_denoised = denoised + for (int j = 0; j < ggml_nelements(x); j++) { + vec_old_denoised[j] = vec_denoised[j]; + } + } + } break; + case DPMPP2Mv2: // Modified DPM++ (2M) from https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/8457 + { + struct ggml_tensor* old_denoised = ggml_dup_tensor(work_ctx, x); + + auto t_fn = [](float sigma) -> float { return -log(sigma); }; + + for (int i = 0; i < steps; i++) { + // denoise + ggml_tensor* denoised = model(x, sigmas[i], i + 1); + + float t = t_fn(sigmas[i]); + float t_next = t_fn(sigmas[i + 1]); + float h = t_next - t; + float a = sigmas[i + 1] / sigmas[i]; + float* vec_x = (float*)x->data; + float* vec_denoised = (float*)denoised->data; + float* vec_old_denoised = (float*)old_denoised->data; + + if (i == 0 || sigmas[i + 1] == 0) { + // Simpler step for the edge cases + float b = exp(-h) - 1.f; + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x[j] = a * vec_x[j] - b * vec_denoised[j]; + } + } else { + float h_last = t - t_fn(sigmas[i - 1]); + float h_min = std::min(h_last, h); + float h_max = std::max(h_last, h); + float r = h_max / h_min; + float h_d = (h_max + h_min) / 2.f; + float b = exp(-h_d) - 1.f; + for (int j = 0; j < ggml_nelements(x); j++) { + float denoised_d = (1.f + 1.f / (2.f * r)) * vec_denoised[j] - (1.f / (2.f * r)) * vec_old_denoised[j]; + vec_x[j] = a * vec_x[j] - b * denoised_d; + } + } + + // old_denoised = denoised + for (int j = 0; j < ggml_nelements(x); j++) { + vec_old_denoised[j] = vec_denoised[j]; + } + } + } break; + case LCM: // Latent Consistency Models + { + struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); + struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + float sigma = sigmas[i]; + + // denoise + ggml_tensor* denoised = model(x, sigma, i + 1); + + // x = denoised + { + float* vec_x = (float*)x->data; + float* vec_denoised = (float*)denoised->data; + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x[j] = vec_denoised[j]; + } + } + + if (sigmas[i + 1] > 0) { + // x += sigmas[i + 1] * noise_sampler(sigmas[i], sigmas[i + 1]) + ggml_tensor_set_f32_randn(noise, rng); + // noise = load_tensor_from_file(res_ctx, "./rand" + std::to_string(i+1) + ".bin"); + { + float* vec_x = (float*)x->data; + float* vec_noise = (float*)noise->data; + + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x[j] = vec_x[j] + sigmas[i + 1] * vec_noise[j]; + } + } + } + } + } break; + + default: + LOG_ERROR("Attempting to sample with nonexisting sample method %i", method); + abort(); + } +} + #endif // __DENOISER_HPP__ diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 74a7fe5a..8e439d2d 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -877,7 +877,7 @@ class StableDiffusionGGML { } struct ggml_tensor* denoised = ggml_dup_tensor(work_ctx, x); - auto denoise = [&](ggml_tensor* input, float sigma, int step) { + auto denoise = [&](ggml_tensor* input, float sigma, int step) -> ggml_tensor* { if (step == 1) { pretty_progress(0, (int)steps, 0); } @@ -983,393 +983,11 @@ class StableDiffusionGGML { pretty_progress(step, (int)steps, (t1 - t0) / 1000000.f); // LOG_INFO("step %d sampling completed taking %.2fs", step, (t1 - t0) * 1.0f / 1000000); } + return denoised; }; - // sample_euler_ancestral - switch (method) { - case EULER_A: { - struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); - struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x); + sample_k_diffusion(method, denoise, work_ctx, x, sigmas, rng); - for (int i = 0; i < steps; i++) { - float sigma = sigmas[i]; - - // denoise - denoise(x, sigma, i + 1); - - // d = (x - denoised) / sigma - { - float* vec_d = (float*)d->data; - float* vec_x = (float*)x->data; - float* vec_denoised = (float*)denoised->data; - - for (int i = 0; i < ggml_nelements(d); i++) { - vec_d[i] = (vec_x[i] - vec_denoised[i]) / sigma; - } - } - - // get_ancestral_step - float sigma_up = std::min(sigmas[i + 1], - std::sqrt(sigmas[i + 1] * sigmas[i + 1] * (sigmas[i] * sigmas[i] - sigmas[i + 1] * sigmas[i + 1]) / (sigmas[i] * sigmas[i]))); - float sigma_down = std::sqrt(sigmas[i + 1] * sigmas[i + 1] - sigma_up * sigma_up); - - // Euler method - float dt = sigma_down - sigmas[i]; - // x = x + d * dt - { - float* vec_d = (float*)d->data; - float* vec_x = (float*)x->data; - - for (int i = 0; i < ggml_nelements(x); i++) { - vec_x[i] = vec_x[i] + vec_d[i] * dt; - } - } - - if (sigmas[i + 1] > 0) { - // x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up - ggml_tensor_set_f32_randn(noise, rng); - // noise = load_tensor_from_file(work_ctx, "./rand" + std::to_string(i+1) + ".bin"); - { - float* vec_x = (float*)x->data; - float* vec_noise = (float*)noise->data; - - for (int i = 0; i < ggml_nelements(x); i++) { - vec_x[i] = vec_x[i] + vec_noise[i] * sigma_up; - } - } - } - } - } break; - case EULER: // Implemented without any sigma churn - { - struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x); - - for (int i = 0; i < steps; i++) { - float sigma = sigmas[i]; - - // denoise - denoise(x, sigma, i + 1); - - // d = (x - denoised) / sigma - { - float* vec_d = (float*)d->data; - float* vec_x = (float*)x->data; - float* vec_denoised = (float*)denoised->data; - - for (int j = 0; j < ggml_nelements(d); j++) { - vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigma; - } - } - - float dt = sigmas[i + 1] - sigma; - // x = x + d * dt - { - float* vec_d = (float*)d->data; - float* vec_x = (float*)x->data; - - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = vec_x[j] + vec_d[j] * dt; - } - } - } - } break; - case HEUN: { - struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x); - struct ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x); - - for (int i = 0; i < steps; i++) { - // denoise - denoise(x, sigmas[i], -(i + 1)); - - // d = (x - denoised) / sigma - { - float* vec_d = (float*)d->data; - float* vec_x = (float*)x->data; - float* vec_denoised = (float*)denoised->data; - - for (int j = 0; j < ggml_nelements(x); j++) { - vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigmas[i]; - } - } - - float dt = sigmas[i + 1] - sigmas[i]; - if (sigmas[i + 1] == 0) { - // Euler step - // x = x + d * dt - float* vec_d = (float*)d->data; - float* vec_x = (float*)x->data; - - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = vec_x[j] + vec_d[j] * dt; - } - } else { - // Heun step - float* vec_d = (float*)d->data; - float* vec_d2 = (float*)d->data; - float* vec_x = (float*)x->data; - float* vec_x2 = (float*)x2->data; - - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x2[j] = vec_x[j] + vec_d[j] * dt; - } - - denoise(x2, sigmas[i + 1], i + 1); - float* vec_denoised = (float*)denoised->data; - for (int j = 0; j < ggml_nelements(x); j++) { - float d2 = (vec_x2[j] - vec_denoised[j]) / sigmas[i + 1]; - vec_d[j] = (vec_d[j] + d2) / 2; - vec_x[j] = vec_x[j] + vec_d[j] * dt; - } - } - } - } break; - case DPM2: { - struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x); - struct ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x); - - for (int i = 0; i < steps; i++) { - // denoise - denoise(x, sigmas[i], i + 1); - - // d = (x - denoised) / sigma - { - float* vec_d = (float*)d->data; - float* vec_x = (float*)x->data; - float* vec_denoised = (float*)denoised->data; - - for (int j = 0; j < ggml_nelements(x); j++) { - vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigmas[i]; - } - } - - if (sigmas[i + 1] == 0) { - // Euler step - // x = x + d * dt - float dt = sigmas[i + 1] - sigmas[i]; - float* vec_d = (float*)d->data; - float* vec_x = (float*)x->data; - - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = vec_x[j] + vec_d[j] * dt; - } - } else { - // DPM-Solver-2 - float sigma_mid = exp(0.5f * (log(sigmas[i]) + log(sigmas[i + 1]))); - float dt_1 = sigma_mid - sigmas[i]; - float dt_2 = sigmas[i + 1] - sigmas[i]; - - float* vec_d = (float*)d->data; - float* vec_x = (float*)x->data; - float* vec_x2 = (float*)x2->data; - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x2[j] = vec_x[j] + vec_d[j] * dt_1; - } - - denoise(x2, sigma_mid, i + 1); - float* vec_denoised = (float*)denoised->data; - for (int j = 0; j < ggml_nelements(x); j++) { - float d2 = (vec_x2[j] - vec_denoised[j]) / sigma_mid; - vec_x[j] = vec_x[j] + d2 * dt_2; - } - } - } - - } break; - case DPMPP2S_A: { - struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); - struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x); - struct ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x); - - for (int i = 0; i < steps; i++) { - // denoise - denoise(x, sigmas[i], i + 1); - - // get_ancestral_step - float sigma_up = std::min(sigmas[i + 1], - std::sqrt(sigmas[i + 1] * sigmas[i + 1] * (sigmas[i] * sigmas[i] - sigmas[i + 1] * sigmas[i + 1]) / (sigmas[i] * sigmas[i]))); - float sigma_down = std::sqrt(sigmas[i + 1] * sigmas[i + 1] - sigma_up * sigma_up); - auto t_fn = [](float sigma) -> float { return -log(sigma); }; - auto sigma_fn = [](float t) -> float { return exp(-t); }; - - if (sigma_down == 0) { - // Euler step - float* vec_d = (float*)d->data; - float* vec_x = (float*)x->data; - float* vec_denoised = (float*)denoised->data; - - for (int j = 0; j < ggml_nelements(d); j++) { - vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigmas[i]; - } - - // TODO: If sigma_down == 0, isn't this wrong? - // But - // https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/sampling.py#L525 - // has this exactly the same way. - float dt = sigma_down - sigmas[i]; - for (int j = 0; j < ggml_nelements(d); j++) { - vec_x[j] = vec_x[j] + vec_d[j] * dt; - } - } else { - // DPM-Solver++(2S) - float t = t_fn(sigmas[i]); - float t_next = t_fn(sigma_down); - float h = t_next - t; - float s = t + 0.5f * h; - - float* vec_d = (float*)d->data; - float* vec_x = (float*)x->data; - float* vec_x2 = (float*)x2->data; - float* vec_denoised = (float*)denoised->data; - - // First half-step - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x2[j] = (sigma_fn(s) / sigma_fn(t)) * vec_x[j] - (exp(-h * 0.5f) - 1) * vec_denoised[j]; - } - - denoise(x2, sigmas[i + 1], i + 1); - - // Second half-step - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = (sigma_fn(t_next) / sigma_fn(t)) * vec_x[j] - (exp(-h) - 1) * vec_denoised[j]; - } - } - - // Noise addition - if (sigmas[i + 1] > 0) { - ggml_tensor_set_f32_randn(noise, rng); - { - float* vec_x = (float*)x->data; - float* vec_noise = (float*)noise->data; - - for (int i = 0; i < ggml_nelements(x); i++) { - vec_x[i] = vec_x[i] + vec_noise[i] * sigma_up; - } - } - } - } - } break; - case DPMPP2M: // DPM++ (2M) from Karras et al (2022) - { - struct ggml_tensor* old_denoised = ggml_dup_tensor(work_ctx, x); - - auto t_fn = [](float sigma) -> float { return -log(sigma); }; - - for (int i = 0; i < steps; i++) { - // denoise - denoise(x, sigmas[i], i + 1); - - float t = t_fn(sigmas[i]); - float t_next = t_fn(sigmas[i + 1]); - float h = t_next - t; - float a = sigmas[i + 1] / sigmas[i]; - float b = exp(-h) - 1.f; - float* vec_x = (float*)x->data; - float* vec_denoised = (float*)denoised->data; - float* vec_old_denoised = (float*)old_denoised->data; - - if (i == 0 || sigmas[i + 1] == 0) { - // Simpler step for the edge cases - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = a * vec_x[j] - b * vec_denoised[j]; - } - } else { - float h_last = t - t_fn(sigmas[i - 1]); - float r = h_last / h; - for (int j = 0; j < ggml_nelements(x); j++) { - float denoised_d = (1.f + 1.f / (2.f * r)) * vec_denoised[j] - (1.f / (2.f * r)) * vec_old_denoised[j]; - vec_x[j] = a * vec_x[j] - b * denoised_d; - } - } - - // old_denoised = denoised - for (int j = 0; j < ggml_nelements(x); j++) { - vec_old_denoised[j] = vec_denoised[j]; - } - } - } break; - case DPMPP2Mv2: // Modified DPM++ (2M) from https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/8457 - { - struct ggml_tensor* old_denoised = ggml_dup_tensor(work_ctx, x); - - auto t_fn = [](float sigma) -> float { return -log(sigma); }; - - for (int i = 0; i < steps; i++) { - // denoise - denoise(x, sigmas[i], i + 1); - - float t = t_fn(sigmas[i]); - float t_next = t_fn(sigmas[i + 1]); - float h = t_next - t; - float a = sigmas[i + 1] / sigmas[i]; - float* vec_x = (float*)x->data; - float* vec_denoised = (float*)denoised->data; - float* vec_old_denoised = (float*)old_denoised->data; - - if (i == 0 || sigmas[i + 1] == 0) { - // Simpler step for the edge cases - float b = exp(-h) - 1.f; - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = a * vec_x[j] - b * vec_denoised[j]; - } - } else { - float h_last = t - t_fn(sigmas[i - 1]); - float h_min = std::min(h_last, h); - float h_max = std::max(h_last, h); - float r = h_max / h_min; - float h_d = (h_max + h_min) / 2.f; - float b = exp(-h_d) - 1.f; - for (int j = 0; j < ggml_nelements(x); j++) { - float denoised_d = (1.f + 1.f / (2.f * r)) * vec_denoised[j] - (1.f / (2.f * r)) * vec_old_denoised[j]; - vec_x[j] = a * vec_x[j] - b * denoised_d; - } - } - - // old_denoised = denoised - for (int j = 0; j < ggml_nelements(x); j++) { - vec_old_denoised[j] = vec_denoised[j]; - } - } - } break; - case LCM: // Latent Consistency Models - { - struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); - struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x); - - for (int i = 0; i < steps; i++) { - float sigma = sigmas[i]; - - // denoise - denoise(x, sigma, i + 1); - - // x = denoised - { - float* vec_x = (float*)x->data; - float* vec_denoised = (float*)denoised->data; - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = vec_denoised[j]; - } - } - - if (sigmas[i + 1] > 0) { - // x += sigmas[i + 1] * noise_sampler(sigmas[i], sigmas[i + 1]) - ggml_tensor_set_f32_randn(noise, rng); - // noise = load_tensor_from_file(res_ctx, "./rand" + std::to_string(i+1) + ".bin"); - { - float* vec_x = (float*)x->data; - float* vec_noise = (float*)noise->data; - - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = vec_x[j] + sigmas[i + 1] * vec_noise[j]; - } - } - } - } - } break; - - default: - LOG_ERROR("Attempting to sample with nonexisting sample method %i", method); - abort(); - } if (control_net) { control_net->free_control_ctx(); control_net->free_compute_buffer();