diff --git a/README.md b/README.md index b5920e63..8f4a5f3b 100644 --- a/README.md +++ b/README.md @@ -190,12 +190,13 @@ arguments: --rng {std_default, cuda} RNG (default: cuda) -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0) -b, --batch-count COUNT number of images to generate. - --schedule {discrete, karras} Denoiser sigma schedule (default: discrete) + --schedule {discrete, karras, ays} Denoiser sigma schedule (default: discrete) --clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1) <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x --vae-tiling process vae in tiles to reduce memory usage --control-net-cpu keep controlnet in cpu (for low vram) --canny apply canny preprocessor (edge detection) + --color colors the logging tags according to level -v, --verbose print extra info ``` diff --git a/denoiser.hpp b/denoiser.hpp index fd934540..255167c2 100644 --- a/denoiser.hpp +++ b/denoiser.hpp @@ -13,6 +13,7 @@ struct SigmaSchedule { float alphas_cumprod[TIMESTEPS]; float sigmas[TIMESTEPS]; float log_sigmas[TIMESTEPS]; + int version = 0; virtual std::vector get_sigmas(uint32_t n) = 0; @@ -75,6 +76,144 @@ struct DiscreteSchedule : SigmaSchedule { } }; +/* +https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html +*/ +struct AYSSchedule : SigmaSchedule { + /* interp and linear_interp adapted from dpilger26's NumCpp library: + * https://github.com/dpilger26/NumCpp/tree/5e40aab74d14e257d65d3dc385c9ff9e2120c60e */ + constexpr double interp(double left, double right, double perc) noexcept { + return (left * (1. - perc)) + (right * perc); + } + + /* This will make the assumption that the reference x and y values are + * already sorted in ascending order because they are being generated as + * such in the calling function */ + std::vector linear_interp(std::vector new_x, + const std::vector ref_x, + const std::vector ref_y) { + const size_t len_x = new_x.size(); + size_t i = 0; + size_t j = 0; + std::vector new_y(len_x); + + if (ref_x.size() != ref_y.size()) { + LOG_ERROR("Linear Interoplation Failed: length mismatch"); + return new_y; + } + + /* serves as the bounds checking for the below while loop */ + if ((new_x[0] < ref_x[0]) || (new_x[new_x.size() - 1] > ref_x[ref_x.size() - 1])) { + LOG_ERROR("Linear Interpolation Failed: bad bounds"); + return new_y; + } + + while (i < len_x) { + if ((ref_x[j] > new_x[i]) || (new_x[i] > ref_x[j + 1])) { + j++; + continue; + } + + const double perc = static_cast(new_x[i] - ref_x[j]) / static_cast(ref_x[j + 1] - ref_x[j]); + + new_y[i] = interp(ref_y[j], ref_y[j + 1], perc); + i++; + } + + return new_y; + } + + std::vector linear_space(const float start, const float end, const size_t num_points) { + std::vector result(num_points); + const float inc = (end - start) / (static_cast(num_points - 1)); + + if (num_points > 0) { + result[0] = start; + + for (size_t i = 1; i < num_points; i++) { + result[i] = result[i - 1] + inc; + } + } + + return result; + } + + std::vector log_linear_interpolation(std::vector sigma_in, + const size_t new_len) { + const size_t s_len = sigma_in.size(); + std::vector x_vals = linear_space(0.f, 1.f, s_len); + std::vector y_vals(s_len); + + /* Reverses the input array to be ascending instead of descending, + * also hits it with a log, it is log-linear interpolation after all */ + for (size_t i = 0; i < s_len; i++) { + y_vals[i] = std::log(sigma_in[s_len - i - 1]); + } + + std::vector new_x_vals = linear_space(0.f, 1.f, new_len); + std::vector new_y_vals = linear_interp(new_x_vals, x_vals, y_vals); + std::vector results(new_len); + + for (size_t i = 0; i < new_len; i++) { + results[i] = static_cast(std::exp(new_y_vals[new_len - i - 1])); + } + + return results; + } + + std::vector get_sigmas(uint32_t len) { + const std::vector noise_levels[] = { + /* SD1.5 */ + {14.6146412293f, 6.4745760956f, 3.8636745985f, 2.6946151520f, + 1.8841921177f, 1.3943805092f, 0.9642583904f, 0.6523686016f, + 0.3977456272f, 0.1515232662f, 0.0291671582f}, + /* SDXL */ + {14.6146412293f, 6.3184485287f, 3.7681790315f, 2.1811480769f, + 1.3405244945f, 0.8620721141f, 0.5550693289f, 0.3798540708f, + 0.2332364134f, 0.1114188177f, 0.0291671582f}, + /* SVD */ + {700.00f, 54.5f, 15.886f, 7.977f, 4.248f, 1.789f, 0.981f, 0.403f, + 0.173f, 0.034f, 0.002f}, + }; + + std::vector inputs; + std::vector results(len + 1); + + switch (version) { + case VERSION_2_x: /* fallthrough */ + LOG_WARN("AYS not designed for SD2.X models"); + case VERSION_1_x: + LOG_INFO("AYS using SD1.5 noise levels"); + inputs = noise_levels[0]; + break; + case VERSION_XL: + LOG_INFO("AYS using SDXL noise levels"); + inputs = noise_levels[1]; + break; + case VERSION_SVD: + LOG_INFO("AYS using SVD noise levels"); + inputs = noise_levels[2]; + break; + default: + LOG_ERROR("Version not compatable with AYS scheduler"); + return results; + } + + /* Stretches those pre-calculated reference levels out to the desired + * size using log-linear interpolation */ + if ((len + 1) != inputs.size()) { + results = log_linear_interpolation(inputs, len + 1); + } else { + results = inputs; + } + + /* Not sure if this is strictly neccessary */ + results[len] = 0.0f; + + return results; + } +}; + struct KarrasSchedule : SigmaSchedule { std::vector get_sigmas(uint32_t n) { // These *COULD* be function arguments here, @@ -122,4 +261,4 @@ struct CompVisVDenoiser : public Denoiser { } }; -#endif // __DENOISER_HPP__ \ No newline at end of file +#endif // __DENOISER_HPP__ diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index 0f26644b..565af74a 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -43,6 +43,7 @@ const char* schedule_str[] = { "default", "discrete", "karras", + "ays", }; const char* modes_str[] = { @@ -190,12 +191,13 @@ void print_usage(int argc, const char* argv[]) { printf(" --rng {std_default, cuda} RNG (default: cuda)\n"); printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n"); printf(" -b, --batch-count COUNT number of images to generate.\n"); - printf(" --schedule {discrete, karras} Denoiser sigma schedule (default: discrete)\n"); + printf(" --schedule {discrete, karras, ays} Denoiser sigma schedule (default: discrete)\n"); printf(" --clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n"); printf(" <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n"); printf(" --vae-tiling process vae in tiles to reduce memory usage\n"); printf(" --control-net-cpu keep controlnet in cpu (for low vram)\n"); printf(" --canny apply canny preprocessor (edge detection)\n"); + printf(" --color Colors the logging tags according to level\n"); printf(" -v, --verbose print extra info\n"); } diff --git a/model.cpp b/model.cpp index 3db919be..684317d2 100644 --- a/model.cpp +++ b/model.cpp @@ -890,6 +890,7 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const // ggml/src/ggml.c:2745 if (n_dims < 1 || n_dims > GGML_MAX_DIMS) { + LOG_ERROR("skip tensor '%s' with n_dims %d", name.c_str(), n_dims); continue; } diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index abaae693..e4eb56e7 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -450,6 +450,11 @@ class StableDiffusionGGML { LOG_INFO("running with Karras schedule"); denoiser->schedule = std::make_shared(); break; + case AYS: + LOG_INFO("Running with Align-Your-Steps schedule"); + denoiser->schedule = std::make_shared(); + denoiser->schedule->version = version; + break; case DEFAULT: // Don't touch anything. break; diff --git a/stable-diffusion.h b/stable-diffusion.h index 0de17ae2..4031a093 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -49,6 +49,7 @@ enum schedule_t { DEFAULT, DISCRETE, KARRAS, + AYS, N_SCHEDULES };