Skip to content

Commit a2d97bb

Browse files
committed
Add the missing "tcd" in help, simplification of comments and consistent reference style
1 parent 8086045 commit a2d97bb

File tree

2 files changed

+22
-15
lines changed

2 files changed

+22
-15
lines changed

denoiser.hpp

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,7 +1009,10 @@ static void sample_k_diffusion(sample_method_t method,
10091009
case DDIM_TRAILING: // Denoising Diffusion Implicit Models
10101010
// with the "trailing" timestep spacing
10111011
{
1012-
// DDIM itself needs alphas_cumprod (DDPM, Ho et al.,
1012+
// See J. Song et al., "Denoising Diffusion Implicit
1013+
// Models", arXiv:2010.02502 [cs.LG]
1014+
//
1015+
// DDIM itself needs alphas_cumprod (DDPM, J. Ho et al.,
10131016
// arXiv:2006.11239 [cs.LG] with k-diffusion's start and
10141017
// end beta) (which unfortunately k-diffusion's data
10151018
// structure hides from the denoiser), and the sigmas are
@@ -1045,9 +1048,8 @@ static void sample_k_diffusion(sample_method_t method,
10451048
// are Flawed", arXiv:2305.08891 [cs], p. 4, Table
10461049
// 2. Most variables below follow Diffusers naming
10471050
//
1048-
// Diffuser naming vs. J. Song et al., "Denoising
1049-
// Diffusion Implicit Models", arXiv:2010.02502, p. 5,
1050-
// (12) and p. 16, (16) (<variable name> -> <name in
1051+
// Diffuser naming vs. Song et al. (2010), p. 5, (12)
1052+
// and p. 16, (16) (<variable name> -> <name in
10511053
// paper>):
10521054
//
10531055
// - pred_noise_t -> epsilon_theta^(t)(x_t)
@@ -1100,9 +1102,8 @@ static void sample_k_diffusion(sample_method_t method,
11001102
}
11011103
// Note (also noise_pred in Diffuser's pipeline)
11021104
// model_output = model() is the D(x, sigma) as
1103-
// defined in T. Karras et al., arXiv:2206.00364,
1104-
// p. 3, Table 1 and p. 8 (7), compare also p. 38
1105-
// (226) therein.
1105+
// defined in Karras et al. (2022), p. 3, Table 1 and
1106+
// p. 8 (7), compare also p. 38 (226) therein.
11061107
struct ggml_tensor* model_output =
11071108
model(x, sigma, i + 1);
11081109
// Here model_output is still the k-diffusion denoiser
@@ -1202,6 +1203,10 @@ static void sample_k_diffusion(sample_method_t method,
12021203
case TCD: // Strategic Stochastic Sampling (Algorithm 4) in
12031204
// Trajectory Consistency Distillation
12041205
{
1206+
// See J. Zheng et al., "Trajectory Consistency
1207+
// Distillation: Improved Latent Consistency Distillation
1208+
// by Semi-Linear Consistency Function with Trajectory
1209+
// Mapping", arXiv:2402.19159 [cs.CV]
12051210
float beta_start = 0.00085f;
12061211
float beta_end = 0.0120f;
12071212
std::vector<double> alphas_cumprod;
@@ -1238,7 +1243,9 @@ static void sample_k_diffusion(sample_method_t method,
12381243
(int)floor((i + 1) *
12391244
((float)original_steps / steps));
12401245
// Here timestep_s is tau_n' in Algorithm 4. The _s
1241-
// notation appears to be that from DPM-Solver, C. Lu,
1246+
// notation appears to be that from C. Lu,
1247+
// "DPM-Solver: A Fast ODE Solver for Diffusion
1248+
// Probabilistic Model Sampling in Around 10 Steps",
12421249
// arXiv:2206.00927 [cs.LG], but this notation is not
12431250
// continued in Algorithm 4, where _n' is used.
12441251
int timestep_s =
@@ -1315,12 +1322,12 @@ static void sample_k_diffusion(sample_method_t method,
13151322
}
13161323
}
13171324
// This consistency function step can be difficult to
1318-
// decipher from Algorithm 4, as it involves a
1319-
// difficult notation ("|->"). In Diffusers it is
1320-
// borrowed verbatim (with the same comments below for
1321-
// step (4)) from LCMScheduler's noise injection step,
1322-
// compare in S. Luo et al., arXiv:2310.04378 p. 14,
1323-
// Algorithm 3.
1325+
// decipher from Algorithm 4, as it is simply stated
1326+
// using a consistency function. This step is the
1327+
// modified DDIM, i.e. p. 8 (32) in Zheng et
1328+
// al. (2024), with eta set to 0 (see the paragraph
1329+
// immediately thereafter that states this somewhat
1330+
// obliquely).
13241331
{
13251332
float* vec_pred_original_sample =
13261333
(float*)pred_original_sample->data;

examples/cli/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ void print_usage(int argc, const char* argv[]) {
224224
printf(" 1.0 corresponds to full destruction of information in init image\n");
225225
printf(" -H, --height H image height, in pixel space (default: 512)\n");
226226
printf(" -W, --width W image width, in pixel space (default: 512)\n");
227-
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing}\n");
227+
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
228228
printf(" sampling method (default: \"euler_a\")\n");
229229
printf(" --steps STEPS number of sample steps (default: 20)\n");
230230
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");

0 commit comments

Comments
 (0)