@@ -168,21 +168,24 @@ struct AYSSchedule : SigmaSchedule {
168168 std::vector<float > inputs;
169169 std::vector<float > results (n + 1 );
170170
171- if (sd_version_is_sd2 ((SDVersion)version)) {
172- LOG_WARN (" AYS not designed for SD2.X models" );
173- } /* fallthrough */
174- else if (sd_version_is_sd1 ((SDVersion)version)) {
175- LOG_INFO (" AYS using SD1.5 noise levels" );
176- inputs = noise_levels[0 ];
177- } else if (sd_version_is_sdxl ((SDVersion)version)) {
178- LOG_INFO (" AYS using SDXL noise levels" );
179- inputs = noise_levels[1 ];
180- } else if (version == VERSION_SVD) {
181- LOG_INFO (" AYS using SVD noise levels" );
182- inputs = noise_levels[2 ];
183- } else {
184- LOG_ERROR (" Version not compatible with AYS scheduler" );
185- return results;
171+ switch (version) {
172+ case VERSION_SD2: /* fallthrough */
173+ LOG_WARN (" AYS not designed for SD2.X models" );
174+ case VERSION_SD1:
175+ LOG_INFO (" AYS using SD1.5 noise levels" );
176+ inputs = noise_levels[0 ];
177+ break ;
178+ case VERSION_SDXL:
179+ LOG_INFO (" AYS using SDXL noise levels" );
180+ inputs = noise_levels[1 ];
181+ break ;
182+ case VERSION_SVD:
183+ LOG_INFO (" AYS using SVD noise levels" );
184+ inputs = noise_levels[2 ];
185+ break ;
186+ default :
187+ LOG_ERROR (" Version not compatable with AYS scheduler" );
188+ return results;
186189 }
187190
188191 /* Stretches those pre-calculated reference levels out to the desired
@@ -232,6 +235,24 @@ struct GITSSchedule : SigmaSchedule {
232235 }
233236};
234237
238+ struct SGMUniformSchedule : SigmaSchedule {
239+ std::vector<float > get_sigmas (uint32_t n, float sigma_min_in, float sigma_max_in, t_to_sigma_t t_to_sigma_func) override {
240+
241+ std::vector<float > result;
242+ if (n == 0 ) {
243+ result.push_back (0 .0f );
244+ return result;
245+ }
246+ result.reserve (n + 1 );
247+ int t_max = TIMESTEPS -1 ;
248+ float step = static_cast <float >(t_max) / static_cast <float >(n > 1 ? (n -1 ) : 1 ) ;
249+ for (uint32_t i=0 ; i<n; ++i) {
250+ result.push_back (t_to_sigma_func (t_max - step * i));
251+ }
252+ result.push_back (0 .0f );
253+ return result;
254+ }
255+ };
235256struct KarrasSchedule : SigmaSchedule {
236257 std::vector<float > get_sigmas (uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) {
237258 // These *COULD* be function arguments here,
@@ -251,6 +272,36 @@ struct KarrasSchedule : SigmaSchedule {
251272 }
252273};
253274
275+ struct SimpleSchedule : SigmaSchedule {
276+ std::vector<float > get_sigmas (uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) override {
277+ std::vector<float > result_sigmas;
278+
279+ if (n == 0 ) {
280+ return result_sigmas;
281+ }
282+
283+ result_sigmas.reserve (n + 1 );
284+
285+ int model_sigmas_len = TIMESTEPS;
286+
287+ float step_factor = static_cast <float >(model_sigmas_len) / static_cast <float >(n);
288+
289+ for (uint32_t i = 0 ; i < n; ++i) {
290+
291+ int offset_from_start_of_py_array = static_cast <int >(static_cast <float >(i) * step_factor);
292+ int timestep_index = model_sigmas_len - 1 - offset_from_start_of_py_array;
293+
294+ if (timestep_index < 0 ) {
295+ timestep_index = 0 ;
296+ }
297+
298+ result_sigmas.push_back (t_to_sigma (static_cast <float >(timestep_index)));
299+ }
300+ result_sigmas.push_back (0 .0f );
301+ return result_sigmas;
302+ }
303+ };
304+
254305struct Denoiser {
255306 std::shared_ptr<SigmaSchedule> schedule = std::make_shared<DiscreteSchedule>();
256307 virtual float sigma_min () = 0;
@@ -262,8 +313,39 @@ struct Denoiser {
262313 virtual ggml_tensor* inverse_noise_scaling (float sigma, ggml_tensor* latent) = 0;
263314
264315 virtual std::vector<float > get_sigmas (uint32_t n) {
265- auto bound_t_to_sigma = std::bind (&Denoiser::t_to_sigma, this , std::placeholders::_1);
266- return schedule->get_sigmas (n, sigma_min (), sigma_max (), bound_t_to_sigma);
316+ // Check if the current schedule is SGMUniformSchedule
317+ if (std::dynamic_pointer_cast<SGMUniformSchedule>(schedule)) {
318+ std::vector<float > sigs;
319+ sigs.reserve (n + 1 );
320+
321+ if (n == 0 ) {
322+ sigs.push_back (0 .0f );
323+ return sigs;
324+ }
325+
326+ // Use the Denoiser's own sigma_to_t and t_to_sigma methods
327+ float start_t_val = this ->sigma_to_t (this ->sigma_max ());
328+ float end_t_val = this ->sigma_to_t (this ->sigma_min ());
329+
330+ float dt_per_step;
331+ if (n > 0 ) {
332+ dt_per_step = (end_t_val - start_t_val) / static_cast <float >(n);
333+ } else {
334+ dt_per_step = 0 .0f ;
335+ }
336+
337+ for (uint32_t i = 0 ; i < n; ++i) {
338+ float current_t = start_t_val + static_cast <float >(i) * dt_per_step;
339+ sigs.push_back (this ->t_to_sigma (current_t ));
340+ }
341+
342+ sigs.push_back (0 .0f );
343+ return sigs;
344+
345+ } else { // For all other schedules, use the existing virtual dispatch
346+ auto bound_t_to_sigma = std::bind (&Denoiser::t_to_sigma, this , std::placeholders::_1);
347+ return schedule->get_sigmas (n, sigma_min (), sigma_max (), bound_t_to_sigma);
348+ }
267349 }
268350};
269351
@@ -343,32 +425,6 @@ struct CompVisVDenoiser : public CompVisDenoiser {
343425 }
344426};
345427
346- struct EDMVDenoiser : public CompVisVDenoiser {
347- float min_sigma = 0.002 ;
348- float max_sigma = 120.0 ;
349-
350- EDMVDenoiser (float min_sigma = 0.002 , float max_sigma = 120.0 )
351- : min_sigma(min_sigma), max_sigma(max_sigma) {
352- schedule = std::make_shared<ExponentialSchedule>();
353- }
354-
355- float t_to_sigma (float t) {
356- return std::exp (t * 4 / (float )TIMESTEPS);
357- }
358-
359- float sigma_to_t (float s) {
360- return 0.25 * std::log (s);
361- }
362-
363- float sigma_min () {
364- return min_sigma;
365- }
366-
367- float sigma_max () {
368- return max_sigma;
369- }
370- };
371-
372428float time_snr_shift (float alpha, float t) {
373429 if (alpha == 1 .0f ) {
374430 return t;
@@ -1042,7 +1098,7 @@ static void sample_k_diffusion(sample_method_t method,
10421098 // also needed to invert the behavior of CompVisDenoiser
10431099 // (k-diffusion's LMSDiscreteScheduler)
10441100 float beta_start = 0 .00085f ;
1045- float beta_end = 0 .0120f ;
1101+ float beta_end = 0 .0120f ;
10461102 std::vector<double > alphas_cumprod;
10471103 std::vector<double > compvis_sigmas;
10481104
@@ -1053,9 +1109,8 @@ static void sample_k_diffusion(sample_method_t method,
10531109 (i == 0 ? 1 .0f : alphas_cumprod[i - 1 ]) *
10541110 (1 .0f -
10551111 std::pow (sqrtf (beta_start) +
1056- (sqrtf (beta_end) - sqrtf (beta_start)) *
1057- ((float )i / (TIMESTEPS - 1 )),
1058- 2 ));
1112+ (sqrtf (beta_end) - sqrtf (beta_start)) *
1113+ ((float )i / (TIMESTEPS - 1 )), 2 ));
10591114 compvis_sigmas[i] =
10601115 std::sqrt ((1 - alphas_cumprod[i]) /
10611116 alphas_cumprod[i]);
@@ -1085,8 +1140,7 @@ static void sample_k_diffusion(sample_method_t method,
10851140 // - pred_prev_sample -> "x_t-1"
10861141 int timestep =
10871142 roundf (TIMESTEPS -
1088- i * ((float )TIMESTEPS / steps)) -
1089- 1 ;
1143+ i * ((float )TIMESTEPS / steps)) - 1 ;
10901144 // 1. get previous step value (=t-1)
10911145 int prev_timestep = timestep - TIMESTEPS / steps;
10921146 // The sigma here is chosen to cause the
@@ -1111,9 +1165,10 @@ static void sample_k_diffusion(sample_method_t method,
11111165 float * vec_x = (float *)x->data ;
11121166 for (int j = 0 ; j < ggml_nelements (x); j++) {
11131167 vec_x[j] *= std::sqrt (sigma * sigma + 1 ) /
1114- sigma;
1168+ sigma;
11151169 }
1116- } else {
1170+ }
1171+ else {
11171172 // For the subsequent steps after the first one,
11181173 // at this point x = latents or x = sample, and
11191174 // needs to be prescaled with x <- sample / c_in
@@ -1151,8 +1206,9 @@ static void sample_k_diffusion(sample_method_t method,
11511206 float alpha_prod_t = alphas_cumprod[timestep];
11521207 // Note final_alpha_cumprod = alphas_cumprod[0] due to
11531208 // trailing timestep spacing
1154- float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0 ];
1155- float beta_prod_t = 1 - alpha_prod_t ;
1209+ float alpha_prod_t_prev = prev_timestep >= 0 ?
1210+ alphas_cumprod[prev_timestep] : alphas_cumprod[0 ];
1211+ float beta_prod_t = 1 - alpha_prod_t ;
11561212 // 3. compute predicted original sample from predicted
11571213 // noise also called "predicted x_0" of formula (12)
11581214 // from https://arxiv.org/pdf/2010.02502.pdf
@@ -1168,7 +1224,7 @@ static void sample_k_diffusion(sample_method_t method,
11681224 vec_pred_original_sample[j] =
11691225 (vec_x[j] / std::sqrt (sigma * sigma + 1 ) -
11701226 std::sqrt (beta_prod_t ) *
1171- vec_model_output[j]) *
1227+ vec_model_output[j]) *
11721228 (1 / std::sqrt (alpha_prod_t ));
11731229 }
11741230 }
@@ -1182,8 +1238,8 @@ static void sample_k_diffusion(sample_method_t method,
11821238 // sigma_t = sqrt((1 - alpha_t-1)/(1 - alpha_t)) *
11831239 // sqrt(1 - alpha_t/alpha_t-1)
11841240 float beta_prod_t_prev = 1 - alpha_prod_t_prev;
1185- float variance = (beta_prod_t_prev / beta_prod_t ) *
1186- (1 - alpha_prod_t / alpha_prod_t_prev);
1241+ float variance = (beta_prod_t_prev / beta_prod_t ) *
1242+ (1 - alpha_prod_t / alpha_prod_t_prev);
11871243 float std_dev_t = eta * std::sqrt (variance);
11881244 // 6. compute "direction pointing to x_t" of formula
11891245 // (12) from https://arxiv.org/pdf/2010.02502.pdf
@@ -1202,8 +1258,8 @@ static void sample_k_diffusion(sample_method_t method,
12021258 std::pow (std_dev_t , 2 )) *
12031259 vec_model_output[j];
12041260 vec_x[j] = std::sqrt (alpha_prod_t_prev) *
1205- vec_pred_original_sample[j] +
1206- pred_sample_direction;
1261+ vec_pred_original_sample[j] +
1262+ pred_sample_direction;
12071263 }
12081264 }
12091265 if (eta > 0 ) {
@@ -1231,7 +1287,7 @@ static void sample_k_diffusion(sample_method_t method,
12311287 // by Semi-Linear Consistency Function with Trajectory
12321288 // Mapping", arXiv:2402.19159 [cs.CV]
12331289 float beta_start = 0 .00085f ;
1234- float beta_end = 0 .0120f ;
1290+ float beta_end = 0 .0120f ;
12351291 std::vector<double > alphas_cumprod;
12361292 std::vector<double > compvis_sigmas;
12371293
@@ -1242,9 +1298,8 @@ static void sample_k_diffusion(sample_method_t method,
12421298 (i == 0 ? 1 .0f : alphas_cumprod[i - 1 ]) *
12431299 (1 .0f -
12441300 std::pow (sqrtf (beta_start) +
1245- (sqrtf (beta_end) - sqrtf (beta_start)) *
1246- ((float )i / (TIMESTEPS - 1 )),
1247- 2 ));
1301+ (sqrtf (beta_end) - sqrtf (beta_start)) *
1302+ ((float )i / (TIMESTEPS - 1 )), 2 ));
12481303 compvis_sigmas[i] =
12491304 std::sqrt ((1 - alphas_cumprod[i]) /
12501305 alphas_cumprod[i]);
@@ -1259,10 +1314,13 @@ static void sample_k_diffusion(sample_method_t method,
12591314 for (int i = 0 ; i < steps; i++) {
12601315 // Analytic form for TCD timesteps
12611316 int timestep = TIMESTEPS - 1 -
1262- (TIMESTEPS / original_steps) *
1263- (int )floor (i * ((float )original_steps / steps));
1317+ (TIMESTEPS / original_steps) *
1318+ (int )floor (i * ((float )original_steps / steps));
12641319 // 1. get previous step value
1265- int prev_timestep = i >= steps - 1 ? 0 : TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int )floor ((i + 1 ) * ((float )original_steps / steps));
1320+ int prev_timestep = i >= steps - 1 ? 0 :
1321+ TIMESTEPS - 1 - (TIMESTEPS / original_steps) *
1322+ (int )floor ((i + 1 ) *
1323+ ((float )original_steps / steps));
12661324 // Here timestep_s is tau_n' in Algorithm 4. The _s
12671325 // notation appears to be that from C. Lu,
12681326 // "DPM-Solver: A Fast ODE Solver for Diffusion
@@ -1279,9 +1337,10 @@ static void sample_k_diffusion(sample_method_t method,
12791337 float * vec_x = (float *)x->data ;
12801338 for (int j = 0 ; j < ggml_nelements (x); j++) {
12811339 vec_x[j] *= std::sqrt (sigma * sigma + 1 ) /
1282- sigma;
1340+ sigma;
12831341 }
1284- } else {
1342+ }
1343+ else {
12851344 float * vec_x = (float *)x->data ;
12861345 for (int j = 0 ; j < ggml_nelements (x); j++) {
12871346 vec_x[j] *= std::sqrt (sigma * sigma + 1 );
@@ -1314,14 +1373,15 @@ static void sample_k_diffusion(sample_method_t method,
13141373 // DPM-Solver. In fact, we have alpha_{t_n} =
13151374 // \sqrt{\hat{alpha_n}}, [...]"
13161375 float alpha_prod_t = alphas_cumprod[timestep];
1317- float beta_prod_t = 1 - alpha_prod_t ;
1376+ float beta_prod_t = 1 - alpha_prod_t ;
13181377 // Note final_alpha_cumprod = alphas_cumprod[0] since
13191378 // TCD is always "trailing"
1320- float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0 ];
1379+ float alpha_prod_t_prev = prev_timestep >= 0 ?
1380+ alphas_cumprod[prev_timestep] : alphas_cumprod[0 ];
13211381 // The subscript _s are the only portion in this
13221382 // section (2) unique to TCD
13231383 float alpha_prod_s = alphas_cumprod[timestep_s];
1324- float beta_prod_s = 1 - alpha_prod_s;
1384+ float beta_prod_s = 1 - alpha_prod_s;
13251385 // 3. Compute the predicted noised sample x_s based on
13261386 // the model parameterization
13271387 //
@@ -1336,7 +1396,7 @@ static void sample_k_diffusion(sample_method_t method,
13361396 vec_pred_original_sample[j] =
13371397 (vec_x[j] / std::sqrt (sigma * sigma + 1 ) -
13381398 std::sqrt (beta_prod_t ) *
1339- vec_model_output[j]) *
1399+ vec_model_output[j]) *
13401400 (1 / std::sqrt (alpha_prod_t ));
13411401 }
13421402 }
@@ -1358,9 +1418,9 @@ static void sample_k_diffusion(sample_method_t method,
13581418 // pred_epsilon = model_output
13591419 vec_x[j] =
13601420 std::sqrt (alpha_prod_s) *
1361- vec_pred_original_sample[j] +
1421+ vec_pred_original_sample[j] +
13621422 std::sqrt (beta_prod_s) *
1363- vec_model_output[j];
1423+ vec_model_output[j];
13641424 }
13651425 }
13661426 // 4. Sample and inject noise z ~ N(0, I) for
@@ -1376,7 +1436,7 @@ static void sample_k_diffusion(sample_method_t method,
13761436 // In this case, x is still pred_noised_sample,
13771437 // continue in-place
13781438 ggml_tensor_set_f32_randn (noise, rng);
1379- float * vec_x = (float *)x->data ;
1439+ float * vec_x = (float *)x->data ;
13801440 float * vec_noise = (float *)noise->data ;
13811441 for (int j = 0 ; j < ggml_nelements (x); j++) {
13821442 // Corresponding to (35) in Zheng et
@@ -1385,10 +1445,10 @@ static void sample_k_diffusion(sample_method_t method,
13851445 vec_x[j] =
13861446 std::sqrt (alpha_prod_t_prev /
13871447 alpha_prod_s) *
1388- vec_x[j] +
1448+ vec_x[j] +
13891449 std::sqrt (1 - alpha_prod_t_prev /
1390- alpha_prod_s) *
1391- vec_noise[j];
1450+ alpha_prod_s) *
1451+ vec_noise[j];
13921452 }
13931453 }
13941454 }
@@ -1400,4 +1460,4 @@ static void sample_k_diffusion(sample_method_t method,
14001460 }
14011461}
14021462
1403- #endif // __DENOISER_HPP__
1463+ #endif // __DENOISER_HPP__
0 commit comments