fixes for sdcpp

LostRuins · LostRuins · commit 7e1289ade809 · 2025-04-12T10:08:23.000+08:00
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
@@ -363,26 +363,26 @@ struct clip_ctx {
         if(enable_gpu_clip)
         {
         #ifdef GGML_USE_CUDA
-            backend = ggml_backend_cuda_init(0);
+            backend = ggml_backend_ptr(ggml_backend_cuda_init(0));
             LOG_INF("%s: CLIP using CUDA backend\n", __func__);
         #endif
         #ifdef GGML_USE_METAL
-            backend = ggml_backend_metal_init();
+            backend = ggml_backend_ptr(ggml_backend_metal_init());
             LOG_INF("%s: CLIP using Metal backend\n", __func__);
         #endif
         #ifdef GGML_USE_VULKAN
-            backend = ggml_backend_vk_init(0);
+            backend = ggml_backend_ptr(ggml_backend_vk_init(0));
             LOG_INF("%s: CLIP using Vulkan backend\n", __func__);
         #endif
         }
 
         if (!backend) {
-            backend = ggml_backend_cpu_init();
+            backend = ggml_backend_ptr(ggml_backend_cpu_init());
             LOG_INF("%s: CLIP using CPU backend\n", __func__);
         }
 
-        backend_ptrs.push_back(backend);
-        backend_buft.push_back(ggml_backend_get_default_buffer_type(backend));
+        backend_ptrs.push_back(backend.get());
+        backend_buft.push_back(ggml_backend_get_default_buffer_type(backend.get()));
 
         sched.reset(
             ggml_backend_sched_new(backend_ptrs.data(), backend_buft.data(), backend_ptrs.size(), 8192, false)
@@ -1228,7 +1228,7 @@ struct clip_model_loader {
 
         // print gguf info
         try {
-        
+
             std::string name;
             get_string(KEY_NAME, name, false);
             std::string description;
@@ -2950,8 +2950,8 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
         if (window_mask) ggml_backend_tensor_set(window_mask, mask.data(), 0, ggml_nbytes(window_mask));
     }
 
-    if (ggml_backend_is_cpu(ctx->backend)) {
-        ggml_backend_cpu_set_n_threads(ctx->backend, n_threads);
+    if (ggml_backend_is_cpu(ctx->backend.get())) {
+        ggml_backend_cpu_set_n_threads(ctx->backend.get(), n_threads);
     }
 
     auto status = ggml_backend_sched_graph_compute(ctx->sched.get(), gf);
diff --git a/otherarch/sdcpp/common.hpp b/otherarch/sdcpp/common.hpp
@@ -56,7 +56,7 @@ class UpSampleBlock : public GGMLBlock {
         // x: [N, channels, h, w]
         auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["conv"]);
 
-        x = ggml_upscale(ctx, x, 2);  // [N, channels, h*2, w*2]
+        x = ggml_upscale(ctx, x, 2, ggml_scale_mode::GGML_SCALE_MODE_NEAREST);  // [N, channels, h*2, w*2]
         x = conv->forward(ctx, x);    // [N, out_channels, h*2, w*2]
         return x;
     }
diff --git a/otherarch/sdcpp/esrgan.hpp b/otherarch/sdcpp/esrgan.hpp
@@ -130,8 +130,8 @@ class RRDBNet : public GGMLBlock {
         body_feat = conv_body->forward(ctx, body_feat);
         feat      = ggml_add(ctx, feat, body_feat);
         // upsample
-        feat     = lrelu(ctx, conv_up1->forward(ctx, ggml_upscale(ctx, feat, 2)));
-        feat     = lrelu(ctx, conv_up2->forward(ctx, ggml_upscale(ctx, feat, 2)));
+        feat     = lrelu(ctx, conv_up1->forward(ctx, ggml_upscale(ctx, feat, 2, ggml_scale_mode::GGML_SCALE_MODE_NEAREST)));
+        feat     = lrelu(ctx, conv_up2->forward(ctx, ggml_upscale(ctx, feat, 2, ggml_scale_mode::GGML_SCALE_MODE_NEAREST)));
         auto out = conv_last->forward(ctx, lrelu(ctx, conv_hr->forward(ctx, feat)));
         return out;
     }
diff --git a/otherarch/sdcpp/ggml_extend.hpp b/otherarch/sdcpp/ggml_extend.hpp
@@ -113,7 +113,8 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_kronecker(ggml_context* ctx, struct g
                                      a->ne[0] * b->ne[0],
                                      a->ne[1] * b->ne[1],
                                      a->ne[2] * b->ne[2],
-                                     a->ne[3] * b->ne[3]),
+                                     a->ne[3] * b->ne[3],
+                                     ggml_scale_mode::GGML_SCALE_MODE_NEAREST),
                     b);
 }
 
diff --git a/otherarch/sdcpp/model.cpp b/otherarch/sdcpp/model.cpp
@@ -1749,7 +1749,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
     bool success = true;
     for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) {
         std::string file_path = file_paths_[file_index];
-        LOG_DEBUG("loading tensors from %s", file_path.c_str());
+        LOG_DEBUG("loading tensors from %s\n", file_path.c_str());
 
         std::ifstream file(file_path, std::ios::binary);
         if (!file.is_open()) {
@@ -1886,7 +1886,12 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
                 }
             }
             int64_t t2 = ggml_time_ms();
-            pretty_progress(++tensor_count, processed_tensor_storages.size(), (t2 - t1) / 1000.0f);
+            ++tensor_count;
+            if(tensor_count<2 || tensor_count%5==0 || (tensor_count+10) > processed_tensor_storages.size())
+            {
+                //throttle progress printing
+                pretty_progress(tensor_count, processed_tensor_storages.size(), (t2 - t1) / 1000.0f);
+            }
             t1 = t2;
         }
 
diff --git a/otherarch/sdcpp/sdtype_adapter.cpp b/otherarch/sdcpp/sdtype_adapter.cpp
@@ -160,6 +160,10 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
     {
         printf("With Custom Clip-G Model: %s\n",clipg_filename.c_str());
     }
+    if(inputs.quant)
+    {
+        printf("Note: Loading a pre-quantized model is always faster than using compress weights!\n");
+    }
 
     //duplicated from expose.cpp
     int cl_parseinfo = inputs.clblast_info; //first digit is whether configured, second is platform, third is devices
diff --git a/otherarch/sdcpp/tae.hpp b/otherarch/sdcpp/tae.hpp
@@ -149,7 +149,7 @@ class TinyDecoder : public UnaryBlock {
                 if (i == 1) {
                     h = ggml_relu_inplace(ctx, h);
                 } else {
-                    h = ggml_upscale(ctx, h, 2);
+                    h = ggml_upscale(ctx, h, 2, ggml_scale_mode::GGML_SCALE_MODE_NEAREST);
                 }
                 continue;
             }

Original file line number	Diff line number	Diff line change
`@@ -56,7 +56,7 @@ class UpSampleBlock : public GGMLBlock {`
`56`	`56`	`// x: [N, channels, h, w]`
`57`	`57`	`auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["conv"]);`
`58`	`58`
`59`		`- x = ggml_upscale(ctx, x, 2); // [N, channels, h2, w2]`
	`59`	`+ x = ggml_upscale(ctx, x, 2, ggml_scale_mode::GGML_SCALE_MODE_NEAREST); // [N, channels, h2, w2]`
`60`	`60`	`x = conv->forward(ctx, x); // [N, out_channels, h2, w2]`
`61`	`61`	`return x;`
`62`	`62`	`}`
Original file line number	Diff line number	Diff line change
`@@ -113,7 +113,8 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_kronecker(ggml_context* ctx, struct g`
`113`	`113`	`a->ne[0] * b->ne[0],`
`114`	`114`	`a->ne[1] * b->ne[1],`
`115`	`115`	`a->ne[2] * b->ne[2],`
`116`		`- a->ne[3] * b->ne[3]),`
	`116`	`+ a->ne[3] * b->ne[3],`
	`117`	`+ ggml_scale_mode::GGML_SCALE_MODE_NEAREST),`
`117`	`118`	`b);`
`118`	`119`	`}`
`119`	`120`
Original file line number	Diff line number	Diff line change
`@@ -160,6 +160,10 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {`
`160`	`160`	`{`
`161`	`161`	`printf("With Custom Clip-G Model: %s\n",clipg_filename.c_str());`
`162`	`162`	`}`
	`163`	`+ if(inputs.quant)`
	`164`	`+ {`
	`165`	`+ printf("Note: Loading a pre-quantized model is always faster than using compress weights!\n");`
	`166`	`+ }`
`163`	`167`
`164`	`168`	`//duplicated from expose.cpp`
`165`	`169`	`int cl_parseinfo = inputs.clblast_info; //first digit is whether configured, second is platform, third is devices`
Original file line number	Diff line number	Diff line change
`@@ -149,7 +149,7 @@ class TinyDecoder : public UnaryBlock {`
`149`	`149`	`if (i == 1) {`
`150`	`150`	`h = ggml_relu_inplace(ctx, h);`
`151`	`151`	`} else {`
`152`		`- h = ggml_upscale(ctx, h, 2);`
	`152`	`+ h = ggml_upscale(ctx, h, 2, ggml_scale_mode::GGML_SCALE_MODE_NEAREST);`
`153`	`153`	`}`
`154`	`154`	`continue;`
`155`	`155`	`}`