Skip to content

Commit 261e6a2

Browse files
authored
Vulkan: Clean up mul_mm shader (#15987)
* vulkan: move mul_mm dequantization steps into a separate file and functions * improve mul_mm vector load code * fix debug mode issues and warnings
1 parent a0e13dc commit 261e6a2

File tree

5 files changed

+663
-577
lines changed

5 files changed

+663
-577
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,8 +1231,6 @@ static std::string format_size(size_t size) {
12311231
return oss.str();
12321232
}
12331233

1234-
static std::mutex log_mutex;
1235-
12361234
class vk_memory_logger {
12371235
public:
12381236
vk_memory_logger(): total_device(0), total_host(0) {}
@@ -1422,6 +1420,8 @@ struct ggml_backend_vk_buffer_context {
14221420
};
14231421

14241422
#ifdef GGML_VULKAN_MEMORY_DEBUG
1423+
static std::mutex log_mutex;
1424+
14251425
void vk_memory_logger::log_allocation(vk_buffer_ref buf_ref, size_t size) {
14261426
std::lock_guard<std::mutex> guard(log_mutex);
14271427
vk_buffer buf = buf_ref.lock();
@@ -13138,16 +13138,16 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_cgraph *
1313813138
} else if (tensor->op == GGML_OP_IM2COL_3D) {
1313913139
const int32_t s0 = tensor->op_params[0];
1314013140
const int32_t s1 = tensor->op_params[1];
13141-
const int32_t s1 = tensor->op_params[2];
13141+
const int32_t s2 = tensor->op_params[2];
1314213142
const int32_t p0 = tensor->op_params[3];
1314313143
const int32_t p1 = tensor->op_params[4];
13144-
const int32_t p1 = tensor->op_params[5];
13144+
const int32_t p2 = tensor->op_params[5];
1314513145
const int32_t d0 = tensor->op_params[6];
1314613146
const int32_t d1 = tensor->op_params[7];
13147-
const int32_t d1 = tensor->op_params[8];
13147+
const int32_t d2 = tensor->op_params[8];
1314813148
const int32_t IC = tensor->op_params[9];
1314913149

13150-
tensor_clone = ggml_im2col(ggml_ctx, src_clone[0], src_clone[1], IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, tensor->type);
13150+
tensor_clone = ggml_im2col_3d(ggml_ctx, src_clone[0], src_clone[1], IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, tensor->type);
1315113151
} else if (tensor->op == GGML_OP_TIMESTEP_EMBEDDING) {
1315213152
const int32_t dim = tensor->op_params[0];
1315313153
const int32_t max_period = tensor->op_params[1];

0 commit comments

Comments
 (0)