From d9b6860b02796b1a8f2a17fa35df4087e0a1d947 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Mon, 20 Oct 2025 13:42:26 +0200 Subject: [PATCH] vulkan: fix matmul pipeline selection for small n values Change mul_mat and mul_mat_id pipeline selection heuristic to prevent Intel Arc GPU hangs. The previous logic would select the small pipeline (mul_mat_id_s) when one dimension was small, causing hangs on Intel Arc when, e.g. m=512 and n=23 as it happens with IBM Granite 4. Signed-off-by: Giuseppe Scrivano --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 21bd052255564..133f5ca8e97d1 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -5826,10 +5826,10 @@ static vk_pipeline ggml_vk_guess_matmul_pipeline(ggml_backend_vk_context * ctx, return aligned ? mmp->a_s : mmp->s; } - if ((ctx->device->mul_mat_s[src0_type] && (m <= 32 || n <= 32)) || (!ctx->device->mul_mat_m[src0_type] && !ctx->device->mul_mat_l[src0_type])) { + if ((ctx->device->mul_mat_s[src0_type] && (m * n <= 32 * 32)) || (!ctx->device->mul_mat_m[src0_type] && !ctx->device->mul_mat_l[src0_type])) { return aligned ? mmp->a_s : mmp->s; } - if ((ctx->device->mul_mat_m[src0_type] && (m <= 64 || n <= 64)) || !ctx->device->mul_mat_l[src0_type]) { + if ((ctx->device->mul_mat_m[src0_type] && (m * n <= 64 * 64)) || !ctx->device->mul_mat_l[src0_type]) { return aligned ? mmp->a_m : mmp->m; } return aligned ? mmp->a_l : mmp->l; @@ -5892,10 +5892,10 @@ static vk_pipeline ggml_vk_guess_matmul_id_pipeline(ggml_backend_vk_context * ct return aligned ? mmp->a_s : mmp->s; } - if ((ctx->device->mul_mat_id_s[src0_type] && (m <= 32 || n <= 32)) || (!ctx->device->mul_mat_id_m[src0_type] && !ctx->device->mul_mat_id_l[src0_type])) { + if ((ctx->device->mul_mat_id_s[src0_type] && (m * n <= 32 * 32)) || (!ctx->device->mul_mat_id_m[src0_type] && !ctx->device->mul_mat_id_l[src0_type])) { return aligned ? mmp->a_s : mmp->s; } - if ((ctx->device->mul_mat_id_m[src0_type] && (m <= 64 || n <= 64)) || !ctx->device->mul_mat_id_l[src0_type]) { + if ((ctx->device->mul_mat_id_m[src0_type] && (m * n <= 64 * 64)) || !ctx->device->mul_mat_id_l[src0_type]) { return aligned ? mmp->a_m : mmp->m; } return aligned ? mmp->a_l : mmp->l;