@@ -99,7 +99,6 @@ static void ggml_vk_destroy_pipeline(vk::Device& device, vk_pipeline& pipeline);
9999
100100struct vk_matmul_pipeline_struct {
101101 vk_pipeline l, m, s;
102- vk_pipeline a_l, a_m, a_s;
103102};
104103
105104typedef std::shared_ptr<vk_matmul_pipeline_struct> vk_matmul_pipeline;
@@ -1603,7 +1602,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
16031602 l_mmq_wg_denoms_k, m_mmq_wg_denoms_k, s_mmq_wg_denoms_k,
16041603 l_mmqid_wg_denoms, m_mmqid_wg_denoms, s_mmqid_wg_denoms;
16051604
1606- uint32_t l_align, m_align, s_align;
16071605 if (device->coopmat2 ) {
16081606 // spec constants and tile sizes for non-quant matmul/matmul_id
16091607 l_warptile = { 256 , 128 , 256 , 64 , 1 };
@@ -1636,10 +1634,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
16361634 l_mmqid_wg_denoms = { 128 , 64 , 1 };
16371635 m_mmqid_wg_denoms = { 128 , 64 , 1 };
16381636 s_mmqid_wg_denoms = { 128 , 64 , 1 };
1639-
1640- l_align = 128 ;
1641- m_align = 64 ;
1642- s_align = 32 ;
16431637 } else {
16441638 // Matrix cores require different warp group sizes
16451639 const uint32_t tm_l = device->coopmat_support ? device->coopmat_m : 4 ;
@@ -1663,9 +1657,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
16631657 l_mmq_wg_denoms = l_wg_denoms = {128 , 128 , 1 };
16641658 m_mmq_wg_denoms = m_wg_denoms = { 64 , 64 , 1 };
16651659 s_mmq_wg_denoms = s_wg_denoms = { 32 , 32 , 1 };
1666- l_align = 128 ;
1667- m_align = 64 ;
1668- s_align = 32 ;
16691660
16701661 for (uint32_t i = 0 ; i < GGML_TYPE_COUNT; ++i) {
16711662 ggml_type t = (ggml_type)i;
@@ -1802,9 +1793,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
18021793 ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->l , #NAMELC #F16ACC " _l" , NAMELC ## F16ACC ## _cm2_len, NAMELC ## F16ACC ## _cm2_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), l_ ## WG_DENOMS, l_ ## WARPTILE, 1 ); \
18031794 ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->m , #NAMELC #F16ACC " _m" , NAMELC ## F16ACC ## _cm2_len, NAMELC ## F16ACC ## _cm2_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, 1 ); \
18041795 ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->s , #NAMELC #F16ACC " _s" , NAMELC ## F16ACC ## _cm2_len, NAMELC ## F16ACC ## _cm2_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, 1 ); \
1805- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_l , #NAMELC #F16ACC " _aligned_l" , NAMELC ## _aligned ## F16ACC ## _cm2_len, NAMELC ## _aligned ## F16ACC ## _cm2_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), l_ ## WG_DENOMS, l_ ## WARPTILE, l_align); \
1806- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_m , #NAMELC #F16ACC " _aligned_m" , NAMELC ## _aligned ## F16ACC ## _cm2_len, NAMELC ## _aligned ## F16ACC ## _cm2_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, m_align); \
1807- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_s , #NAMELC #F16ACC " _aligned_s" , NAMELC ## _aligned ## F16ACC ## _cm2_len, NAMELC ## _aligned ## F16ACC ## _cm2_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, s_align); \
18081796
18091797 // Create 2 variants, {f16,f32} accumulator
18101798#define CREATE_MM2 (PIPELINE_NAME, NAMELC, WG_DENOMS, WARPTILE, PUSHCONST, PARAMCOUNT ) \
@@ -1866,12 +1854,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
18661854 ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->m , #NAMELC #F16ACC " _m" , NAMELC ## F16ACC ## _coopmat_len, NAMELC ## F16ACC ## _coopmat_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, 1 , false , true ); \
18671855 if (device->mul_mat ## ID ## _s[TYPE]) \
18681856 ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->s , #NAMELC #F16ACC " _s" , NAMELC ## F16ACC ## _coopmat_len, NAMELC ## F16ACC ## _coopmat_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, 1 , false , true ); \
1869- if (device->mul_mat ## ID ## _l[TYPE]) \
1870- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_l , #NAMELC #F16ACC " _aligned_l" , NAMELC ## _aligned ## F16ACC ## _coopmat_len, NAMELC ## _aligned ## F16ACC ## _coopmat_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), l_ ## WG_DENOMS, l_ ## WARPTILE, l_align, false , true ); \
1871- if (device->mul_mat ## ID ## _m[TYPE]) \
1872- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_m , #NAMELC #F16ACC " _aligned_m" , NAMELC ## _aligned ## F16ACC ## _coopmat_len, NAMELC ## _aligned ## F16ACC ## _coopmat_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, m_align, false , true ); \
1873- if (device->mul_mat ## ID ## _s[TYPE]) \
1874- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_s , #NAMELC #F16ACC " _aligned_s" , NAMELC ## _aligned ## F16ACC ## _coopmat_len, NAMELC ## _aligned ## F16ACC ## _coopmat_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, s_align, false , true ); \
18751857
18761858 // Create 2 variants, {f16,f32} accumulator
18771859#define CREATE_MM2 (TYPE, PIPELINE_NAME, NAMELC, WG_DENOMS, WARPTILE, PUSHCONST, PARAMCOUNT, ID ) \
@@ -1991,12 +1973,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
19911973 ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->m , #NAMELC #F16ACC " _m" , NAMELC ## F16ACC ## _len, NAMELC ## F16ACC ## _data, " main" , PARAMCOUNT, sizeof (PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, 1 ); \
19921974 if (device->mul_mat ## ID ## _s[TYPE]) \
19931975 ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->s , #NAMELC #F16ACC " _s" , NAMELC ## F16ACC ## _len, NAMELC ## F16ACC ## _data, " main" , PARAMCOUNT, sizeof (PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, 1 ); \
1994- if (device->mul_mat ## ID ## _l[TYPE]) \
1995- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_l , #NAMELC #F16ACC " _aligned_l" , NAMELC ## _aligned ## F16ACC ## _len, NAMELC ## _aligned ## F16ACC ## _data, " main" , PARAMCOUNT, sizeof (PUSHCONST), l_ ## WG_DENOMS, l_ ## WARPTILE, l_align); \
1996- if (device->mul_mat ## ID ## _m[TYPE]) \
1997- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_m , #NAMELC #F16ACC " _aligned_m" , NAMELC ## _aligned ## F16ACC ## _len, NAMELC ## _aligned ## F16ACC ## _data, " main" , PARAMCOUNT, sizeof (PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, m_align); \
1998- if (device->mul_mat ## ID ## _s[TYPE]) \
1999- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_s , #NAMELC #F16ACC " _aligned_s" , NAMELC ## _aligned ## F16ACC ## _len, NAMELC ## _aligned ## F16ACC ## _data, " main" , PARAMCOUNT, sizeof (PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, s_align); \
20001976
20011977 // Create 2 variants, {f16,f32} accumulator
20021978#define CREATE_MM2 (TYPE, PIPELINE_NAME, NAMELC, WG_DENOMS, WARPTILE, PUSHCONST, PARAMCOUNT, ID ) \
@@ -2064,12 +2040,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
20642040 ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->m , #NAMELC #F16ACC " _m" , NAMELC ## F16ACC ## _fp32_len, NAMELC ## F16ACC ## _fp32_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, 1 ); \
20652041 if (device->mul_mat ## ID ## _s[TYPE]) \
20662042 ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->s , #NAMELC #F16ACC " _s" , NAMELC ## F16ACC ## _fp32_len, NAMELC ## F16ACC ## _fp32_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, 1 ); \
2067- if (device->mul_mat ## ID ## _l[TYPE]) \
2068- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_l , #NAMELC #F16ACC " _aligned_l" , NAMELC ## _aligned ## F16ACC ## _fp32_len, NAMELC ## _aligned ## F16ACC ## _fp32_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), l_ ## WG_DENOMS, l_ ## WARPTILE, l_align); \
2069- if (device->mul_mat ## ID ## _m[TYPE]) \
2070- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_m , #NAMELC #F16ACC " _aligned_m" , NAMELC ## _aligned ## F16ACC ## _fp32_len, NAMELC ## _aligned ## F16ACC ## _fp32_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), m_ ## WG_DENOMS, m_ ## WARPTILE, m_align); \
2071- if (device->mul_mat ## ID ## _s[TYPE]) \
2072- ggml_vk_create_pipeline (device, device-> PIPELINE_NAME ->a_s , #NAMELC #F16ACC " _aligned_s" , NAMELC ## _aligned ## F16ACC ## _fp32_len, NAMELC ## _aligned ## F16ACC ## _fp32_data, " main" , PARAMCOUNT, sizeof (PUSHCONST), s_ ## WG_DENOMS, s_ ## WARPTILE, s_align); \
20732043
20742044 CREATE_MM (GGML_TYPE_F32, pipeline_matmul_f32, matmul_f32_f32, , wg_denoms, warptile, vk_mat_mat_push_constants, 3 , );
20752045 CREATE_MM (GGML_TYPE_F32, pipeline_matmul_f32_f16, matmul_f32_f16, , wg_denoms, warptile, vk_mat_mat_push_constants, 3 , );
@@ -4003,35 +3973,30 @@ static uint32_t ggml_vk_guess_split_k(ggml_backend_vk_context * ctx, int m, int
40033973 return split_k;
40043974}
40053975
4006- static vk_pipeline ggml_vk_guess_matmul_pipeline (ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, bool aligned, ggml_type src0_type) {
4007- VK_LOG_DEBUG (" ggml_vk_guess_matmul_pipeline(" << m << " , " << n << " , " << aligned << " , " << ggml_type_name (src0_type) << " )" );
3976+ static vk_pipeline ggml_vk_guess_matmul_pipeline (ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, ggml_type src0_type) {
3977+ VK_LOG_DEBUG (" ggml_vk_guess_matmul_pipeline(" << m << " , " << n << " , " << ggml_type_name (src0_type) << " )" );
40083978
40093979 if (ctx->device ->coopmat2 ) {
40103980 // Use large shader when the N dimension is greater than the medium shader's tile size
40113981 uint32_t crossover_large = mmp->m ->wg_denoms [1 ];
40123982 if ((ctx->device ->mul_mat_l [src0_type] && (n > crossover_large)) || (!ctx->device ->mul_mat_m [src0_type] && !ctx->device ->mul_mat_s [src0_type])) {
4013- return aligned ? mmp-> a_l : mmp->l ;
3983+ return mmp->l ;
40143984 }
40153985 // Use medium shader when the N dimension is greater than the small shader's tile size
40163986 uint32_t crossover_medium = mmp->s ->wg_denoms [1 ];
40173987 if ((ctx->device ->mul_mat_m [src0_type] && (n > crossover_medium)) || !ctx->device ->mul_mat_s [src0_type]) {
4018- return aligned ? mmp-> a_m : mmp->m ;
3988+ return mmp->m ;
40193989 }
4020- return aligned ? mmp-> a_s : mmp->s ;
3990+ return mmp->s ;
40213991 }
40223992
40233993 if ((ctx->device ->mul_mat_s [src0_type] && (m <= 32 || n <= 32 )) || (!ctx->device ->mul_mat_m [src0_type] && !ctx->device ->mul_mat_l [src0_type])) {
4024- return aligned ? mmp-> a_s : mmp->s ;
3994+ return mmp->s ;
40253995 }
40263996 if ((ctx->device ->mul_mat_m [src0_type] && (m <= 64 || n <= 64 )) || !ctx->device ->mul_mat_l [src0_type]) {
4027- return aligned ? mmp-> a_m : mmp->m ;
3997+ return mmp->m ;
40283998 }
4029- return aligned ? mmp->a_l : mmp->l ;
4030- }
4031-
4032- static uint32_t ggml_vk_guess_matmul_pipeline_align (ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, ggml_type src0_type) {
4033- VK_LOG_DEBUG (" ggml_vk_guess_matmul_pipeline_align(" << m << " , " << n << " , " << ggml_type_name (src0_type) << " )" );
4034- return ggml_vk_guess_matmul_pipeline (ctx, mmp, m, n, true , src0_type)->align ;
3999+ return mmp->l ;
40354000}
40364001
40374002static void ggml_vk_matmul (
@@ -4059,35 +4024,30 @@ static void ggml_vk_matmul(
40594024 ggml_vk_dispatch_pipeline (ctx, subctx, ctx->device ->pipeline_matmul_split_k_reduce , { split_k_buffer, d }, pc2.size () * sizeof (uint32_t ), pc2.data (), { m * n * batch, 1 , 1 });
40604025}
40614026
4062- static vk_pipeline ggml_vk_guess_matmul_id_pipeline (ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, bool aligned , ggml_type src0_type) {
4063- VK_LOG_DEBUG (" ggml_vk_guess_matmul_id_pipeline(" << m << " , " << n << " , " << aligned << " , " << ggml_type_name (src0_type) << " )" );
4027+ static vk_pipeline ggml_vk_guess_matmul_id_pipeline (ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, uint32_t m, uint32_t n , ggml_type src0_type) {
4028+ VK_LOG_DEBUG (" ggml_vk_guess_matmul_id_pipeline(" << m << " , " << n << " , " << ggml_type_name (src0_type) << " )" );
40644029
40654030 if (ctx->device ->coopmat2 ) {
40664031 // Use large shader when the N dimension is greater than the medium shader's tile size
40674032 uint32_t crossover_large = mmp->m ->wg_denoms [1 ];
40684033 if ((ctx->device ->mul_mat_id_l [src0_type] && (n > crossover_large)) || (!ctx->device ->mul_mat_id_m [src0_type] && !ctx->device ->mul_mat_id_s [src0_type])) {
4069- return aligned ? mmp-> a_l : mmp->l ;
4034+ return mmp->l ;
40704035 }
40714036 // Use medium shader when the N dimension is greater than the small shader's tile size
40724037 uint32_t crossover_medium = mmp->s ->wg_denoms [1 ];
40734038 if ((ctx->device ->mul_mat_id_m [src0_type] && (n > crossover_medium)) || !ctx->device ->mul_mat_id_s [src0_type]) {
4074- return aligned ? mmp-> a_m : mmp->m ;
4039+ return mmp->m ;
40754040 }
4076- return aligned ? mmp-> a_s : mmp->s ;
4041+ return mmp->s ;
40774042 }
40784043
40794044 if ((ctx->device ->mul_mat_id_s [src0_type] && (m <= 32 || n <= 32 )) || (!ctx->device ->mul_mat_id_m [src0_type] && !ctx->device ->mul_mat_id_l [src0_type])) {
4080- return aligned ? mmp-> a_s : mmp->s ;
4045+ return mmp->s ;
40814046 }
40824047 if ((ctx->device ->mul_mat_id_m [src0_type] && (m <= 64 || n <= 64 )) || !ctx->device ->mul_mat_id_l [src0_type]) {
4083- return aligned ? mmp-> a_m : mmp->m ;
4048+ return mmp->m ;
40844049 }
4085- return aligned ? mmp->a_l : mmp->l ;
4086- }
4087-
4088- static uint32_t ggml_vk_guess_matmul_id_pipeline_align (ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, ggml_type src0_type) {
4089- VK_LOG_DEBUG (" ggml_vk_guess_matmul_pipeline_align(" << m << " , " << n << " , " << ggml_type_name (src0_type) << " )" );
4090- return ggml_vk_guess_matmul_id_pipeline (ctx, mmp, m, n, true , src0_type)->align ;
4050+ return mmp->l ;
40914051}
40924052
40934053static void ggml_vk_matmul_id (
@@ -4265,10 +4225,7 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
42654225 // Not implemented
42664226 GGML_ASSERT (y_non_contig || !qy_needs_dequant); // NOLINT
42674227
4268- const uint32_t kpad = ggml_vk_align_size (ne10, ggml_vk_guess_matmul_pipeline_align (ctx, mmp, ne01, ne11, qx_needs_dequant ? GGML_TYPE_F16 : src0->type ));
4269- const bool aligned = ne10 == kpad && ne01 > 8 && ne11 > 8 ;
4270-
4271- vk_pipeline pipeline = ggml_vk_guess_matmul_pipeline (ctx, mmp, ne01, ne11, aligned, qx_needs_dequant ? GGML_TYPE_F16 : src0->type );
4228+ vk_pipeline pipeline = ggml_vk_guess_matmul_pipeline (ctx, mmp, ne01, ne11, qx_needs_dequant ? GGML_TYPE_F16 : src0->type );
42724229
42734230 // Reserve extra storage in the N dimension for the Y matrix, so we can avoid bounds-checking
42744231 uint32_t padded_n = qy_needs_dequant ? ROUNDUP_POW2 (ne11, pipeline->wg_denoms [1 ]) :ne11;
@@ -4849,10 +4806,7 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
48494806 // Not implemented
48504807 GGML_ASSERT (y_non_contig || !qy_needs_dequant); // NOLINT
48514808
4852- const uint32_t kpad = ggml_vk_align_size (ne10, ggml_vk_guess_matmul_id_pipeline_align (ctx, mmp, ne01, nei1, qx_needs_dequant ? GGML_TYPE_F16 : src0->type ));
4853- const bool aligned = ne10 == kpad && ne01 > 8 && nei1 > 8 ;
4854-
4855- vk_pipeline pipeline = ggml_vk_guess_matmul_id_pipeline (ctx, mmp, ne01, nei1, aligned, qx_needs_dequant ? GGML_TYPE_F16 : src0->type );
4809+ vk_pipeline pipeline = ggml_vk_guess_matmul_id_pipeline (ctx, mmp, ne01, nei1, qx_needs_dequant ? GGML_TYPE_F16 : src0->type );
48564810
48574811 // Reserve extra storage in the N dimension for the Y matrix, so we can avoid bounds-checking
48584812 uint32_t padded_n = qy_needs_dequant ? ROUNDUP_POW2 (ne11, pipeline->wg_denoms [1 ]) :ne11;
@@ -7218,34 +7172,24 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
72187172
72197173 vk_pipeline p;
72207174 std::string shname;
7175+ const size_t kpad = ggml_vk_align_size (k, p->align );
7176+
72217177 if (shader_size == 0 ) {
7222- p = ctx->device ->fp16 ? ctx->device ->pipeline_dequant_mul_mat_mat [quant].f16acc ->a_s : ctx->device ->pipeline_dequant_mul_mat_mat [quant].f32acc ->a_s ;
7223- shname = std::string (ggml_type_name (quant)) + " _ALIGNED_S " ;
7178+ p = ctx->device ->fp16 ? ctx->device ->pipeline_dequant_mul_mat_mat [quant].f16acc ->s : ctx->device ->pipeline_dequant_mul_mat_mat [quant].f32acc ->s ;
7179+ shname = std::string (ggml_type_name (quant)) + " _S " ;
72247180 } else if (shader_size == 1 ) {
7225- p = ctx->device ->fp16 ? ctx->device ->pipeline_dequant_mul_mat_mat [quant].f16acc ->a_m : ctx->device ->pipeline_dequant_mul_mat_mat [quant].f32acc ->a_m ;
7226- shname = std::string (ggml_type_name (quant)) + " _ALIGNED_M " ;
7181+ p = ctx->device ->fp16 ? ctx->device ->pipeline_dequant_mul_mat_mat [quant].f16acc ->m : ctx->device ->pipeline_dequant_mul_mat_mat [quant].f32acc ->m ;
7182+ shname = std::string (ggml_type_name (quant)) + " _M " ;
72277183 } else if (shader_size == 2 ) {
7228- p = ctx->device ->fp16 ? ctx->device ->pipeline_dequant_mul_mat_mat [quant].f16acc ->a_l : ctx->device ->pipeline_dequant_mul_mat_mat [quant].f32acc ->a_l ;
7229- shname = std::string (ggml_type_name (quant)) + " _ALIGNED_L " ;
7184+ p = ctx->device ->fp16 ? ctx->device ->pipeline_dequant_mul_mat_mat [quant].f16acc ->l : ctx->device ->pipeline_dequant_mul_mat_mat [quant].f32acc ->l ;
7185+ shname = std::string (ggml_type_name (quant)) + " _L " ;
72307186 } else {
72317187 GGML_ASSERT (0 );
72327188 }
72337189
7234- const size_t kpad = ggml_vk_align_size (k, p->align );
7235-
7236- if (k != kpad) {
7237- if (shader_size == 0 ) {
7238- p = ctx->device ->fp16 ? ctx->device ->pipeline_dequant_mul_mat_mat [quant].f16acc ->s : ctx->device ->pipeline_dequant_mul_mat_mat [quant].f32acc ->s ;
7239- shname = std::string (ggml_type_name (quant)) + " _S" ;
7240- } else if (shader_size == 1 ) {
7241- p = ctx->device ->fp16 ? ctx->device ->pipeline_dequant_mul_mat_mat [quant].f16acc ->m : ctx->device ->pipeline_dequant_mul_mat_mat [quant].f32acc ->m ;
7242- shname = std::string (ggml_type_name (quant)) + " _M" ;
7243- } else if (shader_size == 2 ) {
7244- p = ctx->device ->fp16 ? ctx->device ->pipeline_dequant_mul_mat_mat [quant].f16acc ->l : ctx->device ->pipeline_dequant_mul_mat_mat [quant].f32acc ->l ;
7245- shname = std::string (ggml_type_name (quant)) + " _L" ;
7246- } else {
7247- GGML_ASSERT (0 );
7248- }
7190+ if (p == nullptr ) {
7191+ std::cerr << " error: no pipeline for ggml_vk_test_dequant_matmul " << ggml_type_name (quant) << std::endl;
7192+ return ;
72497193 }
72507194
72517195 const size_t x_sz = sizeof (float ) * x_ne;
0 commit comments