4747 GGML_METAL_DECL_KERNEL (relu);
4848 GGML_METAL_DECL_KERNEL (soft_max);
4949 GGML_METAL_DECL_KERNEL (diag_mask_inf);
50+ GGML_METAL_DECL_KERNEL (get_rows_f16);
5051 GGML_METAL_DECL_KERNEL (get_rows_q4_0);
5152 GGML_METAL_DECL_KERNEL (rms_norm);
52- GGML_METAL_DECL_KERNEL (mul_mat_q4_0_f32);
5353 GGML_METAL_DECL_KERNEL (mul_mat_f16_f32);
54+ GGML_METAL_DECL_KERNEL (mul_mat_q4_0_f32);
5455 GGML_METAL_DECL_KERNEL (rope);
5556 GGML_METAL_DECL_KERNEL (cpy_f32_f16);
5657 GGML_METAL_DECL_KERNEL (cpy_f32_f32);
130131 GGML_METAL_ADD_KERNEL (relu);
131132 GGML_METAL_ADD_KERNEL (soft_max);
132133 GGML_METAL_ADD_KERNEL (diag_mask_inf);
134+ GGML_METAL_ADD_KERNEL (get_rows_f16);
133135 GGML_METAL_ADD_KERNEL (get_rows_q4_0);
134136 GGML_METAL_ADD_KERNEL (rms_norm);
135- GGML_METAL_ADD_KERNEL (mul_mat_q4_0_f32);
136137 GGML_METAL_ADD_KERNEL (mul_mat_f16_f32);
138+ GGML_METAL_ADD_KERNEL (mul_mat_q4_0_f32);
137139 GGML_METAL_ADD_KERNEL (rope);
138140 GGML_METAL_ADD_KERNEL (cpy_f32_f16);
139141 GGML_METAL_ADD_KERNEL (cpy_f32_f32);
@@ -498,6 +500,14 @@ void ggml_metal_graph_compute(
498500
499501 // use custom matrix x vector kernel
500502 switch (src0t) {
503+ case GGML_TYPE_F16:
504+ {
505+ GGML_ASSERT (ne02 == ne12);
506+
507+ nth0 = 64 ;
508+ nth1 = 1 ;
509+ [encoder setComputePipelineState: ctx->pipeline_mul_mat_f16_f32];
510+ } break ;
501511 case GGML_TYPE_Q4_0:
502512 {
503513 GGML_ASSERT (ne02 == 1 );
@@ -507,14 +517,6 @@ void ggml_metal_graph_compute(
507517 nth1 = 4 ;
508518 [encoder setComputePipelineState: ctx->pipeline_mul_mat_q4_0_f32];
509519 } break ;
510- case GGML_TYPE_F16:
511- {
512- GGML_ASSERT (ne02 == ne12);
513-
514- nth0 = 32 ;
515- nth1 = 1 ;
516- [encoder setComputePipelineState: ctx->pipeline_mul_mat_f16_f32];
517- } break ;
518520 default : GGML_ASSERT (false && " not implemented" );
519521 };
520522
@@ -551,6 +553,7 @@ void ggml_metal_graph_compute(
551553 }
552554
553555 switch (src0->type ) {
556+ case GGML_TYPE_F16: [encoder setComputePipelineState: ctx->pipeline_get_rows_f16]; break ;
554557 case GGML_TYPE_Q4_0: [encoder setComputePipelineState: ctx->pipeline_get_rows_q4_0]; break ;
555558 default : GGML_ASSERT (false && " not implemented" );
556559 }
0 commit comments