@@ -252,45 +252,6 @@ static void whisper_set_i32_nd(struct ggml_tensor * t, int64_t i0, int64_t i1, i
252252 *(int32_t *) data = v;
253253}
254254
255- // faster matrix multiplications for tensors that do not have dimension 0 divisible by "pad"
256- // the idea is to represent the original matrix multiplication:
257- //
258- // Z = X @ Y
259- //
260- // with the sum of two matrix multiplications:
261- //
262- // Z = (X_0 @ Y_0) + (X_1 @ Y_1)
263- //
264- // here X_0 and Y_0 are views of X and Y that have dimension 0 divisible by "pad"
265- // and X_1 and Y_1 are the remaining views. X_1 and Y_1 end up being small matrices that can be processed with more
266- // general-purpose kernels
267- //
268- static struct ggml_tensor * ggml_mul_mat_pad (struct ggml_context * ctx, struct ggml_tensor * x, struct ggml_tensor * y, int pad = 32 ) {
269- // use padding only if dimension 0 is at least 8 times larger than the padding
270- // else we won't get much benefit from the optimization
271- const int n_pad_req = 8 ;
272-
273- if (x->ne [0 ] % pad == 0 || x->ne [0 ] / pad < n_pad_req) {
274- return ggml_mul_mat (ctx, x, y);
275- }
276-
277- struct ggml_tensor * x_0 = ggml_view_3d (ctx, x, (x->ne [0 ]/pad)*pad, x->ne [1 ], x->ne [2 ], x->nb [1 ], x->nb [2 ], 0 );
278- struct ggml_tensor * x_1 = ggml_view_3d (ctx, x, x->ne [0 ]%pad, x->ne [1 ], x->ne [2 ], x->nb [1 ], x->nb [2 ], x_0->ne [0 ]*x_0->nb [0 ]);
279-
280- struct ggml_tensor * y_0 = ggml_view_3d (ctx, y, (y->ne [0 ]/pad)*pad, y->ne [1 ], y->ne [2 ], y->nb [1 ], y->nb [2 ], 0 );
281- struct ggml_tensor * y_1 = ggml_view_3d (ctx, y, y->ne [0 ]%pad, y->ne [1 ], y->ne [2 ], y->nb [1 ], y->nb [2 ], y_0->ne [0 ]*y_0->nb [0 ]);
282-
283- return ggml_add (ctx,
284- ggml_mul_mat (ctx, x_0, y_0),
285- ggml_mul_mat (ctx, x_1, y_1));
286- }
287-
288- // TODO: check if other platforms can benefit from this optimization
289- // TODO: CUDA is currently broken - seems ggml_mul_mat does not handle views correctly
290- #if defined(GGML_USE_METAL)
291- #define ggml_mul_mat ggml_mul_mat_pad
292- #endif
293-
294255// available whisper models
295256enum e_model {
296257 MODEL_UNKNOWN,
0 commit comments