Skip to content

Commit b4909a6

Browse files
authored
whisper : remove ggml_mul_mat padding (#3436)
1 parent fcf0181 commit b4909a6

File tree

1 file changed

+0
-39
lines changed

1 file changed

+0
-39
lines changed

src/whisper.cpp

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -252,45 +252,6 @@ static void whisper_set_i32_nd(struct ggml_tensor * t, int64_t i0, int64_t i1, i
252252
*(int32_t *) data = v;
253253
}
254254

255-
// faster matrix multiplications for tensors that do not have dimension 0 divisible by "pad"
256-
// the idea is to represent the original matrix multiplication:
257-
//
258-
// Z = X @ Y
259-
//
260-
// with the sum of two matrix multiplications:
261-
//
262-
// Z = (X_0 @ Y_0) + (X_1 @ Y_1)
263-
//
264-
// here X_0 and Y_0 are views of X and Y that have dimension 0 divisible by "pad"
265-
// and X_1 and Y_1 are the remaining views. X_1 and Y_1 end up being small matrices that can be processed with more
266-
// general-purpose kernels
267-
//
268-
static struct ggml_tensor * ggml_mul_mat_pad(struct ggml_context * ctx, struct ggml_tensor * x, struct ggml_tensor * y, int pad = 32) {
269-
// use padding only if dimension 0 is at least 8 times larger than the padding
270-
// else we won't get much benefit from the optimization
271-
const int n_pad_req = 8;
272-
273-
if (x->ne[0] % pad == 0 || x->ne[0] / pad < n_pad_req) {
274-
return ggml_mul_mat(ctx, x, y);
275-
}
276-
277-
struct ggml_tensor * x_0 = ggml_view_3d(ctx, x, (x->ne[0]/pad)*pad, x->ne[1], x->ne[2], x->nb[1], x->nb[2], 0);
278-
struct ggml_tensor * x_1 = ggml_view_3d(ctx, x, x->ne[0]%pad, x->ne[1], x->ne[2], x->nb[1], x->nb[2], x_0->ne[0]*x_0->nb[0]);
279-
280-
struct ggml_tensor * y_0 = ggml_view_3d(ctx, y, (y->ne[0]/pad)*pad, y->ne[1], y->ne[2], y->nb[1], y->nb[2], 0);
281-
struct ggml_tensor * y_1 = ggml_view_3d(ctx, y, y->ne[0]%pad, y->ne[1], y->ne[2], y->nb[1], y->nb[2], y_0->ne[0]*y_0->nb[0]);
282-
283-
return ggml_add(ctx,
284-
ggml_mul_mat(ctx, x_0, y_0),
285-
ggml_mul_mat(ctx, x_1, y_1));
286-
}
287-
288-
// TODO: check if other platforms can benefit from this optimization
289-
// TODO: CUDA is currently broken - seems ggml_mul_mat does not handle views correctly
290-
#if defined(GGML_USE_METAL)
291-
#define ggml_mul_mat ggml_mul_mat_pad
292-
#endif
293-
294255
// available whisper models
295256
enum e_model {
296257
MODEL_UNKNOWN,

0 commit comments

Comments
 (0)