@@ -3345,7 +3345,6 @@ static struct ggml_tensor * llm_build_ffn(
33453345// if max_alibi_bias > 0 then apply ALiBi
33463346static struct ggml_tensor * llm_build_kqv (
33473347 struct ggml_context * ctx,
3348- struct ggml_tensor * cur,
33493348 const llama_hparams & hparams,
33503349 const llama_kv_cache & kv,
33513350 struct ggml_tensor * wo,
@@ -3411,7 +3410,7 @@ static struct ggml_tensor * llm_build_kqv(
34113410 struct ggml_tensor * kqv_merged = ggml_permute (ctx, kqv, 0 , 2 , 1 , 3 );
34123411 cb (kqv_merged, " kqv_merged" , il);
34133412
3414- cur = ggml_cont_2d (ctx, kqv_merged, n_embd, n_tokens);
3413+ struct ggml_tensor * cur = ggml_cont_2d (ctx, kqv_merged, n_embd, n_tokens);
34153414 cb (cur, " kqv_merged_cont" , il);
34163415
34173416 cur = ggml_mul_mat (ctx, wo, cur);
@@ -3565,7 +3564,7 @@ struct llm_build_context {
35653564
35663565 llm_build_kv_store (ctx0, hparams, kv_self, gf, Kcur, Vcur, n_ctx, n_tokens, kv_head, cb, il);
35673566
3568- cur = llm_build_kqv (ctx0, cur, hparams, kv_self,
3567+ cur = llm_build_kqv (ctx0, hparams, kv_self,
35693568 model.layers [il].wo , NULL ,
35703569 Qcur, KQ_scale, KQ_mask, n_ctx, n_tokens, n_kv, -1 .0f , cb, il);
35713570 cb (cur, " kqv_out" , il);
@@ -3677,7 +3676,7 @@ struct llm_build_context {
36773676 // apply ALiBi for 13B model
36783677 const float max_alibi_bias = model.type == MODEL_13B ? 8 .0f : -1 .0f ;
36793678
3680- cur = llm_build_kqv (ctx0, cur, hparams, kv_self,
3679+ cur = llm_build_kqv (ctx0, hparams, kv_self,
36813680 model.layers [il].wo , NULL ,
36823681 Qcur, KQ_scale, KQ_mask, n_ctx, n_tokens, n_kv, max_alibi_bias, cb, il);
36833682 cb (cur, " kqv_out" , il);
@@ -3795,7 +3794,7 @@ struct llm_build_context {
37953794
37963795 llm_build_kv_store (ctx0, hparams, kv_self, gf, Kcur, Vcur, n_ctx, n_tokens, kv_head, cb, il);
37973796
3798- cur = llm_build_kqv (ctx0, attn_norm, hparams, kv_self,
3797+ cur = llm_build_kqv (ctx0, hparams, kv_self,
37993798 model.layers [il].wo , NULL ,
38003799 Qcur, KQ_scale, KQ_mask, n_ctx, n_tokens, n_kv, -1 .0f , cb, il);
38013800 cb (cur, " kqv_out" , il);
@@ -3895,7 +3894,7 @@ struct llm_build_context {
38953894
38963895 llm_build_kv_store (ctx0, hparams, kv_self, gf, Kcur, Vcur, n_ctx, n_tokens, kv_head, cb, il);
38973896
3898- cur = llm_build_kqv (ctx0, cur, hparams, kv_self,
3897+ cur = llm_build_kqv (ctx0, hparams, kv_self,
38993898 model.layers [il].wo , model.layers [il].bo ,
39003899 Qcur, KQ_scale, KQ_mask, n_ctx, n_tokens, n_kv, -1 .0f , cb, il);
39013900 cb (cur, " kqv_out" , il);
@@ -4100,7 +4099,7 @@ struct llm_build_context {
41004099 llm_build_kv_store (ctx0, hparams, kv_self, gf, Kcur, Vcur, n_ctx, n_tokens, kv_head, cb, il);
41014100
41024101 // TODO: not tested, could be broken
4103- cur = llm_build_kqv (ctx0, Q, hparams, kv_self,
4102+ cur = llm_build_kqv (ctx0, hparams, kv_self,
41044103 model.layers [il].wo , model.layers [il].bo ,
41054104 Q, KQ_scale, KQ_mask, n_ctx, n_tokens, n_kv, -1 .0f , cb, il);
41064105 cb (cur, " kqv_out" , il);
@@ -4191,7 +4190,7 @@ struct llm_build_context {
41914190
41924191 llm_build_kv_store (ctx0, hparams, kv_self, gf, Kcur, Vcur, n_ctx, n_tokens, kv_head, cb, il);
41934192
4194- cur = llm_build_kqv (ctx0, Qcur, hparams, kv_self,
4193+ cur = llm_build_kqv (ctx0, hparams, kv_self,
41954194 model.layers [il].wo , NULL ,
41964195 Qcur, KQ_scale, KQ_mask, n_ctx, n_tokens, n_kv, 8 .0f , cb, il);
41974196 cb (cur, " kqv_out" , il);
@@ -4288,7 +4287,7 @@ struct llm_build_context {
42884287
42894288 llm_build_kv_store (ctx0, hparams, kv_self, gf, Kcur, Vcur, n_ctx, n_tokens, kv_head, cb, il);
42904289
4291- cur = llm_build_kqv (ctx0, Qcur, hparams, kv_self,
4290+ cur = llm_build_kqv (ctx0, hparams, kv_self,
42924291 model.layers [il].wo , model.layers [il].bo ,
42934292 Qcur, KQ_scale, KQ_mask, n_ctx, n_tokens, n_kv, 8 .0f , cb, il);
42944293 cb (cur, " kqv_out" , il);
@@ -4382,7 +4381,7 @@ struct llm_build_context {
43824381
43834382 llm_build_kv_store (ctx0, hparams, kv_self, gf, Kcur, Vcur, n_ctx, n_tokens, kv_head, cb, il);
43844383
4385- cur = llm_build_kqv (ctx0, Qcur, hparams, kv_self,
4384+ cur = llm_build_kqv (ctx0, hparams, kv_self,
43864385 model.layers [il].wo , NULL ,
43874386 Qcur, KQ_scale, KQ_mask, n_ctx, n_tokens, n_kv, hparams.f_max_alibi_bias , cb, il);
43884387 cb (cur, " kqv_out" , il);
0 commit comments