@@ -11084,7 +11084,8 @@ struct llm_build_gemma_embedding_iswa : public llm_graph_context {
1108411084        // inp_pos - contains the positions
1108511085        ggml_tensor * inp_pos = build_inp_pos();
1108611086
11087-         auto * inp_attn = build_attn_inp_no_cache();
11087+         // TODO: support cacheless iSWA embeddings [TAG_NO_CACHE_ISWA]
11088+         auto * inp_attn = build_attn_inp_kv_iswa();
1108811089
1108911090        ggml_tensor * inp_out_ids = build_inp_out_ids();
1109011091
@@ -18632,7 +18633,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
1863218633        case LLM_ARCH_NOMIC_BERT_MOE:
1863318634        case LLM_ARCH_NEO_BERT:
1863418635        case LLM_ARCH_WAVTOKENIZER_DEC:
18635-         case LLM_ARCH_GEMMA_EMBEDDING:
18636+         // case LLM_ARCH_GEMMA_EMBEDDING: // TODO: disabled until the cacheless SWA logic is fixed [TAG_NO_CACHE_ISWA] 
1863618637        case LLM_ARCH_DREAM:
1863718638        case LLM_ARCH_LLADA:
1863818639            {
@@ -18681,6 +18682,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
1868118682                        /* attn_kv_size      */ cparams.n_ctx,
1868218683                        /* attn_n_pad        */ padding,
1868318684                        /* attn_n_swa        */ hparams.n_swa,
18685+                         /* attn_swa_type     */ hparams.swa_type,
1868418686                        /* recurrent_type_k  */ GGML_TYPE_F32,
1868518687                        /* recurrent_type_v  */ GGML_TYPE_F32,
1868618688                        /* recurrent_kv_size */ std::max((uint32_t) 1, cparams.n_seq_max),
@@ -18750,6 +18752,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
1875018752                                cparams.n_seq_max,
1875118753                                padding,
1875218754                                hparams.n_swa,
18755+                                 hparams.swa_type,
1875318756                                nullptr,
1875418757                                nullptr);
1875518758                    }
0 commit comments