@@ -19641,7 +19641,7 @@ struct llm_build_apertus : public llm_graph_context {
1964119641 }
1964219642};
1964319643
19644- llama_memory_i * llama_model::create_memory(const llama_memory_params & params, llama_cparams & cparams) const {
19644+ llama_memory_i * llama_model::create_memory(const llama_memory_params & params, const llama_cparams & cparams) const {
1964519645 llama_memory_i * res;
1964619646
1964719647 switch (arch) {
@@ -19692,17 +19692,13 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
1969219692 };
1969319693 }
1969419694
19695- const auto padding = llama_kv_cache::get_padding(cparams);
19696-
19697- cparams.n_ctx = GGML_PAD(cparams.n_ctx, padding);
19698-
1969919695 res = new llama_memory_hybrid(
1970019696 /* model */ *this,
1970119697 /* attn_type_k */ params.type_k,
1970219698 /* attn_type_v */ params.type_v,
1970319699 /* attn_v_trans */ !cparams.flash_attn,
1970419700 /* attn_kv_size */ cparams.n_ctx,
19705- /* attn_n_pad */ padding ,
19701+ /* attn_n_pad */ 1 ,
1970619702 /* attn_n_swa */ hparams.n_swa,
1970719703 /* attn_swa_type */ hparams.swa_type,
1970819704 /* recurrent_type_k */ GGML_TYPE_F32,
@@ -19714,23 +19710,12 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
1971419710 /* filter_attn */ std::move(filter_attn),
1971519711 /* filter_recr */ std::move(filter_recr));
1971619712 } else {
19717- const auto padding = llama_kv_cache::get_padding(cparams);
19718-
1971919713 uint32_t n_ctx_per_stream = cparams.n_ctx;
1972019714
1972119715 if (!cparams.kv_unified) {
1972219716 n_ctx_per_stream = (cparams.n_ctx + cparams.n_seq_max - 1)/cparams.n_seq_max;
19723- n_ctx_per_stream = GGML_PAD(n_ctx_per_stream, padding);
19724-
19725- cparams.n_ctx = n_ctx_per_stream*cparams.n_seq_max;
19726- } else {
19727- n_ctx_per_stream = GGML_PAD(n_ctx_per_stream, padding);
19728-
19729- cparams.n_ctx = n_ctx_per_stream;
1973019717 }
1973119718
19732- LLAMA_LOG_DEBUG("%s: n_ctx = %u (padded)\n", __func__, cparams.n_ctx);
19733-
1973419719 llama_memory_i::layer_reuse_cb reuse = nullptr;
1973519720
1973619721 if (arch == LLM_ARCH_GEMMA3N) {
@@ -19757,7 +19742,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
1975719742 n_ctx_per_stream,
1975819743 cparams.n_seq_max,
1975919744 cparams.n_ubatch,
19760- padding ,
19745+ 1 ,
1976119746 nullptr,
1976219747 reuse);
1976319748 } else {
@@ -19772,7 +19757,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
1977219757 cparams.kv_unified,
1977319758 n_ctx_per_stream,
1977419759 cparams.n_seq_max,
19775- padding ,
19760+ 1 ,
1977619761 hparams.n_swa,
1977719762 hparams.swa_type,
1977819763 nullptr,
0 commit comments