@@ -2229,6 +2229,38 @@ static void aclnn_index_fill_tensor(ggml_backend_cann_context& ctx,
22292229 ggml_cann_release_resources (ctx, acl_index, acl_value);
22302230}
22312231
2232+ /* *
2233+ * @brief Initializes and caches sine/cosine positional encoding values
2234+ * (used in RoPE, Rotary Position Embedding) for attention layers.
2235+ *
2236+ * This function computes and caches the sin/cos values of
2237+ * θ = position * theta_scale for RoPE encoding. The cache is shared
2238+ * across attention layers, and only the first attention layer will
2239+ * trigger initialization. The cache includes repeated sin/cos values
2240+ * with different repeat methods depending on the @param is_neox flag.
2241+ *
2242+ * Steps performed by this function:
2243+ * 1. Identify whether the target tensor belongs to Q/K in attention
2244+ * and restrict computation to the first layer only.
2245+ * 2. Initialize the theta scale array (arange → power → freq scaling).
2246+ * 3. Allocate sin/cos caches if the max prompt length increases.
2247+ * 4. Compute θ = position * theta_scale.
2248+ * 5. Compute sin(θ), cos(θ) and optionally scale by attn_factor.
2249+ * 6. Expand sin/cos values by repeat or repeat_interleave depending
2250+ * on whether @param is_neox is enabled.
2251+ * 7. Store the computed values into persistent buffers
2252+ * (ctx.rope_sin_ptr / ctx.rope_cos_ptr).
2253+ *
2254+ * @param ctx The CANN backend context, holding memory pool,
2255+ * stream, and persistent buffers for rope init/cache.
2256+ * @param dst The destination ggml_tensor whose computation
2257+ * depends on the cached RoPE values (usually Qcur/Kcur).
2258+ * @param theta_scale Scalar exponent base for computing theta scale values.
2259+ * @param freq_scale Frequency scaling factor, applied to theta scale.
2260+ * @param attn_factor Attention scaling factor, applied to sin/cos.
2261+ * @param is_neox Whether to use Neox-style repeat strategy
2262+ * (dim expansion vs repeat_interleave).
2263+ */
22322264static void aclnn_cache_init (ggml_backend_cann_context& ctx, ggml_tensor* dst,
22332265 float theta_scale, float freq_scale,
22342266 float attn_factor, bool is_neox) {
0 commit comments