From 8d51e183b077f84b4c58e66c7d1884946932e7b0 Mon Sep 17 00:00:00 2001
From: shani-f <s0556787439@gmail.com>
Date: Thu, 23 Oct 2025 12:38:20 +0300
Subject: [PATCH 1/8] =?UTF-8?q?SYCL=20repeat=5Fback=20v1=20=E2=80=94=20add?=
 =?UTF-8?q?=20core=20op=20+=20switch=20case?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ggml/src/ggml-sycl/ggml-sycl.cpp   | 14 +++++++
 ggml/src/ggml-sycl/repeat_back.cpp | 62 ++++++++++++++++++++++++++++++
 ggml/src/ggml-sycl/repeat_back.hpp |  9 +++++
 3 files changed, 85 insertions(+)
 create mode 100644 ggml/src/ggml-sycl/repeat_back.cpp
 create mode 100644 ggml/src/ggml-sycl/repeat_back.hpp

diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp
index f3407a813d731..b43f37478dc36 100644
--- a/ggml/src/ggml-sycl/ggml-sycl.cpp
+++ b/ggml/src/ggml-sycl/ggml-sycl.cpp
@@ -44,6 +44,7 @@
 #include "ggml-sycl/set_rows.hpp"
 #include "ggml-sycl/sycl_hw.hpp"
 #include "ggml-sycl/getrows.hpp"
+#include "ggml-sycl/repeat_back.hpp"
 #include "ggml-sycl/quantize.hpp"
 #include "ggml.h"
 
@@ -2597,6 +2598,10 @@ catch (sycl::exception const &exc) {
   std::exit(1);
 }
 
+static void ggml_sycl_repeat_back(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
+    ggml_sycl_op_repeat_back(ctx, dst);
+}
 
 static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
     scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
@@ -3616,6 +3621,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg
         case GGML_OP_REPEAT:
             ggml_sycl_repeat(ctx, dst);
             break;
+        case GGML_OP_REPEAT_BACK:
+            ggml_sycl_repeat_back(ctx, dst);
+            break;
         case GGML_OP_GET_ROWS:
             ggml_sycl_get_rows(ctx, dst);
             break;
@@ -4405,11 +4413,17 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
                 }
                 return false;
             }
+
         case GGML_OP_CONCAT:
             {
                 ggml_type src0_type = op->src[0]->type;
                 return src0_type != GGML_TYPE_I32 && src0_type != GGML_TYPE_I16;
             }
+        case GGML_OP_REPEAT_BACK:
+            {
+                ggml_type src0_type = op->src[0]->type;
+                return src0_type == GGML_TYPE_F32;
+            }
         case GGML_OP_DUP:
         case GGML_OP_ARGMAX:
         case GGML_OP_NONE:
diff --git a/ggml/src/ggml-sycl/repeat_back.cpp b/ggml/src/ggml-sycl/repeat_back.cpp
new file mode 100644
index 0000000000000..10fef098cdd93
--- /dev/null
+++ b/ggml/src/ggml-sycl/repeat_back.cpp
@@ -0,0 +1,62 @@
+//בס"ד
+#include "repeat_back.hpp"
+
+#include "common.hpp"
+
+void ggml_sycl_op_repeat_back(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
+    GGML_ASSERT(dst->type == GGML_TYPE_F32);
+
+    const float * src0_dd = (const float *) dst->src[0]->data;
+    float *       dst_dd  = (float *) dst->data;
+
+    const int64_t ne0  = dst->ne[0];
+    const int64_t ne1  = dst->ne[1];
+    const int64_t ne2  = dst->ne[2];
+    const int64_t ne3  = dst->ne[3];
+    const int64_t ne00 = dst->src[0]->ne[0];
+    const int64_t ne01 = dst->src[0]->ne[1];
+    const int64_t ne02 = dst->src[0]->ne[2];
+    const int64_t ne03 = dst->src[0]->ne[3];
+
+    const int nr0 = (int) (ne00 / ne0);
+    const int nr1 = (int) (ne01 / ne1);
+    const int nr2 = (int) (ne02 / ne2);
+    const int nr3 = (int) (ne03 / ne3);
+
+    const size_t total      = ne0 * ne1 * ne2 * ne3;
+    const int    BLOCK_SIZE = 256;
+    const int    num_blocks = (total + BLOCK_SIZE - 1) / BLOCK_SIZE;
+
+    queue_ptr stream = ctx.stream();
+    stream->memset(dst_dd, 0, total * sizeof(float));
+
+    stream->parallel_for(
+        sycl::nd_range<1>(sycl::range<1>(num_blocks * BLOCK_SIZE), sycl::range<1>(BLOCK_SIZE)),
+        [=](sycl::nd_item<1> item_ct1) {
+            const size_t i = item_ct1.get_global_linear_id();
+            if (i >= total) {
+                return;
+            }
+
+            const int i0 = i % ne0;
+            const int i1 = (i / ne0) % ne1;
+            const int i2 = (i / (ne0 * ne1)) % ne2;
+            const int i3 = i / (ne0 * ne1 * ne2);
+
+            float acc = 0.0f;
+
+            for (int j3 = 0; j3 < nr3; ++j3) {
+                for (int j2 = 0; j2 < nr2; ++j2) {
+                    for (int j1 = 0; j1 < nr1; ++j1) {
+                        for (int j0 = 0; j0 < nr0; ++j0) {
+                            acc += src0_dd[(i0 + j0 * ne0) + (i1 + j1 * ne1) * ne00 + (i2 + j2 * ne2) * ne00 * ne01 +
+                                           (i3 + j3 * ne3) * ne00 * ne01 * ne02];
+                        }
+                    }
+                }
+            }
+
+            dst_dd[i] = acc;
+        });
+}
diff --git a/ggml/src/ggml-sycl/repeat_back.hpp b/ggml/src/ggml-sycl/repeat_back.hpp
new file mode 100644
index 0000000000000..6783dead4fe1c
--- /dev/null
+++ b/ggml/src/ggml-sycl/repeat_back.hpp
@@ -0,0 +1,9 @@
+//בס"ד
+#ifndef GGML_SYCL_REPEAT_BACK_HPP
+#define GGML_SYCL_REPEAT_BACK_HPP
+
+#include "common.hpp"
+
+void ggml_sycl_op_repeat_back(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
+
+#endif  // GGML_SYCL_REPEAT_BACK_HPP

From d3e88bc5753e1caa4fc0c26697676e9a6a66d60a Mon Sep 17 00:00:00 2001
From: shani-f <s0556787439@gmail.com>
Date: Thu, 23 Oct 2025 12:40:43 +0300
Subject: [PATCH 2/8] Implement repeat_back SYCL operation and minor fixes

---
 ggml/src/ggml-sycl/repeat_back.cpp | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/ggml/src/ggml-sycl/repeat_back.cpp b/ggml/src/ggml-sycl/repeat_back.cpp
index 10fef098cdd93..48ae998a27543 100644
--- a/ggml/src/ggml-sycl/repeat_back.cpp
+++ b/ggml/src/ggml-sycl/repeat_back.cpp
@@ -4,20 +4,16 @@
 #include "common.hpp"
 
 void ggml_sycl_op_repeat_back(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+
     GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
     GGML_ASSERT(dst->type == GGML_TYPE_F32);
 
     const float * src0_dd = (const float *) dst->src[0]->data;
     float *       dst_dd  = (float *) dst->data;
 
-    const int64_t ne0  = dst->ne[0];
-    const int64_t ne1  = dst->ne[1];
-    const int64_t ne2  = dst->ne[2];
-    const int64_t ne3  = dst->ne[3];
-    const int64_t ne00 = dst->src[0]->ne[0];
-    const int64_t ne01 = dst->src[0]->ne[1];
-    const int64_t ne02 = dst->src[0]->ne[2];
-    const int64_t ne03 = dst->src[0]->ne[3];
+    const int64_t ne0 = dst->ne[0], ne1 = dst->ne[1], ne2 = dst->ne[2], ne3 = dst->ne[3];
+    const int64_t ne00 = dst->src[0]->ne[0], ne01 = dst->src[0]->ne[1], ne02 = dst->src[0]->ne[2],
+                  ne03 = dst->src[0]->ne[3];
 
     const int nr0 = (int) (ne00 / ne0);
     const int nr1 = (int) (ne01 / ne1);
@@ -29,7 +25,6 @@ void ggml_sycl_op_repeat_back(ggml_backend_sycl_context & ctx, ggml_tensor * dst
     const int    num_blocks = (total + BLOCK_SIZE - 1) / BLOCK_SIZE;
 
     queue_ptr stream = ctx.stream();
-    stream->memset(dst_dd, 0, total * sizeof(float));
 
     stream->parallel_for(
         sycl::nd_range<1>(sycl::range<1>(num_blocks * BLOCK_SIZE), sycl::range<1>(BLOCK_SIZE)),

From f7d05e7a945ce6bfd0f214db9dbc628c68a351b3 Mon Sep 17 00:00:00 2001
From: shani-f <s0556787439@gmail.com>
Date: Thu, 30 Oct 2025 20:00:26 +0200
Subject: [PATCH 3/8] SYCL: optimize repeat_back kernel

---
 ggml/src/ggml-sycl/repeat_back.cpp | 102 ++++++++++++++++++-----------
 1 file changed, 63 insertions(+), 39 deletions(-)

diff --git a/ggml/src/ggml-sycl/repeat_back.cpp b/ggml/src/ggml-sycl/repeat_back.cpp
index 48ae998a27543..1b99719cc0935 100644
--- a/ggml/src/ggml-sycl/repeat_back.cpp
+++ b/ggml/src/ggml-sycl/repeat_back.cpp
@@ -3,55 +3,79 @@
 
 #include "common.hpp"
 
-void ggml_sycl_op_repeat_back(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+#define GGML_ASSERT_TENSOR_FITS_INT(t) \
+    GGML_ASSERT((t)->ne[0] < INT_MAX && (t)->ne[1] < INT_MAX && (t)->ne[2] < INT_MAX && (t)->ne[3] < INT_MAX)
 
+void ggml_sycl_op_repeat_back(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
     GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
     GGML_ASSERT(dst->type == GGML_TYPE_F32);
 
     const float * src0_dd = (const float *) dst->src[0]->data;
     float *       dst_dd  = (float *) dst->data;
 
-    const int64_t ne0 = dst->ne[0], ne1 = dst->ne[1], ne2 = dst->ne[2], ne3 = dst->ne[3];
-    const int64_t ne00 = dst->src[0]->ne[0], ne01 = dst->src[0]->ne[1], ne02 = dst->src[0]->ne[2],
-                  ne03 = dst->src[0]->ne[3];
+    GGML_ASSERT_TENSOR_FITS_INT(dst);
+    GGML_ASSERT_TENSOR_FITS_INT(dst->src[0]);
+
+    const int ne0 = dst->ne[0], ne1 = dst->ne[1], ne2 = dst->ne[2], ne3 = dst->ne[3];
+    const int ne00 = dst->src[0]->ne[0], ne01 = dst->src[0]->ne[1], ne02 = dst->src[0]->ne[2],
+              ne03 = dst->src[0]->ne[3];
+
+    const int nr0 = ne00 / ne0;
+    const int nr1 = ne01 / ne1;
+    const int nr2 = ne02 / ne2;
+    const int nr3 = ne03 / ne3;
+
+    const int nb0 = dst->src[0]->nb[0];
+    const int nb1 = dst->src[0]->nb[1];
+    const int nb2 = dst->src[0]->nb[2];
+    const int nb3 = dst->src[0]->nb[3];
+
+    const char * base = (const char *) src0_dd;
 
-    const int nr0 = (int) (ne00 / ne0);
-    const int nr1 = (int) (ne01 / ne1);
-    const int nr2 = (int) (ne02 / ne2);
-    const int nr3 = (int) (ne03 / ne3);
+    const size_t  total      = (size_t) ne0 * ne1 * ne2 * ne3;
+    constexpr int BLOCK_SIZE = 256;
+    const int     num_blocks = (total + BLOCK_SIZE - 1) / BLOCK_SIZE;
 
-    const size_t total      = ne0 * ne1 * ne2 * ne3;
-    const int    BLOCK_SIZE = 256;
-    const int    num_blocks = (total + BLOCK_SIZE - 1) / BLOCK_SIZE;
+    // Precompute inverse sizes to replace integer divisions with multiplications
+    const float inv_ne0      = 1.0f / ne0;
+    const float inv_ne_01    = 1.0f / (ne0 * ne1);
+    const float inv_ne_012   = 1.0f / (ne0 * ne1 * ne2);
+    const int   repeat_count = nr0 * nr1 * nr2 * nr3;
 
     queue_ptr stream = ctx.stream();
 
-    stream->parallel_for(
-        sycl::nd_range<1>(sycl::range<1>(num_blocks * BLOCK_SIZE), sycl::range<1>(BLOCK_SIZE)),
-        [=](sycl::nd_item<1> item_ct1) {
-            const size_t i = item_ct1.get_global_linear_id();
-            if (i >= total) {
-                return;
-            }
-
-            const int i0 = i % ne0;
-            const int i1 = (i / ne0) % ne1;
-            const int i2 = (i / (ne0 * ne1)) % ne2;
-            const int i3 = i / (ne0 * ne1 * ne2);
-
-            float acc = 0.0f;
-
-            for (int j3 = 0; j3 < nr3; ++j3) {
-                for (int j2 = 0; j2 < nr2; ++j2) {
-                    for (int j1 = 0; j1 < nr1; ++j1) {
-                        for (int j0 = 0; j0 < nr0; ++j0) {
-                            acc += src0_dd[(i0 + j0 * ne0) + (i1 + j1 * ne1) * ne00 + (i2 + j2 * ne2) * ne00 * ne01 +
-                                           (i3 + j3 * ne3) * ne00 * ne01 * ne02];
-                        }
-                    }
-                }
-            }
-
-            dst_dd[i] = acc;
-        });
+    stream->parallel_for(sycl::nd_range<1>(sycl::range<1>(num_blocks * BLOCK_SIZE), sycl::range<1>(BLOCK_SIZE)),
+                         [=](sycl::nd_item<1> item_ct1) {
+                             const size_t i = item_ct1.get_global_linear_id();
+
+                             if (i >= total) {
+                                 return;
+                             }
+
+                             // Compute multidimensional indices (i0,i1,i2,i3) from the flattened linear index i
+                             const int i3 = (int) (i * inv_ne_012);
+                             const int i2 = (int) (i * inv_ne_01) - i3 * ne2;
+                             const int i1 = (int) (i * inv_ne0) - (int) (i * inv_ne_01) * ne1;
+                             const int i0 = i - (int) (i * inv_ne0) * ne0;
+
+                             int   j0 = 0, j1 = 0, j2 = 0, j3 = 0;
+                             float acc = 0.0f;
+
+                             for (int j = 0; j < repeat_count; ++j) {
+                                 const float * ptr =  (const float *) (base + (i0 + j0 * ne0) * nb0 + (i1 + j1 * ne1) * nb1 +
+                                                        (i2 + j2 * ne2) * nb2 + (i3 + j3 * ne3) * nb3);
+                                 acc += *ptr;
+
+                                 // Manual carry propagation simulates nested loops efficiently
+                                 int carry = (++j0 >= nr0);
+                                 j0 -= carry * nr0;
+                                 carry = (carry && (++j1 >= nr1));
+                                 j1 -= carry * nr1;
+                                 carry = (carry && (++j2 >= nr2));
+                                 j2 -= carry * nr2;
+                                 j3 += carry;
+                             }
+                             dst_dd[i] = acc;
+                         });
+
 }

From 20717ac63dff4f3f927c54ee9263a0126b973c80 Mon Sep 17 00:00:00 2001
From: shani-f <s0556787439@gmail.com>
Date: Thu, 30 Oct 2025 21:06:13 +0200
Subject: [PATCH 4/8] Remove Hebrew comment from repeat_back.cpp

---
 ggml/src/ggml-sycl/repeat_back.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ggml/src/ggml-sycl/repeat_back.cpp b/ggml/src/ggml-sycl/repeat_back.cpp
index 0a7e12563fe1d..9807e85cb7fb2 100644
--- a/ggml/src/ggml-sycl/repeat_back.cpp
+++ b/ggml/src/ggml-sycl/repeat_back.cpp
@@ -1,4 +1,3 @@
-//בס"ד
 #include "repeat_back.hpp"
 
 #include "common.hpp"

From f2853da020b96daaf5a6a5116e1865add03627e0 Mon Sep 17 00:00:00 2001
From: shani-f <s0556787439@gmail.com>
Date: Thu, 30 Oct 2025 21:09:35 +0200
Subject: [PATCH 5/8] Remove comments for code clarity

Removed comments to clean up the code.
---
 ggml/src/ggml-sycl/repeat_back.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/ggml/src/ggml-sycl/repeat_back.cpp b/ggml/src/ggml-sycl/repeat_back.cpp
index 9807e85cb7fb2..1fc1e8c8c2cef 100644
--- a/ggml/src/ggml-sycl/repeat_back.cpp
+++ b/ggml/src/ggml-sycl/repeat_back.cpp
@@ -35,7 +35,6 @@ void ggml_sycl_op_repeat_back(ggml_backend_sycl_context & ctx, ggml_tensor * dst
     constexpr int BLOCK_SIZE = 256;
     const int     num_blocks = (total + BLOCK_SIZE - 1) / BLOCK_SIZE;
 
-    // Precompute inverse sizes to replace integer divisions with multiplications
     const float inv_ne0      = 1.0f / ne0;
     const float inv_ne_01    = 1.0f / (ne0 * ne1);
     const float inv_ne_012   = 1.0f / (ne0 * ne1 * ne2);
@@ -51,7 +50,6 @@ void ggml_sycl_op_repeat_back(ggml_backend_sycl_context & ctx, ggml_tensor * dst
                                  return;
                              }
 
-                             // Compute multidimensional indices (i0,i1,i2,i3) from the flattened linear index i
                              const int i3 = (int) (i * inv_ne_012);
                              const int i2 = (int) (i * inv_ne_01) - i3 * ne2;
                              const int i1 = (int) (i * inv_ne0) - (int) (i * inv_ne_01) * ne1;
@@ -66,7 +64,6 @@ void ggml_sycl_op_repeat_back(ggml_backend_sycl_context & ctx, ggml_tensor * dst
                                                       (i2 + j2 * ne2) * nb2 + (i3 + j3 * ne3) * nb3);
                                  acc += *ptr;
 
-                                 // Manual carry propagation simulates nested loops efficiently
                                  int carry = (++j0 >= nr0);
                                  j0 -= carry * nr0;
                                  carry = (carry && (++j1 >= nr1));

From 0efd7eb84701d433dc4ab511b617bf1bc90dbccc Mon Sep 17 00:00:00 2001
From: shani-f <s0556787439@gmail.com>
Date: Fri, 31 Oct 2025 16:09:08 +0200
Subject: [PATCH 6/8] Fix formatting in ggml-sycl.cpp

---
 ggml/src/ggml-sycl/ggml-sycl.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp
index d9b2878e9a922..c97c5899435b1 100644
--- a/ggml/src/ggml-sycl/ggml-sycl.cpp
+++ b/ggml/src/ggml-sycl/ggml-sycl.cpp
@@ -4534,7 +4534,6 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
                 }
                 return false;
             }
-
         case GGML_OP_CONCAT:
             {
                 ggml_type src0_type = op->src[0]->type;

From 2c8f97726d31da2bd377989302fd774167ca68de Mon Sep 17 00:00:00 2001
From: shani-f <s0556787439@gmail.com>
Date: Sat, 1 Nov 2025 18:12:40 +0200
Subject: [PATCH 7/8] Formatted lambda according to legacy style. No logic
 changes

---
 ggml/src/ggml-sycl/repeat_back.cpp | 64 +++++++++++++++---------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/ggml/src/ggml-sycl/repeat_back.cpp b/ggml/src/ggml-sycl/repeat_back.cpp
index 1fc1e8c8c2cef..5f651d208bd46 100644
--- a/ggml/src/ggml-sycl/repeat_back.cpp
+++ b/ggml/src/ggml-sycl/repeat_back.cpp
@@ -42,36 +42,36 @@ void ggml_sycl_op_repeat_back(ggml_backend_sycl_context & ctx, ggml_tensor * dst
 
     queue_ptr stream = ctx.stream();
 
-    stream->parallel_for(sycl::nd_range<1>(sycl::range<1>(num_blocks * BLOCK_SIZE), sycl::range<1>(BLOCK_SIZE)),
-                         [=](sycl::nd_item<1> item_ct1) {
-                             const size_t i = item_ct1.get_global_linear_id();
-
-                             if (i >= total) {
-                                 return;
-                             }
-
-                             const int i3 = (int) (i * inv_ne_012);
-                             const int i2 = (int) (i * inv_ne_01) - i3 * ne2;
-                             const int i1 = (int) (i * inv_ne0) - (int) (i * inv_ne_01) * ne1;
-                             const int i0 = i - (int) (i * inv_ne0) * ne0;
-
-                             int   j0 = 0, j1 = 0, j2 = 0, j3 = 0;
-                             float acc = 0.0f;
-
-                             for (int j = 0; j < repeat_count; ++j) {
-                                 const float * ptr =
-                                     (const float *) (base + (i0 + j0 * ne0) * nb0 + (i1 + j1 * ne1) * nb1 +
-                                                      (i2 + j2 * ne2) * nb2 + (i3 + j3 * ne3) * nb3);
-                                 acc += *ptr;
-
-                                 int carry = (++j0 >= nr0);
-                                 j0 -= carry * nr0;
-                                 carry = (carry && (++j1 >= nr1));
-                                 j1 -= carry * nr1;
-                                 carry = (carry && (++j2 >= nr2));
-                                 j2 -= carry * nr2;
-                                 j3 += carry;
-                             }
-                             dst_dd[i] = acc;
-                         });
+    stream->parallel_for(
+        sycl::nd_range<1>(sycl::range<1>(num_blocks * BLOCK_SIZE), sycl::range<1>(BLOCK_SIZE)),
+        [=](sycl::nd_item<1> item_ct1) {
+            const size_t i = item_ct1.get_global_linear_id();
+            if (i >= total) {
+                return;
+            }
+
+            const int i3 = (int) (i * inv_ne_012);
+            const int i2 = (int) (i * inv_ne_01) - i3 * ne2;
+            const int i1 = (int) (i * inv_ne0) - (int) (i * inv_ne_01) * ne1;
+            const int i0 = i - (int) (i * inv_ne0) * ne0;
+
+            int   j0 = 0, j1 = 0, j2 = 0, j3 = 0;
+            float acc = 0.0f;
+
+            for (int j = 0; j < repeat_count; ++j) {
+                const float * ptr = (const float *) (base + (i0 + j0 * ne0) * nb0 + (i1 + j1 * ne1) * nb1 +
+                    (i2 + j2 * ne2) * nb2 + (i3 + j3 * ne3) * nb3);
+                acc += *ptr;
+
+                int carry = (++j0 >= nr0);
+                j0 -= carry * nr0;
+                carry = (carry && (++j1 >= nr1));
+                j1 -= carry * nr1;
+                carry = (carry && (++j2 >= nr2));
+                j2 -= carry * nr2;
+                j3 += carry;
+            }
+            
+            dst_dd[i] = acc;
+        });
 }

From aa20ee1318b3f004d3363d0896ef9b2130611b9b Mon Sep 17 00:00:00 2001
From: shani-f <s0556787439@gmail.com>
Date: Sat, 1 Nov 2025 20:10:53 +0200
Subject: [PATCH 8/8] Remove blank line in repeat_back.cpp

Remove unnecessary blank line before assigning acc to dst_dd.
---
 ggml/src/ggml-sycl/repeat_back.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ggml/src/ggml-sycl/repeat_back.cpp b/ggml/src/ggml-sycl/repeat_back.cpp
index 5f651d208bd46..845b48468c1d6 100644
--- a/ggml/src/ggml-sycl/repeat_back.cpp
+++ b/ggml/src/ggml-sycl/repeat_back.cpp
@@ -71,7 +71,6 @@ void ggml_sycl_op_repeat_back(ggml_backend_sycl_context & ctx, ggml_tensor * dst
                 j2 -= carry * nr2;
                 j3 += carry;
             }
-            
             dst_dd[i] = acc;
         });
 }