From cbefca1818e75a2e49dd896eaf0c60debde13cc8 Mon Sep 17 00:00:00 2001
From: safranowith <bsh155762@gmail.com>
Date: Sun, 21 Sep 2025 19:49:41 +0300
Subject: [PATCH 01/15] SYCL: Add support for FLOOR,CEIL,ROUND and TRUNC unary
 operators

Clean up unrelated changes from previous commit
---
 docs/ops.md                         |   8 +-
 docs/ops/SYCL.csv                   |  16 ++++
 ggml/include/ggml.h                 |  37 +++++++-
 ggml/src/ggml-sycl/element_wise.cpp | 120 +++++++++++++++++++++++++
 ggml/src/ggml-sycl/element_wise.hpp |   4 +
 ggml/src/ggml-sycl/ggml-sycl.cpp    |  16 ++++
 tests/test-backend-ops.cpp          | 132 ++++++++++++++++++++++++++++
 7 files changed, 327 insertions(+), 6 deletions(-)

diff --git a/docs/ops.md b/docs/ops.md
index 938efac815fc0..226cd935d698a 100644
--- a/docs/ops.md
+++ b/docs/ops.md
@@ -22,7 +22,7 @@ Legend:
 |                           ARANGE | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ |
 |                           ARGMAX | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
 |                          ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
-|                             CEIL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
+|                             CEIL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
 |                            CLAMP | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | ❌ |
 |                           CONCAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | 🟡 | ✅ | ❌ |
 |                             CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ❌ |
@@ -42,7 +42,7 @@ Legend:
 |                              ELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
 |                              EXP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
 |                   FLASH_ATTN_EXT | ❌ | 🟡 | ✅ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ |
-|                            FLOOR | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
+|                            FLOOR | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
 |                GATED_LINEAR_ATTN | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ |
 |                            GEGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ |
 |                        GEGLU_ERF | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ |
@@ -84,7 +84,7 @@ Legend:
 |                             ROLL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ |
 |                             ROPE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
 |                        ROPE_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ |
-|                            ROUND | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
+|                            ROUND | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
 |                        RWKV_WKV6 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
 |                        RWKV_WKV7 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
 |                            SCALE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
@@ -111,6 +111,6 @@ Legend:
 |                             TANH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | 🟡 | ❌ |
 |               TIMESTEP_EMBEDDING | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
 |                         TOPK_MOE | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
-|                            TRUNC | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
+|                            TRUNC | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
 |                          UPSCALE | ❌ | 🟡 | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ❌ |
 |                            XIELU | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
diff --git a/docs/ops/SYCL.csv b/docs/ops/SYCL.csv
index d7efa43cdf3da..bc6319f51fa8c 100644
--- a/docs/ops/SYCL.csv
+++ b/docs/ops/SYCL.csv
@@ -31,6 +31,14 @@
 "SYCL0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
 "SYCL0","XIELU","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","SYCL"
 "SYCL0","XIELU","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","SYCL"
+"SYCL0","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
+"SYCL0","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
+"SYCL0","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
+"SYCL0","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
+"SYCL0","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
+"SYCL0","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
+"SYCL0","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
+"SYCL0","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
 "SYCL0","ABS","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","SYCL"
 "SYCL0","ABS","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","SYCL"
 "SYCL0","SGN","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","SYCL"
@@ -95,6 +103,14 @@
 "SYCL0","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
 "SYCL0","XIELU","type=f32,ne_a=[128,2,2,2],v=0","support","0","no","SYCL"
 "SYCL0","XIELU","type=f32,ne_a=[5,7,11,13],v=0","support","0","no","SYCL"
+"SYCL0","FLOOR","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
+"SYCL0","FLOOR","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
+"SYCL0","CEIL","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
+"SYCL0","CEIL","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
+"SYCL0","ROUND","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
+"SYCL0","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
+"SYCL0","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
+"SYCL0","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
 "SYCL0","ABS","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","SYCL"
 "SYCL0","ABS","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","SYCL"
 "SYCL0","SGN","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","SYCL"
diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index d948b00cc7f30..b06fa0c5c82ab 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -581,7 +581,7 @@ extern "C" {
         GGML_UNARY_OP_CEIL,
         GGML_UNARY_OP_ROUND,
         GGML_UNARY_OP_TRUNC,
-
+        
         GGML_UNARY_OP_COUNT,
     };
 
@@ -1103,7 +1103,40 @@ extern "C" {
     GGML_API struct ggml_tensor * ggml_gelu_inplace(
             struct ggml_context * ctx,
             struct ggml_tensor  * a);
+            
+    GGML_API struct ggml_tensor * ggml_floor(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
+    GGML_API struct ggml_tensor * ggml_floor_inplace(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
+    GGML_API struct ggml_tensor * ggml_ceil(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
+    GGML_API struct ggml_tensor * ggml_ceil_inplace(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
 
+    GGML_API struct ggml_tensor * ggml_round(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
+    GGML_API struct ggml_tensor * ggml_round_inplace(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
+    GGML_API struct ggml_tensor * ggml_trunc(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);  
+
+    GGML_API struct ggml_tensor * ggml_trunc_inplace(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
+            
     // GELU using erf (error function) when possible
     // some backends may fallback to approximation based on Abramowitz and Stegun formula
     GGML_API struct ggml_tensor * ggml_gelu_erf(
@@ -2154,7 +2187,7 @@ extern "C" {
             int                  p1,
             int                  p2,
             int                  p3);
-
+    
     GGML_API struct ggml_tensor * ggml_pad_ext(
             struct ggml_context * ctx,
             struct ggml_tensor  * a,
diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp
index 58f5125c9cf6e..810995d0cbf74 100644
--- a/ggml/src/ggml-sycl/element_wise.cpp
+++ b/ggml/src/ggml-sycl/element_wise.cpp
@@ -150,6 +150,26 @@ static __dpct_inline__ T op_clamp(T x, float min_val, float max_val) {
     return x < static_cast<T>(min_val) ? static_cast<T>(min_val) : (x > static_cast<T>(max_val) ? static_cast<T>(max_val) : x);
 }
 
+template<typename T>
+static __dpct_inline__ T op_floor(T x) {
+    return sycl::floor(x);
+}
+
+template<typename T>
+static __dpct_inline__ T op_ceil(T x) {
+    return sycl::ceil(x);
+}
+
+template<typename T>
+static __dpct_inline__ T op_round(T x) {
+    return sycl::round(x);
+}
+
+template<typename T>
+static __dpct_inline__ T op_trunc(T x) {
+    return sycl::trunc(x);
+}
+
 template<typename T>
 static void unary_op_sgn_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
     SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
@@ -304,6 +324,34 @@ static void unary_op_clamp_kernel(const T * x, T * dst, const int k, const sycl:
     }
 }
 
+template<typename T>
+static void unary_op_floor_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
+    SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
+        dst[i] = op_floor(x[i]);
+    }
+}
+
+template<typename T>
+static void unary_op_ceil_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
+    SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
+        dst[i] = op_ceil(x[i]);
+    }
+}
+
+template<typename T>
+static void unary_op_round_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
+    SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
+        dst[i] = op_round(x[i]);
+    }
+}
+
+template<typename T>
+static void unary_op_trunc_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
+    SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
+        dst[i] = op_trunc(x[i]);
+    }
+}
+
 template<typename  T>
 static void upscale(const T  *x, T *dst, const int nb00, const int nb01,
                         const int nb02, const int nb03, const int ne10, const int ne11,
@@ -897,6 +945,58 @@ static inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tens
         }, min_val, max_val);
 }
 
+static inline void ggml_sycl_op_floor(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
+        [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
+            const int num_blocks = ceil_div(k_elements, 256);
+            stream->parallel_for(
+                sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256),
+                                  sycl::range<1>(256)),
+                [=](sycl::nd_item<1> item_ct1) {
+                    unary_op_floor_kernel(src, dst_ptr, k_elements, item_ct1);
+                });
+        });
+}
+
+static inline void ggml_sycl_op_ceil(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
+        [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
+            const int num_blocks = ceil_div(k_elements, 256);
+            stream->parallel_for(
+                sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256),
+                                  sycl::range<1>(256)),
+                [=](sycl::nd_item<1> item_ct1) {
+                    unary_op_ceil_kernel(src, dst_ptr, k_elements, item_ct1);
+                });
+        });
+}
+
+static inline void ggml_sycl_op_round(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
+        [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
+            const int num_blocks = ceil_div(k_elements, 256);
+            stream->parallel_for(
+                sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256),
+                                  sycl::range<1>(256)),
+                [=](sycl::nd_item<1> item_ct1) {
+                    unary_op_round_kernel(src, dst_ptr, k_elements, item_ct1);
+                });
+        });
+}
+
+static inline void ggml_sycl_op_trunc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
+        [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
+            const int num_blocks = ceil_div(k_elements, 256);
+            stream->parallel_for(
+                sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256),
+                                  sycl::range<1>(256)),
+                [=](sycl::nd_item<1> item_ct1) {
+                    unary_op_trunc_kernel(src, dst_ptr, k_elements, item_ct1);
+                });
+        });
+}
+
 static inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
     GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
     GGML_ASSERT(dst->src[1]->type == GGML_TYPE_F32);
@@ -1122,3 +1222,23 @@ void ggml_sycl_arange(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
     scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/0);
     ggml_sycl_detail::ggml_sycl_op_arange(ctx, dst);
 }
+
+void ggml_sycl_floor(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
+    ggml_sycl_op_floor(ctx, dst);
+}
+
+void ggml_sycl_ceil(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
+    ggml_sycl_op_ceil(ctx, dst);
+}
+
+void ggml_sycl_round(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
+    ggml_sycl_op_round(ctx, dst);
+}
+
+void ggml_sycl_trunc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
+    ggml_sycl_op_trunc(ctx, dst);
+}
diff --git a/ggml/src/ggml-sycl/element_wise.hpp b/ggml/src/ggml-sycl/element_wise.hpp
index ed96c55f75a7a..fcf93295cb215 100644
--- a/ggml/src/ggml-sycl/element_wise.hpp
+++ b/ggml/src/ggml-sycl/element_wise.hpp
@@ -80,6 +80,10 @@ void ggml_sycl_reglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 void ggml_sycl_swiglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 void ggml_sycl_geglu_erf(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 void ggml_sycl_geglu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
+void ggml_sycl_floor(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
+void ggml_sycl_ceil(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
+void ggml_sycl_round(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
+void ggml_sycl_trunc(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 
 void ggml_sycl_arange(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 
diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp
index a7e077ec8ebe0..1a007ffe2bca6 100644
--- a/ggml/src/ggml-sycl/ggml-sycl.cpp
+++ b/ggml/src/ggml-sycl/ggml-sycl.cpp
@@ -3698,6 +3698,18 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg
                 case GGML_UNARY_OP_ELU:
                     ggml_sycl_elu(ctx, dst);
                     break;
+                case GGML_UNARY_OP_FLOOR:
+                    ggml_sycl_floor(ctx, dst);
+                    break;
+                case GGML_UNARY_OP_CEIL:
+                    ggml_sycl_ceil(ctx, dst);
+                    break;
+                case GGML_UNARY_OP_ROUND:
+                    ggml_sycl_round(ctx, dst);
+                    break;
+                case GGML_UNARY_OP_TRUNC:
+                    ggml_sycl_trunc(ctx, dst);
+                    break;
                 default:
                     return false;
             }
@@ -4262,6 +4274,10 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
                 case GGML_UNARY_OP_SGN:
                 case GGML_UNARY_OP_ABS:
                 case GGML_UNARY_OP_ELU:
+                case GGML_UNARY_OP_FLOOR:
+                case GGML_UNARY_OP_CEIL:
+                case GGML_UNARY_OP_ROUND:
+                case GGML_UNARY_OP_TRUNC:
 #if defined (GGML_SYCL_F16)
                     return ggml_is_contiguous(op->src[0]) && (op->type == op->src[0]->type);
 #else
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
index 82bb55ea0e184..fa98db2982ce7 100644
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -3759,6 +3759,130 @@ struct test_clamp : public test_case {
     }
 };
 
+// GGML_OP_FLOOR
+struct test_floor : public test_case {
+    const ggml_type type;
+    const std::array<int64_t, 4> ne;
+
+    std::string vars() override {
+        return VARS_TO_STR2(type, ne);
+    }
+
+    test_floor(ggml_type type = GGML_TYPE_F32,
+               std::array<int64_t, 4> ne = {10, 2, 2, 2})
+        : type(type), ne(ne) {}
+
+    ggml_tensor * build_graph(ggml_context * ctx) override {
+        ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
+        ggml_set_param(a);
+        ggml_set_name(a, "a");
+
+        ggml_tensor * out = ggml_floor(ctx, a);
+        ggml_set_name(out, "out");
+
+        return out;
+    }
+
+    void initialize_tensors(ggml_context * ctx) override {
+        for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+            init_tensor_uniform(t, -10.0f, 10.0f);
+        }
+    }
+};
+
+// GGML_OP_CEIL
+struct test_ceil : public test_case {
+    const ggml_type type;
+    const std::array<int64_t, 4> ne;
+
+    std::string vars() override {
+        return VARS_TO_STR2(type, ne);
+    }
+
+    test_ceil(ggml_type type = GGML_TYPE_F32,
+              std::array<int64_t, 4> ne = {10, 2, 2, 2})
+        : type(type), ne(ne) {}
+
+    ggml_tensor * build_graph(ggml_context * ctx) override {
+        ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
+        ggml_set_param(a);
+        ggml_set_name(a, "a");
+
+        ggml_tensor * out = ggml_ceil(ctx, a);
+        ggml_set_name(out, "out");
+
+        return out;
+    }
+
+    void initialize_tensors(ggml_context * ctx) override {
+        for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+            init_tensor_uniform(t, -10.0f, 10.0f);
+        }
+    }
+};
+
+// GGML_OP_ROUND
+struct test_round : public test_case {
+    const ggml_type type;
+    const std::array<int64_t, 4> ne;
+
+    std::string vars() override {
+        return VARS_TO_STR2(type, ne);
+    }
+
+    test_round(ggml_type type = GGML_TYPE_F32,
+               std::array<int64_t, 4> ne = {10, 2, 2, 2})
+        : type(type), ne(ne) {}
+
+    ggml_tensor * build_graph(ggml_context * ctx) override {
+        ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
+        ggml_set_param(a);
+        ggml_set_name(a, "a");
+
+        ggml_tensor * out = ggml_round(ctx, a);
+        ggml_set_name(out, "out");
+
+        return out;
+    }
+
+    void initialize_tensors(ggml_context * ctx) override {
+        for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+            init_tensor_uniform(t, -10.0f, 10.0f);
+        }
+    }
+};
+
+// GGML_OP_TRUNC
+struct test_trunc : public test_case {
+    const ggml_type type;
+    const std::array<int64_t, 4> ne;
+
+    std::string vars() override {
+        return VARS_TO_STR2(type, ne);
+    }
+
+    test_trunc(ggml_type type = GGML_TYPE_F32,
+               std::array<int64_t, 4> ne = {10, 2, 2, 2})
+        : type(type), ne(ne) {}
+
+    ggml_tensor * build_graph(ggml_context * ctx) override {
+        ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
+        ggml_set_param(a);
+        ggml_set_name(a, "a");
+
+        ggml_tensor * out = ggml_trunc(ctx, a);
+        ggml_set_name(out, "out");
+
+        return out;
+    }
+
+    void initialize_tensors(ggml_context * ctx) override {
+        for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+            init_tensor_uniform(t, -10.0f, 10.0f);
+        }
+    }
+};
+
 // GGML_OP_DIAG_MASK_INF
 struct test_diag_mask_inf : public test_case {
     const ggml_type type;
@@ -6585,6 +6709,10 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
         test_cases.emplace_back(new test_cos       (type));
         test_cases.emplace_back(new test_clamp     (type));
         test_cases.emplace_back(new test_leaky_relu(type));
+        test_cases.emplace_back(new test_floor     (type));
+        test_cases.emplace_back(new test_ceil      (type));
+        test_cases.emplace_back(new test_round     (type));
+        test_cases.emplace_back(new test_trunc     (type));
         test_cases.emplace_back(new test_sqr       (type, {7, 1, 5, 3}));
         test_cases.emplace_back(new test_sqrt      (type, {7, 1, 5, 3}));
         test_cases.emplace_back(new test_log       (type, {7, 1, 5, 3}));
@@ -6592,6 +6720,10 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
         test_cases.emplace_back(new test_cos       (type, {7, 1, 5, 3}));
         test_cases.emplace_back(new test_clamp     (type, {7, 1, 5, 3}));
         test_cases.emplace_back(new test_leaky_relu(type, {7, 1, 5, 3}));
+        test_cases.emplace_back(new test_floor     (type, {7, 1, 5, 3}));
+        test_cases.emplace_back(new test_ceil      (type, {7, 1, 5, 3}));
+        test_cases.emplace_back(new test_round     (type, {7, 1, 5, 3}));
+        test_cases.emplace_back(new test_trunc     (type, {7, 1, 5, 3}));
     }
 
     test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, {10, 10, 1, 1}, 5));

From c4ce488e498ecf65e9f82235217e24d4e106070f Mon Sep 17 00:00:00 2001
From: safranowith <bsh155762@gmail.com>
Date: Thu, 16 Oct 2025 15:35:49 +0300
Subject: [PATCH 02/15] Chore: remove empty lines and fix indentation

---
 ggml/include/ggml.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index b06fa0c5c82ab..f34d20f0cd9e2 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -581,7 +581,7 @@ extern "C" {
         GGML_UNARY_OP_CEIL,
         GGML_UNARY_OP_ROUND,
         GGML_UNARY_OP_TRUNC,
-        
+
         GGML_UNARY_OP_COUNT,
     };
 
@@ -2187,10 +2187,11 @@ extern "C" {
             int                  p1,
             int                  p2,
             int                  p3);
-    
+         
     GGML_API struct ggml_tensor * ggml_pad_ext(
             struct ggml_context * ctx,
             struct ggml_tensor  * a,
+    
             int                  lp0,
             int                  rp0,
             int                  lp1,

From a97a0e0df59057e9ad726d6a6253de907aa6c9be Mon Sep 17 00:00:00 2001
From: safranowith <bsh155762@gmail.com>
Date: Thu, 16 Oct 2025 15:45:29 +0300
Subject: [PATCH 03/15] Clean up: remove leftover blank lines and fix spacing

---
 ggml/include/ggml.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index f34d20f0cd9e2..9fdc3d5a997af 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -2187,11 +2187,10 @@ extern "C" {
             int                  p1,
             int                  p2,
             int                  p3);
-         
+
     GGML_API struct ggml_tensor * ggml_pad_ext(
             struct ggml_context * ctx,
             struct ggml_tensor  * a,
-    
             int                  lp0,
             int                  rp0,
             int                  lp1,

From 7d2a08d429f31c3b3f83a612fea9bc2eaa31d672 Mon Sep 17 00:00:00 2001
From: safranowith <bsh155762@gmail.com>
Date: Thu, 16 Oct 2025 16:33:34 +0300
Subject: [PATCH 04/15] chore: fix trailing whitespace and ensure final newline

---
 ggml/include/ggml.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index 9fdc3d5a997af..33e3ff385d0ef 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -1103,7 +1103,7 @@ extern "C" {
     GGML_API struct ggml_tensor * ggml_gelu_inplace(
             struct ggml_context * ctx,
             struct ggml_tensor  * a);
-            
+
     GGML_API struct ggml_tensor * ggml_floor(
             struct ggml_context * ctx,
             struct ggml_tensor  * a);
@@ -1130,13 +1130,13 @@ extern "C" {
 
     GGML_API struct ggml_tensor * ggml_trunc(
             struct ggml_context * ctx,
-            struct ggml_tensor  * a);  
+            struct ggml_tensor  * a);
 
     GGML_API struct ggml_tensor * ggml_trunc_inplace(
             struct ggml_context * ctx,
             struct ggml_tensor  * a);
 
-            
+
     // GELU using erf (error function) when possible
     // some backends may fallback to approximation based on Abramowitz and Stegun formula
     GGML_API struct ggml_tensor * ggml_gelu_erf(

From 3489bbe8b71c5fcaf725c9819c2a4c703be0dcd6 Mon Sep 17 00:00:00 2001
From: safranowith <bsh155762@gmail.com>
Date: Sun, 19 Oct 2025 12:18:23 +0300
Subject: [PATCH 05/15] Cleanup: remove redundant declarations already defined
 in header

---
 ggml/include/ggml.h | 32 --------------------------------
 1 file changed, 32 deletions(-)

diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index 33e3ff385d0ef..b4015a6dd1ce1 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -1104,38 +1104,6 @@ extern "C" {
             struct ggml_context * ctx,
             struct ggml_tensor  * a);
 
-    GGML_API struct ggml_tensor * ggml_floor(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
-
-    GGML_API struct ggml_tensor * ggml_floor_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
-
-    GGML_API struct ggml_tensor * ggml_ceil(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
-
-    GGML_API struct ggml_tensor * ggml_ceil_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
-
-    GGML_API struct ggml_tensor * ggml_round(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
-
-    GGML_API struct ggml_tensor * ggml_round_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
-
-    GGML_API struct ggml_tensor * ggml_trunc(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
-
-    GGML_API struct ggml_tensor * ggml_trunc_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
-
 
     // GELU using erf (error function) when possible
     // some backends may fallback to approximation based on Abramowitz and Stegun formula

From d85313eac99047e326d908f8415c9cb27294e93d Mon Sep 17 00:00:00 2001
From: safranowith <bsh155762@gmail.com>
Date: Sun, 19 Oct 2025 12:36:23 +0300
Subject: [PATCH 06/15] Sync docs/ops.md with updated backend operation support

---
 ggml/include/ggml.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index b4015a6dd1ce1..d948b00cc7f30 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -1104,7 +1104,6 @@ extern "C" {
             struct ggml_context * ctx,
             struct ggml_tensor  * a);
 
-
     // GELU using erf (error function) when possible
     // some backends may fallback to approximation based on Abramowitz and Stegun formula
     GGML_API struct ggml_tensor * ggml_gelu_erf(

From afc71a4cccd830c4be4e7a191ccf9a6195609fb6 Mon Sep 17 00:00:00 2001
From: safranowith <bsh155762@gmail.com>
Date: Sun, 19 Oct 2025 12:52:55 +0300
Subject: [PATCH 07/15] docs: update ops.md after rebase

---
 docs/ops.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/ops.md b/docs/ops.md
index 226cd935d698a..89861b8c2f28b 100644
--- a/docs/ops.md
+++ b/docs/ops.md
@@ -100,8 +100,8 @@ Legend:
 |                    SOFT_MAX_BACK | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ✅ | ❌ |
 |                              SQR | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ |
 |                             SQRT | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | ❌ | ❌ |
-|                         SSM_CONV | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ |
-|                         SSM_SCAN | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ |
+|                         SSM_CONV | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ |
+|                         SSM_SCAN | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ |
 |                             STEP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
 |                              SUB | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ |
 |                              SUM | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ |

From 47aa3a47980c2e0d5b2fee6e93eedaeeffaed9e5 Mon Sep 17 00:00:00 2001
From: safranowith <bsh155762@gmail.com>
Date: Sun, 19 Oct 2025 13:39:46 +0300
Subject: [PATCH 08/15] docs: update ops.md - Vulkan supports SSM_CONV and
 SSM_SCAN

---
 docs/ops.md         |  4 ++--
 docs/ops/Vulkan.csv | 42 +++++++++++++++++++++---------------------
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/docs/ops.md b/docs/ops.md
index 89861b8c2f28b..226cd935d698a 100644
--- a/docs/ops.md
+++ b/docs/ops.md
@@ -100,8 +100,8 @@ Legend:
 |                    SOFT_MAX_BACK | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ✅ | ❌ |
 |                              SQR | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ |
 |                             SQRT | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | ❌ | ❌ |
-|                         SSM_CONV | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ |
-|                         SSM_SCAN | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ |
+|                         SSM_CONV | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ |
+|                         SSM_SCAN | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ |
 |                             STEP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
 |                              SUB | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ |
 |                              SUM | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ |
diff --git a/docs/ops/Vulkan.csv b/docs/ops/Vulkan.csv
index ea252577280d5..298c2a6ccd5fc 100644
--- a/docs/ops/Vulkan.csv
+++ b/docs/ops/Vulkan.csv
@@ -3263,27 +3263,27 @@
 "Vulkan0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=1.000000,broadcast=0","support","1","yes","Vulkan"
 "Vulkan0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=1.000000,broadcast=1","support","1","yes","Vulkan"
 "Vulkan0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[3,1024,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[3,1024,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[3,1024,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[3,1536,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[3,1536,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[3,1536,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[3,2048,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[3,2048,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[3,2048,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[4,1024,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[4,1024,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[4,1024,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[4,1536,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[4,1536,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[4,1536,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[4,2048,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[4,2048,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[4,2048,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_SCAN","type=f32,d_state=16,head_dim=1,n_head=1024,n_group=1,n_seq_tokens=32,n_seqs=4","support","0","no","Vulkan"
-"Vulkan0","SSM_SCAN","type=f32,d_state=128,head_dim=64,n_head=16,n_group=2,n_seq_tokens=32,n_seqs=4","support","0","no","Vulkan"
-"Vulkan0","SSM_SCAN","type=f32,d_state=256,head_dim=64,n_head=8,n_group=2,n_seq_tokens=32,n_seqs=4","support","0","no","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[3,1024,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[3,1536,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[3,2048,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[4,1024,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[4,1024,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[4,1024,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[4,1536,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[4,1536,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[4,1536,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[4,2048,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[4,2048,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[4,2048,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_SCAN","type=f32,d_state=16,head_dim=1,n_head=1024,n_group=1,n_seq_tokens=32,n_seqs=4","support","1","yes","Vulkan"
+"Vulkan0","SSM_SCAN","type=f32,d_state=128,head_dim=64,n_head=16,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","yes","Vulkan"
+"Vulkan0","SSM_SCAN","type=f32,d_state=256,head_dim=64,n_head=8,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","yes","Vulkan"
 "Vulkan0","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1","support","1","yes","Vulkan"
 "Vulkan0","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","1","yes","Vulkan"
 "Vulkan0","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","1","yes","Vulkan"

From c6291c0e7dfec6b324789bfd70623c9121031b83 Mon Sep 17 00:00:00 2001
From: safranowith <bsh155762@gmail.com>
Date: Thu, 16 Oct 2025 17:59:36 +0300
Subject: [PATCH 09/15] SYCL: add support for ROUND, FLOOR, CEIL, TRUNC ops

---
 ggml/include/ggml.h             | 1 +
 ggml/src/ggml-cuda/ggml-cuda.cu | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index d948b00cc7f30..8ce0d7a6fbc29 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -1943,6 +1943,7 @@ extern "C" {
             int                   d1); // dilation dimension 1
 
     GGML_API struct ggml_tensor * ggml_im2col_3d(
+<<<<<<< HEAD
             struct ggml_context * ctx,
             struct ggml_tensor  * a,
             struct ggml_tensor  * b,
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index 75fd6db14c514..723bb4eab4862 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -3357,6 +3357,10 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
                 case GGML_UNARY_OP_TANH:
                 case GGML_UNARY_OP_EXP:
                 case GGML_UNARY_OP_ELU:
+                case GGML_UNARY_OP_FLOOR:
+                case GGML_UNARY_OP_CEIL:
+                case GGML_UNARY_OP_ROUND:
+                case GGML_UNARY_OP_TRUNC:
                     return ggml_is_contiguous(op->src[0]);
                 default:
                     return false;

From 022c43261a9afc142abc10bcd2df2222df87e688 Mon Sep 17 00:00:00 2001
From: safranowith <bsh155762@gmail.com>
Date: Thu, 25 Sep 2025 22:49:06 +0300
Subject: [PATCH 10/15] change llama.cpp/ggml/src/ggml-cuda$ code ggml-cuda.cu

---
 ggml/src/ggml-cuda/ggml-cuda.cu | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index 723bb4eab4862..0d22935b462b0 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -2345,6 +2345,17 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
                     break;
                 case GGML_UNARY_OP_XIELU:
                     ggml_cuda_op_xielu(ctx, dst);
+                case GGML_UNARY_OP_FLOOR:
+                    ggml_cuda_op_floor(ctx, dst);
+                    break;
+                case GGML_UNARY_OP_CEIL:
+                    ggml_cuda_op_ceil(ctx, dst);
+                    break;
+                case GGML_UNARY_OP_ROUND:
+                    ggml_cuda_op_round(ctx, dst);
+                    break;
+                case GGML_UNARY_OP_TRUNC:
+                    ggml_cuda_op_trunc(ctx, dst);
                     break;
                 default:
                     return false;

From 840cebc3efc8a239a9863bd183fe2bb172ee800f Mon Sep 17 00:00:00 2001
From: safranowith <bsh155762@gmail.com>
Date: Sun, 28 Sep 2025 18:09:48 +0300
Subject: [PATCH 11/15] Add CUDA backend support for floor, ceil, round, trunc
 unary ops

---
 docs/ops/CUDA.csv            | 32 ++++++++++++++++++++++++++++++++
 ggml/src/ggml-cuda/unary.cu  | 33 +++++++++++++++++++++++++++++++++
 ggml/src/ggml-cuda/unary.cuh |  8 ++++++++
 ggml/src/ggml.c              |  9 +++++++++
 4 files changed, 82 insertions(+)

diff --git a/docs/ops/CUDA.csv b/docs/ops/CUDA.csv
index 71e47977e31d1..48d0e4ac664df 100644
--- a/docs/ops/CUDA.csv
+++ b/docs/ops/CUDA.csv
@@ -29,6 +29,14 @@
 "CUDA0","EXP","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
 "CUDA0","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
 "CUDA0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
+"CUDA0","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
+"CUDA0","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
+"CUDA0","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
+"CUDA0","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
+"CUDA0","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
+"CUDA0","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
+"CUDA0","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
+"CUDA0","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
 "CUDA0","ABS","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
 "CUDA0","ABS","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
 "CUDA0","SGN","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
@@ -59,6 +67,14 @@
 "CUDA0","EXP","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
 "CUDA0","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
 "CUDA0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
+"CUDA0","FLOOR","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","FLOOR","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
+"CUDA0","CEIL","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","CEIL","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
+"CUDA0","ROUND","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","ROUND","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
+"CUDA0","TRUNC","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","TRUNC","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
 "CUDA0","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
 "CUDA0","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
 "CUDA0","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
@@ -89,6 +105,14 @@
 "CUDA0","EXP","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
 "CUDA0","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
 "CUDA0","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
+"CUDA0","FLOOR","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
+"CUDA0","FLOOR","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
+"CUDA0","CEIL","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
+"CUDA0","CEIL","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
+"CUDA0","ROUND","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
+"CUDA0","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
+"CUDA0","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
+"CUDA0","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
 "CUDA0","ABS","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
 "CUDA0","ABS","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
 "CUDA0","SGN","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
@@ -118,6 +142,14 @@
 "CUDA0","EXP","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
 "CUDA0","EXP","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
 "CUDA0","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","FLOOR","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","FLOOR","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
+"CUDA0","CEIL","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","CEIL","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
+"CUDA0","ROUND","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","ROUND","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
+"CUDA0","TRUNC","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","TRUNC","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
 "CUDA0","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
 "CUDA0","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","yes","CUDA"
 "CUDA0","REGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","yes","CUDA"
diff --git a/ggml/src/ggml-cuda/unary.cu b/ggml/src/ggml-cuda/unary.cu
index 3c564566a51ff..6b5fe1ff6cea2 100644
--- a/ggml/src/ggml-cuda/unary.cu
+++ b/ggml/src/ggml-cuda/unary.cu
@@ -88,6 +88,22 @@ static __device__ __forceinline__ float op_elu(float x) {
     return (x > 0.f) ? x : expm1f(x);
 }
 
+static __device__ __forceinline__ float op_floor(float x) {
+    return floorf(x);
+}
+
+static __device__ __forceinline__ float op_ceil(float x) {
+    return ceilf(x);
+}
+
+static __device__ __forceinline__ float op_round(float x) {
+    return roundf(x);
+}
+
+static __device__ __forceinline__ float op_trunc(float x) {
+    return truncf(x);
+}
+
 template <float (*op)(float), typename T>
 static __global__ void unary_op_kernel(const T * x, T * dst, const int k) {
     const int i = blockDim.x*blockIdx.x + threadIdx.x;
@@ -204,6 +220,23 @@ void ggml_cuda_op_log(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
 void ggml_cuda_op_elu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     ggml_cuda_op_unary<op_elu>(ctx, dst);
 }
+
+void ggml_cuda_op_floor(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
+    ggml_cuda_op_unary<op_floor>(ctx, dst);
+}
+
+void ggml_cuda_op_ceil(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
+    ggml_cuda_op_unary<op_ceil>(ctx, dst);
+}
+
+void ggml_cuda_op_round(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
+    ggml_cuda_op_unary<op_round>(ctx, dst);
+}
+
+void ggml_cuda_op_trunc(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
+    ggml_cuda_op_unary<op_trunc>(ctx, dst);
+}
+
 /* gated ops */
 
 template <float (*op)(float), typename T>
diff --git a/ggml/src/ggml-cuda/unary.cuh b/ggml/src/ggml-cuda/unary.cuh
index 8e7644fcd9a48..8f28f89ab7916 100644
--- a/ggml/src/ggml-cuda/unary.cuh
+++ b/ggml/src/ggml-cuda/unary.cuh
@@ -75,3 +75,11 @@ void ggml_cuda_op_geglu_erf(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
 void ggml_cuda_op_geglu_quick(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
 
 void ggml_cuda_op_xielu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
+
+void ggml_cuda_op_floor(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
+
+void ggml_cuda_op_ceil(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
+
+void ggml_cuda_op_round(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
+
+void ggml_cuda_op_trunc(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
\ No newline at end of file
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index 86f1c31afd7a6..fd778ff82887c 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -6454,6 +6454,15 @@ static void ggml_compute_backward(
                         ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
                     }
                 } break;
+                case GGML_UNARY_OP_FLOOR:
+                case GGML_UNARY_OP_CEIL:
+                case GGML_UNARY_OP_ROUND:
+                case GGML_UNARY_OP_TRUNC: {
+                   if (src0_needs_grads) {
+                       ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, ggml_new_f32(ctx, 0.0f), src0));
+                    }
+                } break;
+
                 default: {
                     fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
                         __func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));

From 60bdbfb8deb48347539c09350249495ac64328c1 Mon Sep 17 00:00:00 2001
From: safranowith <bsh155762@gmail.com>
Date: Sun, 28 Sep 2025 18:12:31 +0300
Subject: [PATCH 12/15] finish

---
 docs/ops.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/ops.md b/docs/ops.md
index 226cd935d698a..6fc976ac7b452 100644
--- a/docs/ops.md
+++ b/docs/ops.md
@@ -22,7 +22,7 @@ Legend:
 |                           ARANGE | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ |
 |                           ARGMAX | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
 |                          ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
-|                             CEIL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
+|                             CEIL | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ✅ | ❌ | ❌ |
 |                            CLAMP | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | ❌ |
 |                           CONCAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | 🟡 | ✅ | ❌ |
 |                             CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ❌ |
@@ -42,7 +42,7 @@ Legend:
 |                              ELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
 |                              EXP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
 |                   FLASH_ATTN_EXT | ❌ | 🟡 | ✅ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ |
-|                            FLOOR | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
+|                            FLOOR | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ✅ | ❌ | ❌ |
 |                GATED_LINEAR_ATTN | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ |
 |                            GEGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ |
 |                        GEGLU_ERF | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ |
@@ -84,7 +84,7 @@ Legend:
 |                             ROLL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ |
 |                             ROPE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
 |                        ROPE_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ |
-|                            ROUND | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
+|                            ROUND | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ✅ | ❌ | ❌ |
 |                        RWKV_WKV6 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
 |                        RWKV_WKV7 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
 |                            SCALE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
@@ -111,6 +111,6 @@ Legend:
 |                             TANH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | 🟡 | ❌ |
 |               TIMESTEP_EMBEDDING | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
 |                         TOPK_MOE | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
-|                            TRUNC | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
+|                            TRUNC | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ✅ | ❌ | ❌ |
 |                          UPSCALE | ❌ | 🟡 | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ❌ |
 |                            XIELU | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |

From 0767a17ab0278bb77b90c2bcf8de405cdd751926 Mon Sep 17 00:00:00 2001
From: safranowith <bsh155762@gmail.com>
Date: Thu, 16 Oct 2025 17:55:40 +0300
Subject: [PATCH 13/15] fix a bag

---
 ggml/include/ggml.h | 75 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index 8ce0d7a6fbc29..caa1647a78253 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -1104,6 +1104,38 @@ extern "C" {
             struct ggml_context * ctx,
             struct ggml_tensor  * a);
 
+    GGML_API struct ggml_tensor * ggml_floor(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
+    GGML_API struct ggml_tensor * ggml_floor_inplace(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
+    GGML_API struct ggml_tensor * ggml_ceil(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
+    GGML_API struct ggml_tensor * ggml_ceil_inplace(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
+    GGML_API struct ggml_tensor * ggml_round(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
+    GGML_API struct ggml_tensor * ggml_round_inplace(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
+    GGML_API struct ggml_tensor * ggml_trunc(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
+    GGML_API struct ggml_tensor * ggml_trunc_inplace(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+  
     // GELU using erf (error function) when possible
     // some backends may fallback to approximation based on Abramowitz and Stegun formula
     GGML_API struct ggml_tensor * ggml_gelu_erf(
@@ -1943,6 +1975,7 @@ extern "C" {
             int                   d1); // dilation dimension 1
 
     GGML_API struct ggml_tensor * ggml_im2col_3d(
+<<<<<<< HEAD
 <<<<<<< HEAD
             struct ggml_context * ctx,
             struct ggml_tensor  * a,
@@ -1960,6 +1993,24 @@ extern "C" {
             enum ggml_type        dst_type);
 
     // a: [OC*IC, KD, KH, KW]
+=======
+          struct ggml_context * ctx,
+          struct ggml_tensor  * a,
+          struct ggml_tensor  * b,
+          int64_t               IC,
+          int                   s0, // stride width
+          int                   s1, // stride height
+          int                   s2, // stride depth
+          int                   p0, // padding width
+          int                   p1, // padding height
+          int                   p2, // padding depth
+          int                   d0, // dilation width
+          int                   d1, // dilation height
+          int                   d2, // dilation depth
+           enum ggml_type        dst_type);
+     
+    //     a: [OC*IC, KD, KH, KW]
+>>>>>>> 1b66dc4a6 (fix a bag)
     // b: [N*IC, ID, IH, IW]
     // result: [N*OC, OD, OH, OW]
     GGML_API struct ggml_tensor * ggml_conv_3d(
@@ -1967,6 +2018,7 @@ extern "C" {
                 struct ggml_tensor  * a,
                 struct ggml_tensor  * b,
                 int64_t               IC,
+<<<<<<< HEAD
                 int                   s0, // stride width
                 int                   s1, // stride height
                 int                   s2, // stride depth
@@ -1976,6 +2028,17 @@ extern "C" {
                 int                   d0, // dilation width
                 int                   d1, // dilation height
                 int                   d2  // dilation depth
+=======
+                 int                   s0, // stride width
+                 int                   s1, // stride height
+                 int                   s2, // stride depth
+                 int                   p0, // padding width
+                 int                   p1, // padding height
+                 int                   p2, // padding depth
+                 int                   d0, // dilation width
+                 int                   d1, // dilation height
+                 int                   d2  // dilation depth
+>>>>>>> 1b66dc4a6 (fix a bag)
         );
 
     // kernel size is a->ne[0] x a->ne[1]
@@ -2155,6 +2218,18 @@ extern "C" {
             int                  p1,
             int                  p2,
             int                  p3);
+    GGML_API struct ggml_tensor * ggml_pad_ext(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            int                  lp0,
+            int                  rp0,
+            int                  lp1,
+            int                  rp1,
+            int                  lp2,
+            int                  rp2,
+            int                  lp3,
+            int                  rp3
+            );
 
     GGML_API struct ggml_tensor * ggml_pad_ext(
             struct ggml_context * ctx,

From 0dc6d82c1f93ce5e9c0c2e59516cb691539ed43a Mon Sep 17 00:00:00 2001
From: safranowith <bsh155762@gmail.com>
Date: Thu, 16 Oct 2025 17:59:36 +0300
Subject: [PATCH 14/15] CUDA: Add support for FLOOR,CEIL,ROUND and TRUNC unary
 operators

---
 ggml/include/ggml.h | 32 --------------------------------
 1 file changed, 32 deletions(-)

diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index caa1647a78253..3944ff84744e7 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -1975,8 +1975,6 @@ extern "C" {
             int                   d1); // dilation dimension 1
 
     GGML_API struct ggml_tensor * ggml_im2col_3d(
-<<<<<<< HEAD
-<<<<<<< HEAD
             struct ggml_context * ctx,
             struct ggml_tensor  * a,
             struct ggml_tensor  * b,
@@ -1993,24 +1991,6 @@ extern "C" {
             enum ggml_type        dst_type);
 
     // a: [OC*IC, KD, KH, KW]
-=======
-          struct ggml_context * ctx,
-          struct ggml_tensor  * a,
-          struct ggml_tensor  * b,
-          int64_t               IC,
-          int                   s0, // stride width
-          int                   s1, // stride height
-          int                   s2, // stride depth
-          int                   p0, // padding width
-          int                   p1, // padding height
-          int                   p2, // padding depth
-          int                   d0, // dilation width
-          int                   d1, // dilation height
-          int                   d2, // dilation depth
-           enum ggml_type        dst_type);
-     
-    //     a: [OC*IC, KD, KH, KW]
->>>>>>> 1b66dc4a6 (fix a bag)
     // b: [N*IC, ID, IH, IW]
     // result: [N*OC, OD, OH, OW]
     GGML_API struct ggml_tensor * ggml_conv_3d(
@@ -2018,7 +1998,6 @@ extern "C" {
                 struct ggml_tensor  * a,
                 struct ggml_tensor  * b,
                 int64_t               IC,
-<<<<<<< HEAD
                 int                   s0, // stride width
                 int                   s1, // stride height
                 int                   s2, // stride depth
@@ -2028,17 +2007,6 @@ extern "C" {
                 int                   d0, // dilation width
                 int                   d1, // dilation height
                 int                   d2  // dilation depth
-=======
-                 int                   s0, // stride width
-                 int                   s1, // stride height
-                 int                   s2, // stride depth
-                 int                   p0, // padding width
-                 int                   p1, // padding height
-                 int                   p2, // padding depth
-                 int                   d0, // dilation width
-                 int                   d1, // dilation height
-                 int                   d2  // dilation depth
->>>>>>> 1b66dc4a6 (fix a bag)
         );
 
     // kernel size is a->ne[0] x a->ne[1]

From fe5ae2cede18d7d53f6363e5306cd362c557b01d Mon Sep 17 00:00:00 2001
From: safranowith <bsh155762@gmail.com>
Date: Mon, 20 Oct 2025 11:08:32 +0300
Subject: [PATCH 15/15] SYCL: Add support for FLOOR,CEIL,ROUND and TRUNC unary
 operators (#16613)

* SYCL: Add support for FLOOR,CEIL,ROUND and TRUNC unary operators

Clean up unrelated changes from previous commit

* Chore: remove empty lines and fix indentation

* Clean up: remove leftover blank lines and fix spacing

* chore: fix trailing whitespace and ensure final newline

* Cleanup: remove redundant declarations already defined in header

* Sync docs/ops.md with updated backend operation support

* docs: update ops.md after rebase

* docs: update ops.md - Vulkan supports SSM_CONV and SSM_SCAN
---
 ggml/src/ggml-sycl/element_wise.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp
index 810995d0cbf74..36d681195017d 100644
--- a/ggml/src/ggml-sycl/element_wise.cpp
+++ b/ggml/src/ggml-sycl/element_wise.cpp
@@ -1241,4 +1241,4 @@ void ggml_sycl_round(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 void ggml_sycl_trunc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
     scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
     ggml_sycl_op_trunc(ctx, dst);
-}
+}
\ No newline at end of file