From 9db3e6f94a8a1ae43be9989482aee68014cd881e Mon Sep 17 00:00:00 2001 From: safranowith Date: Sun, 14 Sep 2025 11:30:46 +0300 Subject: [PATCH 1/2] Add FLOOR unary op with SYCL support Implemented CPU + SYCL backends --- ggml/include/ggml.h | 10 ++++++++++ ggml/src/ggml-cpu/ops.cpp | 4 ++++ ggml/src/ggml-cpu/unary-ops.cpp | 8 ++++++++ ggml/src/ggml-cpu/unary-ops.h | 1 + ggml/src/ggml-sycl/element_wise.cpp | 30 +++++++++++++++++++++++++++++ ggml/src/ggml-sycl/element_wise.hpp | 2 ++ ggml/src/ggml-sycl/ggml-sycl.cpp | 4 ++++ ggml/src/ggml.c | 17 +++++++++++++++- 8 files changed, 75 insertions(+), 1 deletion(-) diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 7e9c3c8c7a096..5321109c02be4 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -559,6 +559,7 @@ extern "C" { enum ggml_unary_op { GGML_UNARY_OP_ABS, + GGML_UNARY_OP_FLOOR, GGML_UNARY_OP_SGN, GGML_UNARY_OP_NEG, GGML_UNARY_OP_STEP, @@ -1028,6 +1029,15 @@ extern "C" { struct ggml_context * ctx, struct ggml_tensor * a); + GGML_API struct ggml_tensor * ggml_floor( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_floor_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_sgn( struct ggml_context * ctx, struct ggml_tensor * a); diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp index 8c1f7948855ac..78a942ca91d0d 100644 --- a/ggml/src/ggml-cpu/ops.cpp +++ b/ggml/src/ggml-cpu/ops.cpp @@ -9336,6 +9336,10 @@ void ggml_compute_forward_unary( { ggml_compute_forward_abs(params, dst); } break; + case GGML_UNARY_OP_FLOOR: + { + ggml_compute_forward_floor(params, dst); + } break; case GGML_UNARY_OP_SGN: { ggml_compute_forward_sgn(params, dst); diff --git a/ggml/src/ggml-cpu/unary-ops.cpp b/ggml/src/ggml-cpu/unary-ops.cpp index 4fce569b3bfc8..ea8e51227e8ed 100644 --- a/ggml/src/ggml-cpu/unary-ops.cpp +++ b/ggml/src/ggml-cpu/unary-ops.cpp @@ -4,6 +4,10 @@ static inline float op_abs(float x) { return fabsf(x); } +static inline float op_floor(float x) { + return floorf(x); +} + static inline float op_sgn(float x) { return (x > 0.f) ? 1.f : ((x < 0.f) ? -1.f : 0.f); } @@ -125,6 +129,10 @@ void ggml_compute_forward_abs(const ggml_compute_params * params, ggml_tensor * unary_op(params, dst); } +void ggml_compute_forward_floor(const ggml_compute_params * params, ggml_tensor * dst) { + unary_op(params, dst); +} + void ggml_compute_forward_sgn(const ggml_compute_params * params, ggml_tensor * dst) { unary_op(params, dst); } diff --git a/ggml/src/ggml-cpu/unary-ops.h b/ggml/src/ggml-cpu/unary-ops.h index b1ade2c8e341f..784b2d08d0b00 100644 --- a/ggml/src/ggml-cpu/unary-ops.h +++ b/ggml/src/ggml-cpu/unary-ops.h @@ -7,6 +7,7 @@ extern "C" { #endif void ggml_compute_forward_abs(const struct ggml_compute_params * params, struct ggml_tensor * dst); +void ggml_compute_forward_floor(const struct ggml_compute_params * params,struct ggml_tensor * dst); void ggml_compute_forward_sgn(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_neg(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_step(const struct ggml_compute_params * params, struct ggml_tensor * dst); diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp index 0363b06a3ec9b..b16fef3fd8b28 100644 --- a/ggml/src/ggml-sycl/element_wise.cpp +++ b/ggml/src/ggml-sycl/element_wise.cpp @@ -39,6 +39,11 @@ static __dpct_inline__ T op_abs(T x) { return sycl::fabs(x); } +template +static __dpct_inline__ T op_floor(T x) { + return sycl::floor(x); +} + template static __dpct_inline__ T op_elu(T x) { return (x > static_cast(0.f)) ? x : sycl::expm1(x); @@ -164,6 +169,13 @@ static void unary_op_abs_kernel(const T * x, T * dst, const int k, const sycl::n } } +template +static void unary_op_floor_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_floor(x[i]); + } +} + template static void unary_op_elu_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { SYCL_GLOBAL_ID_LOOP(k, item_ct1) { @@ -661,6 +673,19 @@ static inline void ggml_sycl_op_abs(ggml_backend_sycl_context & ctx, ggml_tensor }); } +static inline void ggml_sycl_op_floor(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, 256); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256), + sycl::range<1>(256)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_floor_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); +} + static inline void ggml_sycl_op_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { @@ -1129,6 +1154,11 @@ void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { ggml_sycl_op_clamp(ctx, dst); } +void ggml_sycl_floor(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1); + ggml_sycl_op_floor(ctx, dst); +} + void ggml_sycl_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1); ggml_sycl_op_sgn(ctx, dst); diff --git a/ggml/src/ggml-sycl/element_wise.hpp b/ggml/src/ggml-sycl/element_wise.hpp index 50749e87d783e..5521e78a54bfa 100644 --- a/ggml/src/ggml-sycl/element_wise.hpp +++ b/ggml/src/ggml-sycl/element_wise.hpp @@ -75,6 +75,8 @@ void ggml_sycl_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst); +void ggml_sycl_floor(ggml_backend_sycl_context & ctx, ggml_tensor * dst); + void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_geglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 18ff4e0b0c7cf..ea22664c73be7 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -3626,6 +3626,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg case GGML_UNARY_OP_ABS: ggml_sycl_abs(ctx, dst); break; + case GGML_UNARY_OP_FLOOR: + ggml_sycl_floor(ctx, dst); + break; case GGML_UNARY_OP_ELU: ggml_sycl_elu(ctx, dst); break; @@ -4182,6 +4185,7 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g case GGML_UNARY_OP_SGN: case GGML_UNARY_OP_ABS: case GGML_UNARY_OP_ELU: + case GGML_UNARY_OP_FLOOR: #if defined (GGML_SYCL_F16) return ggml_is_contiguous(op->src[0]) && (op->type == op->src[0]->type); #else diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index d76ea58f789e2..315582e7a9fd0 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -1127,6 +1127,7 @@ static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2"); static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = { "ABS", + "FLOOR", "SGN", "NEG", "STEP", @@ -1143,7 +1144,7 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = { "GELU_ERF", }; -static_assert(GGML_UNARY_OP_COUNT == 15, "GGML_UNARY_OP_COUNT != 15"); +static_assert(GGML_UNARY_OP_COUNT == 16, "GGML_UNARY_OP_COUNT != 16"); static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = { @@ -2479,6 +2480,20 @@ struct ggml_tensor * ggml_abs_inplace( return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS); } +// ggml_floor + +struct ggml_tensor * ggml_floor( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_unary(ctx, a, GGML_UNARY_OP_FLOOR); +} + +struct ggml_tensor * ggml_floor_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_FLOOR); +} + // ggml_sgn struct ggml_tensor * ggml_sgn( From 528e03ed21649046d0a2f37e3e89e2c60a0e33b4 Mon Sep 17 00:00:00 2001 From: safranowith Date: Mon, 15 Sep 2025 11:34:08 +0300 Subject: [PATCH 2/2] add test and fix editorconfig mistakes --- ggml/include/ggml.h | 2 +- ggml/src/ggml-sycl/ggml-sycl.cpp | 4 ++-- ggml/src/ggml.c | 2 +- tests/test-backend-ops.cpp | 40 ++++++++++++++++++++++++++++++++ vendor/miniaudio/miniaudio.h | 6 ++--- 5 files changed, 47 insertions(+), 7 deletions(-) diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 5321109c02be4..818c4dd3a76c0 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -1037,7 +1037,7 @@ extern "C" { struct ggml_context * ctx, struct ggml_tensor * a); - + GGML_API struct ggml_tensor * ggml_sgn( struct ggml_context * ctx, struct ggml_tensor * a); diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index ea22664c73be7..95e261e8c5d42 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -3626,9 +3626,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg case GGML_UNARY_OP_ABS: ggml_sycl_abs(ctx, dst); break; - case GGML_UNARY_OP_FLOOR: + case GGML_UNARY_OP_FLOOR: ggml_sycl_floor(ctx, dst); - break; + break; case GGML_UNARY_OP_ELU: ggml_sycl_elu(ctx, dst); break; diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 315582e7a9fd0..933442f768389 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -2489,7 +2489,7 @@ struct ggml_tensor * ggml_floor( } struct ggml_tensor * ggml_floor_inplace( - struct ggml_context * ctx, + struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_FLOOR); } diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 3a58621094d17..227facd02bae8 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -3512,6 +3512,45 @@ struct test_log : public test_case { } }; +// GGML_OP_FLOOR +struct test_floor : public test_case { + const ggml_type type; + const std::array ne; + + std::string vars() override { + return VARS_TO_STR2(type, ne); + } + + test_floor(ggml_type type = GGML_TYPE_F32, + std::array ne = {10, 5, 4, 3}) + : type(type), ne(ne) {} + + ggml_tensor * build_graph(ggml_context * ctx) override { + ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data()); + ggml_set_param(a); + ggml_set_name(a, "a"); + + ggml_tensor * out = ggml_floor(ctx, a); + ggml_set_name(out, "out"); + + return out; + } + + void initialize_tensors(ggml_context * ctx) override { + for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { + init_tensor_uniform(t, -10.0f, 10.0f); + } + } + + float grad_eps() override { + return 1.0f; + } + + bool grad_precise() override { + return true; + } +}; + // GGML_OP_SIN struct test_sin : public test_case { const ggml_type type; @@ -6176,6 +6215,7 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_sqr(type)); test_cases.emplace_back(new test_sqrt(type)); test_cases.emplace_back(new test_log(type)); + test_cases.emplace_back(new test_floor(type)); test_cases.emplace_back(new test_sin(type)); test_cases.emplace_back(new test_cos(type)); test_cases.emplace_back(new test_clamp(type)); diff --git a/vendor/miniaudio/miniaudio.h b/vendor/miniaudio/miniaudio.h index c74bebeb3c725..53499f9a2cd5c 100644 --- a/vendor/miniaudio/miniaudio.h +++ b/vendor/miniaudio/miniaudio.h @@ -28227,7 +28227,7 @@ static ma_result ma_device_start__alsa(ma_device* pDevice) } if (pDevice->type == ma_device_type_playback || pDevice->type == ma_device_type_duplex) { - /* + /* When data is written to the device we wait for the device to get ready to receive data with poll(). In my testing I have observed that poll() can sometimes block forever unless the device is started explicitly with snd_pcm_start() or some data is written with snd_pcm_writei(). @@ -34520,7 +34520,7 @@ static ma_result ma_device_init_internal__coreaudio(ma_context* pContext, ma_dev #endif } - + status = ((ma_AudioUnitSetProperty_proc)pContext->coreaudio.AudioUnitSetProperty)(pData->audioUnit, kAudioUnitProperty_StreamFormat, formatScope, formatElement, &bestFormat, sizeof(bestFormat)); if (status != noErr) { ((ma_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)(pData->audioUnit); @@ -38526,7 +38526,7 @@ static ma_result ma_device_reinit__aaudio(ma_device* pDevice, ma_device_type dev ma_device_stop(pDevice); /* Do a full device stop so we set internal state correctly. */ } } - + result = MA_SUCCESS; } done: