Adjusted ggml_vec_elu_f16 so the positive branch mirrors the f32 implementation, promoting inputs to float, branching on the sign, and only calling expm1f for the negative path before converting back vec.h This restores correct ELU behaviour for FP16 tensors.

Aaron · Aaron · commit f8b76359ace9 · 2025-10-11T09:03:53.000-05:00
diff --git a/ggml/src/ggml-cpu/vec.h b/ggml/src/ggml-cpu/vec.h
@@ -820,7 +820,8 @@ inline static void ggml_vec_tanh_f16 (const int n, ggml_fp16_t * y, const ggml_f
 inline static void ggml_vec_elu_f32  (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expm1f(x[i]); }
 inline static void ggml_vec_elu_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
     for (int i = 0; i < n; ++i) {
-        y[i] = GGML_CPU_FP32_TO_FP16(expm1f(GGML_CPU_FP16_TO_FP32(x[i])));
+        const float v = GGML_CPU_FP16_TO_FP32(x[i]);
+        y[i] = GGML_CPU_FP32_TO_FP16((v > 0.f) ? v : expm1f(v));
     }
 }
 inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }

Original file line number	Diff line number	Diff line change
`@@ -820,7 +820,8 @@ inline static void ggml_vec_tanh_f16 (const int n, ggml_fp16_t * y, const ggml_f`
`820`	`820`	`inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expm1f(x[i]); }`
`821`	`821`	`inline static void ggml_vec_elu_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {`
`822`	`822`	`for (int i = 0; i < n; ++i) {`
`823`		`- y[i] = GGML_CPU_FP32_TO_FP16(expm1f(GGML_CPU_FP16_TO_FP32(x[i])));`
	`823`	`+ const float v = GGML_CPU_FP16_TO_FP32(x[i]);`
	`824`	`+ y[i] = GGML_CPU_FP32_TO_FP16((v > 0.f) ? v : expm1f(v));`
`824`	`825`	`}`
`825`	`826`	`}`
`826`	`827`	`inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }`