From e2dddaf111131665629bf63a92150d27a71feade Mon Sep 17 00:00:00 2001 From: Amir Valizadeh Date: Tue, 28 Oct 2025 14:15:11 -0400 Subject: [PATCH] Add kernel weighting functions (tricube, epanechnikov, gaussian, triangular, quartic) --- src/kernel_weights/mod.rs | 200 ++++++++++++++++++++++++++++++++++++++ src/lib.rs | 22 +++++ tests/kernel_weights.rs | 172 ++++++++++++++++++++++++++++++++ 3 files changed, 394 insertions(+) create mode 100644 src/kernel_weights/mod.rs create mode 100644 tests/kernel_weights.rs diff --git a/src/kernel_weights/mod.rs b/src/kernel_weights/mod.rs new file mode 100644 index 00000000..49fbee71 --- /dev/null +++ b/src/kernel_weights/mod.rs @@ -0,0 +1,200 @@ +//! Kernel weighting functions for statistical smoothing and local regression. +//! +//! This module provides common kernel functions that map a normalized +//! distance `u` (usually `|x_i - x_0| / h`) to a nonnegative weight in `[0, 1]`. +//! +//! These kernels are often used in local regression (LOESS/LOWESS), +//! kernel density estimation, and nonparametric smoothing. +//! +//! Quick reference table: +//! +//! | Kernel | Formula | +//! |---|---| +//! | Tricube | `(1 - |u|^3)^3` for `|u| < 1`, else `0` | +//! | Epanechnikov | `0.75 * (1 - u^2)` for `|u| < 1`, else `0` | +//! | Gaussian | `exp(-0.5 * u^2)` (supports all `u`) | +//! | Triangular | `1 - |u|` for `|u| < 1`, else `0` | +//! | Quartic (biweight) | `(15/16) * (1 - u^2)^2` for `|u| < 1`, else `0` | +//! +//! # Example +//! ``` +//! use ndarray_stats::kernel_weights::{tricube, gaussian}; +//! +//! let w1 = tricube(0.3); +//! let w2 = gaussian(0.3); +//! assert!(w1 > 0.0 && w1 <= 1.0); +//! assert!(w2 > 0.0 && w2 <= 1.0); +//! ``` + +/// Generic trait for kernel functions. +pub trait KernelFn { + fn weight(&self, u: f64) -> f64; +} + +// allow plain function pointers to be used as KernelFn +impl KernelFn for fn(f64) -> f64 { + #[inline] + fn weight(&self, u: f64) -> f64 { + (self)(u) + } +} + +/// Tricube kernel type implementing [`KernelFn`]. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] +pub struct Tricube; +impl KernelFn for Tricube { + #[inline] + fn weight(&self, u: f64) -> f64 { + tricube(u) + } +} +pub const TRICUBE: Tricube = Tricube; + +/// Gaussian kernel type implementing [`KernelFn`]. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] +pub struct Gaussian; +impl KernelFn for Gaussian { + #[inline] + fn weight(&self, u: f64) -> f64 { + gaussian(u) + } +} +pub const GAUSSIAN: Gaussian = Gaussian; + +/// Epanechnikov kernel type implementing [`KernelFn`]. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] +pub struct Epanechnikov; +impl KernelFn for Epanechnikov { + #[inline] + fn weight(&self, u: f64) -> f64 { + epanechnikov(u) + } +} +pub const EPANECHNIKOV: Epanechnikov = Epanechnikov; + +/// Triangular kernel type implementing [`KernelFn`]. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] +pub struct Triangular; +impl KernelFn for Triangular { + #[inline] + fn weight(&self, u: f64) -> f64 { + triangular(u) + } +} +pub const TRIANGULAR: Triangular = Triangular; + +/// Quartic (biweight) kernel type implementing [`KernelFn`]. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] +pub struct Quartic; +impl KernelFn for Quartic { + #[inline] + fn weight(&self, u: f64) -> f64 { + quartic(u) + } +} +pub const QUARTIC: Quartic = Quartic; + +/// Tricube kernel. +/// +/// Defined as `(1 - |u|^3)^3` for `|u| < 1`, and `0` otherwise. +/// +/// # Examples +/// ``` +/// use ndarray_stats::kernel_weights::tricube; +/// assert_eq!(tricube(0.0), 1.0); +/// assert_eq!(tricube(1.0), 0.0); +/// ``` +#[inline] +#[must_use] +pub fn tricube(u: f64) -> f64 { + let u = u.abs(); + if u >= 1.0 { + 0.0 + } else { + let t = 1.0 - u.powi(3); + t.powi(3) + } +} + +/// Epanechnikov kernel. +/// +/// Defined as `0.75 * (1 - u^2)` for `|u| < 1`, and `0` otherwise. +/// Optimal in a mean-square error sense for certain problems. +/// +/// # Example +/// ``` +/// use ndarray_stats::kernel_weights::epanechnikov; +/// assert_eq!(epanechnikov(0.0), 0.75); +/// ``` +#[inline] +#[must_use] +pub fn epanechnikov(u: f64) -> f64 { + let u = u.abs(); + if u >= 1.0 { + 0.0 + } else { + 0.75 * (1.0 - u * u) + } +} + +/// Gaussian kernel. +/// +/// Defined as `exp(-0.5 * u^2)` for all real `u`. +/// +/// # Example +/// ``` +/// use ndarray_stats::kernel_weights::gaussian; +/// assert!((gaussian(0.0) - 1.0).abs() < 1e-12); +/// ``` +#[inline] +#[must_use] +pub fn gaussian(u: f64) -> f64 { + (-0.5 * u * u).exp() +} + +/// Triangular kernel. +/// +/// Defined as `1 - |u|` for `|u| < 1`, and `0` otherwise. +/// Provides linearly decaying weights, often used in moving averages. +/// +/// # Example +/// ``` +/// use ndarray_stats::kernel_weights::triangular; +/// assert_eq!(triangular(0.0), 1.0); +/// assert_eq!(triangular(1.0), 0.0); +/// assert!(triangular(0.5) > 0.0); +/// ``` +#[inline] +#[must_use] +pub fn triangular(u: f64) -> f64 { + let u = u.abs(); + if u >= 1.0 { + 0.0 + } else { + 1.0 - u + } +} + +/// Quartic (biweight) kernel. +/// +/// Defined as `(15/16) * (1 - u^2)^2` for `|u| < 1`, and `0` otherwise. +/// Produces a smooth, compactly supported weighting function often used +/// in kernel density estimation. +/// +/// # Example +/// ``` +/// use ndarray_stats::kernel_weights::quartic; +/// assert_eq!(quartic(0.0), 15.0/16.0); +/// assert_eq!(quartic(1.0), 0.0); +/// ``` +#[inline] +#[must_use] +pub fn quartic(u: f64) -> f64 { + let u = u.abs(); + if u >= 1.0 { + 0.0 + } else { + let t = 1.0 - u * u; + (15.0 / 16.0) * t * t + } +} diff --git a/src/lib.rs b/src/lib.rs index 4ae11004..5fe067bf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -33,6 +33,27 @@ pub use crate::correlation::CorrelationExt; pub use crate::deviation::DeviationExt; pub use crate::entropy::EntropyExt; pub use crate::histogram::HistogramExt; +pub use crate::kernel_weights::{ + epanechnikov, + gaussian, + quartic, + triangular, + // functions + tricube, + Epanechnikov, + Gaussian, + // trait and types (zero-sized kernels) + KernelFn, + Quartic, + Triangular, + Tricube, + EPANECHNIKOV, + GAUSSIAN, + QUARTIC, + TRIANGULAR, + // convenience const instances + TRICUBE, +}; pub use crate::maybe_nan::{MaybeNan, MaybeNanExt}; pub use crate::quantile::{interpolate, Quantile1dExt, QuantileExt}; pub use crate::sort::Sort1dExt; @@ -103,6 +124,7 @@ mod deviation; mod entropy; pub mod errors; pub mod histogram; +pub mod kernel_weights; mod maybe_nan; mod quantile; mod sort; diff --git a/tests/kernel_weights.rs b/tests/kernel_weights.rs new file mode 100644 index 00000000..323065d2 --- /dev/null +++ b/tests/kernel_weights.rs @@ -0,0 +1,172 @@ +use ndarray_stats::kernel_weights::*; + +fn integrate f64>(f: F, a: f64, b: f64, n: usize) -> f64 { + let h = (b - a) / n as f64; + let mut sum = 0.0; + for i in 0..n { + let x0 = a + i as f64 * h; + let x1 = a + (i + 1) as f64 * h; + sum += 0.5 * (f(x0) + f(x1)) * h; + } + sum +} + +fn approx_eq(a: f64, b: f64, tol: f64) -> bool { + (a - b).abs() < tol +} + +#[test] +fn tricube_basic_properties() { + assert!(approx_eq(tricube(0.5), tricube(-0.5), 1e-15)); + assert_eq!(tricube(1.0), 0.0); + assert_eq!(tricube(-1.0), 0.0); + assert_eq!(tricube(0.0), 1.0); + assert!(tricube(0.25) > tricube(0.5)); + assert!(tricube(0.5) > tricube(0.75)); +} + +#[test] +fn epanechnikov_behavior() { + assert!(approx_eq(epanechnikov(0.3), epanechnikov(-0.3), 1e-15)); + assert_eq!(epanechnikov(1.0), 0.0); + assert_eq!(epanechnikov(-1.0), 0.0); + assert!(epanechnikov(0.0) > epanechnikov(0.8)); + assert!(epanechnikov(0.5) > 0.0); + assert!(epanechnikov(0.5) < epanechnikov(0.0)); +} + +#[test] +fn quartic_behavior() { + assert!(approx_eq(quartic(0.0), 15.0 / 16.0, 1e-12)); + assert_eq!(quartic(1.0), 0.0); + assert_eq!(quartic(-1.0), 0.0); + assert!(approx_eq(quartic(0.3), quartic(-0.3), 1e-15)); + assert!(quartic(0.25) > quartic(0.75)); + assert_eq!(quartic(1.1), 0.0); +} + +#[test] +fn triangular_behavior() { + assert_eq!(triangular(0.0), 1.0); + assert_eq!(triangular(1.0), 0.0); + assert_eq!(triangular(-1.0), 0.0); + assert!(approx_eq(triangular(0.3), triangular(-0.3), 1e-15)); + assert!(triangular(0.25) > triangular(0.75)); + assert_eq!(triangular(1.2), 0.0); +} + +#[test] +fn gaussian_behavior() { + assert!(approx_eq(gaussian(0.5), gaussian(-0.5), 1e-15)); + assert!(gaussian(0.0) > gaussian(1.0)); + assert!(gaussian(2.0) < 0.2); + for u in [-3.0, -1.0, 0.0, 1.0, 3.0] { + assert!(gaussian(u) >= 0.0); + } +} + +#[test] +fn kernel_trait_usage() { + let t = Tricube; + let g = Gaussian; + assert_eq!(t.weight(0.0), 1.0); + assert!(g.weight(1.0) < 1.0); + assert!(approx_eq(t.weight(0.5), tricube(0.5), 1e-15)); + struct Linear; + impl KernelFn for Linear { + fn weight(&self, u: f64) -> f64 { + (1.0 - u.abs()).max(0.0) + } + } + let lin = Linear; + assert_eq!(lin.weight(0.0), 1.0); + assert_eq!(lin.weight(1.5), 0.0); +} + +#[test] +fn kernel_types_and_consts() { + assert_eq!(TRICUBE, Tricube::default()); + assert_eq!(GAUSSIAN, Gaussian::default()); + assert_eq!(EPANECHNIKOV, Epanechnikov::default()); + assert_eq!(TRIANGULAR, Triangular::default()); + assert_eq!(QUARTIC, Quartic::default()); + let t1 = Tricube; + let t2 = t1; + let t3 = t1.clone(); + assert_eq!(t1, t2); + assert_eq!(t1, t3); + let _ = format!("{:?}", TRICUBE); +} + +#[test] +fn fn_pointer_implements_kernelfn() { + let f: fn(f64) -> f64 = gaussian; + assert!(approx_eq(f.weight(0.0), gaussian(0.0), 1e-15)); + assert!(approx_eq( + (tricube as fn(f64) -> f64).weight(0.5), + tricube(0.5), + 1e-15 + )); +} + +#[test] +fn monotonicity_samples() { + let kernels: [fn(f64) -> f64; 4] = [tricube, epanechnikov, quartic, triangular]; + let samples = [0.0_f64, 0.25, 0.5, 0.75, 0.99]; + for &k in &kernels { + let mut prev = k(0.0); + for &u in &samples[1..] { + let cur = k(u); + assert!( + cur <= prev + 1e-12, + "kernel not nonincreasing at u={}, prev={}, cur={}", + u, + prev, + cur + ); + prev = cur; + } + } +} + +#[test] +fn integrate_tricube_to_one() { + let integral = integrate(tricube, -1.0, 1.0, 10_000); + let expected = 81.0 / 70.0; + assert!( + (integral - expected).abs() < 1e-3, + "Tricube integral ≈ {}, expected ≈ {}", + integral, + expected + ); +} + +#[test] +fn integrate_epanechnikov_to_one() { + let integral = integrate(epanechnikov, -1.0, 1.0, 10_000); + assert!( + (integral - 1.0).abs() < 1e-3, + "Epanechnikov integral ≈ {}", + integral + ); +} + +#[test] +fn integrate_quartic_to_one() { + let integral = integrate(quartic, -1.0, 1.0, 10_000); + assert!( + (integral - 1.0).abs() < 1e-3, + "Quartic integral ≈ {}", + integral + ); +} + +#[test] +fn integrate_triangular_to_one() { + let integral = integrate(triangular, -1.0, 1.0, 10_000); + assert!( + (integral - 1.0).abs() < 1e-3, + "Triangular integral ≈ {}", + integral + ); +}