From 56d04f049c4323aa09a7b87a421943c9dc299554 Mon Sep 17 00:00:00 2001 From: sayantn Date: Thu, 9 Oct 2025 17:02:26 +0530 Subject: [PATCH 1/7] Implement `simd_fma` and `simd_relaxed_fma` in const-eval --- src/intrinsics/simd.rs | 58 ------------------------------------------ src/machine.rs | 4 +-- 2 files changed, 2 insertions(+), 60 deletions(-) diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs index 1e7366b5a8..2b176093cb 100644 --- a/src/intrinsics/simd.rs +++ b/src/intrinsics/simd.rs @@ -1,5 +1,3 @@ -use rand::Rng; -use rustc_apfloat::Float; use rustc_middle::ty; use rustc_middle::ty::FloatTy; @@ -83,62 +81,6 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { this.write_scalar(val, &dest)?; } } - "fma" | "relaxed_fma" => { - let [a, b, c] = check_intrinsic_arg_count(args)?; - let (a, a_len) = this.project_to_simd(a)?; - let (b, b_len) = this.project_to_simd(b)?; - let (c, c_len) = this.project_to_simd(c)?; - let (dest, dest_len) = this.project_to_simd(dest)?; - - assert_eq!(dest_len, a_len); - assert_eq!(dest_len, b_len); - assert_eq!(dest_len, c_len); - - for i in 0..dest_len { - let a = this.read_scalar(&this.project_index(&a, i)?)?; - let b = this.read_scalar(&this.project_index(&b, i)?)?; - let c = this.read_scalar(&this.project_index(&c, i)?)?; - let dest = this.project_index(&dest, i)?; - - let fuse: bool = intrinsic_name == "fma" - || (this.machine.float_nondet && this.machine.rng.get_mut().random()); - - // Works for f32 and f64. - // FIXME: using host floats to work around https://github.com/rust-lang/miri/issues/2468. - let ty::Float(float_ty) = dest.layout.ty.kind() else { - span_bug!(this.cur_span(), "{} operand is not a float", intrinsic_name) - }; - let val = match float_ty { - FloatTy::F16 => unimplemented!("f16_f128"), - FloatTy::F32 => { - let a = a.to_f32()?; - let b = b.to_f32()?; - let c = c.to_f32()?; - let res = if fuse { - a.mul_add(b, c).value - } else { - ((a * b).value + c).value - }; - let res = this.adjust_nan(res, &[a, b, c]); - Scalar::from(res) - } - FloatTy::F64 => { - let a = a.to_f64()?; - let b = b.to_f64()?; - let c = c.to_f64()?; - let res = if fuse { - a.mul_add(b, c).value - } else { - ((a * b).value + c).value - }; - let res = this.adjust_nan(res, &[a, b, c]); - Scalar::from(res) - } - FloatTy::F128 => unimplemented!("f16_f128"), - }; - this.write_scalar(val, &dest)?; - } - } "expose_provenance" => { let [op] = check_intrinsic_arg_count(args)?; let (op, op_len) = this.project_to_simd(op)?; diff --git a/src/machine.rs b/src/machine.rs index fadbdf5cea..da90f6b846 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -1324,8 +1324,8 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { } #[inline(always)] - fn float_fuse_mul_add(ecx: &mut InterpCx<'tcx, Self>) -> bool { - ecx.machine.float_nondet && ecx.machine.rng.get_mut().random() + fn float_fuse_mul_add(ecx: &InterpCx<'tcx, Self>) -> bool { + ecx.machine.float_nondet && ecx.machine.rng.borrow_mut().random() } #[inline(always)] From 1cfc1c7704e52edd56629e3325c133000dcc09d1 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sun, 2 Nov 2025 15:40:56 +0100 Subject: [PATCH 2/7] remove `unsafe` from `_mm_pause` uses --- tests/pass/shims/x86/intrinsics-x86-sse2.rs | 2 +- tests/pass/shims/x86/intrinsics-x86.rs | 16 ++++------------ 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/tests/pass/shims/x86/intrinsics-x86-sse2.rs b/tests/pass/shims/x86/intrinsics-x86-sse2.rs index 731d8b5776..242aa0e89f 100644 --- a/tests/pass/shims/x86/intrinsics-x86-sse2.rs +++ b/tests/pass/shims/x86/intrinsics-x86-sse2.rs @@ -54,7 +54,7 @@ unsafe fn test_sse2() { } fn test_mm_pause() { - unsafe { _mm_pause() } + _mm_pause() } test_mm_pause(); diff --git a/tests/pass/shims/x86/intrinsics-x86.rs b/tests/pass/shims/x86/intrinsics-x86.rs index 90bcdba435..a18b6d0152 100644 --- a/tests/pass/shims/x86/intrinsics-x86.rs +++ b/tests/pass/shims/x86/intrinsics-x86.rs @@ -7,17 +7,13 @@ mod x86 { fn adc(c_in: u8, a: u32, b: u32) -> (u8, u32) { let mut sum = 0; - // SAFETY: There are no safety requirements for calling `_addcarry_u32`. - // It's just unsafe for API consistency with other intrinsics. - let c_out = unsafe { arch::_addcarry_u32(c_in, a, b, &mut sum) }; + let c_out = arch::_addcarry_u32(c_in, a, b, &mut sum); (c_out, sum) } fn sbb(b_in: u8, a: u32, b: u32) -> (u8, u32) { let mut sum = 0; - // SAFETY: There are no safety requirements for calling `_subborrow_u32`. - // It's just unsafe for API consistency with other intrinsics. - let b_out = unsafe { arch::_subborrow_u32(b_in, a, b, &mut sum) }; + let b_out = arch::_subborrow_u32(b_in, a, b, &mut sum); (b_out, sum) } @@ -52,17 +48,13 @@ mod x86_64 { fn adc(c_in: u8, a: u64, b: u64) -> (u8, u64) { let mut sum = 0; - // SAFETY: There are no safety requirements for calling `_addcarry_u64`. - // It's just unsafe for API consistency with other intrinsics. - let c_out = unsafe { arch::_addcarry_u64(c_in, a, b, &mut sum) }; + let c_out = arch::_addcarry_u64(c_in, a, b, &mut sum); (c_out, sum) } fn sbb(b_in: u8, a: u64, b: u64) -> (u8, u64) { let mut sum = 0; - // SAFETY: There are no safety requirements for calling `_subborrow_u64`. - // It's just unsafe for API consistency with other intrinsics. - let b_out = unsafe { arch::_subborrow_u64(b_in, a, b, &mut sum) }; + let b_out = arch::_subborrow_u64(b_in, a, b, &mut sum); (b_out, sum) } From abd2b52543d4227a4eddd92aafde483d7e18d4c8 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sun, 2 Nov 2025 17:29:38 +0100 Subject: [PATCH 3/7] fix `_mm256_permute2f128` miri tests --- tests/pass/shims/x86/intrinsics-x86-avx.rs | 31 ++++++++++--------- .../x86/intrinsics-x86-pause-without-sse2.rs | 7 +---- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/tests/pass/shims/x86/intrinsics-x86-avx.rs b/tests/pass/shims/x86/intrinsics-x86-avx.rs index b3c2434c0d..9f7c12c439 100644 --- a/tests/pass/shims/x86/intrinsics-x86-avx.rs +++ b/tests/pass/shims/x86/intrinsics-x86-avx.rs @@ -829,15 +829,16 @@ unsafe fn test_avx() { #[target_feature(enable = "avx")] unsafe fn test_mm256_permute2f128_ps() { - let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.); - let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.); - let r = _mm256_permute2f128_ps::<0x13>(a, b); - let e = _mm256_setr_ps(5., 6., 7., 8., 1., 2., 3., 4.); + let a = _mm256_setr_ps(11., 12., 13., 14., 15., 16., 17., 18.); + let b = _mm256_setr_ps(21., 22., 23., 24., 25., 26., 27., 28.); + let r = _mm256_permute2f128_ps::<0b0001_0011>(a, b); + let e = _mm256_setr_ps(25., 26., 27., 28., 15., 16., 17., 18.); assert_eq_m256(r, e); - let r = _mm256_permute2f128_ps::<0x44>(a, b); - let e = _mm256_setr_ps(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); - assert_eq_m256(r, e); + // Setting bits 3 or 7 (zero-indexed) zeroes the corresponding field. + let r = _mm256_permute2f128_ps::<0b1001_1011>(a, b); + let z = _mm256_setr_ps(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); + assert_eq_m256(r, z); } test_mm256_permute2f128_ps(); @@ -845,11 +846,12 @@ unsafe fn test_avx() { unsafe fn test_mm256_permute2f128_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 6., 7., 8.); - let r = _mm256_permute2f128_pd::<0x31>(a, b); + let r = _mm256_permute2f128_pd::<0b0011_0001>(a, b); let e = _mm256_setr_pd(3., 4., 7., 8.); assert_eq_m256d(r, e); - let r = _mm256_permute2f128_pd::<0x44>(a, b); + // Setting bits 3 or 7 (zero-indexed) zeroes the corresponding field. + let r = _mm256_permute2f128_pd::<0b1011_1001>(a, b); let e = _mm256_setr_pd(0.0, 0.0, 0.0, 0.0); assert_eq_m256d(r, e); } @@ -857,13 +859,14 @@ unsafe fn test_avx() { #[target_feature(enable = "avx")] unsafe fn test_mm256_permute2f128_si256() { - let a = _mm256_setr_epi32(1, 2, 3, 4, 1, 2, 3, 4); - let b = _mm256_setr_epi32(5, 6, 7, 8, 5, 6, 7, 8); - let r = _mm256_permute2f128_si256::<0x20>(a, b); - let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let a = _mm256_setr_epi32(11, 12, 13, 14, 15, 16, 17, 18); + let b = _mm256_setr_epi32(21, 22, 23, 24, 25, 26, 27, 28); + let r = _mm256_permute2f128_si256::<0b0010_0000>(a, b); + let e = _mm256_setr_epi32(11, 12, 13, 14, 21, 22, 23, 24); assert_eq_m256i(r, e); - let r = _mm256_permute2f128_si256::<0x44>(a, b); + // Setting bits 3 or 7 (zero-indexed) zeroes the corresponding field. + let r = _mm256_permute2f128_si256::<0b1010_1000>(a, b); let e = _mm256_setr_epi32(0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m256i(r, e); } diff --git a/tests/pass/shims/x86/intrinsics-x86-pause-without-sse2.rs b/tests/pass/shims/x86/intrinsics-x86-pause-without-sse2.rs index 6ca53c0eb6..a4160977ce 100644 --- a/tests/pass/shims/x86/intrinsics-x86-pause-without-sse2.rs +++ b/tests/pass/shims/x86/intrinsics-x86-pause-without-sse2.rs @@ -9,10 +9,5 @@ use std::arch::x86_64::*; fn main() { assert!(!is_x86_feature_detected!("sse2")); - - unsafe { - // This is a SSE2 intrinsic, but it behaves as a no-op when SSE2 - // is not available, so it is always safe to call. - _mm_pause(); - } + _mm_pause(); } From 38ff9f3b3f2b9c3cb45d25891292dd5614378f9e Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sun, 2 Nov 2025 18:03:09 +0100 Subject: [PATCH 4/7] remove miri `_mm256_permute2f128` fallback implementation it is no longer used (and was also incorrect) --- src/shims/x86/avx.rs | 44 -------------------------------------------- 1 file changed, 44 deletions(-) diff --git a/src/shims/x86/avx.rs b/src/shims/x86/avx.rs index 269ce3b51b..ec365aa1b4 100644 --- a/src/shims/x86/avx.rs +++ b/src/shims/x86/avx.rs @@ -217,50 +217,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { )?; } } - // Used to implement the _mm256_permute2f128_ps, _mm256_permute2f128_pd and - // _mm256_permute2f128_si256 functions. Regardless of the suffix in the name - // thay all can be considered to operate on vectors of 128-bit elements. - // For each 128-bit element of `dest`, copies one from `left`, `right` or - // zero, according to `imm`. - "vperm2f128.ps.256" | "vperm2f128.pd.256" | "vperm2f128.si.256" => { - let [left, right, imm] = - this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?; - - assert_eq!(dest.layout, left.layout); - assert_eq!(dest.layout, right.layout); - assert_eq!(dest.layout.size.bits(), 256); - - // Transmute to `[u128; 2]` to process each 128-bit chunk independently. - let u128x2_layout = - this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u128, 2))?; - let left = left.transmute(u128x2_layout, this)?; - let right = right.transmute(u128x2_layout, this)?; - let dest = dest.transmute(u128x2_layout, this)?; - - let imm = this.read_scalar(imm)?.to_u8()?; - - for i in 0..2 { - let dest = this.project_index(&dest, i)?; - - let imm = match i { - 0 => imm & 0xF, - 1 => imm >> 4, - _ => unreachable!(), - }; - if imm & 0b100 != 0 { - this.write_scalar(Scalar::from_u128(0), &dest)?; - } else { - let src = match imm { - 0b00 => this.project_index(&left, 0)?, - 0b01 => this.project_index(&left, 1)?, - 0b10 => this.project_index(&right, 0)?, - 0b11 => this.project_index(&right, 1)?, - _ => unreachable!(), - }; - this.copy_op(&src, &dest)?; - } - } - } // Used to implement the _mm_maskload_ps, _mm_maskload_pd, _mm256_maskload_ps // and _mm256_maskload_pd functions. // For the element `i`, if the high bit of the `i`-th element of `mask` From 2c449a40683890f979d7cee89f85e367c249b776 Mon Sep 17 00:00:00 2001 From: sayantn Date: Sat, 11 Oct 2025 03:24:23 +0530 Subject: [PATCH 5/7] Add Miri tests for f16/f128 SIMD operations --- tests/pass/intrinsics/portable-simd.rs | 262 ++++++++++++++++++++++++- 1 file changed, 260 insertions(+), 2 deletions(-) diff --git a/tests/pass/intrinsics/portable-simd.rs b/tests/pass/intrinsics/portable-simd.rs index e2cd08733a..b7d2584c58 100644 --- a/tests/pass/intrinsics/portable-simd.rs +++ b/tests/pass/intrinsics/portable-simd.rs @@ -6,18 +6,143 @@ rustc_attrs, intrinsics, core_intrinsics, - repr_simd + repr_simd, + f16, + f128 )] -#![allow(incomplete_features, internal_features)] +#![allow(incomplete_features, internal_features, non_camel_case_types)] +use std::fmt::{self, Debug, Formatter}; use std::intrinsics::simd as intrinsics; use std::ptr; use std::simd::StdFloat; use std::simd::prelude::*; +#[repr(simd, packed)] +#[derive(Copy)] +struct PackedSimd([T; N]); + +impl Clone for PackedSimd { + fn clone(&self) -> Self { + *self + } +} + +impl PartialEq for PackedSimd { + fn eq(&self, other: &Self) -> bool { + self.into_array() == other.into_array() + } +} + +impl Debug for PackedSimd { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + Debug::fmt(&self.into_array(), f) + } +} + +type f16x2 = PackedSimd; +type f16x4 = PackedSimd; + +type f128x2 = PackedSimd; +type f128x4 = PackedSimd; + +impl PackedSimd { + fn splat(x: T) -> Self { + Self([x; N]) + } + fn from_array(a: [T; N]) -> Self { + Self(a) + } + fn into_array(self) -> [T; N] { + // as we have `repr(packed)`, there shouldn't be any padding bytes + unsafe { std::mem::transmute_copy(&self) } + } +} + #[rustc_intrinsic] #[rustc_nounwind] pub unsafe fn simd_shuffle_const_generic(x: T, y: T) -> U; +pub fn simd_ops_f16() { + use intrinsics::*; + + // small hack to make type inference better + macro_rules! assert_eq { + ($a:expr, $b:expr $(,$t:tt)*) => {{ + let a = $a; + let b = $b; + if false { let _inference = b == a; } + ::std::assert_eq!(a, b, $(,$t)*) + }} + } + + let a = f16x4::splat(10.0); + let b = f16x4::from_array([1.0, 2.0, 3.0, -4.0]); + + unsafe { + assert_eq!(simd_neg(b), f16x4::from_array([-1.0, -2.0, -3.0, 4.0])); + assert_eq!(simd_add(a, b), f16x4::from_array([11.0, 12.0, 13.0, 6.0])); + assert_eq!(simd_sub(a, b), f16x4::from_array([9.0, 8.0, 7.0, 14.0])); + assert_eq!(simd_mul(a, b), f16x4::from_array([10.0, 20.0, 30.0, -40.0])); + assert_eq!(simd_div(b, a), f16x4::from_array([0.1, 0.2, 0.3, -0.4])); + assert_eq!(simd_div(a, f16x4::splat(2.0)), f16x4::splat(5.0)); + assert_eq!(simd_rem(a, b), f16x4::from_array([0.0, 0.0, 1.0, 2.0])); + assert_eq!(simd_fabs(b), f16x4::from_array([1.0, 2.0, 3.0, 4.0])); + assert_eq!( + simd_fmax(a, simd_mul(b, f16x4::splat(4.0))), + f16x4::from_array([10.0, 10.0, 12.0, 10.0]) + ); + assert_eq!( + simd_fmin(a, simd_mul(b, f16x4::splat(4.0))), + f16x4::from_array([4.0, 8.0, 10.0, -16.0]) + ); + + assert_eq!(simd_fma(a, b, a), simd_add(simd_mul(a, b), a)); + assert_eq!(simd_fma(b, b, a), simd_add(simd_mul(b, b), a)); + assert_eq!(simd_fma(a, b, b), simd_add(simd_mul(a, b), b)); + assert_eq!( + simd_fma(f16x4::splat(-3.2), b, f16x4::splat(f16::NEG_INFINITY)), + f16x4::splat(f16::NEG_INFINITY) + ); + + assert_eq!(simd_relaxed_fma(a, b, a), simd_add(simd_mul(a, b), a)); + assert_eq!(simd_relaxed_fma(b, b, a), simd_add(simd_mul(b, b), a)); + assert_eq!(simd_relaxed_fma(a, b, b), simd_add(simd_mul(a, b), b)); + assert_eq!( + simd_relaxed_fma(f16x4::splat(-3.2), b, f16x4::splat(f16::NEG_INFINITY)), + f16x4::splat(f16::NEG_INFINITY) + ); + + assert_eq!(simd_eq(a, simd_mul(f16x4::splat(5.0), b)), i32x4::from_array([0, !0, 0, 0])); + assert_eq!(simd_ne(a, simd_mul(f16x4::splat(5.0), b)), i32x4::from_array([!0, 0, !0, !0])); + assert_eq!(simd_le(a, simd_mul(f16x4::splat(5.0), b)), i32x4::from_array([0, !0, !0, 0])); + assert_eq!(simd_lt(a, simd_mul(f16x4::splat(5.0), b)), i32x4::from_array([0, 0, !0, 0])); + assert_eq!(simd_ge(a, simd_mul(f16x4::splat(5.0), b)), i32x4::from_array([!0, !0, 0, !0])); + assert_eq!(simd_gt(a, simd_mul(f16x4::splat(5.0), b)), i32x4::from_array([!0, 0, 0, !0])); + + assert_eq!(simd_reduce_add_ordered(a, 0.0), 40.0f16); + assert_eq!(simd_reduce_add_ordered(b, 0.0), 2.0f16); + assert_eq!(simd_reduce_mul_ordered(a, 1.0), 10000.0f16); + assert_eq!(simd_reduce_mul_ordered(b, 1.0), -24.0f16); + assert_eq!(simd_reduce_max(a), 10.0f16); + assert_eq!(simd_reduce_max(b), 3.0f16); + assert_eq!(simd_reduce_min(a), 10.0f16); + assert_eq!(simd_reduce_min(b), -4.0f16); + + assert_eq!( + simd_fmax(f16x2::from_array([0.0, f16::NAN]), f16x2::from_array([f16::NAN, 0.0])), + f16x2::from_array([0.0, 0.0]) + ); + assert_eq!(simd_reduce_max(f16x2::from_array([0.0, f16::NAN])), 0.0f16); + assert_eq!(simd_reduce_max(f16x2::from_array([f16::NAN, 0.0])), 0.0f16); + assert_eq!( + simd_fmin(f16x2::from_array([0.0, f16::NAN]), f16x2::from_array([f16::NAN, 0.0])), + f16x2::from_array([0.0, 0.0]) + ); + assert_eq!(simd_reduce_min(f16x2::from_array([0.0, f16::NAN])), 0.0f16); + assert_eq!(simd_reduce_min(f16x2::from_array([f16::NAN, 0.0])), 0.0f16); + } +} + fn simd_ops_f32() { let a = f32x4::splat(10.0); let b = f32x4::from_array([1.0, 2.0, 3.0, -4.0]); @@ -148,6 +273,87 @@ fn simd_ops_f64() { assert_eq!(f64x2::from_array([f64::NAN, 0.0]).reduce_min(), 0.0); } +pub fn simd_ops_f128() { + use intrinsics::*; + + // small hack to make type inference better + macro_rules! assert_eq { + ($a:expr, $b:expr $(,$t:tt)*) => {{ + let a = $a; + let b = $b; + if false { let _inference = b == a; } + ::std::assert_eq!(a, b, $(,$t)*) + }} + } + + let a = f128x4::splat(10.0); + let b = f128x4::from_array([1.0, 2.0, 3.0, -4.0]); + + unsafe { + assert_eq!(simd_neg(b), f128x4::from_array([-1.0, -2.0, -3.0, 4.0])); + assert_eq!(simd_add(a, b), f128x4::from_array([11.0, 12.0, 13.0, 6.0])); + assert_eq!(simd_sub(a, b), f128x4::from_array([9.0, 8.0, 7.0, 14.0])); + assert_eq!(simd_mul(a, b), f128x4::from_array([10.0, 20.0, 30.0, -40.0])); + assert_eq!(simd_div(b, a), f128x4::from_array([0.1, 0.2, 0.3, -0.4])); + assert_eq!(simd_div(a, f128x4::splat(2.0)), f128x4::splat(5.0)); + assert_eq!(simd_rem(a, b), f128x4::from_array([0.0, 0.0, 1.0, 2.0])); + assert_eq!(simd_fabs(b), f128x4::from_array([1.0, 2.0, 3.0, 4.0])); + assert_eq!( + simd_fmax(a, simd_mul(b, f128x4::splat(4.0))), + f128x4::from_array([10.0, 10.0, 12.0, 10.0]) + ); + assert_eq!( + simd_fmin(a, simd_mul(b, f128x4::splat(4.0))), + f128x4::from_array([4.0, 8.0, 10.0, -16.0]) + ); + + assert_eq!(simd_fma(a, b, a), simd_add(simd_mul(a, b), a)); + assert_eq!(simd_fma(b, b, a), simd_add(simd_mul(b, b), a)); + assert_eq!(simd_fma(a, b, b), simd_add(simd_mul(a, b), b)); + assert_eq!( + simd_fma(f128x4::splat(-3.2), b, f128x4::splat(f128::NEG_INFINITY)), + f128x4::splat(f128::NEG_INFINITY) + ); + + assert_eq!(simd_relaxed_fma(a, b, a), simd_add(simd_mul(a, b), a)); + assert_eq!(simd_relaxed_fma(b, b, a), simd_add(simd_mul(b, b), a)); + assert_eq!(simd_relaxed_fma(a, b, b), simd_add(simd_mul(a, b), b)); + assert_eq!( + simd_relaxed_fma(f128x4::splat(-3.2), b, f128x4::splat(f128::NEG_INFINITY)), + f128x4::splat(f128::NEG_INFINITY) + ); + + assert_eq!(simd_eq(a, simd_mul(f128x4::splat(5.0), b)), i32x4::from_array([0, !0, 0, 0])); + assert_eq!(simd_ne(a, simd_mul(f128x4::splat(5.0), b)), i32x4::from_array([!0, 0, !0, !0])); + assert_eq!(simd_le(a, simd_mul(f128x4::splat(5.0), b)), i32x4::from_array([0, !0, !0, 0])); + assert_eq!(simd_lt(a, simd_mul(f128x4::splat(5.0), b)), i32x4::from_array([0, 0, !0, 0])); + assert_eq!(simd_ge(a, simd_mul(f128x4::splat(5.0), b)), i32x4::from_array([!0, !0, 0, !0])); + assert_eq!(simd_gt(a, simd_mul(f128x4::splat(5.0), b)), i32x4::from_array([!0, 0, 0, !0])); + + assert_eq!(simd_reduce_add_ordered(a, 0.0), 40.0f128); + assert_eq!(simd_reduce_add_ordered(b, 0.0), 2.0f128); + assert_eq!(simd_reduce_mul_ordered(a, 1.0), 10000.0f128); + assert_eq!(simd_reduce_mul_ordered(b, 1.0), -24.0f128); + assert_eq!(simd_reduce_max(a), 10.0f128); + assert_eq!(simd_reduce_max(b), 3.0f128); + assert_eq!(simd_reduce_min(a), 10.0f128); + assert_eq!(simd_reduce_min(b), -4.0f128); + + assert_eq!( + simd_fmax(f128x2::from_array([0.0, f128::NAN]), f128x2::from_array([f128::NAN, 0.0])), + f128x2::from_array([0.0, 0.0]) + ); + assert_eq!(simd_reduce_max(f128x2::from_array([0.0, f128::NAN])), 0.0f128); + assert_eq!(simd_reduce_max(f128x2::from_array([f128::NAN, 0.0])), 0.0f128); + assert_eq!( + simd_fmin(f128x2::from_array([0.0, f128::NAN]), f128x2::from_array([f128::NAN, 0.0])), + f128x2::from_array([0.0, 0.0]) + ); + assert_eq!(simd_reduce_min(f128x2::from_array([0.0, f128::NAN])), 0.0f128); + assert_eq!(simd_reduce_min(f128x2::from_array([f128::NAN, 0.0])), 0.0f128); + } +} + fn simd_ops_i32() { let a = i32x4::splat(10); let b = i32x4::from_array([1, 2, 3, -4]); @@ -563,6 +769,31 @@ fn simd_gather_scatter() { } fn simd_round() { + unsafe { + use intrinsics::*; + + assert_eq!( + simd_ceil(f16x4::from_array([0.9, 1.001, 2.0, -4.5])), + f16x4::from_array([1.0, 2.0, 2.0, -4.0]) + ); + assert_eq!( + simd_floor(f16x4::from_array([0.9, 1.001, 2.0, -4.5])), + f16x4::from_array([0.0, 1.0, 2.0, -5.0]) + ); + assert_eq!( + simd_round(f16x4::from_array([0.9, 1.001, 2.0, -4.5])), + f16x4::from_array([1.0, 1.0, 2.0, -5.0]) + ); + assert_eq!( + simd_round_ties_even(f16x4::from_array([0.9, 1.001, 2.0, -4.5])), + f16x4::from_array([1.0, 1.0, 2.0, -4.0]) + ); + assert_eq!( + simd_trunc(f16x4::from_array([0.9, 1.001, 2.0, -4.5])), + f16x4::from_array([0.0, 1.0, 2.0, -4.0]) + ); + } + assert_eq!( f32x4::from_array([0.9, 1.001, 2.0, -4.5]).ceil(), f32x4::from_array([1.0, 2.0, 2.0, -4.0]) @@ -604,6 +835,31 @@ fn simd_round() { f64x4::from_array([0.9, 1.001, 2.0, -4.5]).trunc(), f64x4::from_array([0.0, 1.0, 2.0, -4.0]) ); + + unsafe { + use intrinsics::*; + + assert_eq!( + simd_ceil(f128x4::from_array([0.9, 1.001, 2.0, -4.5])), + f128x4::from_array([1.0, 2.0, 2.0, -4.0]) + ); + assert_eq!( + simd_floor(f128x4::from_array([0.9, 1.001, 2.0, -4.5])), + f128x4::from_array([0.0, 1.0, 2.0, -5.0]) + ); + assert_eq!( + simd_round(f128x4::from_array([0.9, 1.001, 2.0, -4.5])), + f128x4::from_array([1.0, 1.0, 2.0, -5.0]) + ); + assert_eq!( + simd_round_ties_even(f128x4::from_array([0.9, 1.001, 2.0, -4.5])), + f128x4::from_array([1.0, 1.0, 2.0, -4.0]) + ); + assert_eq!( + simd_trunc(f128x4::from_array([0.9, 1.001, 2.0, -4.5])), + f128x4::from_array([0.0, 1.0, 2.0, -4.0]) + ); + } } fn simd_intrinsics() { @@ -724,8 +980,10 @@ fn simd_ops_non_pow2() { fn main() { simd_mask(); + simd_ops_f16(); simd_ops_f32(); simd_ops_f64(); + simd_ops_f128(); simd_ops_i32(); simd_ops_non_pow2(); simd_cast(); From f20407d0b22f00ce3e17d09713fdb4d04951c1a6 Mon Sep 17 00:00:00 2001 From: The Miri Cronjob Bot Date: Tue, 4 Nov 2025 04:53:50 +0000 Subject: [PATCH 6/7] Prepare for merging from rust-lang/rust This updates the rust-version file to 5f9dd05862d2e4bceb3be1031b6c936e35671501. --- rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust-version b/rust-version index 0e89b4ab6a..036282b12f 100644 --- a/rust-version +++ b/rust-version @@ -1 +1 @@ -c5dabe8cf798123087d094f06417f5a767ca73e8 +5f9dd05862d2e4bceb3be1031b6c936e35671501 From c38128b0d43ae39f93bc8039b7950ab2000ec925 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 4 Nov 2025 08:34:42 +0100 Subject: [PATCH 7/7] wasi is too strange of a target, remove it for now --- README.md | 1 - ci/ci.sh | 1 - src/shims/env.rs | 2 +- src/shims/foreign_items.rs | 5 -- src/shims/mod.rs | 1 - src/shims/tls.rs | 1 - src/shims/wasi/foreign_items.rs | 110 -------------------------------- src/shims/wasi/mod.rs | 1 - 8 files changed, 1 insertion(+), 121 deletions(-) delete mode 100644 src/shims/wasi/foreign_items.rs delete mode 100644 src/shims/wasi/mod.rs diff --git a/README.md b/README.md index 0cbfe0e96a..f6c675839e 100644 --- a/README.md +++ b/README.md @@ -220,7 +220,6 @@ degree documented below): - `solaris` / `illumos`: maintained by @devnexen. Supports the entire test suite. - `freebsd`: maintained by @YohDeadfall and @LorrensP-2158466. Supports the entire test suite. - `android`: **maintainer wanted**. Support very incomplete, but a basic "hello world" works. - - `wasi`: **maintainer wanted**. Support very incomplete, but a basic "hello world" works. - For targets on other operating systems, Miri might fail before even reaching the `main` function. However, even for targets that we do support, the degree of support for accessing platform APIs diff --git a/ci/ci.sh b/ci/ci.sh index bcc110f648..2d27f02749 100755 --- a/ci/ci.sh +++ b/ci/ci.sh @@ -153,7 +153,6 @@ case $HOST_TARGET in BASIC="empty_main integer heap_alloc libc-mem vec string btreemap" # ensures we have the basics: pre-main code, system allocator UNIX="hello panic/panic panic/unwind concurrency/simple atomic libc-mem libc-misc libc-random env num_cpus" # the things that are very similar across all Unixes, and hence easily supported there TEST_TARGET=aarch64-linux-android run_tests_minimal $BASIC $UNIX time hashmap random thread sync concurrency epoll eventfd - TEST_TARGET=wasm32-wasip2 run_tests_minimal $BASIC hello wasm TEST_TARGET=wasm32-unknown-unknown run_tests_minimal no_std empty_main wasm # this target doesn't really have std TEST_TARGET=thumbv7em-none-eabihf run_tests_minimal no_std ;; diff --git a/src/shims/env.rs b/src/shims/env.rs index 689cd3a726..b9fb9192df 100644 --- a/src/shims/env.rs +++ b/src/shims/env.rs @@ -51,7 +51,7 @@ impl<'tcx> EnvVars<'tcx> { } else if ecx.tcx.sess.target.os == "windows" { EnvVars::Windows(WindowsEnvVars::new(ecx, env_vars)?) } else { - // Used e.g. for wasi + // For "none" targets (i.e., without an OS). EnvVars::Uninit }; ecx.machine.env_vars = env_vars; diff --git a/src/shims/foreign_items.rs b/src/shims/foreign_items.rs index 74a1ac729e..54fe27382e 100644 --- a/src/shims/foreign_items.rs +++ b/src/shims/foreign_items.rs @@ -102,7 +102,6 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let this = self.eval_context_ref(); match this.tcx.sess.target.os.as_ref() { os if this.target_os_is_unix() => shims::unix::foreign_items::is_dyn_sym(name, os), - "wasi" => shims::wasi::foreign_items::is_dyn_sym(name), "windows" => shims::windows::foreign_items::is_dyn_sym(name), _ => false, } @@ -846,10 +845,6 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { shims::unix::foreign_items::EvalContextExt::emulate_foreign_item_inner( this, link_name, abi, args, dest, ), - "wasi" => - shims::wasi::foreign_items::EvalContextExt::emulate_foreign_item_inner( - this, link_name, abi, args, dest, - ), "windows" => shims::windows::foreign_items::EvalContextExt::emulate_foreign_item_inner( this, link_name, abi, args, dest, diff --git a/src/shims/mod.rs b/src/shims/mod.rs index 7f7bc3b1cf..e51ace2fd9 100644 --- a/src/shims/mod.rs +++ b/src/shims/mod.rs @@ -8,7 +8,6 @@ mod math; #[cfg(all(unix, feature = "native-lib"))] mod native_lib; mod unix; -mod wasi; mod windows; mod x86; diff --git a/src/shims/tls.rs b/src/shims/tls.rs index 1200029692..9dc829d7a1 100644 --- a/src/shims/tls.rs +++ b/src/shims/tls.rs @@ -253,7 +253,6 @@ impl<'tcx> TlsDtorsState<'tcx> { } _ => { // No TLS dtor support. - // FIXME: should we do something on wasi? break 'new_state Done; } } diff --git a/src/shims/wasi/foreign_items.rs b/src/shims/wasi/foreign_items.rs deleted file mode 100644 index ffc02dc986..0000000000 --- a/src/shims/wasi/foreign_items.rs +++ /dev/null @@ -1,110 +0,0 @@ -use rustc_abi::CanonAbi; -use rustc_middle::ty::Ty; -use rustc_span::Symbol; -use rustc_target::callconv::FnAbi; - -use crate::shims::alloc::EvalContextExt as _; -use crate::*; - -pub fn is_dyn_sym(_name: &str) -> bool { - false -} - -impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} -pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { - fn emulate_foreign_item_inner( - &mut self, - link_name: Symbol, - abi: &FnAbi<'tcx, Ty<'tcx>>, - args: &[OpTy<'tcx>], - dest: &MPlaceTy<'tcx>, - ) -> InterpResult<'tcx, EmulateItemResult> { - let this = self.eval_context_mut(); - match link_name.as_str() { - // Allocation - "posix_memalign" => { - let [memptr, align, size] = - this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?; - let result = this.posix_memalign(memptr, align, size)?; - this.write_scalar(result, dest)?; - } - "aligned_alloc" => { - let [align, size] = - this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?; - let res = this.aligned_alloc(align, size)?; - this.write_pointer(res, dest)?; - } - - // Standard input/output - // FIXME: These shims are hacks that just get basic stdout/stderr working. We can't - // constrain them to "std" since std itself uses the wasi crate for this. - "get-stdout" => { - let [] = - this.check_shim_sig(shim_sig!(extern "C" fn() -> i32), link_name, abi, args)?; - this.write_scalar(Scalar::from_i32(1), dest)?; // POSIX FD number for stdout - } - "get-stderr" => { - let [] = - this.check_shim_sig(shim_sig!(extern "C" fn() -> i32), link_name, abi, args)?; - this.write_scalar(Scalar::from_i32(2), dest)?; // POSIX FD number for stderr - } - "[resource-drop]output-stream" => { - let [handle] = - this.check_shim_sig(shim_sig!(extern "C" fn(i32) -> ()), link_name, abi, args)?; - let handle = this.read_scalar(handle)?.to_i32()?; - - if !(handle == 1 || handle == 2) { - throw_unsup_format!("wasm output-stream: unsupported handle"); - } - // We don't actually close these FDs, so this is a NOP. - } - "[method]output-stream.blocking-write-and-flush" => { - let [handle, buf, len, ret_area] = this.check_shim_sig( - shim_sig!(extern "C" fn(i32, *mut _, usize, *mut _) -> ()), - link_name, - abi, - args, - )?; - let handle = this.read_scalar(handle)?.to_i32()?; - let buf = this.read_pointer(buf)?; - let len = this.read_target_usize(len)?; - let ret_area = this.read_pointer(ret_area)?; - - if len > 4096 { - throw_unsup_format!( - "wasm output-stream.blocking-write-and-flush: buffer too big" - ); - } - let len = usize::try_from(len).unwrap(); - let Some(fd) = this.machine.fds.get(handle) else { - throw_unsup_format!( - "wasm output-stream.blocking-write-and-flush: unsupported handle" - ); - }; - fd.write( - this.machine.communicate(), - buf, - len, - this, - callback!( - @capture<'tcx> { - len: usize, - ret_area: Pointer, - } - |this, result: Result| { - if !matches!(result, Ok(l) if l == len) { - throw_unsup_format!("wasm output-stream.blocking-write-and-flush: returning errors is not supported"); - } - // 0 in the first byte of the ret_area indicates success. - let ret = this.ptr_to_mplace(ret_area, this.machine.layouts.u8); - this.write_null(&ret)?; - interp_ok(()) - }), - )?; - } - - _ => return interp_ok(EmulateItemResult::NotSupported), - } - interp_ok(EmulateItemResult::NeedsReturn) - } -} diff --git a/src/shims/wasi/mod.rs b/src/shims/wasi/mod.rs deleted file mode 100644 index 09c6507b24..0000000000 --- a/src/shims/wasi/mod.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod foreign_items;