From 56d04f049c4323aa09a7b87a421943c9dc299554 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Thu, 9 Oct 2025 17:02:26 +0530
Subject: [PATCH 1/7] Implement `simd_fma` and `simd_relaxed_fma` in const-eval

---
 src/intrinsics/simd.rs | 58 ------------------------------------------
 src/machine.rs         |  4 +--
 2 files changed, 2 insertions(+), 60 deletions(-)

diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs
index 1e7366b5a8..2b176093cb 100644
--- a/src/intrinsics/simd.rs
+++ b/src/intrinsics/simd.rs
@@ -1,5 +1,3 @@
-use rand::Rng;
-use rustc_apfloat::Float;
 use rustc_middle::ty;
 use rustc_middle::ty::FloatTy;
 
@@ -83,62 +81,6 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     this.write_scalar(val, &dest)?;
                 }
             }
-            "fma" | "relaxed_fma" => {
-                let [a, b, c] = check_intrinsic_arg_count(args)?;
-                let (a, a_len) = this.project_to_simd(a)?;
-                let (b, b_len) = this.project_to_simd(b)?;
-                let (c, c_len) = this.project_to_simd(c)?;
-                let (dest, dest_len) = this.project_to_simd(dest)?;
-
-                assert_eq!(dest_len, a_len);
-                assert_eq!(dest_len, b_len);
-                assert_eq!(dest_len, c_len);
-
-                for i in 0..dest_len {
-                    let a = this.read_scalar(&this.project_index(&a, i)?)?;
-                    let b = this.read_scalar(&this.project_index(&b, i)?)?;
-                    let c = this.read_scalar(&this.project_index(&c, i)?)?;
-                    let dest = this.project_index(&dest, i)?;
-
-                    let fuse: bool = intrinsic_name == "fma"
-                        || (this.machine.float_nondet && this.machine.rng.get_mut().random());
-
-                    // Works for f32 and f64.
-                    // FIXME: using host floats to work around https://github.com/rust-lang/miri/issues/2468.
-                    let ty::Float(float_ty) = dest.layout.ty.kind() else {
-                        span_bug!(this.cur_span(), "{} operand is not a float", intrinsic_name)
-                    };
-                    let val = match float_ty {
-                        FloatTy::F16 => unimplemented!("f16_f128"),
-                        FloatTy::F32 => {
-                            let a = a.to_f32()?;
-                            let b = b.to_f32()?;
-                            let c = c.to_f32()?;
-                            let res = if fuse {
-                                a.mul_add(b, c).value
-                            } else {
-                                ((a * b).value + c).value
-                            };
-                            let res = this.adjust_nan(res, &[a, b, c]);
-                            Scalar::from(res)
-                        }
-                        FloatTy::F64 => {
-                            let a = a.to_f64()?;
-                            let b = b.to_f64()?;
-                            let c = c.to_f64()?;
-                            let res = if fuse {
-                                a.mul_add(b, c).value
-                            } else {
-                                ((a * b).value + c).value
-                            };
-                            let res = this.adjust_nan(res, &[a, b, c]);
-                            Scalar::from(res)
-                        }
-                        FloatTy::F128 => unimplemented!("f16_f128"),
-                    };
-                    this.write_scalar(val, &dest)?;
-                }
-            }
             "expose_provenance" => {
                 let [op] = check_intrinsic_arg_count(args)?;
                 let (op, op_len) = this.project_to_simd(op)?;
diff --git a/src/machine.rs b/src/machine.rs
index fadbdf5cea..da90f6b846 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -1324,8 +1324,8 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
     }
 
     #[inline(always)]
-    fn float_fuse_mul_add(ecx: &mut InterpCx<'tcx, Self>) -> bool {
-        ecx.machine.float_nondet && ecx.machine.rng.get_mut().random()
+    fn float_fuse_mul_add(ecx: &InterpCx<'tcx, Self>) -> bool {
+        ecx.machine.float_nondet && ecx.machine.rng.borrow_mut().random()
     }
 
     #[inline(always)]

From 1cfc1c7704e52edd56629e3325c133000dcc09d1 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sun, 2 Nov 2025 15:40:56 +0100
Subject: [PATCH 2/7] remove `unsafe` from `_mm_pause` uses

---
 tests/pass/shims/x86/intrinsics-x86-sse2.rs |  2 +-
 tests/pass/shims/x86/intrinsics-x86.rs      | 16 ++++------------
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/tests/pass/shims/x86/intrinsics-x86-sse2.rs b/tests/pass/shims/x86/intrinsics-x86-sse2.rs
index 731d8b5776..242aa0e89f 100644
--- a/tests/pass/shims/x86/intrinsics-x86-sse2.rs
+++ b/tests/pass/shims/x86/intrinsics-x86-sse2.rs
@@ -54,7 +54,7 @@ unsafe fn test_sse2() {
     }
 
     fn test_mm_pause() {
-        unsafe { _mm_pause() }
+        _mm_pause()
     }
     test_mm_pause();
 
diff --git a/tests/pass/shims/x86/intrinsics-x86.rs b/tests/pass/shims/x86/intrinsics-x86.rs
index 90bcdba435..a18b6d0152 100644
--- a/tests/pass/shims/x86/intrinsics-x86.rs
+++ b/tests/pass/shims/x86/intrinsics-x86.rs
@@ -7,17 +7,13 @@ mod x86 {
 
     fn adc(c_in: u8, a: u32, b: u32) -> (u8, u32) {
         let mut sum = 0;
-        // SAFETY: There are no safety requirements for calling `_addcarry_u32`.
-        // It's just unsafe for API consistency with other intrinsics.
-        let c_out = unsafe { arch::_addcarry_u32(c_in, a, b, &mut sum) };
+        let c_out = arch::_addcarry_u32(c_in, a, b, &mut sum);
         (c_out, sum)
     }
 
     fn sbb(b_in: u8, a: u32, b: u32) -> (u8, u32) {
         let mut sum = 0;
-        // SAFETY: There are no safety requirements for calling `_subborrow_u32`.
-        // It's just unsafe for API consistency with other intrinsics.
-        let b_out = unsafe { arch::_subborrow_u32(b_in, a, b, &mut sum) };
+        let b_out = arch::_subborrow_u32(b_in, a, b, &mut sum);
         (b_out, sum)
     }
 
@@ -52,17 +48,13 @@ mod x86_64 {
 
     fn adc(c_in: u8, a: u64, b: u64) -> (u8, u64) {
         let mut sum = 0;
-        // SAFETY: There are no safety requirements for calling `_addcarry_u64`.
-        // It's just unsafe for API consistency with other intrinsics.
-        let c_out = unsafe { arch::_addcarry_u64(c_in, a, b, &mut sum) };
+        let c_out = arch::_addcarry_u64(c_in, a, b, &mut sum);
         (c_out, sum)
     }
 
     fn sbb(b_in: u8, a: u64, b: u64) -> (u8, u64) {
         let mut sum = 0;
-        // SAFETY: There are no safety requirements for calling `_subborrow_u64`.
-        // It's just unsafe for API consistency with other intrinsics.
-        let b_out = unsafe { arch::_subborrow_u64(b_in, a, b, &mut sum) };
+        let b_out = arch::_subborrow_u64(b_in, a, b, &mut sum);
         (b_out, sum)
     }
 

From abd2b52543d4227a4eddd92aafde483d7e18d4c8 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sun, 2 Nov 2025 17:29:38 +0100
Subject: [PATCH 3/7] fix `_mm256_permute2f128` miri tests

---
 tests/pass/shims/x86/intrinsics-x86-avx.rs    | 31 ++++++++++---------
 .../x86/intrinsics-x86-pause-without-sse2.rs  |  7 +----
 2 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/tests/pass/shims/x86/intrinsics-x86-avx.rs b/tests/pass/shims/x86/intrinsics-x86-avx.rs
index b3c2434c0d..9f7c12c439 100644
--- a/tests/pass/shims/x86/intrinsics-x86-avx.rs
+++ b/tests/pass/shims/x86/intrinsics-x86-avx.rs
@@ -829,15 +829,16 @@ unsafe fn test_avx() {
 
     #[target_feature(enable = "avx")]
     unsafe fn test_mm256_permute2f128_ps() {
-        let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
-        let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
-        let r = _mm256_permute2f128_ps::<0x13>(a, b);
-        let e = _mm256_setr_ps(5., 6., 7., 8., 1., 2., 3., 4.);
+        let a = _mm256_setr_ps(11., 12., 13., 14., 15., 16., 17., 18.);
+        let b = _mm256_setr_ps(21., 22., 23., 24., 25., 26., 27., 28.);
+        let r = _mm256_permute2f128_ps::<0b0001_0011>(a, b);
+        let e = _mm256_setr_ps(25., 26., 27., 28., 15., 16., 17., 18.);
         assert_eq_m256(r, e);
 
-        let r = _mm256_permute2f128_ps::<0x44>(a, b);
-        let e = _mm256_setr_ps(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
-        assert_eq_m256(r, e);
+        // Setting bits 3 or 7 (zero-indexed) zeroes the corresponding field.
+        let r = _mm256_permute2f128_ps::<0b1001_1011>(a, b);
+        let z = _mm256_setr_ps(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
+        assert_eq_m256(r, z);
     }
     test_mm256_permute2f128_ps();
 
@@ -845,11 +846,12 @@ unsafe fn test_avx() {
     unsafe fn test_mm256_permute2f128_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let b = _mm256_setr_pd(5., 6., 7., 8.);
-        let r = _mm256_permute2f128_pd::<0x31>(a, b);
+        let r = _mm256_permute2f128_pd::<0b0011_0001>(a, b);
         let e = _mm256_setr_pd(3., 4., 7., 8.);
         assert_eq_m256d(r, e);
 
-        let r = _mm256_permute2f128_pd::<0x44>(a, b);
+        // Setting bits 3 or 7 (zero-indexed) zeroes the corresponding field.
+        let r = _mm256_permute2f128_pd::<0b1011_1001>(a, b);
         let e = _mm256_setr_pd(0.0, 0.0, 0.0, 0.0);
         assert_eq_m256d(r, e);
     }
@@ -857,13 +859,14 @@ unsafe fn test_avx() {
 
     #[target_feature(enable = "avx")]
     unsafe fn test_mm256_permute2f128_si256() {
-        let a = _mm256_setr_epi32(1, 2, 3, 4, 1, 2, 3, 4);
-        let b = _mm256_setr_epi32(5, 6, 7, 8, 5, 6, 7, 8);
-        let r = _mm256_permute2f128_si256::<0x20>(a, b);
-        let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+        let a = _mm256_setr_epi32(11, 12, 13, 14, 15, 16, 17, 18);
+        let b = _mm256_setr_epi32(21, 22, 23, 24, 25, 26, 27, 28);
+        let r = _mm256_permute2f128_si256::<0b0010_0000>(a, b);
+        let e = _mm256_setr_epi32(11, 12, 13, 14, 21, 22, 23, 24);
         assert_eq_m256i(r, e);
 
-        let r = _mm256_permute2f128_si256::<0x44>(a, b);
+        // Setting bits 3 or 7 (zero-indexed) zeroes the corresponding field.
+        let r = _mm256_permute2f128_si256::<0b1010_1000>(a, b);
         let e = _mm256_setr_epi32(0, 0, 0, 0, 0, 0, 0, 0);
         assert_eq_m256i(r, e);
     }
diff --git a/tests/pass/shims/x86/intrinsics-x86-pause-without-sse2.rs b/tests/pass/shims/x86/intrinsics-x86-pause-without-sse2.rs
index 6ca53c0eb6..a4160977ce 100644
--- a/tests/pass/shims/x86/intrinsics-x86-pause-without-sse2.rs
+++ b/tests/pass/shims/x86/intrinsics-x86-pause-without-sse2.rs
@@ -9,10 +9,5 @@ use std::arch::x86_64::*;
 
 fn main() {
     assert!(!is_x86_feature_detected!("sse2"));
-
-    unsafe {
-        // This is a SSE2 intrinsic, but it behaves as a no-op when SSE2
-        // is not available, so it is always safe to call.
-        _mm_pause();
-    }
+    _mm_pause();
 }

From 38ff9f3b3f2b9c3cb45d25891292dd5614378f9e Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sun, 2 Nov 2025 18:03:09 +0100
Subject: [PATCH 4/7] remove miri `_mm256_permute2f128` fallback implementation

it is no longer used (and was also incorrect)
---
 src/shims/x86/avx.rs | 44 --------------------------------------------
 1 file changed, 44 deletions(-)

diff --git a/src/shims/x86/avx.rs b/src/shims/x86/avx.rs
index 269ce3b51b..ec365aa1b4 100644
--- a/src/shims/x86/avx.rs
+++ b/src/shims/x86/avx.rs
@@ -217,50 +217,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     )?;
                 }
             }
-            // Used to implement the _mm256_permute2f128_ps, _mm256_permute2f128_pd and
-            // _mm256_permute2f128_si256 functions. Regardless of the suffix in the name
-            // thay all can be considered to operate on vectors of 128-bit elements.
-            // For each 128-bit element of `dest`, copies one from `left`, `right` or
-            // zero, according to `imm`.
-            "vperm2f128.ps.256" | "vperm2f128.pd.256" | "vperm2f128.si.256" => {
-                let [left, right, imm] =
-                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
-
-                assert_eq!(dest.layout, left.layout);
-                assert_eq!(dest.layout, right.layout);
-                assert_eq!(dest.layout.size.bits(), 256);
-
-                // Transmute to `[u128; 2]` to process each 128-bit chunk independently.
-                let u128x2_layout =
-                    this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u128, 2))?;
-                let left = left.transmute(u128x2_layout, this)?;
-                let right = right.transmute(u128x2_layout, this)?;
-                let dest = dest.transmute(u128x2_layout, this)?;
-
-                let imm = this.read_scalar(imm)?.to_u8()?;
-
-                for i in 0..2 {
-                    let dest = this.project_index(&dest, i)?;
-
-                    let imm = match i {
-                        0 => imm & 0xF,
-                        1 => imm >> 4,
-                        _ => unreachable!(),
-                    };
-                    if imm & 0b100 != 0 {
-                        this.write_scalar(Scalar::from_u128(0), &dest)?;
-                    } else {
-                        let src = match imm {
-                            0b00 => this.project_index(&left, 0)?,
-                            0b01 => this.project_index(&left, 1)?,
-                            0b10 => this.project_index(&right, 0)?,
-                            0b11 => this.project_index(&right, 1)?,
-                            _ => unreachable!(),
-                        };
-                        this.copy_op(&src, &dest)?;
-                    }
-                }
-            }
             // Used to implement the _mm_maskload_ps, _mm_maskload_pd, _mm256_maskload_ps
             // and _mm256_maskload_pd functions.
             // For the element `i`, if the high bit of the `i`-th element of `mask`

From 2c449a40683890f979d7cee89f85e367c249b776 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 11 Oct 2025 03:24:23 +0530
Subject: [PATCH 5/7] Add Miri tests for f16/f128 SIMD operations

---
 tests/pass/intrinsics/portable-simd.rs | 262 ++++++++++++++++++++++++-
 1 file changed, 260 insertions(+), 2 deletions(-)

diff --git a/tests/pass/intrinsics/portable-simd.rs b/tests/pass/intrinsics/portable-simd.rs
index e2cd08733a..b7d2584c58 100644
--- a/tests/pass/intrinsics/portable-simd.rs
+++ b/tests/pass/intrinsics/portable-simd.rs
@@ -6,18 +6,143 @@
     rustc_attrs,
     intrinsics,
     core_intrinsics,
-    repr_simd
+    repr_simd,
+    f16,
+    f128
 )]
-#![allow(incomplete_features, internal_features)]
+#![allow(incomplete_features, internal_features, non_camel_case_types)]
+use std::fmt::{self, Debug, Formatter};
 use std::intrinsics::simd as intrinsics;
 use std::ptr;
 use std::simd::StdFloat;
 use std::simd::prelude::*;
 
+#[repr(simd, packed)]
+#[derive(Copy)]
+struct PackedSimd<T, const N: usize>([T; N]);
+
+impl<T: Copy, const N: usize> Clone for PackedSimd<T, N> {
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<T: PartialEq + Copy, const N: usize> PartialEq for PackedSimd<T, N> {
+    fn eq(&self, other: &Self) -> bool {
+        self.into_array() == other.into_array()
+    }
+}
+
+impl<T: Debug + Copy, const N: usize> Debug for PackedSimd<T, N> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        Debug::fmt(&self.into_array(), f)
+    }
+}
+
+type f16x2 = PackedSimd<f16, 2>;
+type f16x4 = PackedSimd<f16, 4>;
+
+type f128x2 = PackedSimd<f128, 2>;
+type f128x4 = PackedSimd<f128, 4>;
+
+impl<T: Copy, const N: usize> PackedSimd<T, N> {
+    fn splat(x: T) -> Self {
+        Self([x; N])
+    }
+    fn from_array(a: [T; N]) -> Self {
+        Self(a)
+    }
+    fn into_array(self) -> [T; N] {
+        // as we have `repr(packed)`, there shouldn't be any padding bytes
+        unsafe { std::mem::transmute_copy(&self) }
+    }
+}
+
 #[rustc_intrinsic]
 #[rustc_nounwind]
 pub unsafe fn simd_shuffle_const_generic<T, U, const IDX: &'static [u32]>(x: T, y: T) -> U;
 
+pub fn simd_ops_f16() {
+    use intrinsics::*;
+
+    // small hack to make type inference better
+    macro_rules! assert_eq {
+        ($a:expr, $b:expr $(,$t:tt)*) => {{
+            let a = $a;
+            let b = $b;
+            if false { let _inference = b == a; }
+            ::std::assert_eq!(a, b, $(,$t)*)
+        }}
+    }
+
+    let a = f16x4::splat(10.0);
+    let b = f16x4::from_array([1.0, 2.0, 3.0, -4.0]);
+
+    unsafe {
+        assert_eq!(simd_neg(b), f16x4::from_array([-1.0, -2.0, -3.0, 4.0]));
+        assert_eq!(simd_add(a, b), f16x4::from_array([11.0, 12.0, 13.0, 6.0]));
+        assert_eq!(simd_sub(a, b), f16x4::from_array([9.0, 8.0, 7.0, 14.0]));
+        assert_eq!(simd_mul(a, b), f16x4::from_array([10.0, 20.0, 30.0, -40.0]));
+        assert_eq!(simd_div(b, a), f16x4::from_array([0.1, 0.2, 0.3, -0.4]));
+        assert_eq!(simd_div(a, f16x4::splat(2.0)), f16x4::splat(5.0));
+        assert_eq!(simd_rem(a, b), f16x4::from_array([0.0, 0.0, 1.0, 2.0]));
+        assert_eq!(simd_fabs(b), f16x4::from_array([1.0, 2.0, 3.0, 4.0]));
+        assert_eq!(
+            simd_fmax(a, simd_mul(b, f16x4::splat(4.0))),
+            f16x4::from_array([10.0, 10.0, 12.0, 10.0])
+        );
+        assert_eq!(
+            simd_fmin(a, simd_mul(b, f16x4::splat(4.0))),
+            f16x4::from_array([4.0, 8.0, 10.0, -16.0])
+        );
+
+        assert_eq!(simd_fma(a, b, a), simd_add(simd_mul(a, b), a));
+        assert_eq!(simd_fma(b, b, a), simd_add(simd_mul(b, b), a));
+        assert_eq!(simd_fma(a, b, b), simd_add(simd_mul(a, b), b));
+        assert_eq!(
+            simd_fma(f16x4::splat(-3.2), b, f16x4::splat(f16::NEG_INFINITY)),
+            f16x4::splat(f16::NEG_INFINITY)
+        );
+
+        assert_eq!(simd_relaxed_fma(a, b, a), simd_add(simd_mul(a, b), a));
+        assert_eq!(simd_relaxed_fma(b, b, a), simd_add(simd_mul(b, b), a));
+        assert_eq!(simd_relaxed_fma(a, b, b), simd_add(simd_mul(a, b), b));
+        assert_eq!(
+            simd_relaxed_fma(f16x4::splat(-3.2), b, f16x4::splat(f16::NEG_INFINITY)),
+            f16x4::splat(f16::NEG_INFINITY)
+        );
+
+        assert_eq!(simd_eq(a, simd_mul(f16x4::splat(5.0), b)), i32x4::from_array([0, !0, 0, 0]));
+        assert_eq!(simd_ne(a, simd_mul(f16x4::splat(5.0), b)), i32x4::from_array([!0, 0, !0, !0]));
+        assert_eq!(simd_le(a, simd_mul(f16x4::splat(5.0), b)), i32x4::from_array([0, !0, !0, 0]));
+        assert_eq!(simd_lt(a, simd_mul(f16x4::splat(5.0), b)), i32x4::from_array([0, 0, !0, 0]));
+        assert_eq!(simd_ge(a, simd_mul(f16x4::splat(5.0), b)), i32x4::from_array([!0, !0, 0, !0]));
+        assert_eq!(simd_gt(a, simd_mul(f16x4::splat(5.0), b)), i32x4::from_array([!0, 0, 0, !0]));
+
+        assert_eq!(simd_reduce_add_ordered(a, 0.0), 40.0f16);
+        assert_eq!(simd_reduce_add_ordered(b, 0.0), 2.0f16);
+        assert_eq!(simd_reduce_mul_ordered(a, 1.0), 10000.0f16);
+        assert_eq!(simd_reduce_mul_ordered(b, 1.0), -24.0f16);
+        assert_eq!(simd_reduce_max(a), 10.0f16);
+        assert_eq!(simd_reduce_max(b), 3.0f16);
+        assert_eq!(simd_reduce_min(a), 10.0f16);
+        assert_eq!(simd_reduce_min(b), -4.0f16);
+
+        assert_eq!(
+            simd_fmax(f16x2::from_array([0.0, f16::NAN]), f16x2::from_array([f16::NAN, 0.0])),
+            f16x2::from_array([0.0, 0.0])
+        );
+        assert_eq!(simd_reduce_max(f16x2::from_array([0.0, f16::NAN])), 0.0f16);
+        assert_eq!(simd_reduce_max(f16x2::from_array([f16::NAN, 0.0])), 0.0f16);
+        assert_eq!(
+            simd_fmin(f16x2::from_array([0.0, f16::NAN]), f16x2::from_array([f16::NAN, 0.0])),
+            f16x2::from_array([0.0, 0.0])
+        );
+        assert_eq!(simd_reduce_min(f16x2::from_array([0.0, f16::NAN])), 0.0f16);
+        assert_eq!(simd_reduce_min(f16x2::from_array([f16::NAN, 0.0])), 0.0f16);
+    }
+}
+
 fn simd_ops_f32() {
     let a = f32x4::splat(10.0);
     let b = f32x4::from_array([1.0, 2.0, 3.0, -4.0]);
@@ -148,6 +273,87 @@ fn simd_ops_f64() {
     assert_eq!(f64x2::from_array([f64::NAN, 0.0]).reduce_min(), 0.0);
 }
 
+pub fn simd_ops_f128() {
+    use intrinsics::*;
+
+    // small hack to make type inference better
+    macro_rules! assert_eq {
+        ($a:expr, $b:expr $(,$t:tt)*) => {{
+            let a = $a;
+            let b = $b;
+            if false { let _inference = b == a; }
+            ::std::assert_eq!(a, b, $(,$t)*)
+        }}
+    }
+
+    let a = f128x4::splat(10.0);
+    let b = f128x4::from_array([1.0, 2.0, 3.0, -4.0]);
+
+    unsafe {
+        assert_eq!(simd_neg(b), f128x4::from_array([-1.0, -2.0, -3.0, 4.0]));
+        assert_eq!(simd_add(a, b), f128x4::from_array([11.0, 12.0, 13.0, 6.0]));
+        assert_eq!(simd_sub(a, b), f128x4::from_array([9.0, 8.0, 7.0, 14.0]));
+        assert_eq!(simd_mul(a, b), f128x4::from_array([10.0, 20.0, 30.0, -40.0]));
+        assert_eq!(simd_div(b, a), f128x4::from_array([0.1, 0.2, 0.3, -0.4]));
+        assert_eq!(simd_div(a, f128x4::splat(2.0)), f128x4::splat(5.0));
+        assert_eq!(simd_rem(a, b), f128x4::from_array([0.0, 0.0, 1.0, 2.0]));
+        assert_eq!(simd_fabs(b), f128x4::from_array([1.0, 2.0, 3.0, 4.0]));
+        assert_eq!(
+            simd_fmax(a, simd_mul(b, f128x4::splat(4.0))),
+            f128x4::from_array([10.0, 10.0, 12.0, 10.0])
+        );
+        assert_eq!(
+            simd_fmin(a, simd_mul(b, f128x4::splat(4.0))),
+            f128x4::from_array([4.0, 8.0, 10.0, -16.0])
+        );
+
+        assert_eq!(simd_fma(a, b, a), simd_add(simd_mul(a, b), a));
+        assert_eq!(simd_fma(b, b, a), simd_add(simd_mul(b, b), a));
+        assert_eq!(simd_fma(a, b, b), simd_add(simd_mul(a, b), b));
+        assert_eq!(
+            simd_fma(f128x4::splat(-3.2), b, f128x4::splat(f128::NEG_INFINITY)),
+            f128x4::splat(f128::NEG_INFINITY)
+        );
+
+        assert_eq!(simd_relaxed_fma(a, b, a), simd_add(simd_mul(a, b), a));
+        assert_eq!(simd_relaxed_fma(b, b, a), simd_add(simd_mul(b, b), a));
+        assert_eq!(simd_relaxed_fma(a, b, b), simd_add(simd_mul(a, b), b));
+        assert_eq!(
+            simd_relaxed_fma(f128x4::splat(-3.2), b, f128x4::splat(f128::NEG_INFINITY)),
+            f128x4::splat(f128::NEG_INFINITY)
+        );
+
+        assert_eq!(simd_eq(a, simd_mul(f128x4::splat(5.0), b)), i32x4::from_array([0, !0, 0, 0]));
+        assert_eq!(simd_ne(a, simd_mul(f128x4::splat(5.0), b)), i32x4::from_array([!0, 0, !0, !0]));
+        assert_eq!(simd_le(a, simd_mul(f128x4::splat(5.0), b)), i32x4::from_array([0, !0, !0, 0]));
+        assert_eq!(simd_lt(a, simd_mul(f128x4::splat(5.0), b)), i32x4::from_array([0, 0, !0, 0]));
+        assert_eq!(simd_ge(a, simd_mul(f128x4::splat(5.0), b)), i32x4::from_array([!0, !0, 0, !0]));
+        assert_eq!(simd_gt(a, simd_mul(f128x4::splat(5.0), b)), i32x4::from_array([!0, 0, 0, !0]));
+
+        assert_eq!(simd_reduce_add_ordered(a, 0.0), 40.0f128);
+        assert_eq!(simd_reduce_add_ordered(b, 0.0), 2.0f128);
+        assert_eq!(simd_reduce_mul_ordered(a, 1.0), 10000.0f128);
+        assert_eq!(simd_reduce_mul_ordered(b, 1.0), -24.0f128);
+        assert_eq!(simd_reduce_max(a), 10.0f128);
+        assert_eq!(simd_reduce_max(b), 3.0f128);
+        assert_eq!(simd_reduce_min(a), 10.0f128);
+        assert_eq!(simd_reduce_min(b), -4.0f128);
+
+        assert_eq!(
+            simd_fmax(f128x2::from_array([0.0, f128::NAN]), f128x2::from_array([f128::NAN, 0.0])),
+            f128x2::from_array([0.0, 0.0])
+        );
+        assert_eq!(simd_reduce_max(f128x2::from_array([0.0, f128::NAN])), 0.0f128);
+        assert_eq!(simd_reduce_max(f128x2::from_array([f128::NAN, 0.0])), 0.0f128);
+        assert_eq!(
+            simd_fmin(f128x2::from_array([0.0, f128::NAN]), f128x2::from_array([f128::NAN, 0.0])),
+            f128x2::from_array([0.0, 0.0])
+        );
+        assert_eq!(simd_reduce_min(f128x2::from_array([0.0, f128::NAN])), 0.0f128);
+        assert_eq!(simd_reduce_min(f128x2::from_array([f128::NAN, 0.0])), 0.0f128);
+    }
+}
+
 fn simd_ops_i32() {
     let a = i32x4::splat(10);
     let b = i32x4::from_array([1, 2, 3, -4]);
@@ -563,6 +769,31 @@ fn simd_gather_scatter() {
 }
 
 fn simd_round() {
+    unsafe {
+        use intrinsics::*;
+
+        assert_eq!(
+            simd_ceil(f16x4::from_array([0.9, 1.001, 2.0, -4.5])),
+            f16x4::from_array([1.0, 2.0, 2.0, -4.0])
+        );
+        assert_eq!(
+            simd_floor(f16x4::from_array([0.9, 1.001, 2.0, -4.5])),
+            f16x4::from_array([0.0, 1.0, 2.0, -5.0])
+        );
+        assert_eq!(
+            simd_round(f16x4::from_array([0.9, 1.001, 2.0, -4.5])),
+            f16x4::from_array([1.0, 1.0, 2.0, -5.0])
+        );
+        assert_eq!(
+            simd_round_ties_even(f16x4::from_array([0.9, 1.001, 2.0, -4.5])),
+            f16x4::from_array([1.0, 1.0, 2.0, -4.0])
+        );
+        assert_eq!(
+            simd_trunc(f16x4::from_array([0.9, 1.001, 2.0, -4.5])),
+            f16x4::from_array([0.0, 1.0, 2.0, -4.0])
+        );
+    }
+
     assert_eq!(
         f32x4::from_array([0.9, 1.001, 2.0, -4.5]).ceil(),
         f32x4::from_array([1.0, 2.0, 2.0, -4.0])
@@ -604,6 +835,31 @@ fn simd_round() {
         f64x4::from_array([0.9, 1.001, 2.0, -4.5]).trunc(),
         f64x4::from_array([0.0, 1.0, 2.0, -4.0])
     );
+
+    unsafe {
+        use intrinsics::*;
+
+        assert_eq!(
+            simd_ceil(f128x4::from_array([0.9, 1.001, 2.0, -4.5])),
+            f128x4::from_array([1.0, 2.0, 2.0, -4.0])
+        );
+        assert_eq!(
+            simd_floor(f128x4::from_array([0.9, 1.001, 2.0, -4.5])),
+            f128x4::from_array([0.0, 1.0, 2.0, -5.0])
+        );
+        assert_eq!(
+            simd_round(f128x4::from_array([0.9, 1.001, 2.0, -4.5])),
+            f128x4::from_array([1.0, 1.0, 2.0, -5.0])
+        );
+        assert_eq!(
+            simd_round_ties_even(f128x4::from_array([0.9, 1.001, 2.0, -4.5])),
+            f128x4::from_array([1.0, 1.0, 2.0, -4.0])
+        );
+        assert_eq!(
+            simd_trunc(f128x4::from_array([0.9, 1.001, 2.0, -4.5])),
+            f128x4::from_array([0.0, 1.0, 2.0, -4.0])
+        );
+    }
 }
 
 fn simd_intrinsics() {
@@ -724,8 +980,10 @@ fn simd_ops_non_pow2() {
 
 fn main() {
     simd_mask();
+    simd_ops_f16();
     simd_ops_f32();
     simd_ops_f64();
+    simd_ops_f128();
     simd_ops_i32();
     simd_ops_non_pow2();
     simd_cast();

From f20407d0b22f00ce3e17d09713fdb4d04951c1a6 Mon Sep 17 00:00:00 2001
From: The Miri Cronjob Bot <miri@cron.bot>
Date: Tue, 4 Nov 2025 04:53:50 +0000
Subject: [PATCH 6/7] Prepare for merging from rust-lang/rust

This updates the rust-version file to 5f9dd05862d2e4bceb3be1031b6c936e35671501.
---
 rust-version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rust-version b/rust-version
index 0e89b4ab6a..036282b12f 100644
--- a/rust-version
+++ b/rust-version
@@ -1 +1 @@
-c5dabe8cf798123087d094f06417f5a767ca73e8
+5f9dd05862d2e4bceb3be1031b6c936e35671501

From c38128b0d43ae39f93bc8039b7950ab2000ec925 Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Tue, 4 Nov 2025 08:34:42 +0100
Subject: [PATCH 7/7] wasi is too strange of a target, remove it for now

---
 README.md                       |   1 -
 ci/ci.sh                        |   1 -
 src/shims/env.rs                |   2 +-
 src/shims/foreign_items.rs      |   5 --
 src/shims/mod.rs                |   1 -
 src/shims/tls.rs                |   1 -
 src/shims/wasi/foreign_items.rs | 110 --------------------------------
 src/shims/wasi/mod.rs           |   1 -
 8 files changed, 1 insertion(+), 121 deletions(-)
 delete mode 100644 src/shims/wasi/foreign_items.rs
 delete mode 100644 src/shims/wasi/mod.rs

diff --git a/README.md b/README.md
index 0cbfe0e96a..f6c675839e 100644
--- a/README.md
+++ b/README.md
@@ -220,7 +220,6 @@ degree documented below):
   - `solaris` / `illumos`: maintained by @devnexen. Supports the entire test suite.
   - `freebsd`: maintained by @YohDeadfall and @LorrensP-2158466. Supports the entire test suite.
   - `android`: **maintainer wanted**. Support very incomplete, but a basic "hello world" works.
-  - `wasi`: **maintainer wanted**. Support very incomplete, but a basic "hello world" works.
 - For targets on other operating systems, Miri might fail before even reaching the `main` function.
 
 However, even for targets that we do support, the degree of support for accessing platform APIs
diff --git a/ci/ci.sh b/ci/ci.sh
index bcc110f648..2d27f02749 100755
--- a/ci/ci.sh
+++ b/ci/ci.sh
@@ -153,7 +153,6 @@ case $HOST_TARGET in
     BASIC="empty_main integer heap_alloc libc-mem vec string btreemap" # ensures we have the basics: pre-main code, system allocator
     UNIX="hello panic/panic panic/unwind concurrency/simple atomic libc-mem libc-misc libc-random env num_cpus" # the things that are very similar across all Unixes, and hence easily supported there
     TEST_TARGET=aarch64-linux-android  run_tests_minimal $BASIC $UNIX time hashmap random thread sync concurrency epoll eventfd
-    TEST_TARGET=wasm32-wasip2          run_tests_minimal $BASIC hello wasm
     TEST_TARGET=wasm32-unknown-unknown run_tests_minimal no_std empty_main wasm # this target doesn't really have std
     TEST_TARGET=thumbv7em-none-eabihf  run_tests_minimal no_std
     ;;
diff --git a/src/shims/env.rs b/src/shims/env.rs
index 689cd3a726..b9fb9192df 100644
--- a/src/shims/env.rs
+++ b/src/shims/env.rs
@@ -51,7 +51,7 @@ impl<'tcx> EnvVars<'tcx> {
         } else if ecx.tcx.sess.target.os == "windows" {
             EnvVars::Windows(WindowsEnvVars::new(ecx, env_vars)?)
         } else {
-            // Used e.g. for wasi
+            // For "none" targets (i.e., without an OS).
             EnvVars::Uninit
         };
         ecx.machine.env_vars = env_vars;
diff --git a/src/shims/foreign_items.rs b/src/shims/foreign_items.rs
index 74a1ac729e..54fe27382e 100644
--- a/src/shims/foreign_items.rs
+++ b/src/shims/foreign_items.rs
@@ -102,7 +102,6 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         let this = self.eval_context_ref();
         match this.tcx.sess.target.os.as_ref() {
             os if this.target_os_is_unix() => shims::unix::foreign_items::is_dyn_sym(name, os),
-            "wasi" => shims::wasi::foreign_items::is_dyn_sym(name),
             "windows" => shims::windows::foreign_items::is_dyn_sym(name),
             _ => false,
         }
@@ -846,10 +845,6 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
                         shims::unix::foreign_items::EvalContextExt::emulate_foreign_item_inner(
                             this, link_name, abi, args, dest,
                         ),
-                    "wasi" =>
-                        shims::wasi::foreign_items::EvalContextExt::emulate_foreign_item_inner(
-                            this, link_name, abi, args, dest,
-                        ),
                     "windows" =>
                         shims::windows::foreign_items::EvalContextExt::emulate_foreign_item_inner(
                             this, link_name, abi, args, dest,
diff --git a/src/shims/mod.rs b/src/shims/mod.rs
index 7f7bc3b1cf..e51ace2fd9 100644
--- a/src/shims/mod.rs
+++ b/src/shims/mod.rs
@@ -8,7 +8,6 @@ mod math;
 #[cfg(all(unix, feature = "native-lib"))]
 mod native_lib;
 mod unix;
-mod wasi;
 mod windows;
 mod x86;
 
diff --git a/src/shims/tls.rs b/src/shims/tls.rs
index 1200029692..9dc829d7a1 100644
--- a/src/shims/tls.rs
+++ b/src/shims/tls.rs
@@ -253,7 +253,6 @@ impl<'tcx> TlsDtorsState<'tcx> {
                         }
                         _ => {
                             // No TLS dtor support.
-                            // FIXME: should we do something on wasi?
                             break 'new_state Done;
                         }
                     }
diff --git a/src/shims/wasi/foreign_items.rs b/src/shims/wasi/foreign_items.rs
deleted file mode 100644
index ffc02dc986..0000000000
--- a/src/shims/wasi/foreign_items.rs
+++ /dev/null
@@ -1,110 +0,0 @@
-use rustc_abi::CanonAbi;
-use rustc_middle::ty::Ty;
-use rustc_span::Symbol;
-use rustc_target::callconv::FnAbi;
-
-use crate::shims::alloc::EvalContextExt as _;
-use crate::*;
-
-pub fn is_dyn_sym(_name: &str) -> bool {
-    false
-}
-
-impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
-pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
-    fn emulate_foreign_item_inner(
-        &mut self,
-        link_name: Symbol,
-        abi: &FnAbi<'tcx, Ty<'tcx>>,
-        args: &[OpTy<'tcx>],
-        dest: &MPlaceTy<'tcx>,
-    ) -> InterpResult<'tcx, EmulateItemResult> {
-        let this = self.eval_context_mut();
-        match link_name.as_str() {
-            // Allocation
-            "posix_memalign" => {
-                let [memptr, align, size] =
-                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
-                let result = this.posix_memalign(memptr, align, size)?;
-                this.write_scalar(result, dest)?;
-            }
-            "aligned_alloc" => {
-                let [align, size] =
-                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
-                let res = this.aligned_alloc(align, size)?;
-                this.write_pointer(res, dest)?;
-            }
-
-            // Standard input/output
-            // FIXME: These shims are hacks that just get basic stdout/stderr working. We can't
-            // constrain them to "std" since std itself uses the wasi crate for this.
-            "get-stdout" => {
-                let [] =
-                    this.check_shim_sig(shim_sig!(extern "C" fn() -> i32), link_name, abi, args)?;
-                this.write_scalar(Scalar::from_i32(1), dest)?; // POSIX FD number for stdout
-            }
-            "get-stderr" => {
-                let [] =
-                    this.check_shim_sig(shim_sig!(extern "C" fn() -> i32), link_name, abi, args)?;
-                this.write_scalar(Scalar::from_i32(2), dest)?; // POSIX FD number for stderr
-            }
-            "[resource-drop]output-stream" => {
-                let [handle] =
-                    this.check_shim_sig(shim_sig!(extern "C" fn(i32) -> ()), link_name, abi, args)?;
-                let handle = this.read_scalar(handle)?.to_i32()?;
-
-                if !(handle == 1 || handle == 2) {
-                    throw_unsup_format!("wasm output-stream: unsupported handle");
-                }
-                // We don't actually close these FDs, so this is a NOP.
-            }
-            "[method]output-stream.blocking-write-and-flush" => {
-                let [handle, buf, len, ret_area] = this.check_shim_sig(
-                    shim_sig!(extern "C" fn(i32, *mut _, usize, *mut _) -> ()),
-                    link_name,
-                    abi,
-                    args,
-                )?;
-                let handle = this.read_scalar(handle)?.to_i32()?;
-                let buf = this.read_pointer(buf)?;
-                let len = this.read_target_usize(len)?;
-                let ret_area = this.read_pointer(ret_area)?;
-
-                if len > 4096 {
-                    throw_unsup_format!(
-                        "wasm output-stream.blocking-write-and-flush: buffer too big"
-                    );
-                }
-                let len = usize::try_from(len).unwrap();
-                let Some(fd) = this.machine.fds.get(handle) else {
-                    throw_unsup_format!(
-                        "wasm output-stream.blocking-write-and-flush: unsupported handle"
-                    );
-                };
-                fd.write(
-                    this.machine.communicate(),
-                    buf,
-                    len,
-                    this,
-                    callback!(
-                        @capture<'tcx> {
-                            len: usize,
-                            ret_area: Pointer,
-                        }
-                        |this, result: Result<usize, IoError>| {
-                            if !matches!(result, Ok(l) if l == len) {
-                                throw_unsup_format!("wasm output-stream.blocking-write-and-flush: returning errors is not supported");
-                            }
-                            // 0 in the first byte of the ret_area indicates success.
-                            let ret = this.ptr_to_mplace(ret_area, this.machine.layouts.u8);
-                            this.write_null(&ret)?;
-                            interp_ok(())
-                    }),
-                )?;
-            }
-
-            _ => return interp_ok(EmulateItemResult::NotSupported),
-        }
-        interp_ok(EmulateItemResult::NeedsReturn)
-    }
-}
diff --git a/src/shims/wasi/mod.rs b/src/shims/wasi/mod.rs
deleted file mode 100644
index 09c6507b24..0000000000
--- a/src/shims/wasi/mod.rs
+++ /dev/null
@@ -1 +0,0 @@
-pub mod foreign_items;