Skip to content

Conversation

@mikolaj-pirog
Copy link
Member

IR has the contract to indicate that contraction is allowed. Testing shouldn't rely on global flag to perform contraction. This is a prerequisite before making backends rely only on the IR to perform contraction. See more here: https://discourse.llvm.org/t/allowfpopfusion-vs-sdnodeflags-hasallowcontract/80909/5

@llvmbot
Copy link
Member

llvmbot commented Sep 11, 2025

@llvm/pr-subscribers-backend-x86

Author: Mikołaj Piróg (mikolaj-pirog)

Changes

IR has the contract to indicate that contraction is allowed. Testing shouldn't rely on global flag to perform contraction. This is a prerequisite before making backends rely only on the IR to perform contraction. See more here: https://discourse.llvm.org/t/allowfpopfusion-vs-sdnodeflags-hasallowcontract/80909/5


Patch is 100.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/158026.diff

11 Files Affected:

  • (modified) llvm/test/CodeGen/X86/avx512-fma.ll (+31-31)
  • (modified) llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll (+26-26)
  • (modified) llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll (+1-1)
  • (modified) llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll (+1-1)
  • (modified) llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll (+5-5)
  • (modified) llvm/test/CodeGen/X86/dag-combiner-fma-folding.ll (+1-1)
  • (modified) llvm/test/CodeGen/X86/fma-do-not-commute.ll (+3-3)
  • (modified) llvm/test/CodeGen/X86/fma_patterns.ll (+195-195)
  • (modified) llvm/test/CodeGen/X86/fma_patterns_wide.ll (+82-82)
  • (modified) llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll (+3-11)
  • (modified) llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll (+25-25)
diff --git a/llvm/test/CodeGen/X86/avx512-fma.ll b/llvm/test/CodeGen/X86/avx512-fma.ll
index 97f8e5f4ea16c..54343ee771ff7 100644
--- a/llvm/test/CodeGen/X86/avx512-fma.ll
+++ b/llvm/test/CodeGen/X86/avx512-fma.ll
@@ -1,14 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f  | FileCheck %s --check-prefix=ALL --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx  | FileCheck %s --check-prefix=ALL --check-prefix=SKX
 
 define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
 ; ALL-LABEL: test_x86_fmadd_ps_z:
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
 ; ALL-NEXT:    retq
-  %x = fmul <16 x float> %a0, %a1
-  %res = fadd <16 x float> %x, %a2
+  %x = fmul contract <16 x float> %a0, %a1
+  %res = fadd contract <16 x float> %x, %a2
   ret <16 x float> %res
 }
 
@@ -17,8 +17,8 @@ define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
 ; ALL-NEXT:    retq
-  %x = fmul <16 x float> %a0, %a1
-  %res = fsub <16 x float> %x, %a2
+  %x = fmul contract <16 x float> %a0, %a1
+  %res = fsub contract <16 x float> %x, %a2
   ret <16 x float> %res
 }
 
@@ -27,8 +27,8 @@ define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <1
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
 ; ALL-NEXT:    retq
-  %x = fmul <16 x float> %a0, %a1
-  %res = fsub <16 x float> %a2, %x
+  %x = fmul contract <16 x float> %a0, %a1
+  %res = fsub contract <16 x float> %a2, %x
   ret <16 x float> %res
 }
 
@@ -37,12 +37,12 @@ define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <1
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
 ; ALL-NEXT:    retq
-  %x = fmul <16 x float> %a0, %a1
-  %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
+  %x = fmul contract <16 x float> %a0, %a1
+  %y = fsub contract <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
                           float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
                           float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
                           float -0.000000e+00>, %x
-  %res = fsub <16 x float> %y, %a2
+  %res = fsub contract <16 x float> %y, %a2
   ret <16 x float> %res
 }
 
@@ -51,8 +51,8 @@ define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
 ; ALL-NEXT:    retq
-  %x = fmul <8 x double> %a0, %a1
-  %res = fadd <8 x double> %x, %a2
+  %x = fmul contract <8 x double> %a0, %a1
+  %res = fadd contract <8 x double> %x, %a2
   ret <8 x double> %res
 }
 
@@ -61,8 +61,8 @@ define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
 ; ALL-NEXT:    retq
-  %x = fmul <8 x double> %a0, %a1
-  %res = fsub <8 x double> %x, %a2
+  %x = fmul contract <8 x double> %a0, %a1
+  %res = fsub contract <8 x double> %x, %a2
   ret <8 x double> %res
 }
 
@@ -71,8 +71,8 @@ define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
 ; ALL-NEXT:    retq
-  %x = fmul double %a0, %a1
-  %res = fsub double %x, %a2
+  %x = fmul contract double %a0, %a1
+  %res = fsub contract double %x, %a2
   ret double %res
 }
 
@@ -82,8 +82,8 @@ define double @test_x86_fmsub_213_m(double %a0, double %a1, ptr %a2_ptr) {
 ; ALL-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem
 ; ALL-NEXT:    retq
   %a2 = load double , ptr%a2_ptr
-  %x = fmul double %a0, %a1
-  %res = fsub double %x, %a2
+  %x = fmul contract double %a0, %a1
+  %res = fsub contract double %x, %a2
   ret double %res
 }
 
@@ -93,8 +93,8 @@ define double @test_x86_fmsub_231_m(double %a0, double %a1, ptr %a2_ptr) {
 ; ALL-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1
 ; ALL-NEXT:    retq
   %a2 = load double , ptr%a2_ptr
-  %x = fmul double %a0, %a2
-  %res = fsub double %x, %a1
+  %x = fmul contract double %a0, %a2
+  %res = fsub contract double %x, %a1
   ret double %res
 }
 
@@ -103,8 +103,8 @@ define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1
 ; ALL-NEXT:    retq
-  %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
-  %b2 = fadd <16 x float> %b1, %a2
+  %b1 = fmul contract <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
+  %b2 = fadd contract <16 x float> %b1, %a2
   ret <16 x float> %b2
 }
 
@@ -113,8 +113,8 @@ define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + mem
 ; ALL-NEXT:    retq
-  %b1 = fmul <16 x float> %a1, %a2
-  %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
+  %b1 = fmul contract <16 x float> %a1, %a2
+  %b2 = fadd contract <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
   ret <16 x float> %b2
 }
 
@@ -135,8 +135,8 @@ define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, pt
 ; SKX-NEXT:    vfmadd132ps {{.*#+}} zmm0 {%k1} = (zmm0 * mem) + zmm1
 ; SKX-NEXT:    retq
   %a2   = load <16 x float>,ptr%a2_ptrt,align 1
-  %x = fmul <16 x float> %a0, %a2
-  %y = fadd <16 x float> %x, %a1
+  %x = fmul contract <16 x float> %a0, %a2
+  %y = fadd contract <16 x float> %x, %a1
   %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
   ret <16 x float> %res
 }
@@ -160,8 +160,8 @@ define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, pt
 ; SKX-NEXT:    vmovaps %zmm1, %zmm0
 ; SKX-NEXT:    retq
   %a2   = load <16 x float>,ptr%a2_ptrt,align 1
-  %x = fmul <16 x float> %a0, %a2
-  %y = fadd <16 x float> %x, %a1
+  %x = fmul contract <16 x float> %a0, %a2
+  %y = fadd contract <16 x float> %x, %a1
   %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
   ret <16 x float> %res
 }
@@ -185,8 +185,8 @@ define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, pt
 ; SKX-NEXT:    vmovaps %zmm1, %zmm0
 ; SKX-NEXT:    retq
   %a2   = load <16 x float>,ptr%a2_ptrt,align 1
-  %x = fmul <16 x float> %a1, %a0
-  %y = fadd <16 x float> %x, %a2
+  %x = fmul contract <16 x float> %a1, %a0
+  %y = fadd contract <16 x float> %x, %a2
   %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
   ret <16 x float> %res
 }
diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll
index 36b95e744ba14..52e9507d43a1f 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast --enable-no-signed-zeros-fp-math -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,NO-SZ
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,HAS-SZ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown  --enable-no-signed-zeros-fp-math -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,NO-SZ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown  -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,HAS-SZ
 
 ; FADD(acc, FMA(a, b, +0.0)) can be combined to FMA(a, b, acc) if the nsz flag set.
 define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
@@ -18,9 +18,9 @@ define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half
 entry:
   %0 = bitcast <32 x half> %a to <16 x float>
   %1 = bitcast <32 x half> %b to <16 x float>
-  %2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
+  %2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
   %3 = bitcast <16 x float> %2 to <32 x half>
-  %add.i = fadd <32 x half> %3, %acc
+  %add.i = fadd contract <32 x half> %3, %acc
   ret <32 x half> %add.i
 }
 
@@ -39,9 +39,9 @@ define dso_local <32 x half> @test2(<32 x half> %acc, <32 x half> %a, <32 x half
 entry:
   %0 = bitcast <32 x half> %a to <16 x float>
   %1 = bitcast <32 x half> %b to <16 x float>
-  %2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
+  %2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
   %3 = bitcast <16 x float> %2 to <32 x half>
-  %add.i = fadd <32 x half> %3, %acc
+  %add.i = fadd contract <32 x half> %3, %acc
   ret <32 x half> %add.i
 }
 
@@ -60,9 +60,9 @@ define dso_local <16 x half> @test3(<16 x half> %acc, <16 x half> %a, <16 x half
 entry:
   %0 = bitcast <16 x half> %a to <8 x float>
   %1 = bitcast <16 x half> %b to <8 x float>
-  %2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
+  %2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
   %3 = bitcast <8 x float> %2 to <16 x half>
-  %add.i = fadd <16 x half> %3, %acc
+  %add.i = fadd contract <16 x half> %3, %acc
   ret <16 x half> %add.i
 }
 
@@ -81,9 +81,9 @@ define dso_local <16 x half> @test4(<16 x half> %acc, <16 x half> %a, <16 x half
 entry:
   %0 = bitcast <16 x half> %a to <8 x float>
   %1 = bitcast <16 x half> %b to <8 x float>
-  %2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
+  %2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
   %3 = bitcast <8 x float> %2 to <16 x half>
-  %add.i = fadd <16 x half> %3, %acc
+  %add.i = fadd contract <16 x half> %3, %acc
   ret <16 x half> %add.i
 }
 
@@ -102,9 +102,9 @@ define dso_local <8 x half> @test5(<8 x half> %acc, <8 x half> %a, <8 x half> %b
 entry:
   %0 = bitcast <8 x half> %a to <4 x float>
   %1 = bitcast <8 x half> %b to <4 x float>
-  %2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
+  %2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
   %3 = bitcast <4 x float> %2 to <8 x half>
-  %add.i = fadd <8 x half> %3, %acc
+  %add.i = fadd contract <8 x half> %3, %acc
   ret <8 x half> %add.i
 }
 
@@ -123,9 +123,9 @@ define dso_local <8 x half> @test6(<8 x half> %acc, <8 x half> %a, <8 x half> %b
 entry:
   %0 = bitcast <8 x half> %a to <4 x float>
   %1 = bitcast <8 x half> %b to <4 x float>
-  %2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
+  %2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
   %3 = bitcast <4 x float> %2 to <8 x half>
-  %add.i = fadd <8 x half> %3, %acc
+  %add.i = fadd contract <8 x half> %3, %acc
   ret <8 x half> %add.i
 }
 
@@ -138,9 +138,9 @@ define dso_local <32 x half> @test13(<32 x half> %acc, <32 x half> %a, <32 x hal
 entry:
   %0 = bitcast <32 x half> %a to <16 x float>
   %1 = bitcast <32 x half> %b to <16 x float>
-  %2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i16 -1, i32 4)
+  %2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i16 -1, i32 4)
   %3 = bitcast <16 x float> %2 to <32 x half>
-  %add.i = fadd <32 x half> %3, %acc
+  %add.i = fadd contract <32 x half> %3, %acc
   ret <32 x half> %add.i
 }
 
@@ -152,9 +152,9 @@ define dso_local <32 x half> @test14(<32 x half> %acc, <32 x half> %a, <32 x hal
 entry:
   %0 = bitcast <32 x half> %a to <16 x float>
   %1 = bitcast <32 x half> %b to <16 x float>
-  %2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i16 -1, i32 4)
+  %2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i16 -1, i32 4)
   %3 = bitcast <16 x float> %2 to <32 x half>
-  %add.i = fadd <32 x half> %3, %acc
+  %add.i = fadd contract <32 x half> %3, %acc
   ret <32 x half> %add.i
 }
 
@@ -166,9 +166,9 @@ define dso_local <16 x half> @test15(<16 x half> %acc, <16 x half> %a, <16 x hal
 entry:
   %0 = bitcast <16 x half> %a to <8 x float>
   %1 = bitcast <16 x half> %b to <8 x float>
-  %2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
+  %2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
   %3 = bitcast <8 x float> %2 to <16 x half>
-  %add.i = fadd <16 x half> %3, %acc
+  %add.i = fadd contract <16 x half> %3, %acc
   ret <16 x half> %add.i
 }
 
@@ -180,9 +180,9 @@ define dso_local <16 x half> @test16(<16 x half> %acc, <16 x half> %a, <16 x hal
 entry:
   %0 = bitcast <16 x half> %a to <8 x float>
   %1 = bitcast <16 x half> %b to <8 x float>
-  %2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
+  %2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
   %3 = bitcast <8 x float> %2 to <16 x half>
-  %add.i = fadd <16 x half> %3, %acc
+  %add.i = fadd contract <16 x half> %3, %acc
   ret <16 x half> %add.i
 }
 
@@ -194,9 +194,9 @@ define dso_local <8 x half> @test17(<8 x half> %acc, <8 x half> %a, <8 x half> %
 entry:
   %0 = bitcast <8 x half> %a to <4 x float>
   %1 = bitcast <8 x half> %b to <4 x float>
-  %2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
+  %2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
   %3 = bitcast <4 x float> %2 to <8 x half>
-  %add.i = fadd <8 x half> %3, %acc
+  %add.i = fadd contract <8 x half> %3, %acc
   ret <8 x half> %add.i
 }
 
@@ -208,9 +208,9 @@ define dso_local <8 x half> @test18(<8 x half> %acc, <8 x half> %a, <8 x half> %
 entry:
   %0 = bitcast <8 x half> %a to <4 x float>
   %1 = bitcast <8 x half> %b to <4 x float>
-  %2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
+  %2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
   %3 = bitcast <4 x float> %2 to <8 x half>
-  %add.i = fadd <8 x half> %3, %acc
+ ...
[truncated]

Copy link
Contributor

@phoebewang phoebewang left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@mikolaj-pirog mikolaj-pirog requested a review from e-kud September 14, 2025 15:24
@mikolaj-pirog mikolaj-pirog requested a review from e-kud September 15, 2025 16:21
Copy link
Contributor

@e-kud e-kud left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Thanks!

@mikolaj-pirog mikolaj-pirog merged commit 175bab3 into llvm:main Sep 16, 2025
9 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Sep 16, 2025

LLVM Buildbot has detected a new failure on builder lldb-x86_64-debian running on lldb-x86_64-debian while building llvm at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/162/builds/31225

Here is the relevant piece of the build log for the reference
Step 6 (test) failure: build (failure)
...
PASS: lldb-unit :: Core/./LLDBCoreTests/65/118 (871 of 3192)
UNSUPPORTED: lldb-api :: riscv/break-undecoded/TestBreakpointIllegal.py (872 of 3192)
PASS: lldb-api :: functionalities/json/symbol-file/TestSymbolFileJSON.py (873 of 3192)
UNSUPPORTED: lldb-api :: commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py (874 of 3192)
UNSUPPORTED: lldb-api :: commands/register/register/aarch64_sme_z_registers/save_restore/TestSMEZRegistersSaveRestore.py (875 of 3192)
PASS: lldb-api :: functionalities/postmortem/minidump/TestMiniDump.py (876 of 3192)
PASS: lldb-api :: functionalities/postmortem/elf-core/thread_crash/TestLinuxCoreThreads.py (877 of 3192)
PASS: lldb-shell :: Register/x86-multithread-write.test (878 of 3192)
PASS: lldb-api :: functionalities/breakpoint/breakpoint_with_realpath_and_source_map/TestBreakpoint.py (879 of 3192)
PASS: lldb-api :: commands/expression/TestRegisterExpressionEndian.py (880 of 3192)
FAIL: lldb-api :: functionalities/postmortem/netbsd-core/TestNetBSDCore.py (881 of 3192)
******************** TEST 'lldb-api :: functionalities/postmortem/netbsd-core/TestNetBSDCore.py' FAILED ********************
Script:
--
/usr/bin/python3 /home/worker/2.0.1/lldb-x86_64-debian/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/worker/2.0.1/lldb-x86_64-debian/build/./lib --env LLVM_INCLUDE_DIR=/home/worker/2.0.1/lldb-x86_64-debian/build/include --env LLVM_TOOLS_DIR=/home/worker/2.0.1/lldb-x86_64-debian/build/./bin --arch x86_64 --build-dir /home/worker/2.0.1/lldb-x86_64-debian/build/lldb-test-build.noindex --lldb-module-cache-dir /home/worker/2.0.1/lldb-x86_64-debian/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/worker/2.0.1/lldb-x86_64-debian/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/worker/2.0.1/lldb-x86_64-debian/build/./bin/lldb --compiler /home/worker/2.0.1/lldb-x86_64-debian/build/./bin/clang --dsymutil /home/worker/2.0.1/lldb-x86_64-debian/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/worker/2.0.1/lldb-x86_64-debian/build/./bin --lldb-obj-root /home/worker/2.0.1/lldb-x86_64-debian/build/tools/lldb --lldb-libs-dir /home/worker/2.0.1/lldb-x86_64-debian/build/./lib --cmake-build-type Release -t /home/worker/2.0.1/lldb-x86_64-debian/llvm-project/lldb/test/API/functionalities/postmortem/netbsd-core -p TestNetBSDCore.py
--
Exit Code: -6

Command Output (stdout):
--
lldb version 22.0.0git (https://github.com/llvm/llvm-project.git revision 175bab32d7f37bfb854b89c0da7e9f4aa427bc70)
  clang revision 175bab32d7f37bfb854b89c0da7e9f4aa427bc70
  llvm revision 175bab32d7f37bfb854b89c0da7e9f4aa427bc70
Skipping the following test categories: ['libc++', 'msvcstl', 'dsym', 'gmodules', 'debugserver', 'objc']

--
Command Output (stderr):
--
Change dir to: /home/worker/2.0.1/lldb-x86_64-debian/llvm-project/lldb/test/API/functionalities/postmortem/netbsd-core
runCmd: settings clear --all

output: 

runCmd: settings set symbols.enable-external-lookup false

output: 

runCmd: settings set target.inherit-tcc true

output: 

runCmd: settings set target.disable-aslr false

output: 

runCmd: settings set target.detach-on-error false

output: 


Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants