-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[X86] Don't rely on global -fp-contract=fast on X86 CodeGen tests #158026
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-x86 Author: Mikołaj Piróg (mikolaj-pirog) ChangesIR has the Patch is 100.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/158026.diff 11 Files Affected:
diff --git a/llvm/test/CodeGen/X86/avx512-fma.ll b/llvm/test/CodeGen/X86/avx512-fma.ll
index 97f8e5f4ea16c..54343ee771ff7 100644
--- a/llvm/test/CodeGen/X86/avx512-fma.ll
+++ b/llvm/test/CodeGen/X86/avx512-fma.ll
@@ -1,14 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX
define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; ALL-LABEL: test_x86_fmadd_ps_z:
; ALL: ## %bb.0:
; ALL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
; ALL-NEXT: retq
- %x = fmul <16 x float> %a0, %a1
- %res = fadd <16 x float> %x, %a2
+ %x = fmul contract <16 x float> %a0, %a1
+ %res = fadd contract <16 x float> %x, %a2
ret <16 x float> %res
}
@@ -17,8 +17,8 @@ define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16
; ALL: ## %bb.0:
; ALL-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
; ALL-NEXT: retq
- %x = fmul <16 x float> %a0, %a1
- %res = fsub <16 x float> %x, %a2
+ %x = fmul contract <16 x float> %a0, %a1
+ %res = fsub contract <16 x float> %x, %a2
ret <16 x float> %res
}
@@ -27,8 +27,8 @@ define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <1
; ALL: ## %bb.0:
; ALL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
; ALL-NEXT: retq
- %x = fmul <16 x float> %a0, %a1
- %res = fsub <16 x float> %a2, %x
+ %x = fmul contract <16 x float> %a0, %a1
+ %res = fsub contract <16 x float> %a2, %x
ret <16 x float> %res
}
@@ -37,12 +37,12 @@ define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <1
; ALL: ## %bb.0:
; ALL-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
; ALL-NEXT: retq
- %x = fmul <16 x float> %a0, %a1
- %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
+ %x = fmul contract <16 x float> %a0, %a1
+ %y = fsub contract <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
float -0.000000e+00>, %x
- %res = fsub <16 x float> %y, %a2
+ %res = fsub contract <16 x float> %y, %a2
ret <16 x float> %res
}
@@ -51,8 +51,8 @@ define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8
; ALL: ## %bb.0:
; ALL-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
; ALL-NEXT: retq
- %x = fmul <8 x double> %a0, %a1
- %res = fadd <8 x double> %x, %a2
+ %x = fmul contract <8 x double> %a0, %a1
+ %res = fadd contract <8 x double> %x, %a2
ret <8 x double> %res
}
@@ -61,8 +61,8 @@ define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8
; ALL: ## %bb.0:
; ALL-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
; ALL-NEXT: retq
- %x = fmul <8 x double> %a0, %a1
- %res = fsub <8 x double> %x, %a2
+ %x = fmul contract <8 x double> %a0, %a1
+ %res = fsub contract <8 x double> %x, %a2
ret <8 x double> %res
}
@@ -71,8 +71,8 @@ define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
; ALL: ## %bb.0:
; ALL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
; ALL-NEXT: retq
- %x = fmul double %a0, %a1
- %res = fsub double %x, %a2
+ %x = fmul contract double %a0, %a1
+ %res = fsub contract double %x, %a2
ret double %res
}
@@ -82,8 +82,8 @@ define double @test_x86_fmsub_213_m(double %a0, double %a1, ptr %a2_ptr) {
; ALL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem
; ALL-NEXT: retq
%a2 = load double , ptr%a2_ptr
- %x = fmul double %a0, %a1
- %res = fsub double %x, %a2
+ %x = fmul contract double %a0, %a1
+ %res = fsub contract double %x, %a2
ret double %res
}
@@ -93,8 +93,8 @@ define double @test_x86_fmsub_231_m(double %a0, double %a1, ptr %a2_ptr) {
; ALL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1
; ALL-NEXT: retq
%a2 = load double , ptr%a2_ptr
- %x = fmul double %a0, %a2
- %res = fsub double %x, %a1
+ %x = fmul contract double %a0, %a2
+ %res = fsub contract double %x, %a1
ret double %res
}
@@ -103,8 +103,8 @@ define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
; ALL: ## %bb.0:
; ALL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1
; ALL-NEXT: retq
- %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
- %b2 = fadd <16 x float> %b1, %a2
+ %b1 = fmul contract <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
+ %b2 = fadd contract <16 x float> %b1, %a2
ret <16 x float> %b2
}
@@ -113,8 +113,8 @@ define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
; ALL: ## %bb.0:
; ALL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + mem
; ALL-NEXT: retq
- %b1 = fmul <16 x float> %a1, %a2
- %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
+ %b1 = fmul contract <16 x float> %a1, %a2
+ %b2 = fadd contract <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
ret <16 x float> %b2
}
@@ -135,8 +135,8 @@ define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, pt
; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 {%k1} = (zmm0 * mem) + zmm1
; SKX-NEXT: retq
%a2 = load <16 x float>,ptr%a2_ptrt,align 1
- %x = fmul <16 x float> %a0, %a2
- %y = fadd <16 x float> %x, %a1
+ %x = fmul contract <16 x float> %a0, %a2
+ %y = fadd contract <16 x float> %x, %a1
%res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
ret <16 x float> %res
}
@@ -160,8 +160,8 @@ define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, pt
; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
%a2 = load <16 x float>,ptr%a2_ptrt,align 1
- %x = fmul <16 x float> %a0, %a2
- %y = fadd <16 x float> %x, %a1
+ %x = fmul contract <16 x float> %a0, %a2
+ %y = fadd contract <16 x float> %x, %a1
%res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
ret <16 x float> %res
}
@@ -185,8 +185,8 @@ define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, pt
; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
%a2 = load <16 x float>,ptr%a2_ptrt,align 1
- %x = fmul <16 x float> %a1, %a0
- %y = fadd <16 x float> %x, %a2
+ %x = fmul contract <16 x float> %a1, %a0
+ %y = fadd contract <16 x float> %x, %a2
%res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
ret <16 x float> %res
}
diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll
index 36b95e744ba14..52e9507d43a1f 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast --enable-no-signed-zeros-fp-math -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,NO-SZ
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,HAS-SZ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown --enable-no-signed-zeros-fp-math -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,NO-SZ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,HAS-SZ
; FADD(acc, FMA(a, b, +0.0)) can be combined to FMA(a, b, acc) if the nsz flag set.
define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
@@ -18,9 +18,9 @@ define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half
entry:
%0 = bitcast <32 x half> %a to <16 x float>
%1 = bitcast <32 x half> %b to <16 x float>
- %2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
+ %2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
%3 = bitcast <16 x float> %2 to <32 x half>
- %add.i = fadd <32 x half> %3, %acc
+ %add.i = fadd contract <32 x half> %3, %acc
ret <32 x half> %add.i
}
@@ -39,9 +39,9 @@ define dso_local <32 x half> @test2(<32 x half> %acc, <32 x half> %a, <32 x half
entry:
%0 = bitcast <32 x half> %a to <16 x float>
%1 = bitcast <32 x half> %b to <16 x float>
- %2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
+ %2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
%3 = bitcast <16 x float> %2 to <32 x half>
- %add.i = fadd <32 x half> %3, %acc
+ %add.i = fadd contract <32 x half> %3, %acc
ret <32 x half> %add.i
}
@@ -60,9 +60,9 @@ define dso_local <16 x half> @test3(<16 x half> %acc, <16 x half> %a, <16 x half
entry:
%0 = bitcast <16 x half> %a to <8 x float>
%1 = bitcast <16 x half> %b to <8 x float>
- %2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
+ %2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
%3 = bitcast <8 x float> %2 to <16 x half>
- %add.i = fadd <16 x half> %3, %acc
+ %add.i = fadd contract <16 x half> %3, %acc
ret <16 x half> %add.i
}
@@ -81,9 +81,9 @@ define dso_local <16 x half> @test4(<16 x half> %acc, <16 x half> %a, <16 x half
entry:
%0 = bitcast <16 x half> %a to <8 x float>
%1 = bitcast <16 x half> %b to <8 x float>
- %2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
+ %2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
%3 = bitcast <8 x float> %2 to <16 x half>
- %add.i = fadd <16 x half> %3, %acc
+ %add.i = fadd contract <16 x half> %3, %acc
ret <16 x half> %add.i
}
@@ -102,9 +102,9 @@ define dso_local <8 x half> @test5(<8 x half> %acc, <8 x half> %a, <8 x half> %b
entry:
%0 = bitcast <8 x half> %a to <4 x float>
%1 = bitcast <8 x half> %b to <4 x float>
- %2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
+ %2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
%3 = bitcast <4 x float> %2 to <8 x half>
- %add.i = fadd <8 x half> %3, %acc
+ %add.i = fadd contract <8 x half> %3, %acc
ret <8 x half> %add.i
}
@@ -123,9 +123,9 @@ define dso_local <8 x half> @test6(<8 x half> %acc, <8 x half> %a, <8 x half> %b
entry:
%0 = bitcast <8 x half> %a to <4 x float>
%1 = bitcast <8 x half> %b to <4 x float>
- %2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
+ %2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
%3 = bitcast <4 x float> %2 to <8 x half>
- %add.i = fadd <8 x half> %3, %acc
+ %add.i = fadd contract <8 x half> %3, %acc
ret <8 x half> %add.i
}
@@ -138,9 +138,9 @@ define dso_local <32 x half> @test13(<32 x half> %acc, <32 x half> %a, <32 x hal
entry:
%0 = bitcast <32 x half> %a to <16 x float>
%1 = bitcast <32 x half> %b to <16 x float>
- %2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i16 -1, i32 4)
+ %2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i16 -1, i32 4)
%3 = bitcast <16 x float> %2 to <32 x half>
- %add.i = fadd <32 x half> %3, %acc
+ %add.i = fadd contract <32 x half> %3, %acc
ret <32 x half> %add.i
}
@@ -152,9 +152,9 @@ define dso_local <32 x half> @test14(<32 x half> %acc, <32 x half> %a, <32 x hal
entry:
%0 = bitcast <32 x half> %a to <16 x float>
%1 = bitcast <32 x half> %b to <16 x float>
- %2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i16 -1, i32 4)
+ %2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i16 -1, i32 4)
%3 = bitcast <16 x float> %2 to <32 x half>
- %add.i = fadd <32 x half> %3, %acc
+ %add.i = fadd contract <32 x half> %3, %acc
ret <32 x half> %add.i
}
@@ -166,9 +166,9 @@ define dso_local <16 x half> @test15(<16 x half> %acc, <16 x half> %a, <16 x hal
entry:
%0 = bitcast <16 x half> %a to <8 x float>
%1 = bitcast <16 x half> %b to <8 x float>
- %2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
+ %2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
%3 = bitcast <8 x float> %2 to <16 x half>
- %add.i = fadd <16 x half> %3, %acc
+ %add.i = fadd contract <16 x half> %3, %acc
ret <16 x half> %add.i
}
@@ -180,9 +180,9 @@ define dso_local <16 x half> @test16(<16 x half> %acc, <16 x half> %a, <16 x hal
entry:
%0 = bitcast <16 x half> %a to <8 x float>
%1 = bitcast <16 x half> %b to <8 x float>
- %2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
+ %2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
%3 = bitcast <8 x float> %2 to <16 x half>
- %add.i = fadd <16 x half> %3, %acc
+ %add.i = fadd contract <16 x half> %3, %acc
ret <16 x half> %add.i
}
@@ -194,9 +194,9 @@ define dso_local <8 x half> @test17(<8 x half> %acc, <8 x half> %a, <8 x half> %
entry:
%0 = bitcast <8 x half> %a to <4 x float>
%1 = bitcast <8 x half> %b to <4 x float>
- %2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
+ %2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
%3 = bitcast <4 x float> %2 to <8 x half>
- %add.i = fadd <8 x half> %3, %acc
+ %add.i = fadd contract <8 x half> %3, %acc
ret <8 x half> %add.i
}
@@ -208,9 +208,9 @@ define dso_local <8 x half> @test18(<8 x half> %acc, <8 x half> %a, <8 x half> %
entry:
%0 = bitcast <8 x half> %a to <4 x float>
%1 = bitcast <8 x half> %b to <4 x float>
- %2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
+ %2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
%3 = bitcast <4 x float> %2 to <8 x half>
- %add.i = fadd <8 x half> %3, %acc
+ ...
[truncated]
|
phoebewang
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
d33a78f to
1b2e7a5
Compare
1b2e7a5 to
c509ca9
Compare
e-kud
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thanks!
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/162/builds/31225 Here is the relevant piece of the build log for the reference |
IR has the
contractto indicate that contraction is allowed. Testing shouldn't rely on global flag to perform contraction. This is a prerequisite before making backends rely only on the IR to perform contraction. See more here: https://discourse.llvm.org/t/allowfpopfusion-vs-sdnodeflags-hasallowcontract/80909/5