Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 31 additions & 31 deletions llvm/test/CodeGen/X86/avx512-fma.ll
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=SKX
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX

define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; ALL-LABEL: test_x86_fmadd_ps_z:
; ALL: ## %bb.0:
; ALL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
; ALL-NEXT: retq
%x = fmul <16 x float> %a0, %a1
%res = fadd <16 x float> %x, %a2
%x = fmul contract <16 x float> %a0, %a1
%res = fadd contract <16 x float> %x, %a2
ret <16 x float> %res
}

Expand All @@ -17,8 +17,8 @@ define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16
; ALL: ## %bb.0:
; ALL-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
; ALL-NEXT: retq
%x = fmul <16 x float> %a0, %a1
%res = fsub <16 x float> %x, %a2
%x = fmul contract <16 x float> %a0, %a1
%res = fsub contract <16 x float> %x, %a2
ret <16 x float> %res
}

Expand All @@ -27,8 +27,8 @@ define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <1
; ALL: ## %bb.0:
; ALL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
; ALL-NEXT: retq
%x = fmul <16 x float> %a0, %a1
%res = fsub <16 x float> %a2, %x
%x = fmul contract <16 x float> %a0, %a1
%res = fsub contract <16 x float> %a2, %x
ret <16 x float> %res
}

Expand All @@ -37,12 +37,12 @@ define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <1
; ALL: ## %bb.0:
; ALL-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
; ALL-NEXT: retq
%x = fmul <16 x float> %a0, %a1
%y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
%x = fmul contract <16 x float> %a0, %a1
%y = fsub contract <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
float -0.000000e+00>, %x
%res = fsub <16 x float> %y, %a2
%res = fsub contract <16 x float> %y, %a2
ret <16 x float> %res
}

Expand All @@ -51,8 +51,8 @@ define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8
; ALL: ## %bb.0:
; ALL-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
; ALL-NEXT: retq
%x = fmul <8 x double> %a0, %a1
%res = fadd <8 x double> %x, %a2
%x = fmul contract <8 x double> %a0, %a1
%res = fadd contract <8 x double> %x, %a2
ret <8 x double> %res
}

Expand All @@ -61,8 +61,8 @@ define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8
; ALL: ## %bb.0:
; ALL-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
; ALL-NEXT: retq
%x = fmul <8 x double> %a0, %a1
%res = fsub <8 x double> %x, %a2
%x = fmul contract <8 x double> %a0, %a1
%res = fsub contract <8 x double> %x, %a2
ret <8 x double> %res
}

Expand All @@ -71,8 +71,8 @@ define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
; ALL: ## %bb.0:
; ALL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
; ALL-NEXT: retq
%x = fmul double %a0, %a1
%res = fsub double %x, %a2
%x = fmul contract double %a0, %a1
%res = fsub contract double %x, %a2
ret double %res
}

Expand All @@ -82,8 +82,8 @@ define double @test_x86_fmsub_213_m(double %a0, double %a1, ptr %a2_ptr) {
; ALL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem
; ALL-NEXT: retq
%a2 = load double , ptr%a2_ptr
%x = fmul double %a0, %a1
%res = fsub double %x, %a2
%x = fmul contract double %a0, %a1
%res = fsub contract double %x, %a2
ret double %res
}

Expand All @@ -93,8 +93,8 @@ define double @test_x86_fmsub_231_m(double %a0, double %a1, ptr %a2_ptr) {
; ALL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1
; ALL-NEXT: retq
%a2 = load double , ptr%a2_ptr
%x = fmul double %a0, %a2
%res = fsub double %x, %a1
%x = fmul contract double %a0, %a2
%res = fsub contract double %x, %a1
ret double %res
}

Expand All @@ -103,8 +103,8 @@ define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
; ALL: ## %bb.0:
; ALL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1
; ALL-NEXT: retq
%b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
%b2 = fadd <16 x float> %b1, %a2
%b1 = fmul contract <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
%b2 = fadd contract <16 x float> %b1, %a2
ret <16 x float> %b2
}

Expand All @@ -113,8 +113,8 @@ define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
; ALL: ## %bb.0:
; ALL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + mem
; ALL-NEXT: retq
%b1 = fmul <16 x float> %a1, %a2
%b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
%b1 = fmul contract <16 x float> %a1, %a2
%b2 = fadd contract <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
ret <16 x float> %b2
}

Expand All @@ -135,8 +135,8 @@ define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, pt
; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 {%k1} = (zmm0 * mem) + zmm1
; SKX-NEXT: retq
%a2 = load <16 x float>,ptr%a2_ptrt,align 1
%x = fmul <16 x float> %a0, %a2
%y = fadd <16 x float> %x, %a1
%x = fmul contract <16 x float> %a0, %a2
%y = fadd contract <16 x float> %x, %a1
%res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
ret <16 x float> %res
}
Expand All @@ -160,8 +160,8 @@ define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, pt
; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
%a2 = load <16 x float>,ptr%a2_ptrt,align 1
%x = fmul <16 x float> %a0, %a2
%y = fadd <16 x float> %x, %a1
%x = fmul contract <16 x float> %a0, %a2
%y = fadd contract <16 x float> %x, %a1
%res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
ret <16 x float> %res
}
Expand All @@ -185,8 +185,8 @@ define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, pt
; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
%a2 = load <16 x float>,ptr%a2_ptrt,align 1
%x = fmul <16 x float> %a1, %a0
%y = fadd <16 x float> %x, %a2
%x = fmul contract <16 x float> %a1, %a0
%y = fadd contract <16 x float> %x, %a2
%res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
ret <16 x float> %res
}
Expand Down
52 changes: 26 additions & 26 deletions llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast --enable-no-signed-zeros-fp-math -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,NO-SZ
; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,HAS-SZ
; RUN: llc < %s -mtriple=x86_64-unknown-unknown --enable-no-signed-zeros-fp-math -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,NO-SZ
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,HAS-SZ

; FADD(acc, FMA(a, b, +0.0)) can be combined to FMA(a, b, acc) if the nsz flag set.
define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
Expand All @@ -18,9 +18,9 @@ define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half
entry:
%0 = bitcast <32 x half> %a to <16 x float>
%1 = bitcast <32 x half> %b to <16 x float>
%2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
%2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
%3 = bitcast <16 x float> %2 to <32 x half>
%add.i = fadd <32 x half> %3, %acc
%add.i = fadd contract <32 x half> %3, %acc
ret <32 x half> %add.i
}

Expand All @@ -39,9 +39,9 @@ define dso_local <32 x half> @test2(<32 x half> %acc, <32 x half> %a, <32 x half
entry:
%0 = bitcast <32 x half> %a to <16 x float>
%1 = bitcast <32 x half> %b to <16 x float>
%2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
%2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
%3 = bitcast <16 x float> %2 to <32 x half>
%add.i = fadd <32 x half> %3, %acc
%add.i = fadd contract <32 x half> %3, %acc
ret <32 x half> %add.i
}

Expand All @@ -60,9 +60,9 @@ define dso_local <16 x half> @test3(<16 x half> %acc, <16 x half> %a, <16 x half
entry:
%0 = bitcast <16 x half> %a to <8 x float>
%1 = bitcast <16 x half> %b to <8 x float>
%2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
%2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
%3 = bitcast <8 x float> %2 to <16 x half>
%add.i = fadd <16 x half> %3, %acc
%add.i = fadd contract <16 x half> %3, %acc
ret <16 x half> %add.i
}

Expand All @@ -81,9 +81,9 @@ define dso_local <16 x half> @test4(<16 x half> %acc, <16 x half> %a, <16 x half
entry:
%0 = bitcast <16 x half> %a to <8 x float>
%1 = bitcast <16 x half> %b to <8 x float>
%2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
%2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
%3 = bitcast <8 x float> %2 to <16 x half>
%add.i = fadd <16 x half> %3, %acc
%add.i = fadd contract <16 x half> %3, %acc
ret <16 x half> %add.i
}

Expand All @@ -102,9 +102,9 @@ define dso_local <8 x half> @test5(<8 x half> %acc, <8 x half> %a, <8 x half> %b
entry:
%0 = bitcast <8 x half> %a to <4 x float>
%1 = bitcast <8 x half> %b to <4 x float>
%2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
%2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
%3 = bitcast <4 x float> %2 to <8 x half>
%add.i = fadd <8 x half> %3, %acc
%add.i = fadd contract <8 x half> %3, %acc
ret <8 x half> %add.i
}

Expand All @@ -123,9 +123,9 @@ define dso_local <8 x half> @test6(<8 x half> %acc, <8 x half> %a, <8 x half> %b
entry:
%0 = bitcast <8 x half> %a to <4 x float>
%1 = bitcast <8 x half> %b to <4 x float>
%2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
%2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
%3 = bitcast <4 x float> %2 to <8 x half>
%add.i = fadd <8 x half> %3, %acc
%add.i = fadd contract <8 x half> %3, %acc
ret <8 x half> %add.i
}

Expand All @@ -138,9 +138,9 @@ define dso_local <32 x half> @test13(<32 x half> %acc, <32 x half> %a, <32 x hal
entry:
%0 = bitcast <32 x half> %a to <16 x float>
%1 = bitcast <32 x half> %b to <16 x float>
%2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i16 -1, i32 4)
%2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i16 -1, i32 4)
%3 = bitcast <16 x float> %2 to <32 x half>
%add.i = fadd <32 x half> %3, %acc
%add.i = fadd contract <32 x half> %3, %acc
ret <32 x half> %add.i
}

Expand All @@ -152,9 +152,9 @@ define dso_local <32 x half> @test14(<32 x half> %acc, <32 x half> %a, <32 x hal
entry:
%0 = bitcast <32 x half> %a to <16 x float>
%1 = bitcast <32 x half> %b to <16 x float>
%2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i16 -1, i32 4)
%2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i16 -1, i32 4)
%3 = bitcast <16 x float> %2 to <32 x half>
%add.i = fadd <32 x half> %3, %acc
%add.i = fadd contract <32 x half> %3, %acc
ret <32 x half> %add.i
}

Expand All @@ -166,9 +166,9 @@ define dso_local <16 x half> @test15(<16 x half> %acc, <16 x half> %a, <16 x hal
entry:
%0 = bitcast <16 x half> %a to <8 x float>
%1 = bitcast <16 x half> %b to <8 x float>
%2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
%2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
%3 = bitcast <8 x float> %2 to <16 x half>
%add.i = fadd <16 x half> %3, %acc
%add.i = fadd contract <16 x half> %3, %acc
ret <16 x half> %add.i
}

Expand All @@ -180,9 +180,9 @@ define dso_local <16 x half> @test16(<16 x half> %acc, <16 x half> %a, <16 x hal
entry:
%0 = bitcast <16 x half> %a to <8 x float>
%1 = bitcast <16 x half> %b to <8 x float>
%2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
%2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
%3 = bitcast <8 x float> %2 to <16 x half>
%add.i = fadd <16 x half> %3, %acc
%add.i = fadd contract <16 x half> %3, %acc
ret <16 x half> %add.i
}

Expand All @@ -194,9 +194,9 @@ define dso_local <8 x half> @test17(<8 x half> %acc, <8 x half> %a, <8 x half> %
entry:
%0 = bitcast <8 x half> %a to <4 x float>
%1 = bitcast <8 x half> %b to <4 x float>
%2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
%2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
%3 = bitcast <4 x float> %2 to <8 x half>
%add.i = fadd <8 x half> %3, %acc
%add.i = fadd contract <8 x half> %3, %acc
ret <8 x half> %add.i
}

Expand All @@ -208,9 +208,9 @@ define dso_local <8 x half> @test18(<8 x half> %acc, <8 x half> %a, <8 x half> %
entry:
%0 = bitcast <8 x half> %a to <4 x float>
%1 = bitcast <8 x half> %b to <4 x float>
%2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
%2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
%3 = bitcast <4 x float> %2 to <8 x half>
%add.i = fadd <8 x half> %3, %acc
%add.i = fadd contract <8 x half> %3, %acc
ret <8 x half> %add.i
}

Expand Down
Loading