6
6
;
7
7
;============================ end_copyright_notice =============================
8
8
9
- ; RUN: %opt %use_old_pass_manager% -GenXLowering -march=genx64 -mcpu=XeHPG -mtriple=spir64-unknown-unknown -S < %s | FileCheck --check-prefix =SIMD16 %s
10
- ; RUN: %opt %use_old_pass_manager% -GenXLowering -march=genx64 -mcpu=XeHPC -mtriple=spir64-unknown-unknown -S < %s | FileCheck --check-prefix =SIMD32 %s
9
+ ; RUN: %opt %use_old_pass_manager% -GenXLowering -march=genx64 -mcpu=XeHPG -mtriple=spir64-unknown-unknown -S < %s | FileCheck --check-prefixes =SIMD16,CHECK %s
10
+ ; RUN: %opt %use_old_pass_manager% -GenXLowering -march=genx64 -mcpu=XeHPC -mtriple=spir64-unknown-unknown -S < %s | FileCheck --check-prefixes =SIMD32,CHECK %s
11
11
; REQUIRES: llvm_12_or_greater
12
12
13
13
declare i32 @llvm.vector.reduce.add.v96i32 (<96 x i32 >)
@@ -20,6 +20,8 @@ declare float @llvm.vector.reduce.fmax.v96f32(<96 x float>)
20
20
declare i32 @llvm.vector.reduce.add.v14i32 (<14 x i32 >)
21
21
declare i32 @llvm.vector.reduce.add.v73i32 (<73 x i32 >)
22
22
23
+ declare float @llvm.vector.reduce.fadd.v16f32 (float , <16 x float >)
24
+
23
25
define i32 @test_add (<96 x i32 > %src ) {
24
26
; SIMD16-LABEL: @test_add(
25
27
; SIMD16-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.genx.rdregioni.v16i32.v96i32.i16(<96 x i32> [[SRC:%.*]], i32 0, i32 16, i32 1, i16 0, i32 undef)
@@ -456,3 +458,24 @@ define float @test_fmin(<96 x float> %src) {
456
458
%reduce = call reassoc float @llvm.vector.reduce.fmin.v96f32 (<96 x float > %src )
457
459
ret float %reduce
458
460
}
461
+
462
+ define float @test_fadd_legal (<16 x float > %src ) {
463
+ ; CHECK-LABEL: @test_fadd_legal(
464
+ ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.genx.rdregionf.v8f32.v16f32.i16(<16 x float> [[SRC:%.*]], i32 0, i32 8, i32 1, i16 0, i32 undef)
465
+ ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.genx.rdregionf.v8f32.v16f32.i16(<16 x float> [[SRC]], i32 0, i32 8, i32 1, i16 32, i32 undef)
466
+ ; CHECK-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]]
467
+ ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.genx.rdregionf.v4f32.v8f32.i16(<8 x float> [[TMP3]], i32 0, i32 4, i32 1, i16 0, i32 undef)
468
+ ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.genx.rdregionf.v4f32.v8f32.i16(<8 x float> [[TMP3]], i32 0, i32 4, i32 1, i16 16, i32 undef)
469
+ ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]]
470
+ ; CHECK-NEXT: [[TMP7:%.*]] = call <2 x float> @llvm.genx.rdregionf.v2f32.v4f32.i16(<4 x float> [[TMP6]], i32 0, i32 2, i32 1, i16 0, i32 undef)
471
+ ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x float> @llvm.genx.rdregionf.v2f32.v4f32.i16(<4 x float> [[TMP6]], i32 0, i32 2, i32 1, i16 8, i32 undef)
472
+ ; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x float> [[TMP7]], [[TMP8]]
473
+ ; CHECK-NEXT: [[TMP10:%.*]] = call <1 x float> @llvm.genx.rdregionf.v1f32.v2f32.i16(<2 x float> [[TMP9]], i32 0, i32 1, i32 1, i16 0, i32 undef)
474
+ ; CHECK-NEXT: [[TMP11:%.*]] = call <1 x float> @llvm.genx.rdregionf.v1f32.v2f32.i16(<2 x float> [[TMP9]], i32 0, i32 1, i32 1, i16 4, i32 undef)
475
+ ; CHECK-NEXT: [[TMP12:%.*]] = fadd <1 x float> [[TMP10]], [[TMP11]]
476
+ ; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x float> [[TMP12]] to float
477
+ ; CHECK-NEXT: [[RES:%.*]] = fadd float [[TMP13]], 0.000000e+00
478
+ ; CHECK-NEXT: ret float [[RES]]
479
+ %reduce = call reassoc float @llvm.vector.reduce.fadd.v16f32 (float 0 .0 , <16 x float > %src )
480
+ ret float %reduce
481
+ }
0 commit comments