Skip to content

Commit 1be6116

Browse files
vmustyaigcbot
authored andcommitted
Fix reduction lowering for small vector vectors in VC
When the reduction vector size is a power of two and less than 32, the reduction lowering emits an internal compiler error. This patch fixes the failure by properly handling of the no-tail case on small vectors.
1 parent 88cca8b commit 1be6116

File tree

2 files changed

+27
-2
lines changed

2 files changed

+27
-2
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5013,6 +5013,8 @@ bool GenXLowering::lowerReduction(CallInst *CI, Value *Src, Value *Start,
50135013
IGC_ASSERT_EXIT(TailIndex);
50145014
TailWidth = SrcWidth % TailIndex;
50155015
SrcWidth = TailIndex;
5016+
} else {
5017+
TailWidth = 0;
50165018
}
50175019

50185020
for (SrcWidth /= 2; SrcWidth > 0; SrcWidth /= 2) {

IGC/VectorCompiler/test/Lowering/reduce.ll

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
;
77
;============================ end_copyright_notice =============================
88

9-
; RUN: %opt %use_old_pass_manager% -GenXLowering -march=genx64 -mcpu=XeHPG -mtriple=spir64-unknown-unknown -S < %s | FileCheck --check-prefix=SIMD16 %s
10-
; RUN: %opt %use_old_pass_manager% -GenXLowering -march=genx64 -mcpu=XeHPC -mtriple=spir64-unknown-unknown -S < %s | FileCheck --check-prefix=SIMD32 %s
9+
; RUN: %opt %use_old_pass_manager% -GenXLowering -march=genx64 -mcpu=XeHPG -mtriple=spir64-unknown-unknown -S < %s | FileCheck --check-prefixes=SIMD16,CHECK %s
10+
; RUN: %opt %use_old_pass_manager% -GenXLowering -march=genx64 -mcpu=XeHPC -mtriple=spir64-unknown-unknown -S < %s | FileCheck --check-prefixes=SIMD32,CHECK %s
1111
; REQUIRES: llvm_12_or_greater
1212

1313
declare i32 @llvm.vector.reduce.add.v96i32(<96 x i32>)
@@ -20,6 +20,8 @@ declare float @llvm.vector.reduce.fmax.v96f32(<96 x float>)
2020
declare i32 @llvm.vector.reduce.add.v14i32(<14 x i32>)
2121
declare i32 @llvm.vector.reduce.add.v73i32(<73 x i32>)
2222

23+
declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>)
24+
2325
define i32 @test_add(<96 x i32> %src) {
2426
; SIMD16-LABEL: @test_add(
2527
; SIMD16-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.genx.rdregioni.v16i32.v96i32.i16(<96 x i32> [[SRC:%.*]], i32 0, i32 16, i32 1, i16 0, i32 undef)
@@ -456,3 +458,24 @@ define float @test_fmin(<96 x float> %src) {
456458
%reduce = call reassoc float @llvm.vector.reduce.fmin.v96f32(<96 x float> %src)
457459
ret float %reduce
458460
}
461+
462+
define float @test_fadd_legal(<16 x float> %src) {
463+
; CHECK-LABEL: @test_fadd_legal(
464+
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.genx.rdregionf.v8f32.v16f32.i16(<16 x float> [[SRC:%.*]], i32 0, i32 8, i32 1, i16 0, i32 undef)
465+
; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.genx.rdregionf.v8f32.v16f32.i16(<16 x float> [[SRC]], i32 0, i32 8, i32 1, i16 32, i32 undef)
466+
; CHECK-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]]
467+
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.genx.rdregionf.v4f32.v8f32.i16(<8 x float> [[TMP3]], i32 0, i32 4, i32 1, i16 0, i32 undef)
468+
; CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.genx.rdregionf.v4f32.v8f32.i16(<8 x float> [[TMP3]], i32 0, i32 4, i32 1, i16 16, i32 undef)
469+
; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]]
470+
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x float> @llvm.genx.rdregionf.v2f32.v4f32.i16(<4 x float> [[TMP6]], i32 0, i32 2, i32 1, i16 0, i32 undef)
471+
; CHECK-NEXT: [[TMP8:%.*]] = call <2 x float> @llvm.genx.rdregionf.v2f32.v4f32.i16(<4 x float> [[TMP6]], i32 0, i32 2, i32 1, i16 8, i32 undef)
472+
; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x float> [[TMP7]], [[TMP8]]
473+
; CHECK-NEXT: [[TMP10:%.*]] = call <1 x float> @llvm.genx.rdregionf.v1f32.v2f32.i16(<2 x float> [[TMP9]], i32 0, i32 1, i32 1, i16 0, i32 undef)
474+
; CHECK-NEXT: [[TMP11:%.*]] = call <1 x float> @llvm.genx.rdregionf.v1f32.v2f32.i16(<2 x float> [[TMP9]], i32 0, i32 1, i32 1, i16 4, i32 undef)
475+
; CHECK-NEXT: [[TMP12:%.*]] = fadd <1 x float> [[TMP10]], [[TMP11]]
476+
; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x float> [[TMP12]] to float
477+
; CHECK-NEXT: [[RES:%.*]] = fadd float [[TMP13]], 0.000000e+00
478+
; CHECK-NEXT: ret float [[RES]]
479+
%reduce = call reassoc float @llvm.vector.reduce.fadd.v16f32(float 0.0, <16 x float> %src)
480+
ret float %reduce
481+
}

0 commit comments

Comments
 (0)