Fix reduction lowering for small vector vectors in VC

vmustya · igcbot · commit 1be6116ccc71 · 2024-11-28T03:06:37.000+01:00
When the reduction vector size is a power of two and less than 32, the
reduction lowering emits an internal compiler error. This patch fixes
the failure by properly handling of the no-tail case on small vectors.
diff --git a/IGC/VectorCompiler/lib/GenXCodeGen/GenXLowering.cpp b/IGC/VectorCompiler/lib/GenXCodeGen/GenXLowering.cpp
@@ -5013,6 +5013,8 @@ bool GenXLowering::lowerReduction(CallInst *CI, Value *Src, Value *Start,
     IGC_ASSERT_EXIT(TailIndex);
     TailWidth = SrcWidth % TailIndex;
     SrcWidth = TailIndex;
+  } else {
+    TailWidth = 0;
   }
 
   for (SrcWidth /= 2; SrcWidth > 0; SrcWidth /= 2) {
diff --git a/IGC/VectorCompiler/test/Lowering/reduce.ll b/IGC/VectorCompiler/test/Lowering/reduce.ll
@@ -6,8 +6,8 @@
 ;
 ;============================ end_copyright_notice =============================
 
-; RUN: %opt %use_old_pass_manager% -GenXLowering -march=genx64 -mcpu=XeHPG -mtriple=spir64-unknown-unknown -S < %s | FileCheck --check-prefix=SIMD16 %s
-; RUN: %opt %use_old_pass_manager% -GenXLowering -march=genx64 -mcpu=XeHPC -mtriple=spir64-unknown-unknown -S < %s | FileCheck --check-prefix=SIMD32 %s
+; RUN: %opt %use_old_pass_manager% -GenXLowering -march=genx64 -mcpu=XeHPG -mtriple=spir64-unknown-unknown -S < %s | FileCheck --check-prefixes=SIMD16,CHECK %s
+; RUN: %opt %use_old_pass_manager% -GenXLowering -march=genx64 -mcpu=XeHPC -mtriple=spir64-unknown-unknown -S < %s | FileCheck --check-prefixes=SIMD32,CHECK %s
 ; REQUIRES: llvm_12_or_greater
 
 declare i32 @llvm.vector.reduce.add.v96i32(<96 x i32>)
@@ -20,6 +20,8 @@ declare float @llvm.vector.reduce.fmax.v96f32(<96 x float>)
 declare i32 @llvm.vector.reduce.add.v14i32(<14 x i32>)
 declare i32 @llvm.vector.reduce.add.v73i32(<73 x i32>)
 
+declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>)
+
 define i32 @test_add(<96 x i32> %src) {
 ; SIMD16-LABEL: @test_add(
 ; SIMD16-NEXT:    [[TMP1:%.*]] = call <16 x i32> @llvm.genx.rdregioni.v16i32.v96i32.i16(<96 x i32> [[SRC:%.*]], i32 0, i32 16, i32 1, i16 0, i32 undef)
@@ -456,3 +458,24 @@ define float @test_fmin(<96 x float> %src) {
   %reduce = call reassoc float @llvm.vector.reduce.fmin.v96f32(<96 x float> %src)
   ret float %reduce
 }
+
+define float @test_fadd_legal(<16 x float> %src) {
+; CHECK-LABEL: @test_fadd_legal(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.genx.rdregionf.v8f32.v16f32.i16(<16 x float> [[SRC:%.*]], i32 0, i32 8, i32 1, i16 0, i32 undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.genx.rdregionf.v8f32.v16f32.i16(<16 x float> [[SRC]], i32 0, i32 8, i32 1, i16 32, i32 undef)
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.genx.rdregionf.v4f32.v8f32.i16(<8 x float> [[TMP3]], i32 0, i32 4, i32 1, i16 0, i32 undef)
+; CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.genx.rdregionf.v4f32.v8f32.i16(<8 x float> [[TMP3]], i32 0, i32 4, i32 1, i16 16, i32 undef)
+; CHECK-NEXT:    [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x float> @llvm.genx.rdregionf.v2f32.v4f32.i16(<4 x float> [[TMP6]], i32 0, i32 2, i32 1, i16 0, i32 undef)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x float> @llvm.genx.rdregionf.v2f32.v4f32.i16(<4 x float> [[TMP6]], i32 0, i32 2, i32 1, i16 8, i32 undef)
+; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x float> [[TMP7]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = call <1 x float> @llvm.genx.rdregionf.v1f32.v2f32.i16(<2 x float> [[TMP9]], i32 0, i32 1, i32 1, i16 0, i32 undef)
+; CHECK-NEXT:    [[TMP11:%.*]] = call <1 x float> @llvm.genx.rdregionf.v1f32.v2f32.i16(<2 x float> [[TMP9]], i32 0, i32 1, i32 1, i16 4, i32 undef)
+; CHECK-NEXT:    [[TMP12:%.*]] = fadd <1 x float> [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x float> [[TMP12]] to float
+; CHECK-NEXT:    [[RES:%.*]] = fadd float [[TMP13]], 0.000000e+00
+; CHECK-NEXT:    ret float [[RES]]
+  %reduce = call reassoc float @llvm.vector.reduce.fadd.v16f32(float 0.0, <16 x float> %src)
+  ret float %reduce
+}

Original file line number	Diff line number	Diff line change
`@@ -5013,6 +5013,8 @@ bool GenXLowering::lowerReduction(CallInst CI, Value Src, Value *Start,`
`5013`	`5013`	`IGC_ASSERT_EXIT(TailIndex);`
`5014`	`5014`	`TailWidth = SrcWidth % TailIndex;`
`5015`	`5015`	`SrcWidth = TailIndex;`
	`5016`	`+ } else {`
	`5017`	`+ TailWidth = 0;`
`5016`	`5018`	`}`
`5017`	`5019`
`5018`	`5020`	`for (SrcWidth /= 2; SrcWidth > 0; SrcWidth /= 2) {`