Skip to content

Commit 300f146

Browse files
esukhovpszymich
authored andcommitted
Fix for IGCVectorizer insertpoint
Now for small blocks consisting of 2 special case instructions PHI & Terminator (BR or RET for example) we return not firstnonPHI but the last PHI. (cherry picked from commit 0b9518e)
1 parent 22dc83a commit 300f146

File tree

2 files changed

+72
-0
lines changed

2 files changed

+72
-0
lines changed

IGC/Compiler/CISACodeGen/IGCVectorizer.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,8 @@ Instruction *IGCVectorizer::getInsertPointForVector(VecArr &Arr) {
460460
// if insert point is PHI, shift it to the first nonPHI to be safe
461461
if (llvm::isa<llvm::PHINode>(InsertPoint))
462462
InsertPoint = InsertPoint->getParent()->getFirstNonPHI();
463+
if (InsertPoint->isTerminator())
464+
InsertPoint = InsertPoint->getPrevNonDebugInstruction();
463465

464466
return InsertPoint;
465467
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; RUN: igc_opt -S --igc-vectorizer -dce --regkey=VectorizerDepWindowMultiplier=6 < %s 2>&1 | FileCheck %s
10+
11+
; CHECK-LABEL: bb3:
12+
; CHECK-NEXT: [[PHI:%.*]] = phi float
13+
; CHECK-NEXT: [[VECTOR_0:%.*]] = insertelement <8 x float> undef, float [[PHI]], i32 0
14+
; CHECK-NEXT: [[VECTOR_1:%.*]] = insertelement <8 x float> [[VECTOR_0]], float [[PHI]], i32 1
15+
; CHECK-NEXT: [[VECTOR_2:%.*]] = insertelement <8 x float> [[VECTOR_1]], float [[PHI]], i32 2
16+
; CHECK-NEXT: [[VECTOR_3:%.*]] = insertelement <8 x float> [[VECTOR_2]], float [[PHI]], i32 3
17+
; CHECK-NEXT: [[VECTOR_4:%.*]] = insertelement <8 x float> [[VECTOR_3]], float [[PHI]], i32 4
18+
; CHECK-NEXT: [[VECTOR_5:%.*]] = insertelement <8 x float> [[VECTOR_4]], float [[PHI]], i32 5
19+
; CHECK-NEXT: [[VECTOR_6:%.*]] = insertelement <8 x float> [[VECTOR_5]], float [[PHI]], i32 6
20+
; CHECK-NEXT: [[VECTOR_7:%.*]] = insertelement <8 x float> [[VECTOR_6]], float [[PHI]], i32 7
21+
; CHECK-NEXT: br i1 {{%.*}}, label {{%.*}}, label {{%.*}}
22+
23+
24+
define spir_kernel void @barney() {
25+
bb:
26+
%tmp = fcmp une float 0.000000e+00, 0.000000e+00
27+
br label %bb1
28+
29+
bb1: ; preds = %bb
30+
br i1 false, label %bb3, label %bb2
31+
32+
bb2: ; preds = %bb1
33+
br label %bb3
34+
35+
bb3: ; preds = %bb2, %bb1
36+
%tmp4 = phi float [ 0.000000e+00, %bb1 ], [ 0.000000e+00, %bb2 ]
37+
br i1 %tmp, label %bb5, label %bb6
38+
39+
bb5: ; preds = %bb3
40+
br label %bb6
41+
42+
bb6: ; preds = %bb5, %bb3
43+
%tmp7 = fmul reassoc nsz arcp contract float 0.000000e+00, %tmp4
44+
%tmp8 = fmul reassoc nsz arcp contract float 0.000000e+00, %tmp4
45+
%tmp9 = fmul reassoc nsz arcp contract float 0.000000e+00, %tmp4
46+
%tmp10 = fmul reassoc nsz arcp contract float 0.000000e+00, %tmp4
47+
%tmp11 = fmul reassoc nsz arcp contract float 0.000000e+00, %tmp4
48+
%tmp12 = fmul reassoc nsz arcp contract float 0.000000e+00, %tmp4
49+
%tmp13 = fmul reassoc nsz arcp contract float 0.000000e+00, %tmp4
50+
%tmp14 = fmul reassoc nsz arcp contract float 0.000000e+00, %tmp4
51+
%tmp15 = insertelement <8 x float> zeroinitializer, float %tmp7, i64 0
52+
%tmp16 = insertelement <8 x float> %tmp15, float %tmp8, i64 1
53+
%tmp17 = insertelement <8 x float> %tmp16, float %tmp9, i64 2
54+
%tmp18 = insertelement <8 x float> %tmp17, float %tmp10, i64 3
55+
%tmp19 = insertelement <8 x float> %tmp18, float %tmp11, i64 4
56+
%tmp20 = insertelement <8 x float> %tmp19, float %tmp12, i64 5
57+
%tmp21 = insertelement <8 x float> %tmp20, float %tmp13, i64 6
58+
%tmp22 = insertelement <8 x float> %tmp21, float %tmp14, i64 7
59+
%tmp23 = bitcast <8 x float> %tmp22 to <8 x i32>
60+
call void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false, i32 0, <8 x i32> %tmp23)
61+
ret void
62+
}
63+
64+
declare void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32, <8 x i32>)
65+
66+
!igc.functions = !{!0}
67+
68+
!0 = distinct !{void ()* @barney, !1}
69+
!1 = distinct !{!2}
70+
!2 = distinct !{!"sub_group_size", i32 16}

0 commit comments

Comments
 (0)