Skip to content

Commit e32522c

Browse files
committed
[SLPVectorizer] Do not assume extracelement idx is a ConstantInt.
The index of an ExtractElementInst is not guaranteed to be a ConstantInt. It can be any integer value. Check explicitly for ConstantInts. The new test cases illustrate scenarios where we crash without this patch. I've also added another test case to check the matching of extractelement vector ops works. Reviewers: RKSimon, ABataev, dtemirbulatov, vporpo Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D74758
1 parent aa3e99d commit e32522c

File tree

2 files changed

+155
-6
lines changed

2 files changed

+155
-6
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -832,13 +832,12 @@ class BoUpSLP {
832832

833833
// Extracts from consecutive indexes of the same vector better score as
834834
// the extracts could be optimized away.
835-
auto *Ex1 = dyn_cast<ExtractElementInst>(V1);
836-
auto *Ex2 = dyn_cast<ExtractElementInst>(V2);
837-
if (Ex1 && Ex2 && Ex1->getVectorOperand() == Ex2->getVectorOperand() &&
838-
cast<ConstantInt>(Ex1->getIndexOperand())->getZExtValue() + 1 ==
839-
cast<ConstantInt>(Ex2->getIndexOperand())->getZExtValue()) {
835+
Value *EV;
836+
ConstantInt *Ex1Idx, *Ex2Idx;
837+
if (match(V1, m_ExtractElement(m_Value(EV), m_ConstantInt(Ex1Idx))) &&
838+
match(V2, m_ExtractElement(m_Deferred(EV), m_ConstantInt(Ex2Idx))) &&
839+
Ex1Idx->getZExtValue() + 1 == Ex2Idx->getZExtValue())
840840
return VLOperands::ScoreConsecutiveExtracts;
841-
}
842841

843842
auto *I1 = dyn_cast<Instruction>(V1);
844843
auto *I2 = dyn_cast<Instruction>(V2);

llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,3 +494,153 @@ define void @ChecksExtractScores(double* %storeArray, double* %array, <2 x doubl
494494
store double %add1, double *%sidx1, align 8
495495
ret void
496496
}
497+
498+
499+
define i1 @ExtractIdxNotConstantInt1(float %a, float %b, float %c, <4 x float> %vec, i64 %idx2) {
500+
; CHECK-LABEL: @ExtractIdxNotConstantInt1(
501+
; CHECK-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 undef
502+
; CHECK-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
503+
; CHECK-NEXT: [[FM:%.*]] = fmul float [[A:%.*]], [[SUB14_I167]]
504+
; CHECK-NEXT: [[SUB25_I168:%.*]] = fsub float [[FM]], [[B:%.*]]
505+
; CHECK-NEXT: [[VECEXT_I276_I169:%.*]] = extractelement <4 x float> [[VEC]], i64 [[IDX2:%.*]]
506+
; CHECK-NEXT: [[ADD36_I173:%.*]] = fadd float [[SUB25_I168]], 1.000000e+01
507+
; CHECK-NEXT: [[MUL72_I179:%.*]] = fmul float [[C:%.*]], [[VECEXT_I276_I169]]
508+
; CHECK-NEXT: [[ADD78_I180:%.*]] = fsub float [[MUL72_I179]], 3.000000e+01
509+
; CHECK-NEXT: [[ADD79_I181:%.*]] = fadd float 2.000000e+00, [[ADD78_I180]]
510+
; CHECK-NEXT: [[MUL123_I184:%.*]] = fmul float [[ADD36_I173]], [[ADD79_I181]]
511+
; CHECK-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
512+
; CHECK-NEXT: ret i1 [[CMP_I185]]
513+
;
514+
%vecext.i291.i166 = extractelement <4 x float> %vec, i64 undef
515+
%sub14.i167 = fsub float undef, %vecext.i291.i166
516+
%fm = fmul float %a, %sub14.i167
517+
%sub25.i168 = fsub float %fm, %b
518+
%vecext.i276.i169 = extractelement <4 x float> %vec, i64 %idx2
519+
%add36.i173 = fadd float %sub25.i168, 10.0
520+
%mul72.i179 = fmul float %c, %vecext.i276.i169
521+
%add78.i180 = fsub float %mul72.i179, 30.0
522+
%add79.i181 = fadd float 2.0, %add78.i180
523+
%mul123.i184 = fmul float %add36.i173, %add79.i181
524+
%cmp.i185 = fcmp ogt float %mul123.i184, 0.000000e+00
525+
ret i1 %cmp.i185
526+
}
527+
528+
529+
define i1 @ExtractIdxNotConstantInt2(float %a, float %b, float %c, <4 x float> %vec, i64 %idx2) {
530+
; CHECK-LABEL: @ExtractIdxNotConstantInt2(
531+
; CHECK-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 1
532+
; CHECK-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
533+
; CHECK-NEXT: [[FM:%.*]] = fmul float [[A:%.*]], [[SUB14_I167]]
534+
; CHECK-NEXT: [[SUB25_I168:%.*]] = fsub float [[FM]], [[B:%.*]]
535+
; CHECK-NEXT: [[VECEXT_I276_I169:%.*]] = extractelement <4 x float> [[VEC]], i64 [[IDX2:%.*]]
536+
; CHECK-NEXT: [[ADD36_I173:%.*]] = fadd float [[SUB25_I168]], 1.000000e+01
537+
; CHECK-NEXT: [[MUL72_I179:%.*]] = fmul float [[C:%.*]], [[VECEXT_I276_I169]]
538+
; CHECK-NEXT: [[ADD78_I180:%.*]] = fsub float [[MUL72_I179]], 3.000000e+01
539+
; CHECK-NEXT: [[ADD79_I181:%.*]] = fadd float 2.000000e+00, [[ADD78_I180]]
540+
; CHECK-NEXT: [[MUL123_I184:%.*]] = fmul float [[ADD36_I173]], [[ADD79_I181]]
541+
; CHECK-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
542+
; CHECK-NEXT: ret i1 [[CMP_I185]]
543+
;
544+
%vecext.i291.i166 = extractelement <4 x float> %vec, i64 1
545+
%sub14.i167 = fsub float undef, %vecext.i291.i166
546+
%fm = fmul float %a, %sub14.i167
547+
%sub25.i168 = fsub float %fm, %b
548+
%vecext.i276.i169 = extractelement <4 x float> %vec, i64 %idx2
549+
%add36.i173 = fadd float %sub25.i168, 10.0
550+
%mul72.i179 = fmul float %c, %vecext.i276.i169
551+
%add78.i180 = fsub float %mul72.i179, 30.0
552+
%add79.i181 = fadd float 2.0, %add78.i180
553+
%mul123.i184 = fmul float %add36.i173, %add79.i181
554+
%cmp.i185 = fcmp ogt float %mul123.i184, 0.000000e+00
555+
ret i1 %cmp.i185
556+
}
557+
558+
559+
define i1 @foo(float %a, float %b, float %c, <4 x float> %vec, i64 %idx2) {
560+
; CHECK-LABEL: @foo(
561+
; CHECK-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 0
562+
; CHECK-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
563+
; CHECK-NEXT: [[FM:%.*]] = fmul float [[A:%.*]], [[SUB14_I167]]
564+
; CHECK-NEXT: [[SUB25_I168:%.*]] = fsub float [[FM]], [[B:%.*]]
565+
; CHECK-NEXT: [[VECEXT_I276_I169:%.*]] = extractelement <4 x float> [[VEC]], i64 1
566+
; CHECK-NEXT: [[ADD36_I173:%.*]] = fadd float [[SUB25_I168]], 1.000000e+01
567+
; CHECK-NEXT: [[MUL72_I179:%.*]] = fmul float [[C:%.*]], [[VECEXT_I276_I169]]
568+
; CHECK-NEXT: [[ADD78_I180:%.*]] = fsub float [[MUL72_I179]], 3.000000e+01
569+
; CHECK-NEXT: [[ADD79_I181:%.*]] = fadd float 2.000000e+00, [[ADD78_I180]]
570+
; CHECK-NEXT: [[MUL123_I184:%.*]] = fmul float [[ADD36_I173]], [[ADD79_I181]]
571+
; CHECK-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
572+
; CHECK-NEXT: ret i1 [[CMP_I185]]
573+
;
574+
%vecext.i291.i166 = extractelement <4 x float> %vec, i64 0
575+
%sub14.i167 = fsub float undef, %vecext.i291.i166
576+
%fm = fmul float %a, %sub14.i167
577+
%sub25.i168 = fsub float %fm, %b
578+
%vecext.i276.i169 = extractelement <4 x float> %vec, i64 1
579+
%add36.i173 = fadd float %sub25.i168, 10.0
580+
%mul72.i179 = fmul float %c, %vecext.i276.i169
581+
%add78.i180 = fsub float %mul72.i179, 30.0
582+
%add79.i181 = fadd float 2.0, %add78.i180
583+
%mul123.i184 = fmul float %add36.i173, %add79.i181
584+
%cmp.i185 = fcmp ogt float %mul123.i184, 0.000000e+00
585+
ret i1 %cmp.i185
586+
}
587+
588+
; Same as @ChecksExtractScores, but the extratelement vector operands do not match.
589+
define void @ChecksExtractScores_different_vectors(double* %storeArray, double* %array, <2 x double> *%vecPtr1, <2 x double>* %vecPtr2, <2 x double>* %vecPtr3, <2 x double>* %vecPtr4) {
590+
; CHECK-LABEL: @ChecksExtractScores_different_vectors(
591+
; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
592+
; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1
593+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
594+
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
595+
; CHECK-NEXT: [[LOADVEC:%.*]] = load <2 x double>, <2 x double>* [[VECPTR1:%.*]], align 4
596+
; CHECK-NEXT: [[LOADVEC2:%.*]] = load <2 x double>, <2 x double>* [[VECPTR2:%.*]], align 4
597+
; CHECK-NEXT: [[EXTRA0:%.*]] = extractelement <2 x double> [[LOADVEC]], i32 0
598+
; CHECK-NEXT: [[EXTRA1:%.*]] = extractelement <2 x double> [[LOADVEC2]], i32 1
599+
; CHECK-NEXT: [[LOADVEC3:%.*]] = load <2 x double>, <2 x double>* [[VECPTR3:%.*]], align 4
600+
; CHECK-NEXT: [[LOADVEC4:%.*]] = load <2 x double>, <2 x double>* [[VECPTR4:%.*]], align 4
601+
; CHECK-NEXT: [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0
602+
; CHECK-NEXT: [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1
603+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[EXTRB0]], i32 0
604+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[EXTRA1]], i32 1
605+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP2]], i32 1
606+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> undef, double [[TMP5]], i32 0
607+
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
608+
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1
609+
; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP4]], [[TMP8]]
610+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> undef, double [[EXTRA0]], i32 0
611+
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[TMP10]], double [[EXTRB1]], i32 1
612+
; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[TMP11]], [[TMP2]]
613+
; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], [[TMP9]]
614+
; CHECK-NEXT: [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0
615+
; CHECK-NEXT: [[SIDX1:%.*]] = getelementptr inbounds double, double* [[STOREARRAY]], i64 1
616+
; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
617+
; CHECK-NEXT: store <2 x double> [[TMP13]], <2 x double>* [[TMP14]], align 8
618+
; CHECK-NEXT: ret void
619+
;
620+
%idx0 = getelementptr inbounds double, double* %array, i64 0
621+
%idx1 = getelementptr inbounds double, double* %array, i64 1
622+
%loadA0 = load double, double* %idx0, align 4
623+
%loadA1 = load double, double* %idx1, align 4
624+
625+
%loadVec = load <2 x double>, <2 x double>* %vecPtr1, align 4
626+
%loadVec2 = load <2 x double>, <2 x double>* %vecPtr2, align 4
627+
%extrA0 = extractelement <2 x double> %loadVec, i32 0
628+
%extrA1 = extractelement <2 x double> %loadVec2, i32 1
629+
%loadVec3= load <2 x double>, <2 x double>* %vecPtr3, align 4
630+
%loadVec4 = load <2 x double>, <2 x double>* %vecPtr4, align 4
631+
%extrB0 = extractelement <2 x double> %loadVec3, i32 0
632+
%extrB1 = extractelement <2 x double> %loadVec4, i32 1
633+
634+
%mul0 = fmul double %extrA0, %loadA0
635+
%mul1 = fmul double %extrA1, %loadA0
636+
%mul3 = fmul double %extrB0, %loadA1
637+
%mul4 = fmul double %extrB1, %loadA1
638+
%add0 = fadd double %mul0, %mul3
639+
%add1 = fadd double %mul1, %mul4
640+
641+
%sidx0 = getelementptr inbounds double, double* %storeArray, i64 0
642+
%sidx1 = getelementptr inbounds double, double* %storeArray, i64 1
643+
store double %add0, double *%sidx0, align 8
644+
store double %add1, double *%sidx1, align 8
645+
ret void
646+
}

0 commit comments

Comments
 (0)