@@ -494,3 +494,153 @@ define void @ChecksExtractScores(double* %storeArray, double* %array, <2 x doubl
494494 store double %add1 , double *%sidx1 , align 8
495495 ret void
496496}
497+
498+
499+ define i1 @ExtractIdxNotConstantInt1 (float %a , float %b , float %c , <4 x float > %vec , i64 %idx2 ) {
500+ ; CHECK-LABEL: @ExtractIdxNotConstantInt1(
501+ ; CHECK-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 undef
502+ ; CHECK-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
503+ ; CHECK-NEXT: [[FM:%.*]] = fmul float [[A:%.*]], [[SUB14_I167]]
504+ ; CHECK-NEXT: [[SUB25_I168:%.*]] = fsub float [[FM]], [[B:%.*]]
505+ ; CHECK-NEXT: [[VECEXT_I276_I169:%.*]] = extractelement <4 x float> [[VEC]], i64 [[IDX2:%.*]]
506+ ; CHECK-NEXT: [[ADD36_I173:%.*]] = fadd float [[SUB25_I168]], 1.000000e+01
507+ ; CHECK-NEXT: [[MUL72_I179:%.*]] = fmul float [[C:%.*]], [[VECEXT_I276_I169]]
508+ ; CHECK-NEXT: [[ADD78_I180:%.*]] = fsub float [[MUL72_I179]], 3.000000e+01
509+ ; CHECK-NEXT: [[ADD79_I181:%.*]] = fadd float 2.000000e+00, [[ADD78_I180]]
510+ ; CHECK-NEXT: [[MUL123_I184:%.*]] = fmul float [[ADD36_I173]], [[ADD79_I181]]
511+ ; CHECK-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
512+ ; CHECK-NEXT: ret i1 [[CMP_I185]]
513+ ;
514+ %vecext.i291.i166 = extractelement <4 x float > %vec , i64 undef
515+ %sub14.i167 = fsub float undef , %vecext.i291.i166
516+ %fm = fmul float %a , %sub14.i167
517+ %sub25.i168 = fsub float %fm , %b
518+ %vecext.i276.i169 = extractelement <4 x float > %vec , i64 %idx2
519+ %add36.i173 = fadd float %sub25.i168 , 10 .0
520+ %mul72.i179 = fmul float %c , %vecext.i276.i169
521+ %add78.i180 = fsub float %mul72.i179 , 30 .0
522+ %add79.i181 = fadd float 2 .0 , %add78.i180
523+ %mul123.i184 = fmul float %add36.i173 , %add79.i181
524+ %cmp.i185 = fcmp ogt float %mul123.i184 , 0 .000000e+00
525+ ret i1 %cmp.i185
526+ }
527+
528+
529+ define i1 @ExtractIdxNotConstantInt2 (float %a , float %b , float %c , <4 x float > %vec , i64 %idx2 ) {
530+ ; CHECK-LABEL: @ExtractIdxNotConstantInt2(
531+ ; CHECK-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 1
532+ ; CHECK-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
533+ ; CHECK-NEXT: [[FM:%.*]] = fmul float [[A:%.*]], [[SUB14_I167]]
534+ ; CHECK-NEXT: [[SUB25_I168:%.*]] = fsub float [[FM]], [[B:%.*]]
535+ ; CHECK-NEXT: [[VECEXT_I276_I169:%.*]] = extractelement <4 x float> [[VEC]], i64 [[IDX2:%.*]]
536+ ; CHECK-NEXT: [[ADD36_I173:%.*]] = fadd float [[SUB25_I168]], 1.000000e+01
537+ ; CHECK-NEXT: [[MUL72_I179:%.*]] = fmul float [[C:%.*]], [[VECEXT_I276_I169]]
538+ ; CHECK-NEXT: [[ADD78_I180:%.*]] = fsub float [[MUL72_I179]], 3.000000e+01
539+ ; CHECK-NEXT: [[ADD79_I181:%.*]] = fadd float 2.000000e+00, [[ADD78_I180]]
540+ ; CHECK-NEXT: [[MUL123_I184:%.*]] = fmul float [[ADD36_I173]], [[ADD79_I181]]
541+ ; CHECK-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
542+ ; CHECK-NEXT: ret i1 [[CMP_I185]]
543+ ;
544+ %vecext.i291.i166 = extractelement <4 x float > %vec , i64 1
545+ %sub14.i167 = fsub float undef , %vecext.i291.i166
546+ %fm = fmul float %a , %sub14.i167
547+ %sub25.i168 = fsub float %fm , %b
548+ %vecext.i276.i169 = extractelement <4 x float > %vec , i64 %idx2
549+ %add36.i173 = fadd float %sub25.i168 , 10 .0
550+ %mul72.i179 = fmul float %c , %vecext.i276.i169
551+ %add78.i180 = fsub float %mul72.i179 , 30 .0
552+ %add79.i181 = fadd float 2 .0 , %add78.i180
553+ %mul123.i184 = fmul float %add36.i173 , %add79.i181
554+ %cmp.i185 = fcmp ogt float %mul123.i184 , 0 .000000e+00
555+ ret i1 %cmp.i185
556+ }
557+
558+
559+ define i1 @foo (float %a , float %b , float %c , <4 x float > %vec , i64 %idx2 ) {
560+ ; CHECK-LABEL: @foo(
561+ ; CHECK-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 0
562+ ; CHECK-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
563+ ; CHECK-NEXT: [[FM:%.*]] = fmul float [[A:%.*]], [[SUB14_I167]]
564+ ; CHECK-NEXT: [[SUB25_I168:%.*]] = fsub float [[FM]], [[B:%.*]]
565+ ; CHECK-NEXT: [[VECEXT_I276_I169:%.*]] = extractelement <4 x float> [[VEC]], i64 1
566+ ; CHECK-NEXT: [[ADD36_I173:%.*]] = fadd float [[SUB25_I168]], 1.000000e+01
567+ ; CHECK-NEXT: [[MUL72_I179:%.*]] = fmul float [[C:%.*]], [[VECEXT_I276_I169]]
568+ ; CHECK-NEXT: [[ADD78_I180:%.*]] = fsub float [[MUL72_I179]], 3.000000e+01
569+ ; CHECK-NEXT: [[ADD79_I181:%.*]] = fadd float 2.000000e+00, [[ADD78_I180]]
570+ ; CHECK-NEXT: [[MUL123_I184:%.*]] = fmul float [[ADD36_I173]], [[ADD79_I181]]
571+ ; CHECK-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
572+ ; CHECK-NEXT: ret i1 [[CMP_I185]]
573+ ;
574+ %vecext.i291.i166 = extractelement <4 x float > %vec , i64 0
575+ %sub14.i167 = fsub float undef , %vecext.i291.i166
576+ %fm = fmul float %a , %sub14.i167
577+ %sub25.i168 = fsub float %fm , %b
578+ %vecext.i276.i169 = extractelement <4 x float > %vec , i64 1
579+ %add36.i173 = fadd float %sub25.i168 , 10 .0
580+ %mul72.i179 = fmul float %c , %vecext.i276.i169
581+ %add78.i180 = fsub float %mul72.i179 , 30 .0
582+ %add79.i181 = fadd float 2 .0 , %add78.i180
583+ %mul123.i184 = fmul float %add36.i173 , %add79.i181
584+ %cmp.i185 = fcmp ogt float %mul123.i184 , 0 .000000e+00
585+ ret i1 %cmp.i185
586+ }
587+
588+ ; Same as @ChecksExtractScores, but the extratelement vector operands do not match.
589+ define void @ChecksExtractScores_different_vectors (double * %storeArray , double * %array , <2 x double > *%vecPtr1 , <2 x double >* %vecPtr2 , <2 x double >* %vecPtr3 , <2 x double >* %vecPtr4 ) {
590+ ; CHECK-LABEL: @ChecksExtractScores_different_vectors(
591+ ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
592+ ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1
593+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
594+ ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
595+ ; CHECK-NEXT: [[LOADVEC:%.*]] = load <2 x double>, <2 x double>* [[VECPTR1:%.*]], align 4
596+ ; CHECK-NEXT: [[LOADVEC2:%.*]] = load <2 x double>, <2 x double>* [[VECPTR2:%.*]], align 4
597+ ; CHECK-NEXT: [[EXTRA0:%.*]] = extractelement <2 x double> [[LOADVEC]], i32 0
598+ ; CHECK-NEXT: [[EXTRA1:%.*]] = extractelement <2 x double> [[LOADVEC2]], i32 1
599+ ; CHECK-NEXT: [[LOADVEC3:%.*]] = load <2 x double>, <2 x double>* [[VECPTR3:%.*]], align 4
600+ ; CHECK-NEXT: [[LOADVEC4:%.*]] = load <2 x double>, <2 x double>* [[VECPTR4:%.*]], align 4
601+ ; CHECK-NEXT: [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0
602+ ; CHECK-NEXT: [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1
603+ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[EXTRB0]], i32 0
604+ ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[EXTRA1]], i32 1
605+ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP2]], i32 1
606+ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> undef, double [[TMP5]], i32 0
607+ ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
608+ ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1
609+ ; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP4]], [[TMP8]]
610+ ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> undef, double [[EXTRA0]], i32 0
611+ ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[TMP10]], double [[EXTRB1]], i32 1
612+ ; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[TMP11]], [[TMP2]]
613+ ; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], [[TMP9]]
614+ ; CHECK-NEXT: [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0
615+ ; CHECK-NEXT: [[SIDX1:%.*]] = getelementptr inbounds double, double* [[STOREARRAY]], i64 1
616+ ; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
617+ ; CHECK-NEXT: store <2 x double> [[TMP13]], <2 x double>* [[TMP14]], align 8
618+ ; CHECK-NEXT: ret void
619+ ;
620+ %idx0 = getelementptr inbounds double , double * %array , i64 0
621+ %idx1 = getelementptr inbounds double , double * %array , i64 1
622+ %loadA0 = load double , double * %idx0 , align 4
623+ %loadA1 = load double , double * %idx1 , align 4
624+
625+ %loadVec = load <2 x double >, <2 x double >* %vecPtr1 , align 4
626+ %loadVec2 = load <2 x double >, <2 x double >* %vecPtr2 , align 4
627+ %extrA0 = extractelement <2 x double > %loadVec , i32 0
628+ %extrA1 = extractelement <2 x double > %loadVec2 , i32 1
629+ %loadVec3 = load <2 x double >, <2 x double >* %vecPtr3 , align 4
630+ %loadVec4 = load <2 x double >, <2 x double >* %vecPtr4 , align 4
631+ %extrB0 = extractelement <2 x double > %loadVec3 , i32 0
632+ %extrB1 = extractelement <2 x double > %loadVec4 , i32 1
633+
634+ %mul0 = fmul double %extrA0 , %loadA0
635+ %mul1 = fmul double %extrA1 , %loadA0
636+ %mul3 = fmul double %extrB0 , %loadA1
637+ %mul4 = fmul double %extrB1 , %loadA1
638+ %add0 = fadd double %mul0 , %mul3
639+ %add1 = fadd double %mul1 , %mul4
640+
641+ %sidx0 = getelementptr inbounds double , double * %storeArray , i64 0
642+ %sidx1 = getelementptr inbounds double , double * %storeArray , i64 1
643+ store double %add0 , double *%sidx0 , align 8
644+ store double %add1 , double *%sidx1 , align 8
645+ ret void
646+ }
0 commit comments