@@ -621,8 +621,6 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias %
621621; I32-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 8
622622; I32-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
623623; I32-NEXT: [[TMP2:%.*]] = sub i64 [[START]], [[N_VEC]]
624- ; I32-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[START]], i64 0
625- ; I32-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
626624; I32-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x ptr> poison, ptr [[SRC_2]], i64 0
627625; I32-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x ptr> [[BROADCAST_SPLATINSERT1]], <8 x ptr> poison, <8 x i32> zeroinitializer
628626; I32-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -644,14 +642,6 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias %
644642; I32-NEXT: [[TMP16:%.*]] = add i64 [[TMP8]], 1
645643; I32-NEXT: [[TMP17:%.*]] = add i64 [[TMP9]], 1
646644; I32-NEXT: [[TMP18:%.*]] = add i64 [[TMP10]], 1
647- ; I32-NEXT: [[TMP19:%.*]] = insertelement <8 x i64> poison, i64 [[TMP11]], i32 0
648- ; I32-NEXT: [[TMP20:%.*]] = insertelement <8 x i64> [[TMP19]], i64 [[TMP12]], i32 1
649- ; I32-NEXT: [[TMP21:%.*]] = insertelement <8 x i64> [[TMP20]], i64 [[TMP13]], i32 2
650- ; I32-NEXT: [[TMP22:%.*]] = insertelement <8 x i64> [[TMP21]], i64 [[TMP14]], i32 3
651- ; I32-NEXT: [[TMP23:%.*]] = insertelement <8 x i64> [[TMP22]], i64 [[TMP15]], i32 4
652- ; I32-NEXT: [[TMP24:%.*]] = insertelement <8 x i64> [[TMP23]], i64 [[TMP16]], i32 5
653- ; I32-NEXT: [[TMP25:%.*]] = insertelement <8 x i64> [[TMP24]], i64 [[TMP17]], i32 6
654- ; I32-NEXT: [[TMP26:%.*]] = insertelement <8 x i64> [[TMP25]], i64 [[TMP18]], i32 7
655645; I32-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP11]]
656646; I32-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP12]]
657647; I32-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP13]]
@@ -677,22 +667,21 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias %
677667; I32-NEXT: [[TMP49:%.*]] = insertelement <8 x float> [[TMP48]], float [[TMP41]], i32 6
678668; I32-NEXT: [[TMP50:%.*]] = insertelement <8 x float> [[TMP49]], float [[TMP42]], i32 7
679669; I32-NEXT: [[TMP51:%.*]] = fcmp oeq <8 x float> [[TMP50]], zeroinitializer
680- ; I32-NEXT: [[TMP52:%.*]] = mul <8 x i64> [[TMP26]], [[BROADCAST_SPLAT]]
681- ; I32-NEXT: [[TMP53:%.*]] = extractelement <8 x i64> [[TMP52]], i32 0
670+ ; I32-NEXT: [[TMP53:%.*]] = mul i64 [[TMP11]], [[START]]
671+ ; I32-NEXT: [[TMP55:%.*]] = mul i64 [[TMP12]], [[START]]
672+ ; I32-NEXT: [[TMP57:%.*]] = mul i64 [[TMP13]], [[START]]
673+ ; I32-NEXT: [[TMP59:%.*]] = mul i64 [[TMP14]], [[START]]
674+ ; I32-NEXT: [[TMP61:%.*]] = mul i64 [[TMP15]], [[START]]
675+ ; I32-NEXT: [[TMP63:%.*]] = mul i64 [[TMP16]], [[START]]
676+ ; I32-NEXT: [[TMP65:%.*]] = mul i64 [[TMP17]], [[START]]
677+ ; I32-NEXT: [[TMP67:%.*]] = mul i64 [[TMP18]], [[START]]
682678; I32-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP53]]
683- ; I32-NEXT: [[TMP55:%.*]] = extractelement <8 x i64> [[TMP52]], i32 1
684679; I32-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP55]]
685- ; I32-NEXT: [[TMP57:%.*]] = extractelement <8 x i64> [[TMP52]], i32 2
686680; I32-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP57]]
687- ; I32-NEXT: [[TMP59:%.*]] = extractelement <8 x i64> [[TMP52]], i32 3
688681; I32-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP59]]
689- ; I32-NEXT: [[TMP61:%.*]] = extractelement <8 x i64> [[TMP52]], i32 4
690682; I32-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP61]]
691- ; I32-NEXT: [[TMP63:%.*]] = extractelement <8 x i64> [[TMP52]], i32 5
692683; I32-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP63]]
693- ; I32-NEXT: [[TMP65:%.*]] = extractelement <8 x i64> [[TMP52]], i32 6
694684; I32-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP65]]
695- ; I32-NEXT: [[TMP67:%.*]] = extractelement <8 x i64> [[TMP52]], i32 7
696685; I32-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP67]]
697686; I32-NEXT: [[TMP69:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP54]], i32 0
698687; I32-NEXT: [[TMP70:%.*]] = insertelement <8 x ptr> [[TMP69]], ptr [[TMP56]], i32 1
@@ -774,7 +763,222 @@ exit:
774763 ret void
775764}
776765
777- attributes #0 = { "target-cpu" ="znver3" }
766+ define void @address_use_in_different_block (ptr noalias %dst , ptr %src.0 , ptr %src.1 , i32 %x ) #0 {
767+ ; I64-LABEL: define void @address_use_in_different_block(
768+ ; I64-SAME: ptr noalias [[DST:%.*]], ptr [[SRC_0:%.*]], ptr [[SRC_1:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
769+ ; I64-NEXT: [[ENTRY:.*:]]
770+ ; I64-NEXT: [[X_POS:%.*]] = call i32 @llvm.smax.i32(i32 [[X]], i32 0)
771+ ; I64-NEXT: [[OFFSET:%.*]] = zext i32 [[X_POS]] to i64
772+ ; I64-NEXT: br label %[[VECTOR_PH:.*]]
773+ ; I64: [[VECTOR_PH]]:
774+ ; I64-NEXT: br label %[[VECTOR_BODY:.*]]
775+ ; I64: [[VECTOR_BODY]]:
776+ ; I64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
777+ ; I64-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
778+ ; I64-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
779+ ; I64-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
780+ ; I64-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
781+ ; I64-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
782+ ; I64-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
783+ ; I64-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
784+ ; I64-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
785+ ; I64-NEXT: [[TMP8:%.*]] = mul i64 [[TMP0]], [[OFFSET]]
786+ ; I64-NEXT: [[TMP9:%.*]] = mul i64 [[TMP1]], [[OFFSET]]
787+ ; I64-NEXT: [[TMP10:%.*]] = mul i64 [[TMP2]], [[OFFSET]]
788+ ; I64-NEXT: [[TMP11:%.*]] = mul i64 [[TMP3]], [[OFFSET]]
789+ ; I64-NEXT: [[TMP12:%.*]] = mul i64 [[TMP4]], [[OFFSET]]
790+ ; I64-NEXT: [[TMP13:%.*]] = mul i64 [[TMP5]], [[OFFSET]]
791+ ; I64-NEXT: [[TMP14:%.*]] = mul i64 [[TMP6]], [[OFFSET]]
792+ ; I64-NEXT: [[TMP15:%.*]] = mul i64 [[TMP7]], [[OFFSET]]
793+ ; I64-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP8]]
794+ ; I64-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP9]]
795+ ; I64-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP10]]
796+ ; I64-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP11]]
797+ ; I64-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP12]]
798+ ; I64-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP13]]
799+ ; I64-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP14]]
800+ ; I64-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP15]]
801+ ; I64-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP16]], align 4
802+ ; I64-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP17]], align 4
803+ ; I64-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP18]], align 4
804+ ; I64-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP19]], align 4
805+ ; I64-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP20]], align 4
806+ ; I64-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP21]], align 4
807+ ; I64-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP22]], align 4
808+ ; I64-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP23]], align 4
809+ ; I64-NEXT: [[TMP32:%.*]] = sext i32 [[TMP24]] to i64
810+ ; I64-NEXT: [[TMP33:%.*]] = sext i32 [[TMP25]] to i64
811+ ; I64-NEXT: [[TMP34:%.*]] = sext i32 [[TMP26]] to i64
812+ ; I64-NEXT: [[TMP35:%.*]] = sext i32 [[TMP27]] to i64
813+ ; I64-NEXT: [[TMP36:%.*]] = sext i32 [[TMP28]] to i64
814+ ; I64-NEXT: [[TMP37:%.*]] = sext i32 [[TMP29]] to i64
815+ ; I64-NEXT: [[TMP38:%.*]] = sext i32 [[TMP30]] to i64
816+ ; I64-NEXT: [[TMP39:%.*]] = sext i32 [[TMP31]] to i64
817+ ; I64-NEXT: [[TMP40:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP32]]
818+ ; I64-NEXT: [[TMP41:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP33]]
819+ ; I64-NEXT: [[TMP42:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP34]]
820+ ; I64-NEXT: [[TMP43:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP35]]
821+ ; I64-NEXT: [[TMP44:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP36]]
822+ ; I64-NEXT: [[TMP45:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP37]]
823+ ; I64-NEXT: [[TMP46:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP38]]
824+ ; I64-NEXT: [[TMP47:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP39]]
825+ ; I64-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP40]], i64 -8
826+ ; I64-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[TMP41]], i64 -8
827+ ; I64-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[TMP42]], i64 -8
828+ ; I64-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[TMP43]], i64 -8
829+ ; I64-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[TMP44]], i64 -8
830+ ; I64-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr [[TMP45]], i64 -8
831+ ; I64-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[TMP46]], i64 -8
832+ ; I64-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr [[TMP47]], i64 -8
833+ ; I64-NEXT: [[TMP56:%.*]] = load double, ptr [[TMP48]], align 8
834+ ; I64-NEXT: [[TMP57:%.*]] = load double, ptr [[TMP49]], align 8
835+ ; I64-NEXT: [[TMP58:%.*]] = insertelement <2 x double> poison, double [[TMP56]], i32 0
836+ ; I64-NEXT: [[TMP59:%.*]] = insertelement <2 x double> [[TMP58]], double [[TMP57]], i32 1
837+ ; I64-NEXT: [[TMP60:%.*]] = load double, ptr [[TMP50]], align 8
838+ ; I64-NEXT: [[TMP61:%.*]] = load double, ptr [[TMP51]], align 8
839+ ; I64-NEXT: [[TMP62:%.*]] = insertelement <2 x double> poison, double [[TMP60]], i32 0
840+ ; I64-NEXT: [[TMP63:%.*]] = insertelement <2 x double> [[TMP62]], double [[TMP61]], i32 1
841+ ; I64-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP52]], align 8
842+ ; I64-NEXT: [[TMP65:%.*]] = load double, ptr [[TMP53]], align 8
843+ ; I64-NEXT: [[TMP66:%.*]] = insertelement <2 x double> poison, double [[TMP64]], i32 0
844+ ; I64-NEXT: [[TMP67:%.*]] = insertelement <2 x double> [[TMP66]], double [[TMP65]], i32 1
845+ ; I64-NEXT: [[TMP68:%.*]] = load double, ptr [[TMP54]], align 8
846+ ; I64-NEXT: [[TMP69:%.*]] = load double, ptr [[TMP55]], align 8
847+ ; I64-NEXT: [[TMP70:%.*]] = insertelement <2 x double> poison, double [[TMP68]], i32 0
848+ ; I64-NEXT: [[TMP71:%.*]] = insertelement <2 x double> [[TMP70]], double [[TMP69]], i32 1
849+ ; I64-NEXT: [[TMP72:%.*]] = fsub <2 x double> zeroinitializer, [[TMP59]]
850+ ; I64-NEXT: [[TMP73:%.*]] = fsub <2 x double> zeroinitializer, [[TMP63]]
851+ ; I64-NEXT: [[TMP74:%.*]] = fsub <2 x double> zeroinitializer, [[TMP67]]
852+ ; I64-NEXT: [[TMP75:%.*]] = fsub <2 x double> zeroinitializer, [[TMP71]]
853+ ; I64-NEXT: [[TMP76:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP8]]
854+ ; I64-NEXT: [[TMP77:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP9]]
855+ ; I64-NEXT: [[TMP78:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP10]]
856+ ; I64-NEXT: [[TMP79:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP11]]
857+ ; I64-NEXT: [[TMP80:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP12]]
858+ ; I64-NEXT: [[TMP81:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP13]]
859+ ; I64-NEXT: [[TMP82:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP14]]
860+ ; I64-NEXT: [[TMP83:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP15]]
861+ ; I64-NEXT: [[TMP84:%.*]] = extractelement <2 x double> [[TMP72]], i32 0
862+ ; I64-NEXT: store double [[TMP84]], ptr [[TMP76]], align 8
863+ ; I64-NEXT: [[TMP85:%.*]] = extractelement <2 x double> [[TMP72]], i32 1
864+ ; I64-NEXT: store double [[TMP85]], ptr [[TMP77]], align 8
865+ ; I64-NEXT: [[TMP86:%.*]] = extractelement <2 x double> [[TMP73]], i32 0
866+ ; I64-NEXT: store double [[TMP86]], ptr [[TMP78]], align 8
867+ ; I64-NEXT: [[TMP87:%.*]] = extractelement <2 x double> [[TMP73]], i32 1
868+ ; I64-NEXT: store double [[TMP87]], ptr [[TMP79]], align 8
869+ ; I64-NEXT: [[TMP88:%.*]] = extractelement <2 x double> [[TMP74]], i32 0
870+ ; I64-NEXT: store double [[TMP88]], ptr [[TMP80]], align 8
871+ ; I64-NEXT: [[TMP89:%.*]] = extractelement <2 x double> [[TMP74]], i32 1
872+ ; I64-NEXT: store double [[TMP89]], ptr [[TMP81]], align 8
873+ ; I64-NEXT: [[TMP90:%.*]] = extractelement <2 x double> [[TMP75]], i32 0
874+ ; I64-NEXT: store double [[TMP90]], ptr [[TMP82]], align 8
875+ ; I64-NEXT: [[TMP91:%.*]] = extractelement <2 x double> [[TMP75]], i32 1
876+ ; I64-NEXT: store double [[TMP91]], ptr [[TMP83]], align 8
877+ ; I64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
878+ ; I64-NEXT: [[TMP92:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
879+ ; I64-NEXT: br i1 [[TMP92]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
880+ ; I64: [[MIDDLE_BLOCK]]:
881+ ; I64-NEXT: br label %[[SCALAR_PH:.*]]
882+ ; I64: [[SCALAR_PH]]:
883+ ;
884+ ; I32-LABEL: define void @address_use_in_different_block(
885+ ; I32-SAME: ptr noalias [[DST:%.*]], ptr [[SRC_0:%.*]], ptr [[SRC_1:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
886+ ; I32-NEXT: [[ENTRY:.*:]]
887+ ; I32-NEXT: [[X_POS:%.*]] = call i32 @llvm.smax.i32(i32 [[X]], i32 0)
888+ ; I32-NEXT: [[OFFSET:%.*]] = zext i32 [[X_POS]] to i64
889+ ; I32-NEXT: br label %[[VECTOR_PH:.*]]
890+ ; I32: [[VECTOR_PH]]:
891+ ; I32-NEXT: br label %[[VECTOR_BODY:.*]]
892+ ; I32: [[VECTOR_BODY]]:
893+ ; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
894+ ; I32-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
895+ ; I32-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
896+ ; I32-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
897+ ; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
898+ ; I32-NEXT: [[TMP4:%.*]] = mul i64 [[TMP0]], [[OFFSET]]
899+ ; I32-NEXT: [[TMP5:%.*]] = mul i64 [[TMP1]], [[OFFSET]]
900+ ; I32-NEXT: [[TMP6:%.*]] = mul i64 [[TMP2]], [[OFFSET]]
901+ ; I32-NEXT: [[TMP7:%.*]] = mul i64 [[TMP3]], [[OFFSET]]
902+ ; I32-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP4]]
903+ ; I32-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP5]]
904+ ; I32-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP6]]
905+ ; I32-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP7]]
906+ ; I32-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP8]], align 4
907+ ; I32-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP9]], align 4
908+ ; I32-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 4
909+ ; I32-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4
910+ ; I32-NEXT: [[TMP16:%.*]] = sext i32 [[TMP12]] to i64
911+ ; I32-NEXT: [[TMP17:%.*]] = sext i32 [[TMP13]] to i64
912+ ; I32-NEXT: [[TMP18:%.*]] = sext i32 [[TMP14]] to i64
913+ ; I32-NEXT: [[TMP19:%.*]] = sext i32 [[TMP15]] to i64
914+ ; I32-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP16]]
915+ ; I32-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP17]]
916+ ; I32-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP18]]
917+ ; I32-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP19]]
918+ ; I32-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP20]], i64 -8
919+ ; I32-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[TMP21]], i64 -8
920+ ; I32-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[TMP22]], i64 -8
921+ ; I32-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP23]], i64 -8
922+ ; I32-NEXT: [[TMP28:%.*]] = load double, ptr [[TMP24]], align 8
923+ ; I32-NEXT: [[TMP29:%.*]] = load double, ptr [[TMP25]], align 8
924+ ; I32-NEXT: [[TMP30:%.*]] = load double, ptr [[TMP26]], align 8
925+ ; I32-NEXT: [[TMP31:%.*]] = load double, ptr [[TMP27]], align 8
926+ ; I32-NEXT: [[TMP32:%.*]] = insertelement <4 x double> poison, double [[TMP28]], i32 0
927+ ; I32-NEXT: [[TMP33:%.*]] = insertelement <4 x double> [[TMP32]], double [[TMP29]], i32 1
928+ ; I32-NEXT: [[TMP34:%.*]] = insertelement <4 x double> [[TMP33]], double [[TMP30]], i32 2
929+ ; I32-NEXT: [[TMP35:%.*]] = insertelement <4 x double> [[TMP34]], double [[TMP31]], i32 3
930+ ; I32-NEXT: [[TMP36:%.*]] = fsub <4 x double> zeroinitializer, [[TMP35]]
931+ ; I32-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP4]]
932+ ; I32-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP5]]
933+ ; I32-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP6]]
934+ ; I32-NEXT: [[TMP40:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP7]]
935+ ; I32-NEXT: [[TMP41:%.*]] = extractelement <4 x double> [[TMP36]], i32 0
936+ ; I32-NEXT: store double [[TMP41]], ptr [[TMP37]], align 8
937+ ; I32-NEXT: [[TMP42:%.*]] = extractelement <4 x double> [[TMP36]], i32 1
938+ ; I32-NEXT: store double [[TMP42]], ptr [[TMP38]], align 8
939+ ; I32-NEXT: [[TMP43:%.*]] = extractelement <4 x double> [[TMP36]], i32 2
940+ ; I32-NEXT: store double [[TMP43]], ptr [[TMP39]], align 8
941+ ; I32-NEXT: [[TMP44:%.*]] = extractelement <4 x double> [[TMP36]], i32 3
942+ ; I32-NEXT: store double [[TMP44]], ptr [[TMP40]], align 8
943+ ; I32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
944+ ; I32-NEXT: [[TMP45:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
945+ ; I32-NEXT: br i1 [[TMP45]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
946+ ; I32: [[MIDDLE_BLOCK]]:
947+ ; I32-NEXT: br label %[[SCALAR_PH:.*]]
948+ ; I32: [[SCALAR_PH]]:
949+ ;
950+ entry:
951+ %x.pos = call i32 @llvm.smax.i32 (i32 %x , i32 0 )
952+ %offset = zext i32 %x.pos to i64
953+ br label %loop.header
954+
955+ loop.header:
956+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop.latch ]
957+ %7 = mul i64 %iv , %offset
958+ %gep.src.0 = getelementptr i32 , ptr %src.0 , i64 %7
959+ %l8 = load i32 , ptr %gep.src.0 , align 4
960+ %c = icmp sgt i32 %x , 0
961+ br i1 %c , label %loop.latch , label %then
962+
963+ then:
964+ br label %loop.latch
965+
966+ loop.latch:
967+ %l.ext = sext i32 %l8 to i64
968+ %gep.src.1 = getelementptr double , ptr %src.1 , i64 %l.ext
969+ %13 = getelementptr i8 , ptr %gep.src.1 , i64 -8
970+ %l.2 = load double , ptr %13 , align 8
971+ %sub = fsub double 0 .000000e+00 , %l.2
972+ %gep.dst = getelementptr double , ptr %dst , i64 %7
973+ store double %sub , ptr %gep.dst , align 8
974+ %iv.next = add i64 %iv , 1
975+ %ec = icmp eq i64 %iv , 100
976+ br i1 %ec , label %exit , label %loop.header
977+
978+ exit:
979+ ret void
980+ }
981+
778982attributes #0 = { "target-cpu" ="znver2" }
779983
780984!0 = distinct !{!0 , !1 }
0 commit comments