@@ -1013,6 +1013,133 @@ for.inc9: ; preds = %for.end
1013
1013
for.end11: ; preds = %for.cond
1014
1014
ret void
1015
1015
}
1016
+
1017
+ ; This test contains an example of a SAXPY loop manually unrolled by five:
1018
+ ;
1019
+ ; void saxpy(long n, float a, float *x, float *y) {
1020
+ ; for (int i = 0; i < n; i += 5) {
1021
+ ; y[i] += a * x[i];
1022
+ ; y[i + 1] += a * x[i + 1];
1023
+ ; y[i + 2] += a * x[i + 2];
1024
+ ; y[i + 3] += a * x[i + 3];
1025
+ ; y[i + 4] += a * x[i + 4];
1026
+ ; }
1027
+ ; }
1028
+ ;
1029
+ define void @saxpy_5 (i64 %n , float %a , ptr readonly %x , ptr noalias %y ) {
1030
+ ; CHECK-LABEL: define void @saxpy_5(
1031
+ ; CHECK-SAME: i64 [[N:%.*]], float [[A:%.*]], ptr readonly captures(none) [[X:%.*]], ptr noalias captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
1032
+ ; CHECK-NEXT: [[ENTRY:.*:]]
1033
+ ; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 0
1034
+ ; CHECK-NEXT: br i1 [[TMP0]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]]
1035
+ ; CHECK: [[LOOP_PREHEADER]]:
1036
+ ; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[N]], -1
1037
+ ; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP1]], 5
1038
+ ; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
1039
+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 6
1040
+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[LOOP_PREHEADER11:.*]], label %[[VECTOR_PH:.*]]
1041
+ ; CHECK: [[VECTOR_PH]]:
1042
+ ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 9223372036854775806
1043
+ ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 5
1044
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[A]], i64 0
1045
+ ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <10 x i32> zeroinitializer
1046
+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1047
+ ; CHECK: [[VECTOR_BODY]]:
1048
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1049
+ ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 5
1050
+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[X]], i64 [[OFFSET_IDX]]
1051
+ ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <10 x float>, ptr [[TMP6]], align 4
1052
+ ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[Y]], i64 [[OFFSET_IDX]]
1053
+ ; CHECK-NEXT: [[WIDE_VEC5:%.*]] = load <10 x float>, ptr [[TMP7]], align 4
1054
+ ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <10 x float> [[WIDE_VEC]], [[TMP5]]
1055
+ ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <10 x float> [[WIDE_VEC5]], [[TMP8]]
1056
+ ; CHECK-NEXT: store <10 x float> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4
1057
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1058
+ ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1059
+ ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
1060
+ ; CHECK: [[MIDDLE_BLOCK]]:
1061
+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
1062
+ ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT]], label %[[LOOP_PREHEADER11]]
1063
+ ; CHECK: [[LOOP_PREHEADER11]]:
1064
+ ; CHECK-NEXT: [[I1_PH:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ]
1065
+ ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> poison, float [[A]], i64 0
1066
+ ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <4 x i32> zeroinitializer
1067
+ ; CHECK-NEXT: br label %[[LOOP:.*]]
1068
+ ; CHECK: [[LOOP]]:
1069
+ ; CHECK-NEXT: [[I1:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP]] ], [ [[I1_PH]], %[[LOOP_PREHEADER11]] ]
1070
+ ; CHECK-NEXT: [[XGEP1:%.*]] = getelementptr inbounds nuw float, ptr [[X]], i64 [[I1]]
1071
+ ; CHECK-NEXT: [[YGEP1:%.*]] = getelementptr inbounds nuw float, ptr [[Y]], i64 [[I1]]
1072
+ ; CHECK-NEXT: [[TMP12:%.*]] = load <4 x float>, ptr [[XGEP1]], align 4
1073
+ ; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <4 x float> [[TMP12]], [[TMP11]]
1074
+ ; CHECK-NEXT: [[TMP14:%.*]] = load <4 x float>, ptr [[YGEP1]], align 4
1075
+ ; CHECK-NEXT: [[TMP15:%.*]] = fadd fast <4 x float> [[TMP14]], [[TMP13]]
1076
+ ; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[YGEP1]], align 4
1077
+ ; CHECK-NEXT: [[I5:%.*]] = add nuw nsw i64 [[I1]], 4
1078
+ ; CHECK-NEXT: [[XGEP5:%.*]] = getelementptr inbounds nuw float, ptr [[X]], i64 [[I5]]
1079
+ ; CHECK-NEXT: [[X5:%.*]] = load float, ptr [[XGEP5]], align 4
1080
+ ; CHECK-NEXT: [[AX5:%.*]] = fmul fast float [[X5]], [[A]]
1081
+ ; CHECK-NEXT: [[YGEP5:%.*]] = getelementptr inbounds nuw float, ptr [[Y]], i64 [[I5]]
1082
+ ; CHECK-NEXT: [[Y5:%.*]] = load float, ptr [[YGEP5]], align 4
1083
+ ; CHECK-NEXT: [[AXPY5:%.*]] = fadd fast float [[Y5]], [[AX5]]
1084
+ ; CHECK-NEXT: store float [[AXPY5]], ptr [[YGEP5]], align 4
1085
+ ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I1]], 5
1086
+ ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[N]], [[I_NEXT]]
1087
+ ; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP10:![0-9]+]]
1088
+ ; CHECK: [[EXIT]]:
1089
+ ; CHECK-NEXT: ret void
1090
+ ;
1091
+ entry:
1092
+ %0 = icmp sgt i64 %n , 0
1093
+ br i1 %0 , label %loop , label %exit
1094
+
1095
+ loop:
1096
+ %i1 = phi i64 [ %i.next , %loop ], [ 0 , %entry ]
1097
+ %xgep1 = getelementptr inbounds nuw float , ptr %x , i64 %i1
1098
+ %x1 = load float , ptr %xgep1 , align 4
1099
+ %ax1 = fmul fast float %x1 , %a
1100
+ %ygep1 = getelementptr inbounds nuw float , ptr %y , i64 %i1
1101
+ %y1 = load float , ptr %ygep1 , align 4
1102
+ %axpy1 = fadd fast float %y1 , %ax1
1103
+ store float %axpy1 , ptr %ygep1 , align 4
1104
+ %i2 = add nuw nsw i64 %i1 , 1
1105
+ %xgep2 = getelementptr inbounds nuw float , ptr %x , i64 %i2
1106
+ %x2 = load float , ptr %xgep2 , align 4
1107
+ %ax2 = fmul fast float %x2 , %a
1108
+ %ygep2 = getelementptr inbounds nuw float , ptr %y , i64 %i2
1109
+ %y2 = load float , ptr %ygep2 , align 4
1110
+ %axpy2 = fadd fast float %y2 , %ax2
1111
+ store float %axpy2 , ptr %ygep2 , align 4
1112
+ %i3 = add nuw nsw i64 %i1 , 2
1113
+ %xgep3 = getelementptr inbounds nuw float , ptr %x , i64 %i3
1114
+ %x3 = load float , ptr %xgep3 , align 4
1115
+ %ax3 = fmul fast float %x3 , %a
1116
+ %ygep3 = getelementptr inbounds nuw float , ptr %y , i64 %i3
1117
+ %y3 = load float , ptr %ygep3 , align 4
1118
+ %axpy3 = fadd fast float %y3 , %ax3
1119
+ store float %axpy3 , ptr %ygep3 , align 4
1120
+ %i4 = add nuw nsw i64 %i1 , 3
1121
+ %xgep4 = getelementptr inbounds nuw float , ptr %x , i64 %i4
1122
+ %x4 = load float , ptr %xgep4 , align 4
1123
+ %ax4 = fmul fast float %x4 , %a
1124
+ %ygep4 = getelementptr inbounds nuw float , ptr %y , i64 %i4
1125
+ %y4 = load float , ptr %ygep4 , align 4
1126
+ %axpy4 = fadd fast float %y4 , %ax4
1127
+ store float %axpy4 , ptr %ygep4 , align 4
1128
+ %i5 = add nuw nsw i64 %i1 , 4
1129
+ %xgep5 = getelementptr inbounds nuw float , ptr %x , i64 %i5
1130
+ %x5 = load float , ptr %xgep5 , align 4
1131
+ %ax5 = fmul fast float %x5 , %a
1132
+ %ygep5 = getelementptr inbounds nuw float , ptr %y , i64 %i5
1133
+ %y5 = load float , ptr %ygep5 , align 4
1134
+ %axpy5 = fadd fast float %y5 , %ax5
1135
+ store float %axpy5 , ptr %ygep5 , align 4
1136
+ %i.next = add nuw nsw i64 %i1 , 5
1137
+ %cmp = icmp sgt i64 %n , %i.next
1138
+ br i1 %cmp , label %loop , label %exit
1139
+
1140
+ exit:
1141
+ ret void
1142
+ }
1016
1143
;.
1017
1144
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1018
1145
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -1023,4 +1150,6 @@ for.end11: ; preds = %for.cond
1023
1150
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
1024
1151
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
1025
1152
; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
1153
+ ; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
1154
+ ; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META1]]}
1026
1155
;.
0 commit comments