Skip to content

Commit 6551f7f

Browse files
authored
[LV] Add test for vectorisation of SAXPY unrolled by 5 (NFC). (#153039)
This test contains a vectorisation example of a loop based on SAXPY manually unrolled by five, as discussed in #148808.
1 parent aa5bec0 commit 6551f7f

File tree

1 file changed

+129
-0
lines changed

1 file changed

+129
-0
lines changed

llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,6 +1013,133 @@ for.inc9: ; preds = %for.end
10131013
for.end11: ; preds = %for.cond
10141014
ret void
10151015
}
1016+
1017+
; This test contains an example of a SAXPY loop manually unrolled by five:
1018+
;
1019+
; void saxpy(long n, float a, float *x, float *y) {
1020+
; for (int i = 0; i < n; i += 5) {
1021+
; y[i] += a * x[i];
1022+
; y[i + 1] += a * x[i + 1];
1023+
; y[i + 2] += a * x[i + 2];
1024+
; y[i + 3] += a * x[i + 3];
1025+
; y[i + 4] += a * x[i + 4];
1026+
; }
1027+
; }
1028+
;
1029+
define void @saxpy_5(i64 %n, float %a, ptr readonly %x, ptr noalias %y) {
1030+
; CHECK-LABEL: define void @saxpy_5(
1031+
; CHECK-SAME: i64 [[N:%.*]], float [[A:%.*]], ptr readonly captures(none) [[X:%.*]], ptr noalias captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
1032+
; CHECK-NEXT: [[ENTRY:.*:]]
1033+
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 0
1034+
; CHECK-NEXT: br i1 [[TMP0]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]]
1035+
; CHECK: [[LOOP_PREHEADER]]:
1036+
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[N]], -1
1037+
; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP1]], 5
1038+
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
1039+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 6
1040+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[LOOP_PREHEADER11:.*]], label %[[VECTOR_PH:.*]]
1041+
; CHECK: [[VECTOR_PH]]:
1042+
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 9223372036854775806
1043+
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 5
1044+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[A]], i64 0
1045+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <10 x i32> zeroinitializer
1046+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1047+
; CHECK: [[VECTOR_BODY]]:
1048+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1049+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 5
1050+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[X]], i64 [[OFFSET_IDX]]
1051+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <10 x float>, ptr [[TMP6]], align 4
1052+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[Y]], i64 [[OFFSET_IDX]]
1053+
; CHECK-NEXT: [[WIDE_VEC5:%.*]] = load <10 x float>, ptr [[TMP7]], align 4
1054+
; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <10 x float> [[WIDE_VEC]], [[TMP5]]
1055+
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <10 x float> [[WIDE_VEC5]], [[TMP8]]
1056+
; CHECK-NEXT: store <10 x float> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4
1057+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1058+
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1059+
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
1060+
; CHECK: [[MIDDLE_BLOCK]]:
1061+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
1062+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT]], label %[[LOOP_PREHEADER11]]
1063+
; CHECK: [[LOOP_PREHEADER11]]:
1064+
; CHECK-NEXT: [[I1_PH:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ]
1065+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> poison, float [[A]], i64 0
1066+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <4 x i32> zeroinitializer
1067+
; CHECK-NEXT: br label %[[LOOP:.*]]
1068+
; CHECK: [[LOOP]]:
1069+
; CHECK-NEXT: [[I1:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP]] ], [ [[I1_PH]], %[[LOOP_PREHEADER11]] ]
1070+
; CHECK-NEXT: [[XGEP1:%.*]] = getelementptr inbounds nuw float, ptr [[X]], i64 [[I1]]
1071+
; CHECK-NEXT: [[YGEP1:%.*]] = getelementptr inbounds nuw float, ptr [[Y]], i64 [[I1]]
1072+
; CHECK-NEXT: [[TMP12:%.*]] = load <4 x float>, ptr [[XGEP1]], align 4
1073+
; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <4 x float> [[TMP12]], [[TMP11]]
1074+
; CHECK-NEXT: [[TMP14:%.*]] = load <4 x float>, ptr [[YGEP1]], align 4
1075+
; CHECK-NEXT: [[TMP15:%.*]] = fadd fast <4 x float> [[TMP14]], [[TMP13]]
1076+
; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[YGEP1]], align 4
1077+
; CHECK-NEXT: [[I5:%.*]] = add nuw nsw i64 [[I1]], 4
1078+
; CHECK-NEXT: [[XGEP5:%.*]] = getelementptr inbounds nuw float, ptr [[X]], i64 [[I5]]
1079+
; CHECK-NEXT: [[X5:%.*]] = load float, ptr [[XGEP5]], align 4
1080+
; CHECK-NEXT: [[AX5:%.*]] = fmul fast float [[X5]], [[A]]
1081+
; CHECK-NEXT: [[YGEP5:%.*]] = getelementptr inbounds nuw float, ptr [[Y]], i64 [[I5]]
1082+
; CHECK-NEXT: [[Y5:%.*]] = load float, ptr [[YGEP5]], align 4
1083+
; CHECK-NEXT: [[AXPY5:%.*]] = fadd fast float [[Y5]], [[AX5]]
1084+
; CHECK-NEXT: store float [[AXPY5]], ptr [[YGEP5]], align 4
1085+
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I1]], 5
1086+
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[N]], [[I_NEXT]]
1087+
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP10:![0-9]+]]
1088+
; CHECK: [[EXIT]]:
1089+
; CHECK-NEXT: ret void
1090+
;
1091+
entry:
1092+
%0 = icmp sgt i64 %n, 0
1093+
br i1 %0, label %loop, label %exit
1094+
1095+
loop:
1096+
%i1 = phi i64 [ %i.next, %loop ], [ 0, %entry ]
1097+
%xgep1 = getelementptr inbounds nuw float, ptr %x, i64 %i1
1098+
%x1 = load float, ptr %xgep1, align 4
1099+
%ax1 = fmul fast float %x1, %a
1100+
%ygep1 = getelementptr inbounds nuw float, ptr %y, i64 %i1
1101+
%y1 = load float, ptr %ygep1, align 4
1102+
%axpy1 = fadd fast float %y1, %ax1
1103+
store float %axpy1, ptr %ygep1, align 4
1104+
%i2 = add nuw nsw i64 %i1, 1
1105+
%xgep2 = getelementptr inbounds nuw float, ptr %x, i64 %i2
1106+
%x2 = load float, ptr %xgep2, align 4
1107+
%ax2 = fmul fast float %x2, %a
1108+
%ygep2 = getelementptr inbounds nuw float, ptr %y, i64 %i2
1109+
%y2 = load float, ptr %ygep2, align 4
1110+
%axpy2 = fadd fast float %y2, %ax2
1111+
store float %axpy2, ptr %ygep2, align 4
1112+
%i3 = add nuw nsw i64 %i1, 2
1113+
%xgep3 = getelementptr inbounds nuw float, ptr %x, i64 %i3
1114+
%x3 = load float, ptr %xgep3, align 4
1115+
%ax3 = fmul fast float %x3, %a
1116+
%ygep3 = getelementptr inbounds nuw float, ptr %y, i64 %i3
1117+
%y3 = load float, ptr %ygep3, align 4
1118+
%axpy3 = fadd fast float %y3, %ax3
1119+
store float %axpy3, ptr %ygep3, align 4
1120+
%i4 = add nuw nsw i64 %i1, 3
1121+
%xgep4 = getelementptr inbounds nuw float, ptr %x, i64 %i4
1122+
%x4 = load float, ptr %xgep4, align 4
1123+
%ax4 = fmul fast float %x4, %a
1124+
%ygep4 = getelementptr inbounds nuw float, ptr %y, i64 %i4
1125+
%y4 = load float, ptr %ygep4, align 4
1126+
%axpy4 = fadd fast float %y4, %ax4
1127+
store float %axpy4, ptr %ygep4, align 4
1128+
%i5 = add nuw nsw i64 %i1, 4
1129+
%xgep5 = getelementptr inbounds nuw float, ptr %x, i64 %i5
1130+
%x5 = load float, ptr %xgep5, align 4
1131+
%ax5 = fmul fast float %x5, %a
1132+
%ygep5 = getelementptr inbounds nuw float, ptr %y, i64 %i5
1133+
%y5 = load float, ptr %ygep5, align 4
1134+
%axpy5 = fadd fast float %y5, %ax5
1135+
store float %axpy5, ptr %ygep5, align 4
1136+
%i.next = add nuw nsw i64 %i1, 5
1137+
%cmp = icmp sgt i64 %n, %i.next
1138+
br i1 %cmp, label %loop, label %exit
1139+
1140+
exit:
1141+
ret void
1142+
}
10161143
;.
10171144
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
10181145
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -1023,4 +1150,6 @@ for.end11: ; preds = %for.cond
10231150
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
10241151
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
10251152
; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
1153+
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
1154+
; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META1]]}
10261155
;.

0 commit comments

Comments
 (0)