Skip to content

Commit 463c02c

Browse files
committed
[Matrix] Add test identifying GVN and dead store elimination opportunities for matrix intrinsics
1 parent a827487 commit 463c02c

File tree

3 files changed

+227
-0
lines changed

3 files changed

+227
-0
lines changed
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -aa-pipeline=basic-aa -passes=gvn -S < %s | FileCheck %s
3+
4+
; BasicAA should prove that loads from sufficiently large static offsets
5+
; don't overlap with matrix loads with a statically known size.
6+
7+
define <8 x double> @non_overlapping_strided_load(ptr %p) {
8+
; CHECK-LABEL: define <8 x double> @non_overlapping_strided_load(
9+
; CHECK-SAME: ptr [[P:%.*]]) {
10+
; CHECK-NEXT: [[ENTRY:.*:]]
11+
; CHECK-NEXT: [[P_OFFSET:%.*]] = getelementptr inbounds double, ptr [[P]], i64 16
12+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[P_OFFSET]], i32 8, i1 false, i32 4, i32 2)
13+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i64(<8 x double> [[L]], ptr [[P]], i64 8, i1 false, i32 4, i32 2)
14+
; CHECK-NEXT: [[S:%.*]] = fadd <8 x double> [[L]], [[L]]
15+
; CHECK-NEXT: ret <8 x double> [[S]]
16+
;
17+
entry:
18+
%p.offset = getelementptr inbounds double, double* %p, i64 16
19+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %p.offset, i32 8, i1 false, i32 4, i32 2)
20+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %p, i64 8, i1 false, i32 4, i32 2)
21+
%l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %p.offset, i32 8, i1 false, i32 4, i32 2)
22+
%s = fadd <8 x double> %l, %l.2
23+
ret <8 x double> %s
24+
}
25+
26+
define <8 x double> @overlapping_strided_load(ptr %p) {
27+
; CHECK-LABEL: define <8 x double> @overlapping_strided_load(
28+
; CHECK-SAME: ptr [[P:%.*]]) {
29+
; CHECK-NEXT: [[ENTRY:.*:]]
30+
; CHECK-NEXT: [[P_OFFSET:%.*]] = getelementptr inbounds double, ptr [[P]], i64 15
31+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[P_OFFSET]], i32 8, i1 false, i32 4, i32 2)
32+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i64(<8 x double> [[L]], ptr [[P]], i64 8, i1 false, i32 4, i32 2)
33+
; CHECK-NEXT: [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[P_OFFSET]], i32 8, i1 false, i32 4, i32 2)
34+
; CHECK-NEXT: [[S:%.*]] = fadd <8 x double> [[L]], [[L_2]]
35+
; CHECK-NEXT: ret <8 x double> [[S]]
36+
;
37+
entry:
38+
%p.offset = getelementptr inbounds double, double* %p, i64 15
39+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %p.offset, i32 8, i1 false, i32 4, i32 2)
40+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %p, i64 8, i1 false, i32 4, i32 2)
41+
%l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %p.offset, i32 8, i1 false, i32 4, i32 2)
42+
%s = fadd <8 x double> %l, %l.2
43+
ret <8 x double> %s
44+
}
45+
46+
declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32)
47+
declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32)
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -passes=dse -S < %s | FileCheck %s
3+
4+
define void @dead_unstrided_store(ptr noalias %src, ptr noalias %dst) {
5+
; CHECK-LABEL: define void @dead_unstrided_store(
6+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
9+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i64(<8 x double> [[L]], ptr [[DST]], i64 4, i1 false, i32 4, i32 2)
10+
; CHECK-NEXT: ret void
11+
;
12+
entry:
13+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
14+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i64 4, i1 false, i32 4, i32 2)
15+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i64 4, i1 false, i32 4, i32 2)
16+
ret void
17+
}
18+
19+
define void @live_strided_store(ptr noalias %src, ptr noalias %dst) {
20+
; CHECK-LABEL: define void @live_strided_store(
21+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
22+
; CHECK-NEXT: [[ENTRY:.*:]]
23+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
24+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i64(<8 x double> [[L]], ptr [[DST]], i64 200, i1 false, i32 4, i32 2)
25+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i64(<8 x double> [[L]], ptr [[DST]], i64 100, i1 false, i32 4, i32 2)
26+
; CHECK-NEXT: ret void
27+
;
28+
entry:
29+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
30+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i64 200, i1 false, i32 4, i32 2)
31+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i64 100, i1 false, i32 4, i32 2)
32+
ret void
33+
}
34+
35+
define void @dead_strided_store(ptr noalias %src, ptr noalias %dst) {
36+
; CHECK-LABEL: define void @dead_strided_store(
37+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
38+
; CHECK-NEXT: [[ENTRY:.*:]]
39+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 200, i1 false, i32 4, i32 2)
40+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i64(<8 x double> [[L]], ptr [[DST]], i64 100, i1 false, i32 4, i32 2)
41+
; CHECK-NEXT: ret void
42+
;
43+
entry:
44+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 200, i1 false, i32 4, i32 2)
45+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i64 100, i1 false, i32 4, i32 2)
46+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i64 100, i1 false, i32 4, i32 2)
47+
ret void
48+
}
49+
50+
define void @dead_dynamically_strided_store(ptr noalias %src, ptr noalias %dst, i64 %stride) {
51+
; CHECK-LABEL: define void @dead_dynamically_strided_store(
52+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[STRIDE:%.*]]) {
53+
; CHECK-NEXT: [[ENTRY:.*:]]
54+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
55+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i64(<8 x double> [[L]], ptr [[DST]], i64 [[STRIDE]], i1 false, i32 4, i32 2)
56+
; CHECK-NEXT: ret void
57+
;
58+
entry:
59+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
60+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i64 %stride, i1 false, i32 4, i32 2)
61+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i64 %stride, i1 false, i32 4, i32 2)
62+
ret void
63+
}
64+
65+
define void @live_dynamically_strided_store(ptr noalias %src, ptr noalias %dst, i64 %stride, i64 %stride.2) {
66+
; CHECK-LABEL: define void @live_dynamically_strided_store(
67+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[STRIDE:%.*]], i64 [[STRIDE_2:%.*]]) {
68+
; CHECK-NEXT: [[ENTRY:.*:]]
69+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
70+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i64(<8 x double> [[L]], ptr [[DST]], i64 [[STRIDE]], i1 false, i32 4, i32 2)
71+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i64(<8 x double> [[L]], ptr [[DST]], i64 [[STRIDE_2]], i1 false, i32 4, i32 2)
72+
; CHECK-NEXT: ret void
73+
;
74+
entry:
75+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
76+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i64 %stride, i1 false, i32 4, i32 2)
77+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i64 %stride.2, i1 false, i32 4, i32 2)
78+
ret void
79+
}
80+
81+
declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32)
82+
declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32)
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -passes=gvn -S < %s | FileCheck %s
3+
4+
define <8 x double> @redundant_unstrided_load(ptr %src) {
5+
; CHECK-LABEL: define <8 x double> @redundant_unstrided_load(
6+
; CHECK-SAME: ptr [[SRC:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
9+
; CHECK-NEXT: [[S:%.*]] = fadd contract <8 x double> [[L]], [[L]]
10+
; CHECK-NEXT: ret <8 x double> [[S]]
11+
;
12+
entry:
13+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
14+
%l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
15+
%s = fadd contract <8 x double> %l, %l.2
16+
ret <8 x double> %s
17+
}
18+
19+
define <8 x double> @redundant_strided_load(ptr %src) {
20+
; CHECK-LABEL: define <8 x double> @redundant_strided_load(
21+
; CHECK-SAME: ptr [[SRC:%.*]]) {
22+
; CHECK-NEXT: [[ENTRY:.*:]]
23+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 200, i1 false, i32 4, i32 2)
24+
; CHECK-NEXT: [[S:%.*]] = fadd contract <8 x double> [[L]], [[L]]
25+
; CHECK-NEXT: ret <8 x double> [[S]]
26+
;
27+
entry:
28+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 200, i1 false, i32 4, i32 2)
29+
%l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 200, i1 false, i32 4, i32 2)
30+
%s = fadd contract <8 x double> %l, %l.2
31+
ret <8 x double> %s
32+
}
33+
34+
define <8 x double> @necessary_unstrided_load(ptr %src) {
35+
; CHECK-LABEL: define <8 x double> @necessary_unstrided_load(
36+
; CHECK-SAME: ptr [[SRC:%.*]]) {
37+
; CHECK-NEXT: [[ENTRY:.*:]]
38+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
39+
; CHECK-NEXT: [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 2, i1 false, i32 2, i32 4)
40+
; CHECK-NEXT: [[S:%.*]] = fadd contract <8 x double> [[L]], [[L_2]]
41+
; CHECK-NEXT: ret <8 x double> [[S]]
42+
;
43+
entry:
44+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
45+
%l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 2, i1 false, i32 2, i32 4)
46+
%s = fadd contract <8 x double> %l, %l.2
47+
ret <8 x double> %s
48+
}
49+
50+
define <8 x double> @necessary_strided_load(ptr %src) {
51+
; CHECK-LABEL: define <8 x double> @necessary_strided_load(
52+
; CHECK-SAME: ptr [[SRC:%.*]]) {
53+
; CHECK-NEXT: [[ENTRY:.*:]]
54+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 200, i1 false, i32 4, i32 2)
55+
; CHECK-NEXT: [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 100, i1 false, i32 4, i32 2)
56+
; CHECK-NEXT: [[S:%.*]] = fadd contract <8 x double> [[L]], [[L_2]]
57+
; CHECK-NEXT: ret <8 x double> [[S]]
58+
;
59+
entry:
60+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 200, i1 false, i32 4, i32 2)
61+
%l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 100, i1 false, i32 4, i32 2)
62+
%s = fadd contract <8 x double> %l, %l.2
63+
ret <8 x double> %s
64+
}
65+
66+
define <8 x double> @redundant_dynamically_strided_load(ptr %src, i32 %stride) {
67+
; CHECK-LABEL: define <8 x double> @redundant_dynamically_strided_load(
68+
; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
69+
; CHECK-NEXT: [[ENTRY:.*:]]
70+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 [[STRIDE]], i1 false, i32 4, i32 2)
71+
; CHECK-NEXT: [[S:%.*]] = fadd contract <8 x double> [[L]], [[L]]
72+
; CHECK-NEXT: ret <8 x double> [[S]]
73+
;
74+
entry:
75+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 %stride, i1 false, i32 4, i32 2)
76+
%l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 %stride, i1 false, i32 4, i32 2)
77+
%s = fadd contract <8 x double> %l, %l.2
78+
ret <8 x double> %s
79+
}
80+
81+
define <8 x double> @necessary_dynamically_strided_load(ptr %src, i32 %stride, i32 %stride.2) {
82+
; CHECK-LABEL: define <8 x double> @necessary_dynamically_strided_load(
83+
; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]], i32 [[STRIDE_2:%.*]]) {
84+
; CHECK-NEXT: [[ENTRY:.*:]]
85+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 [[STRIDE]], i1 false, i32 4, i32 2)
86+
; CHECK-NEXT: [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 [[STRIDE_2]], i1 false, i32 4, i32 2)
87+
; CHECK-NEXT: [[S:%.*]] = fadd contract <8 x double> [[L]], [[L_2]]
88+
; CHECK-NEXT: ret <8 x double> [[S]]
89+
;
90+
entry:
91+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 %stride, i1 false, i32 4, i32 2)
92+
%l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 %stride.2, i1 false, i32 4, i32 2)
93+
%s = fadd contract <8 x double> %l, %l.2
94+
ret <8 x double> %s
95+
}
96+
97+
declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32)
98+
declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32)

0 commit comments

Comments
 (0)