Skip to content

Commit 1eaff3a

Browse files
committed
[Matrix] Add tests identifying GVN and DSE opportunities for matrix store / load intrinsics
1 parent 46a866a commit 1eaff3a

File tree

3 files changed

+326
-0
lines changed

3 files changed

+326
-0
lines changed
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
; RUN: opt %s -aa-pipeline=basic-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
2+
3+
; BasicAA should prove that loads from sufficiently large static offsets
4+
; don't overlap with matrix loads with a statically known size.
5+
6+
define <8 x double> @non_overlapping_strided_load(ptr %src) {
7+
entry:
8+
%src.offset = getelementptr inbounds double, ptr %src, i32 12
9+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
10+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
11+
%l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
12+
%s = fadd <8 x double> %l, %l.2
13+
ret <8 x double> %s
14+
}
15+
16+
; CHECK-LABEL: Function: non_overlapping_strided_load:
17+
; CHECK: Just Ref: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
18+
; CHECK: NoModRef: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
19+
; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
20+
; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
21+
; CHECK: NoModRef: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
22+
; CHECK: Just Ref: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
23+
24+
define <8 x double> @overlapping_strided_load(ptr %src) {
25+
entry:
26+
%src.offset = getelementptr inbounds double, ptr %src, i32 11
27+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
28+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
29+
%l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
30+
%s = fadd <8 x double> %l, %l.2
31+
ret <8 x double> %s
32+
}
33+
34+
; CHECK-LABEL: Function: overlapping_strided_load:
35+
; CHECK: Just Ref: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
36+
; CHECK: NoModRef: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
37+
; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
38+
; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
39+
; CHECK: NoModRef: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
40+
; CHECK: Just Ref: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
41+
42+
declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32)
43+
declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32)
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -passes=dse -S %s | FileCheck %s
3+
4+
define void @live_non_matrix_store(ptr noalias %src, ptr noalias %dst) {
5+
; CHECK-LABEL: define void @live_non_matrix_store(
6+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[DST_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 12
9+
; CHECK-NEXT: store double 4.200000e+01, ptr [[DST]], align 8
10+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
11+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
12+
; CHECK-NEXT: ret void
13+
;
14+
entry:
15+
%dst.offset = getelementptr inbounds double, ptr %src, i32 12
16+
store double 42.0, ptr %dst
17+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
18+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i32 4, i1 false, i32 4, i32 2)
19+
ret void
20+
}
21+
22+
define void @dead_unstrided_store_non_matrix_load(ptr noalias %src, ptr noalias %dst) {
23+
; CHECK-LABEL: define void @dead_unstrided_store_non_matrix_load(
24+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
25+
; CHECK-NEXT: [[ENTRY:.*:]]
26+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
27+
; CHECK-NEXT: [[L:%.*]] = load double, ptr [[SRC]], align 8
28+
; CHECK-NEXT: ret void
29+
;
30+
entry:
31+
call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2)
32+
%l = load double, ptr %src
33+
call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2)
34+
ret void
35+
}
36+
37+
define void @dead_strided_store(ptr noalias %src, ptr noalias %dst) {
38+
; CHECK-LABEL: define void @dead_strided_store(
39+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
40+
; CHECK-NEXT: [[ENTRY:.*:]]
41+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 100, i1 false, i32 4, i32 2)
42+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 200, i1 false, i32 4, i32 2)
43+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 100, i1 false, i32 4, i32 2)
44+
; CHECK-NEXT: ret void
45+
;
46+
entry:
47+
call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 100, i1 false, i32 4, i32 2)
48+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 200, i1 false, i32 4, i32 2)
49+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i32 100, i1 false, i32 4, i32 2)
50+
ret void
51+
}
52+
53+
define void @dead_strided_store_non_matrix_load(ptr noalias %src, ptr noalias %dst) {
54+
; CHECK-LABEL: define void @dead_strided_store_non_matrix_load(
55+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
56+
; CHECK-NEXT: [[ENTRY:.*:]]
57+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 100, i1 false, i32 4, i32 2)
58+
; CHECK-NEXT: [[L:%.*]] = load double, ptr [[SRC]], align 8
59+
; CHECK-NEXT: ret void
60+
;
61+
entry:
62+
call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 100, i1 false, i32 4, i32 2)
63+
%l = load double, ptr %src
64+
call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 100, i1 false, i32 4, i32 2)
65+
ret void
66+
}
67+
68+
define void @dead_dynamically_strided_store(ptr noalias %src, ptr noalias %dst, i32 %stride) {
69+
; CHECK-LABEL: define void @dead_dynamically_strided_store(
70+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[STRIDE:%.*]]) {
71+
; CHECK-NEXT: [[ENTRY:.*:]]
72+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 [[STRIDE]], i1 false, i32 4, i32 2)
73+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
74+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 [[STRIDE]], i1 false, i32 4, i32 2)
75+
; CHECK-NEXT: ret void
76+
;
77+
entry:
78+
call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2)
79+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
80+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i32 %stride, i1 false, i32 4, i32 2)
81+
ret void
82+
}
83+
84+
define void @dead_dynamically_strided_store_non_matrix_load(ptr noalias %src, ptr noalias %dst, i32 %stride) {
85+
; CHECK-LABEL: define void @dead_dynamically_strided_store_non_matrix_load(
86+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[STRIDE:%.*]]) {
87+
; CHECK-NEXT: [[ENTRY:.*:]]
88+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 [[STRIDE]], i1 false, i32 4, i32 2)
89+
; CHECK-NEXT: [[L:%.*]] = load double, ptr [[SRC]], align 8
90+
; CHECK-NEXT: ret void
91+
;
92+
entry:
93+
call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2)
94+
%l = load double, ptr %src
95+
call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2)
96+
ret void
97+
}
98+
99+
define void @dead_unstrided_store(ptr noalias %src, ptr noalias %dst) {
100+
; CHECK-LABEL: define void @dead_unstrided_store(
101+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
102+
; CHECK-NEXT: [[ENTRY:.*:]]
103+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
104+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
105+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
106+
; CHECK-NEXT: ret void
107+
;
108+
entry:
109+
call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2)
110+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
111+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i32 4, i1 false, i32 4, i32 2)
112+
ret void
113+
}
114+
115+
define void @dead_non_matrix_store(ptr noalias %src, ptr noalias %dst) {
116+
; CHECK-LABEL: define void @dead_non_matrix_store(
117+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
118+
; CHECK-NEXT: [[ENTRY:.*:]]
119+
; CHECK-NEXT: [[DST_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 6
120+
; CHECK-NEXT: store double 4.200000e+01, ptr [[DST]], align 8
121+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
122+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
123+
; CHECK-NEXT: ret void
124+
;
125+
entry:
126+
%dst.offset = getelementptr inbounds double, ptr %src, i32 6
127+
store double 42.0, ptr %dst
128+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
129+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i32 4, i1 false, i32 4, i32 2)
130+
ret void
131+
}
132+
133+
define void @dead_matrix_store_non_matrix_overwrite_unstrided(ptr noalias %src, ptr noalias %dst) {
134+
; CHECK-LABEL: define void @dead_matrix_store_non_matrix_overwrite_unstrided(
135+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
136+
; CHECK-NEXT: [[ENTRY:.*:]]
137+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
138+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
139+
; CHECK-NEXT: store <8 x double> zeroinitializer, ptr [[DST]], align 64
140+
; CHECK-NEXT: ret void
141+
;
142+
entry:
143+
call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2)
144+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
145+
store <8 x double> zeroinitializer, ptr %dst
146+
ret void
147+
}
148+
149+
define void @dead_matrix_store_non_matrix_overwrite_strided(ptr noalias %src, ptr noalias %dst) {
150+
; CHECK-LABEL: define void @dead_matrix_store_non_matrix_overwrite_strided(
151+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
152+
; CHECK-NEXT: [[ENTRY:.*:]]
153+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
154+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 8, i1 false, i32 4, i32 2)
155+
; CHECK-NEXT: store <16 x double> zeroinitializer, ptr [[DST]], align 128
156+
; CHECK-NEXT: ret void
157+
;
158+
entry:
159+
call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2)
160+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 8, i1 false, i32 4, i32 2)
161+
store <16 x double> zeroinitializer, ptr %dst
162+
ret void
163+
}
164+
165+
define void @live_matrix_store_non_matrix_overwrite_unstrided(ptr noalias %src, ptr noalias %dst) {
166+
; CHECK-LABEL: define void @live_matrix_store_non_matrix_overwrite_unstrided(
167+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
168+
; CHECK-NEXT: [[ENTRY:.*:]]
169+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
170+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
171+
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[DST]], align 32
172+
; CHECK-NEXT: ret void
173+
;
174+
entry:
175+
call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2)
176+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
177+
store <4 x double> zeroinitializer, ptr %dst
178+
ret void
179+
}
180+
181+
define void @live_matrix_store_non_matrix_overwrite_strided(ptr noalias %src, ptr noalias %dst) {
182+
; CHECK-LABEL: define void @live_matrix_store_non_matrix_overwrite_strided(
183+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
184+
; CHECK-NEXT: [[ENTRY:.*:]]
185+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
186+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 8, i1 false, i32 4, i32 2)
187+
; CHECK-NEXT: store <8 x double> zeroinitializer, ptr [[DST]], align 64
188+
; CHECK-NEXT: ret void
189+
;
190+
entry:
191+
call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2)
192+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 8, i1 false, i32 4, i32 2)
193+
store <8 x double> zeroinitializer, ptr %dst
194+
ret void
195+
}
196+
197+
declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32)
198+
declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32)

0 commit comments

Comments
 (0)