Skip to content

Commit 0d89928

Browse files
committed
[Matrix] Add tests identifying GVN and DSE opportunities for matrix store / load intrinsics
1 parent aa43577 commit 0d89928

File tree

3 files changed

+179
-0
lines changed

3 files changed

+179
-0
lines changed
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
; RUN: opt < %s -aa-pipeline=basic-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
2+
3+
; BasicAA should prove that loads from sufficiently large static offsets
4+
; don't overlap with matrix loads with a statically known size.
5+
6+
define <8 x double> @non_overlapping_strided_load(ptr %src) {
7+
entry:
8+
%src.offset = getelementptr inbounds double, double* %src, i32 16
9+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
10+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
11+
%l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
12+
%s = fadd <8 x double> %l, %l.2
13+
ret <8 x double> %s
14+
}
15+
16+
; CHECK-LABEL: Function: non_overlapping_strided_load:
17+
; CHECK: Just Ref: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
18+
; CHECK: NoModRef: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
19+
; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
20+
; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
21+
; CHECK: NoModRef: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
22+
; CHECK: Just Ref: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
23+
24+
define <8 x double> @non_overlapping_strided_load_i128(ptr %src) {
25+
entry:
26+
%src.offset = getelementptr inbounds double, double* %src, i128 u0x200000000
27+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 u0x100000000, i1 false, i32 4, i32 2)
28+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i128 u0x100000000, i1 false, i32 4, i32 2)
29+
%l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i28(ptr %src.offset, i128 u0x100000000, i1 false, i32 4, i32 2)
30+
%s = fadd <8 x double> %l, %l.2
31+
ret <8 x double> %s
32+
}
33+
34+
; CHECK-LABEL: Function: non_overlapping_strided_load_i128
35+
; CHECK: Just Ref: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i128(<8 x double> %l, ptr %src, i128 4294967296, i1 false, i32 4, i32 2)
36+
; CHECK: NoModRef: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2)
37+
; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i128(<8 x double> %l, ptr %src, i128 4294967296, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2)
38+
; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i128(<8 x double> %l, ptr %src, i128 4294967296, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2)
39+
; CHECK: NoModRef: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2)
40+
; CHECK: Just Ref: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i128(<8 x double> %l, ptr %src, i128 4294967296, i1 false, i32 4, i32 2)
41+
42+
define <8 x double> @overlapping_strided_load(ptr %src) {
43+
entry:
44+
%src.offset = getelementptr inbounds double, double* %src, i32 15
45+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
46+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
47+
%l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
48+
%s = fadd <8 x double> %l, %l.2
49+
ret <8 x double> %s
50+
}
51+
52+
; CHECK-LABEL: Function: overlapping_strided_load:
53+
; CHECK: Just Ref: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
54+
; CHECK: NoModRef: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
55+
; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
56+
; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
57+
; CHECK: NoModRef: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
58+
; CHECK: Just Ref: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
59+
60+
define <8 x double> @overlapping_strided_load_i128(ptr %src) {
61+
entry:
62+
%src.offset = getelementptr inbounds double, double* %src, i128 u0x100000000
63+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 u0x100000000, i1 false, i32 4, i32 2)
64+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i128 u0x100000000, i1 false, i32 4, i32 2)
65+
%l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i28(ptr %src.offset, i128 u0x100000000, i1 false, i32 4, i32 2)
66+
%s = fadd <8 x double> %l, %l.2
67+
ret <8 x double> %s
68+
}
69+
70+
; Function: overlapping_strided_load_i128
71+
; Just Ref: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i128(<8 x double> %l, ptr %src, i128 4294967296, i1 false, i32 4, i32 2)
72+
; NoModRef: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2)
73+
; Just Mod: call void @llvm.matrix.column.major.store.v8f64.i128(<8 x double> %l, ptr %src, i128 4294967296, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2)
74+
; Just Mod: call void @llvm.matrix.column.major.store.v8f64.i128(<8 x double> %l, ptr %src, i128 4294967296, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2)
75+
; NoModRef: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2)
76+
; Just Ref: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i128(<8 x double> %l, ptr %src, i128 4294967296, i1 false, i32 4, i32 2)
77+
78+
declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32)
79+
declare <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr, i128, i1, i32, i32)
80+
declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32)
81+
declare void @llvm.matrix.column.major.store.v8f64.i128(<8 x double>, ptr, i128, i1, i32, i32)
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -passes=dse -S < %s | FileCheck %s
3+
4+
define void @dead_unstrided_store(ptr noalias %src, ptr noalias %dst) {
5+
; CHECK-LABEL: define void @dead_unstrided_store(
6+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
9+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
10+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
11+
; CHECK-NEXT: ret void
12+
;
13+
entry:
14+
call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2)
15+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
16+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i32 4, i1 false, i32 4, i32 2)
17+
ret void
18+
}
19+
20+
define void @dead_strided_store(ptr noalias %src, ptr noalias %dst) {
21+
; CHECK-LABEL: define void @dead_strided_store(
22+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
23+
; CHECK-NEXT: [[ENTRY:.*:]]
24+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 100, i1 false, i32 4, i32 2)
25+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 200, i1 false, i32 4, i32 2)
26+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 100, i1 false, i32 4, i32 2)
27+
; CHECK-NEXT: ret void
28+
;
29+
entry:
30+
call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 100, i1 false, i32 4, i32 2)
31+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 200, i1 false, i32 4, i32 2)
32+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i32 100, i1 false, i32 4, i32 2)
33+
ret void
34+
}
35+
36+
define void @dead_dynamically_strided_store(ptr noalias %src, ptr noalias %dst, i32 %stride) {
37+
; CHECK-LABEL: define void @dead_dynamically_strided_store(
38+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[STRIDE:%.*]]) {
39+
; CHECK-NEXT: [[ENTRY:.*:]]
40+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 [[STRIDE]], i1 false, i32 4, i32 2)
41+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
42+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 [[STRIDE]], i1 false, i32 4, i32 2)
43+
; CHECK-NEXT: ret void
44+
;
45+
entry:
46+
call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2)
47+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
48+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i32 %stride, i1 false, i32 4, i32 2)
49+
ret void
50+
}
51+
52+
declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32)
53+
declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32)
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -passes=gvn -S < %s | FileCheck %s
3+
4+
define <8 x double> @redundant_unstrided_load(ptr %src) {
5+
; CHECK-LABEL: define <8 x double> @redundant_unstrided_load(
6+
; CHECK-SAME: ptr [[SRC:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[SRC_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 8
9+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 4, i1 false, i32 4, i32 2)
10+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
11+
; CHECK-NEXT: [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 4, i1 false, i32 4, i32 2)
12+
; CHECK-NEXT: [[S:%.*]] = fadd <8 x double> [[L]], [[L_2]]
13+
; CHECK-NEXT: ret <8 x double> [[S]]
14+
;
15+
entry:
16+
%src.offset = getelementptr inbounds double, double* %src, i32 8
17+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 4, i1 false, i32 4, i32 2)
18+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 4, i1 false, i32 4, i32 2)
19+
%l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 4, i1 false, i32 4, i32 2)
20+
%s = fadd <8 x double> %l, %l.2
21+
ret <8 x double> %s
22+
}
23+
24+
define <8 x double> @redundant_strided_load(ptr %src) {
25+
; CHECK-LABEL: define <8 x double> @redundant_strided_load(
26+
; CHECK-SAME: ptr [[SRC:%.*]]) {
27+
; CHECK-NEXT: [[ENTRY:.*:]]
28+
; CHECK-NEXT: [[SRC_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 16
29+
; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 8, i1 false, i32 4, i32 2)
30+
; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[SRC]], i32 8, i1 false, i32 4, i32 2)
31+
; CHECK-NEXT: [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 8, i1 false, i32 4, i32 2)
32+
; CHECK-NEXT: [[S:%.*]] = fadd <8 x double> [[L]], [[L_2]]
33+
; CHECK-NEXT: ret <8 x double> [[S]]
34+
;
35+
entry:
36+
%src.offset = getelementptr inbounds double, double* %src, i32 16
37+
%l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
38+
call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
39+
%l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
40+
%s = fadd <8 x double> %l, %l.2
41+
ret <8 x double> %s
42+
}
43+
44+
declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32)
45+
declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32)

0 commit comments

Comments
 (0)