Skip to content

Commit 4c446b3

Browse files
authored
[CIR][CIRGen][Builtin][Neon] Lower neon_vext_v and neon_vextq_v (#951)
as title. There are two highlights of the PR 1. The PR introduced a new test file to cover neon intrinsics that move data, which is a big category. This would the 5th neon test file. And we're committed to keep total number of neon test files within 6. This file uses another opt option instcombine, which makes test LLVM code more concise, and our -fclangir generated LLVM code would be identical to OG with this. It looks like OG did some instcombine optimization. 2. `getIntFromMLIRValue` helper function could be substituted by [`mlir::cir::IntAttr getConstOpIntAttr` in CIRGenAtomic.cpp](https://github.com/llvm/clangir/blob/24b24557c98d1c031572a567b658cfb6254f8a89/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp#L337). The function `mlir::cir::IntAttr getConstOpIntAttr` is doing more than `getIntFromMLIRValue`, and there is FIXME in the comment, so not sure if we should just use `mlir::cir::IntAttr getConstOpIntAttr`, either is fine with me.
1 parent 3d43e93 commit 4c446b3

File tree

2 files changed

+235
-0
lines changed

2 files changed

+235
-0
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2197,6 +2197,14 @@ mlir::Value buildNeonCall(unsigned int builtinID, CIRGenFunction &cgf,
21972197
}
21982198
}
21992199

2200+
/// Get integer from a mlir::Value that is an int constant or a constant op.
2201+
static int64_t getIntValueFromConstOp(mlir::Value val) {
2202+
auto constOp = mlir::cast<mlir::cir::ConstantOp>(val.getDefiningOp());
2203+
return (mlir::cast<mlir::cir::IntAttr>(constOp.getValue()))
2204+
.getValue()
2205+
.getSExtValue();
2206+
}
2207+
22002208
mlir::Value CIRGenFunction::buildCommonNeonBuiltinExpr(
22012209
unsigned builtinID, unsigned llvmIntrinsic, unsigned altLLVMIntrinsic,
22022210
const char *nameHint, unsigned modifier, const CallExpr *e,
@@ -2241,6 +2249,18 @@ mlir::Value CIRGenFunction::buildCommonNeonBuiltinExpr(
22412249
// In CIR, integral cast op supports vector of int type truncating.
22422250
return builder.createIntCast(ops[0], ty);
22432251
}
2252+
case NEON::BI__builtin_neon_vext_v:
2253+
case NEON::BI__builtin_neon_vextq_v: {
2254+
int cv = getIntValueFromConstOp(ops[2]);
2255+
llvm::SmallVector<int64_t, 16> indices;
2256+
for (unsigned i = 0, e = vTy.getSize(); i != e; ++i)
2257+
indices.push_back(i + cv);
2258+
2259+
ops[0] = builder.createBitcast(ops[0], ty);
2260+
ops[1] = builder.createBitcast(ops[1], ty);
2261+
return builder.createVecShuffle(getLoc(e->getExprLoc()), ops[0], ops[1],
2262+
indices);
2263+
}
22442264
}
22452265

22462266
// This second switch is for the intrinsics that might have a more generic
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -target-feature +neon \
2+
// RUN: -fclangir -disable-O0-optnone -fno-clangir-call-conv-lowering \
3+
// RUN: -flax-vector-conversions=none -emit-cir -o %t.cir %s
4+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
5+
6+
// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -target-feature +neon \
7+
// RUN: -fclangir -disable-O0-optnone -fno-clangir-call-conv-lowering \
8+
// RUN: -flax-vector-conversions=none -emit-llvm -o - %s \
9+
// RUN: | opt -S -passes=instcombine,mem2reg,simplifycfg -o %t.ll
10+
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
11+
12+
// REQUIRES: aarch64-registered-target || arm-registered-target
13+
14+
// This test file contains test cases for the intrinsics that move data between
15+
// registers and vectors, such as mov, get, set, and ext. We dedicate this file
16+
// to them becuase they are many. The file neon.c covers some such intrinsics
17+
// that are not in this file.
18+
19+
#include <arm_neon.h>
20+
21+
int8x8_t test_vext_s8(int8x8_t a, int8x8_t b) {
22+
return vext_s8(a, b, 2);
23+
24+
// CIR-LABEL: vext_s8
25+
// CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!s8i x 8>)
26+
// CIR-SAME: [#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i,
27+
// CIR-SAME: #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i,
28+
// CIR-SAME: #cir.int<8> : !s32i, #cir.int<9> : !s32i] : !cir.vector<!s8i x 8>
29+
30+
// LLVM: {{.*}}test_vext_s8(<8 x i8>{{.*}}[[A:%.*]], <8 x i8>{{.*}}[[B:%.*]])
31+
// LLVM: [[RES:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]],
32+
// LLVM-SAME: <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
33+
// LLVM: ret <8 x i8> [[RES]]
34+
}
35+
36+
int8x16_t test_vextq_s8(int8x16_t a, int8x16_t b) {
37+
return vextq_s8(a, b, 2);
38+
39+
// CIR-LABEL: vextq_s8
40+
// CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!s8i x 16>)
41+
// CIR-SAME: [#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i,
42+
// CIR-SAME: #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i,
43+
// CIR-SAME: #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i,
44+
// CIR-SAME: #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i,
45+
// CIR-SAME: #cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<16> : !s32i,
46+
// CIR-SAME: #cir.int<17> : !s32i] : !cir.vector<!s8i x 16>
47+
48+
// LLVM: {{.*}}test_vextq_s8(<16 x i8>{{.*}}[[A:%.*]], <16 x i8>{{.*}}[[B:%.*]])
49+
// LLVM: [[RES:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]],
50+
// LLVM-SAME: <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9
51+
// LLVM-SAME: i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
52+
// LLVM: ret <16 x i8> [[RES]]
53+
}
54+
55+
int16x4_t test_vext_s16(int16x4_t a, int16x4_t b) {
56+
return vext_s16(a, b, 3);
57+
58+
// CIR-LABEL: vext_s16
59+
// CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!s16i x 4>)
60+
// CIR-SAME: [#cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i,
61+
// CIR-SAME: #cir.int<6> : !s32i] : !cir.vector<!s16i x 4>
62+
63+
// LLVM: {{.*}}test_vext_s16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[B:%.*]])
64+
// LLVM: [[RES:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]],
65+
// LLVM-SAME: <4 x i32> <i32 3, i32 4, i32 5, i32 6>
66+
// LLVM: ret <4 x i16> [[RES]]
67+
}
68+
69+
int16x8_t test_vextq_s16(int16x8_t a, int16x8_t b) {
70+
return vextq_s16(a, b, 3);
71+
72+
// CIR-LABEL: vextq_s16
73+
// CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!s16i x 8>)
74+
// CIR-SAME: [#cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i,
75+
// CIR-SAME: #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<8> : !s32i,
76+
// CIR-SAME: #cir.int<9> : !s32i, #cir.int<10> : !s32i] : !cir.vector<!s16i x 8>
77+
78+
// LLVM: {{.*}}test_vextq_s16(<8 x i16>{{.*}}[[A:%.*]], <8 x i16>{{.*}}[[B:%.*]])
79+
// LLVM: [[RES:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]],
80+
// LLVM-SAME: <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
81+
// LLVM: ret <8 x i16> [[RES]]
82+
}
83+
84+
85+
uint16x4_t test_vext_u16(uint16x4_t a, uint16x4_t b) {
86+
return vext_u16(a, b, 3);
87+
88+
// CIR-LABEL: vext_u16
89+
// CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!u16i x 4>)
90+
// CIR-SAME: [#cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i,
91+
// CIR-SAME: #cir.int<6> : !s32i] : !cir.vector<!u16i x 4>
92+
93+
// LLVM: {{.*}}test_vext_u16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[B:%.*]])
94+
// LLVM: [[RES:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]],
95+
// LLVM-SAME: <4 x i32> <i32 3, i32 4, i32 5, i32 6>
96+
// LLVM: ret <4 x i16> [[RES]]
97+
}
98+
99+
uint16x8_t test_vextq_u16(uint16x8_t a, uint16x8_t b) {
100+
return vextq_u16(a, b, 3);
101+
102+
// CIR-LABEL: vextq_u16
103+
// CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!u16i x 8>)
104+
// CIR-SAME: [#cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i,
105+
// CIR-SAME: #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<8> : !s32i,
106+
// CIR-SAME: #cir.int<9> : !s32i, #cir.int<10> : !s32i] : !cir.vector<!u16i x 8>
107+
108+
// LLVM: {{.*}}test_vextq_u16(<8 x i16>{{.*}}[[A:%.*]], <8 x i16>{{.*}}[[B:%.*]])
109+
// LLVM: [[RES:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]],
110+
// LLVM-SAME: <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
111+
// LLVM: ret <8 x i16> [[RES]]
112+
}
113+
114+
int32x2_t test_vext_s32(int32x2_t a, int32x2_t b) {
115+
return vext_s32(a, b, 1);
116+
117+
// CIR-LABEL: vext_s32
118+
// CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!s32i x 2>)
119+
// CIR-SAME: [#cir.int<1> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!s32i x 2>
120+
121+
// LLVM: {{.*}}test_vext_s32(<2 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[B:%.*]])
122+
// LLVM: [[RES:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]],
123+
// LLVM-SAME: <2 x i32> <i32 1, i32 2>
124+
// LLVM: ret <2 x i32> [[RES]]
125+
}
126+
127+
int32x4_t test_vextq_s32(int32x4_t a, int32x4_t b) {
128+
return vextq_s32(a, b, 1);
129+
130+
// CIR-LABEL: vextq_s32
131+
// CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!s32i x 4>)
132+
// CIR-SAME: [#cir.int<1> : !s32i, #cir.int<2> : !s32i,
133+
// CIR-SAME: #cir.int<3> : !s32i, #cir.int<4> : !s32i] : !cir.vector<!s32i x 4>
134+
135+
// LLVM: {{.*}}test_vextq_s32(<4 x i32>{{.*}}[[A:%.*]], <4 x i32>{{.*}}[[B:%.*]])
136+
// LLVM: [[RES:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]],
137+
// LLVM-SAME: <4 x i32> <i32 1, i32 2, i32 3, i32 4>
138+
// LLVM: ret <4 x i32> [[RES]]
139+
}
140+
141+
int64x1_t test_vext_s64(int64x1_t a, int64x1_t b) {
142+
return vext_s64(a, b, 0);
143+
144+
// CIR-LABEL: vext_s64
145+
// CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!s64i x 1>)
146+
// CIR-SAME: [#cir.int<0> : !s32i] : !cir.vector<!s64i x 1>
147+
148+
// LLVM: {{.*}}test_vext_s64(<1 x i64>{{.*}}[[A:%.*]], <1 x i64>{{.*}}[[B:%.*]])
149+
// LLVM: ret <1 x i64> [[A]]
150+
}
151+
152+
int64x2_t test_vextq_s64(int64x2_t a, int64x2_t b) {
153+
return vextq_s64(a, b, 1);
154+
155+
// CIR-LABEL: vextq_s64
156+
// CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!s64i x 2>)
157+
// CIR-SAME: [#cir.int<1> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!s64i x 2>
158+
159+
// LLVM: {{.*}}test_vextq_s64(<2 x i64>{{.*}}[[A:%.*]], <2 x i64>{{.*}}[[B:%.*]])
160+
// LLVM: [[RES:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]],
161+
// LLVM-SAME: <2 x i32> <i32 1, i32 2>
162+
// LLVM: ret <2 x i64> [[RES]]
163+
}
164+
165+
float32x2_t test_vext_f32(float32x2_t a, float32x2_t b) {
166+
return vext_f32(a, b, 1);
167+
168+
// CIR-LABEL: vext_f32
169+
// CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!cir.float x 2>)
170+
// CIR-SAME: [#cir.int<1> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!cir.float x 2>
171+
172+
// LLVM: {{.*}}test_vext_f32(<2 x float>{{.*}}[[A:%.*]], <2 x float>{{.*}}[[B:%.*]])
173+
// LLVM: [[RES:%.*]] = shufflevector <2 x float> [[A]], <2 x float> [[B]],
174+
// LLVM-SAME: <2 x i32> <i32 1, i32 2>
175+
// LLVM: ret <2 x float> [[RES]]
176+
}
177+
178+
float32x4_t test_vextq_f32(float32x4_t a, float32x4_t b) {
179+
return vextq_f32(a, b, 1);
180+
181+
// CIR-LABEL: vextq_f32
182+
// CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!cir.float x 4>)
183+
// CIR-SAME: [#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i,
184+
// CIR-SAME: #cir.int<4> : !s32i] : !cir.vector<!cir.float x 4>
185+
186+
// LLVM: {{.*}}test_vextq_f32(<4 x float>{{.*}}[[A:%.*]], <4 x float>{{.*}}[[B:%.*]])
187+
// LLVM: [[RES:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]],
188+
// LLVM-SAME: <4 x i32> <i32 1, i32 2, i32 3, i32 4>
189+
// LLVM: ret <4 x float> [[RES]]
190+
}
191+
192+
193+
float64x1_t test_vext_f64(float64x1_t a, float64x1_t b) {
194+
return vext_f64(a, b, 0);
195+
196+
// CIR-LABEL: vext_f64
197+
// CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!cir.double x 1>)
198+
// CIR-SAME: [#cir.int<0> : !s32i] : !cir.vector<!cir.double x 1>
199+
200+
// LLVM: {{.*}}test_vext_f64(<1 x double>{{.*}}[[A:%.*]], <1 x double>{{.*}}[[B:%.*]])
201+
// LLVM: ret <1 x double> [[A]]
202+
}
203+
204+
float64x2_t test_vextq_f64(float64x2_t a, float64x2_t b) {
205+
return vextq_f64(a, b, 1);
206+
207+
// CIR-LABEL: vextq_f64
208+
// CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!cir.double x 2>)
209+
// CIR-SAME: [#cir.int<1> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!cir.double x 2>
210+
211+
// LLVM: {{.*}}test_vextq_f64(<2 x double>{{.*}}[[A:%.*]], <2 x double>{{.*}}[[B:%.*]])
212+
// LLVM: [[RES:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]],
213+
// LLVM-SAME: <2 x i32> <i32 1, i32 2>
214+
// LLVM: ret <2 x double> [[RES]]
215+
}

0 commit comments

Comments
 (0)