|
6 | 6 | // RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
|
7 | 7 |
|
8 | 8 | typedef int vi4 __attribute__((ext_vector_type(4)));
|
| 9 | +typedef int vi6 __attribute__((ext_vector_type(6))); |
9 | 10 | typedef unsigned int uvi4 __attribute__((ext_vector_type(4)));
|
10 | 11 | typedef int vi3 __attribute__((ext_vector_type(3)));
|
11 | 12 | typedef int vi2 __attribute__((ext_vector_type(2)));
|
@@ -988,3 +989,87 @@ void foo14() {
|
988 | 989 | // OGCG: %[[TMP_B:.*]] = load <4 x float>, ptr %[[VEC_B]], align 16
|
989 | 990 | // OGCG: %[[GE:.*]] = fcmp oge <4 x float> %[[TMP_A]], %[[TMP_B]]
|
990 | 991 | // OGCG: %[[RES:.*]] = sext <4 x i1> %[[GE]] to <4 x i32>
|
| 992 | + |
| 993 | +void foo15() { |
| 994 | + vi4 a; |
| 995 | + vi4 b; |
| 996 | + vi4 r = __builtin_shufflevector(a, b); |
| 997 | +} |
| 998 | + |
| 999 | +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> |
| 1000 | +// CIR: %[[TMP_B:.*]] = cir.load{{>*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> |
| 1001 | +// CIR: %[[NEW_VEC:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<4 x !s32i>, %[[TMP_B]] : !cir.vector<4 x !s32i> |
| 1002 | + |
| 1003 | +// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16 |
| 1004 | +// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16 |
| 1005 | +// LLVM: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3) |
| 1006 | +// LLVM: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0 |
| 1007 | +// LLVM: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] |
| 1008 | +// LLVM: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> undef, i32 %[[SHUF_ELE_0]], i64 0 |
| 1009 | +// LLVM: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1 |
| 1010 | +// LLVM: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] |
| 1011 | +// LLVM: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 |
| 1012 | +// LLVM: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2 |
| 1013 | +// LLVM: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] |
| 1014 | +// LLVM: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 |
| 1015 | +// LLVM: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3 |
| 1016 | +// LLVM: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] |
| 1017 | +// LLVM: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 |
| 1018 | + |
| 1019 | +// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16 |
| 1020 | +// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16 |
| 1021 | +// OGCG: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3) |
| 1022 | +// OGCG: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0 |
| 1023 | +// OGCG: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] |
| 1024 | +// OGCG: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> poison, i32 %[[SHUF_ELE_0]], i64 0 |
| 1025 | +// OGCG: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1 |
| 1026 | +// OGCG: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] |
| 1027 | +// OGCG: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 |
| 1028 | +// OGCG: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2 |
| 1029 | +// OGCG: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] |
| 1030 | +// OGCG: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 |
| 1031 | +// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3 |
| 1032 | +// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] |
| 1033 | +// OGCG: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 |
| 1034 | + |
| 1035 | +void foo16() { |
| 1036 | + vi6 a; |
| 1037 | + vi6 b; |
| 1038 | + vi6 r = __builtin_shufflevector(a, b); |
| 1039 | +} |
| 1040 | + |
| 1041 | +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<6 x !s32i>>, !cir.vector<6 x !s32i> |
| 1042 | +// CIR: %[[TMP_B:.*]] = cir.load{{>*}} {{.*}} : !cir.ptr<!cir.vector<6 x !s32i>>, !cir.vector<6 x !s32i> |
| 1043 | +// CIR: %[[NEW_VEC:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<6 x !s32i>, %[[TMP_B]] : !cir.vector<6 x !s32i> |
| 1044 | + |
| 1045 | +// LLVM: %[[TMP_A:.*]] = load <6 x i32>, ptr {{.*}}, align 32 |
| 1046 | +// LLVM: %[[TMP_B:.*]] = load <6 x i32>, ptr {{.*}}, align 32 |
| 1047 | +// LLVM: %[[MASK:.*]] = and <6 x i32> %[[TMP_B]], splat (i32 7) |
| 1048 | +// LLVM: %[[SHUF_IDX_0:.*]] = extractelement <6 x i32> %[[MASK]], i64 0 |
| 1049 | +// LLVM: %[[SHUF_ELE_0:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] |
| 1050 | +// LLVM: %[[SHUF_INS_0:.*]] = insertelement <6 x i32> undef, i32 %[[SHUF_ELE_0]], i64 0 |
| 1051 | +// LLVM: %[[SHUF_IDX_1:.*]] = extractelement <6 x i32> %[[MASK]], i64 1 |
| 1052 | +// LLVM: %[[SHUF_ELE_1:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] |
| 1053 | +// LLVM: %[[SHUF_INS_1:.*]] = insertelement <6 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 |
| 1054 | +// LLVM: %[[SHUF_IDX_2:.*]] = extractelement <6 x i32> %[[MASK]], i64 2 |
| 1055 | +// LLVM: %[[SHUF_ELE_2:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] |
| 1056 | +// LLVM: %[[SHUF_INS_2:.*]] = insertelement <6 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 |
| 1057 | +// LLVM: %[[SHUF_IDX_3:.*]] = extractelement <6 x i32> %[[MASK]], i64 3 |
| 1058 | +// LLVM: %[[SHUF_ELE_3:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] |
| 1059 | +// LLVM: %[[SHUF_INS_3:.*]] = insertelement <6 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 |
| 1060 | + |
| 1061 | +// OGCG: %[[TMP_A:.*]] = load <6 x i32>, ptr {{.*}}, align 32 |
| 1062 | +// OGCG: %[[TMP_B:.*]] = load <6 x i32>, ptr {{.*}}, align 32 |
| 1063 | +// OGCG: %[[MASK:.*]] = and <6 x i32> %[[TMP_B]], splat (i32 7) |
| 1064 | +// OGCG: %[[SHUF_IDX_0:.*]] = extractelement <6 x i32> %[[MASK]], i64 0 |
| 1065 | +// OGCG: %[[SHUF_ELE_0:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] |
| 1066 | +// OGCG: %[[SHUF_INS_0:.*]] = insertelement <6 x i32> poison, i32 %[[SHUF_ELE_0]], i64 0 |
| 1067 | +// OGCG: %[[SHUF_IDX_1:.*]] = extractelement <6 x i32> %[[MASK]], i64 1 |
| 1068 | +// OGCG: %[[SHUF_ELE_1:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] |
| 1069 | +// OGCG: %[[SHUF_INS_1:.*]] = insertelement <6 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 |
| 1070 | +// OGCG: %[[SHUF_IDX_2:.*]] = extractelement <6 x i32> %[[MASK]], i64 2 |
| 1071 | +// OGCG: %[[SHUF_ELE_2:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] |
| 1072 | +// OGCG: %[[SHUF_INS_2:.*]] = insertelement <6 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 |
| 1073 | +// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <6 x i32> %[[MASK]], i64 3 |
| 1074 | +// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] |
| 1075 | +// OGCG: %[[SHUF_INS_3:.*]] = insertelement <6 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 |
0 commit comments