3737// from the C code in LLVMIntrinsicEmulation/bitreverse.c with a custom clang
3838// that was modified to disable llvm.bitreverse.* intrinsic generation.
3939//
40+ // A similar command was run on LLVMIntrinsicEmulation/small_bitreverse.c to
41+ // produce functions to reverse 2-bit and 4-bit types.
42+ //
4043// Manual modification was done to avoid coercing vector types into scalar
4144// types. For example, the original LLVM IR:
4245//
5962// ...
6063// ret <4 x i8> %or12
6164// }
65+ #define GEN_CONST1 (BASE_TYPE , VAL ) #VAL
66+ #define GEN_CONST2 (BASE_TYPE , VAL ) \
67+ "<" #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL ">"
68+ #define GEN_CONST3 (BASE_TYPE , VAL ) \
69+ "<" #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL ">"
70+ #define GEN_CONST4 (BASE_TYPE , VAL ) \
71+ "<" #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL \
72+ "," #BASE_TYPE " " #VAL ">"
73+ #define GEN_CONST8 (BASE_TYPE , VAL ) \
74+ "<" #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL \
75+ "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL \
76+ "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL ">"
77+ #define GEN_CONST16 (BASE_TYPE , VAL ) \
78+ "<" #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL \
79+ "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL \
80+ "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL \
81+ "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL \
82+ "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL \
83+ "," #BASE_TYPE " " #VAL ">"
84+
85+ // clang-format off
86+ #define MAKE_BITREVERSE_2BIT (SUFFIX ,TYPE_STR ,NUM_ELTS ,BASE_TYPE ) \
87+ static const char LLVMBitreverse ## SUFFIX[]{" \n\
88+ define " TYPE_STR " @llvm_bitreverse_" #SUFFIX "(" TYPE_STR " %A) { \n\
89+ entry: \n\
90+ %and = shl " TYPE_STR " %A, " GEN_CONST ## NUM_ELTS(BASE_TYPE,1) " \n\
91+ %shr4 = lshr " TYPE_STR " %A, " GEN_CONST ## NUM_ELTS(BASE_TYPE,1) " \n\
92+ %or = or disjoint " TYPE_STR " %and, %shr4 \n\
93+ ret " TYPE_STR " %or \n\
94+ } \n\
95+ "}
96+
97+ MAKE_BITREVERSE_2BIT (i2 , "i2" , 1 , i2 );
98+ MAKE_BITREVERSE_2BIT (v2i2 , "<2 x i2>" , 2 , i2 );
99+ MAKE_BITREVERSE_2BIT (v3i2 , "<3 x i2>" , 3 , i2 );
100+ MAKE_BITREVERSE_2BIT (v4i2 , "<4 x i2>" , 4 , i2 );
101+ MAKE_BITREVERSE_2BIT (v8i2 , "<8 x i2>" , 8 , i2 );
102+ MAKE_BITREVERSE_2BIT (v16i2 , "<16 x i2>" , 16 , i2 );
103+
104+ #define MAKE_BITREVERSE_4BIT (SUFFIX ,TYPE_STR ,NUM_ELTS ,BASE_TYPE ) \
105+ static const char LLVMBitreverse ## SUFFIX[]{" \n\
106+ define " TYPE_STR " @llvm_bitreverse_" #SUFFIX "(" TYPE_STR " %A) { \n\
107+ entry: \n\
108+ %and = shl " TYPE_STR " %A, " GEN_CONST ## NUM_ELTS(BASE_TYPE, 2) " \n\
109+ %shr = lshr " TYPE_STR " %A, " GEN_CONST ## NUM_ELTS(BASE_TYPE, 2) " \n\
110+ %or = or disjoint " TYPE_STR " %and, %shr \n\
111+ %and2 = shl " TYPE_STR " %or, " GEN_CONST ## NUM_ELTS(BASE_TYPE, 1) " \n\
112+ %shl3 = and " TYPE_STR " %and2, " GEN_CONST ## NUM_ELTS(BASE_TYPE,-6) " \n\
113+ %shr4 = lshr " TYPE_STR " %or, " GEN_CONST ## NUM_ELTS(BASE_TYPE, 1) " \n\
114+ %and5 = and " TYPE_STR " %shr4, " GEN_CONST ## NUM_ELTS(BASE_TYPE, 5) " \n\
115+ %or6 = or disjoint " TYPE_STR " %shl3, %and5 \n\
116+ ret " TYPE_STR " %or6 \n\
117+ } \n\
118+ "}
119+
120+ MAKE_BITREVERSE_4BIT (i4 , "i4" , 1 , i4 );
121+ MAKE_BITREVERSE_4BIT (v2i4 , "<2 x i4>" , 2 , i4 );
122+ MAKE_BITREVERSE_4BIT (v3i4 , "<3 x i4>" , 3 , i4 );
123+ MAKE_BITREVERSE_4BIT (v4i4 , "<4 x i4>" , 4 , i4 );
124+ MAKE_BITREVERSE_4BIT (v8i4 , "<8 x i4>" , 8 , i4 );
125+ MAKE_BITREVERSE_4BIT (v16i4 , "<16 x i4>" , 16 , i4 );
126+ // clang-format on
62127
63- static const char LLVMBitreverseScalari8 []{R "(
128+ static const char LLVMBitreversei8 []{R "(
64129define zeroext i8 @llvm_bitreverse_i8 (i8 %A ) {
65130entry :
66131 %and = shl i8 %A , 4
@@ -80,7 +145,7 @@ define zeroext i8 @llvm_bitreverse_i8(i8 %A) {
80145}
81146)"};
82147
83- static const char LLVMBitreverseScalari16 []{R "(
148+ static const char LLVMBitreversei16 []{R "(
84149define zeroext i16 @llvm_bitreverse_i16 (i16 %A ) {
85150entry :
86151 %and = shl i16 %A , 8
@@ -105,7 +170,7 @@ define zeroext i16 @llvm_bitreverse_i16(i16 %A) {
105170}
106171)"};
107172
108- static const char LLVMBitreverseScalari32 []{R "(
173+ static const char LLVMBitreversei32 []{R "(
109174define i32 @llvm_bitreverse_i32 (i32 %A ) {
110175entry :
111176 %and = shl i32 %A , 16
@@ -135,7 +200,7 @@ define i32 @llvm_bitreverse_i32(i32 %A) {
135200}
136201)"};
137202
138- static const char LLVMBitreverseScalari64 []{R "(
203+ static const char LLVMBitreversei64 []{R "(
139204define i64 @llvm_bitreverse_i64 (i64 %A ) {
140205entry :
141206 %and = shl i64 %A , 32
@@ -170,7 +235,7 @@ define i64 @llvm_bitreverse_i64(i64 %A) {
170235}
171236)"};
172237
173- static const char LLVMBitreverseV2i8 []{R "(
238+ static const char LLVMBitreversev2i8 []{R "(
174239define < 2 x i8 > @llvm_bitreverse_v2i8 (< 2 x i8 > %A ) {
175240entry :
176241 %shl = shl < 2 x i8 > %A , < i8 4 , i8 4 >
@@ -190,7 +255,7 @@ define <2 x i8> @llvm_bitreverse_v2i8(<2 x i8> %A) {
190255}
191256)"};
192257
193- static const char LLVMBitreverseV2i16 []{R "(
258+ static const char LLVMBitreversev2i16 []{R "(
194259define < 2 x i16 > @llvm_bitreverse_v2i16 (< 2 x i16 > %A ) {
195260entry :
196261 %shl = shl < 2 x i16 > %A , < i16 8 , i16 8 >
@@ -215,7 +280,7 @@ define <2 x i16> @llvm_bitreverse_v2i16(<2 x i16> %A) {
215280}
216281)"};
217282
218- static const char LLVMBitreverseV2i32 []{R "(
283+ static const char LLVMBitreversev2i32 []{R "(
219284define < 2 x i32 > @llvm_bitreverse_v2i32 (< 2 x i32 > %A ) {
220285entry :
221286 %shl = shl < 2 x i32 > %A , < i32 16 , i32 16 >
@@ -245,7 +310,7 @@ define <2 x i32> @llvm_bitreverse_v2i32(<2 x i32> %A) {
245310}
246311)"};
247312
248- static const char LLVMBitreverseV2i64 []{R "(
313+ static const char LLVMBitreversev2i64 []{R "(
249314define < 2 x i64 > @llvm_bitreverse_v2i64 (< 2 x i64 > %A ) {
250315entry :
251316 %shl = shl < 2 x i64 > %A , < i64 32 , i64 32 >
@@ -280,7 +345,7 @@ define <2 x i64> @llvm_bitreverse_v2i64(<2 x i64> %A) {
280345}
281346)"};
282347
283- static const char LLVMBitreverseV3i8 []{R "(
348+ static const char LLVMBitreversev3i8 []{R "(
284349define < 3 x i8 > @llvm_bitreverse_v3i8 (< 3 x i8 > %A ) {
285350entry :
286351 %shl = shl < 3 x i8 > %A , < i8 4 , i8 4 , i8 4 >
@@ -300,7 +365,7 @@ define <3 x i8> @llvm_bitreverse_v3i8(<3 x i8> %A) {
300365}
301366)"};
302367
303- static const char LLVMBitreverseV3i16 []{R "(
368+ static const char LLVMBitreversev3i16 []{R "(
304369define < 3 x i16 > @llvm_bitreverse_v3i16 (< 3 x i16 > %A ) {
305370entry :
306371 %shl = shl < 3 x i16 > %A , < i16 8 , i16 8 , i16 8 >
@@ -325,7 +390,7 @@ define <3 x i16> @llvm_bitreverse_v3i16(<3 x i16> %A) {
325390}
326391)"};
327392
328- static const char LLVMBitreverseV3i32 []{R "(
393+ static const char LLVMBitreversev3i32 []{R "(
329394define < 3 x i32 > @llvm_bitreverse_v3i32 (< 3 x i32 > %A ) {
330395entry :
331396 %shl = shl < 3 x i32 > %A , < i32 16 , i32 16 , i32 16 >
@@ -355,7 +420,7 @@ define <3 x i32> @llvm_bitreverse_v3i32(<3 x i32> %A) {
355420}
356421)"};
357422
358- static const char LLVMBitreverseV3i64 []{R "(
423+ static const char LLVMBitreversev3i64 []{R "(
359424define < 3 x i64 > @llvm_bitreverse_v3i64 (< 3 x i64 > %A ) {
360425entry :
361426 %shl = shl < 3 x i64 > %A , < i64 32 , i64 32 , i64 32 >
@@ -390,7 +455,7 @@ define <3 x i64> @llvm_bitreverse_v3i64(<3 x i64> %A) {
390455}
391456)"};
392457
393- static const char LLVMBitreverseV4i8 []{R "(
458+ static const char LLVMBitreversev4i8 []{R "(
394459define < 4 x i8 > @llvm_bitreverse_v4i8 (< 4 x i8 > %A ) {
395460entry :
396461 %shl = shl < 4 x i8 > %A , < i8 4 , i8 4 , i8 4 , i8 4 >
@@ -410,7 +475,7 @@ define <4 x i8> @llvm_bitreverse_v4i8(<4 x i8> %A) {
410475}
411476)"};
412477
413- static const char LLVMBitreverseV4i16 []{R "(
478+ static const char LLVMBitreversev4i16 []{R "(
414479define < 4 x i16 > @llvm_bitreverse_v4i16 (< 4 x i16 > %A ) {
415480entry :
416481 %shl = shl < 4 x i16 > %A , < i16 8 , i16 8 , i16 8 , i16 8 >
@@ -435,7 +500,7 @@ define <4 x i16> @llvm_bitreverse_v4i16(<4 x i16> %A) {
435500}
436501)"};
437502
438- static const char LLVMBitreverseV4i32 []{R "(
503+ static const char LLVMBitreversev4i32 []{R "(
439504define < 4 x i32 > @llvm_bitreverse_v4i32 (< 4 x i32 > %A ) {
440505entry :
441506 %shl = shl < 4 x i32 > %A , < i32 16 , i32 16 , i32 16 , i32 16 >
@@ -465,7 +530,7 @@ define <4 x i32> @llvm_bitreverse_v4i32(<4 x i32> %A) {
465530}
466531)"};
467532
468- static const char LLVMBitreverseV4i64 []{R "(
533+ static const char LLVMBitreversev4i64 []{R "(
469534define < 4 x i64 > @llvm_bitreverse_v4i64 (< 4 x i64 > %A ) {
470535entry :
471536 %shl = shl < 4 x i64 > %A , < i64 32 , i64 32 , i64 32 , i64 32 >
@@ -500,7 +565,7 @@ define <4 x i64> @llvm_bitreverse_v4i64(<4 x i64> %A) {
500565}
501566)"};
502567
503- static const char LLVMBitreverseV8i8 []{R "(
568+ static const char LLVMBitreversev8i8 []{R "(
504569define < 8 x i8 > @llvm_bitreverse_v8i8 (< 8 x i8 > %A ) {
505570entry :
506571 %shl = shl < 8 x i8 > %A , < i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 >
@@ -520,7 +585,7 @@ define <8 x i8> @llvm_bitreverse_v8i8(<8 x i8> %A) {
520585}
521586)"};
522587
523- static const char LLVMBitreverseV8i16 []{R "(
588+ static const char LLVMBitreversev8i16 []{R "(
524589define < 8 x i16 > @llvm_bitreverse_v8i16 (< 8 x i16 > %A ) {
525590entry :
526591 %shl = shl < 8 x i16 > %A , < i16 8 , i16 8 , i16 8 , i16 8 , i16 8 , i16 8 , i16 8 , i16 8 >
@@ -545,7 +610,7 @@ define <8 x i16> @llvm_bitreverse_v8i16(<8 x i16> %A) {
545610}
546611)"};
547612
548- static const char LLVMBitreverseV8i32 []{R "(
613+ static const char LLVMBitreversev8i32 []{R "(
549614define < 8 x i32 > @llvm_bitreverse_v8i32 (< 8 x i32 > %A ) {
550615entry :
551616 %shl = shl < 8 x i32 > %A , < i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 >
@@ -575,7 +640,7 @@ define <8 x i32> @llvm_bitreverse_v8i32(<8 x i32> %A) {
575640}
576641)"};
577642
578- static const char LLVMBitreverseV8i64 []{R "(
643+ static const char LLVMBitreversev8i64 []{R "(
579644define < 8 x i64 > @llvm_bitreverse_v8i64 (< 8 x i64 > %A ) {
580645entry :
581646 %shl = shl < 8 x i64 > %A , < i64 32 , i64 32 , i64 32 , i64 32 , i64 32 , i64 32 , i64 32 , i64 32 >
@@ -610,7 +675,7 @@ define <8 x i64> @llvm_bitreverse_v8i64(<8 x i64> %A) {
610675}
611676)"};
612677
613- static const char LLVMBitreverseV16i8 []{R "(
678+ static const char LLVMBitreversev16i8 []{R "(
614679define < 16 x i8 > @llvm_bitreverse_v16i8 (< 16 x i8 > %A ) {
615680entry :
616681 %shl = shl < 16 x i8 > %A , < i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 , i8 4 >
@@ -630,7 +695,7 @@ define <16 x i8> @llvm_bitreverse_v16i8(<16 x i8> %A) {
630695}
631696)"};
632697
633- static const char LLVMBitreverseV16i16 []{R "(
698+ static const char LLVMBitreversev16i16 []{R "(
634699define < 16 x i16 > @llvm_bitreverse_v16i16 (< 16 x i16 > %A ) {
635700entry :
636701 %shl = shl < 16 x i16 > %A , < i16 8 , i16 8 , i16 8 , i16 8 , i16 8 , i16 8 , i16 8 , i16 8 , i16 8 , i16 8 , i16 8 , i16 8 , i16 8 , i16 8 , i16 8 , i16 8 >
@@ -655,7 +720,7 @@ define <16 x i16> @llvm_bitreverse_v16i16(<16 x i16> %A) {
655720}
656721)"};
657722
658- static const char LLVMBitreverseV16i32 []{R "(
723+ static const char LLVMBitreversev16i32 []{R "(
659724define < 16 x i32 > @llvm_bitreverse_v16i32 (< 16 x i32 > %A ) {
660725entry :
661726 %shl = shl < 16 x i32 > %A , < i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 >
@@ -685,7 +750,7 @@ define <16 x i32> @llvm_bitreverse_v16i32(<16 x i32> %A) {
685750}
686751)"};
687752
688- static const char LLVMBitreverseV16i64 []{R "(
753+ static const char LLVMBitreversev16i64 []{R "(
689754define < 16 x i64 > @llvm_bitreverse_v16i64 (< 16 x i64 > %A ) {
690755entry :
691756 %shl = shl < 16 x i64 > %A , < i64 32 , i64 32 , i64 32 , i64 32 , i64 32 , i64 32 , i64 32 , i64 32 , i64 32 , i64 32 , i64 32 , i64 32 , i64 32 , i64 32 , i64 32 , i64 32 >
0 commit comments