Skip to content

Commit b42f884

Browse files
authored
[CIR][CIRGen] Support wide string literals (#399)
This commit supports the codegen of wide string literals, including `wchar_t` string literals, `char16_t` string literals, and `char32_t` string literals. I'm not following the proposal in #374. The clang frontend doesn't record the literal string. It only records the encoded code units for wide string literals. So I believe that a dedicated string attribute with an encoding tag as described in #374 may not be that helpful as I thought.
1 parent 0bda8cb commit b42f884

File tree

2 files changed

+57
-2
lines changed

2 files changed

+57
-2
lines changed

clang/lib/CIR/CodeGen/CIRGenModule.cpp

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1103,8 +1103,37 @@ CIRGenModule::getConstantArrayFromStringLiteral(const StringLiteral *E) {
11031103
return builder.getString(Str, eltTy, finalSize);
11041104
}
11051105

1106-
assert(0 && "not implemented");
1107-
return {};
1106+
auto arrayTy =
1107+
getTypes().ConvertType(E->getType()).dyn_cast<mlir::cir::ArrayType>();
1108+
assert(arrayTy && "string literals must be emitted as an array type");
1109+
1110+
auto arrayEltTy = arrayTy.getEltType().dyn_cast<mlir::cir::IntType>();
1111+
assert(arrayEltTy &&
1112+
"string literal elements must be emitted as integral type");
1113+
1114+
auto arraySize = arrayTy.getSize();
1115+
auto literalSize = E->getLength();
1116+
1117+
// Collect the code units.
1118+
SmallVector<uint32_t, 32> elementValues;
1119+
elementValues.reserve(arraySize);
1120+
for (unsigned i = 0; i < literalSize; ++i)
1121+
elementValues.push_back(E->getCodeUnit(i));
1122+
elementValues.resize(arraySize);
1123+
1124+
// If the string is full of null bytes, emit a #cir.zero instead.
1125+
if (std::all_of(elementValues.begin(), elementValues.end(),
1126+
[](uint32_t x) { return x == 0; }))
1127+
return builder.getZeroAttr(arrayTy);
1128+
1129+
// Otherwise emit a constant array holding the characters.
1130+
SmallVector<mlir::Attribute, 32> elements;
1131+
elements.reserve(arraySize);
1132+
for (uint64_t i = 0; i < arraySize; ++i)
1133+
elements.push_back(mlir::cir::IntAttr::get(arrayEltTy, elementValues[i]));
1134+
1135+
auto elementsAttr = mlir::ArrayAttr::get(builder.getContext(), elements);
1136+
return builder.getConstArray(elementsAttr, arrayTy);
11081137
}
11091138

11101139
// TODO(cir): this could be a common AST helper for both CIR and LLVM codegen.
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir-enable -emit-cir %s -o %t.cir
2+
// RUN: FileCheck --input-file=%t.cir %s
3+
4+
const char16_t *test_utf16() {
5+
return u"你好世界";
6+
}
7+
8+
// CHECK: cir.global "private" constant internal @{{.+}} = #cir.const_array<[#cir.int<20320> : !u16i, #cir.int<22909> : !u16i, #cir.int<19990> : !u16i, #cir.int<30028> : !u16i, #cir.int<0> : !u16i]> : !cir.array<!u16i x 5>
9+
10+
const char32_t *test_utf32() {
11+
return U"你好世界";
12+
}
13+
14+
// CHECK: cir.global "private" constant internal @{{.+}} = #cir.const_array<[#cir.int<20320> : !u32i, #cir.int<22909> : !u32i, #cir.int<19990> : !u32i, #cir.int<30028> : !u32i, #cir.int<0> : !u32i]> : !cir.array<!u32i x 5>
15+
16+
const char16_t *test_zero16() {
17+
return u"\0\0\0\0";
18+
}
19+
20+
// CHECK: cir.global "private" constant internal @{{.+}} = #cir.zero : !cir.array<!u16i x 5>
21+
22+
const char32_t *test_zero32() {
23+
return U"\0\0\0\0";
24+
}
25+
26+
// CHECK: cir.global "private" constant internal @{{.+}} = #cir.zero : !cir.array<!u32i x 5>

0 commit comments

Comments
 (0)