@@ -6,7 +6,7 @@ const mem = std.mem;
66
77/// Returns how many bytes the UTF-8 representation would require
88/// for the given codepoint.
9- pub fn utf8CodepointSequenceLength (c : u32 ) ! u3 {
9+ pub fn utf8CodepointSequenceLength (c : u21 ) ! u3 {
1010 if (c < 0x80 ) return @as (u3 , 1 );
1111 if (c < 0x800 ) return @as (u3 , 2 );
1212 if (c < 0x10000 ) return @as (u3 , 3 );
@@ -32,7 +32,7 @@ pub fn utf8ByteSequenceLength(first_byte: u8) !u3 {
3232/// out: the out buffer to write to. Must have a len >= utf8CodepointSequenceLength(c).
3333/// Errors: if c cannot be encoded in UTF-8.
3434/// Returns: the number of bytes written to out.
35- pub fn utf8Encode (c : u32 , out : []u8 ) ! u3 {
35+ pub fn utf8Encode (c : u21 , out : []u8 ) ! u3 {
3636 const length = try utf8CodepointSequenceLength (c );
3737 assert (out .len >= length );
3838 switch (length ) {
@@ -68,9 +68,9 @@ const Utf8DecodeError = Utf8Decode2Error || Utf8Decode3Error || Utf8Decode4Error
6868/// bytes.len must be equal to utf8ByteSequenceLength(bytes[0]) catch unreachable.
6969/// If you already know the length at comptime, you can call one of
7070/// utf8Decode2,utf8Decode3,utf8Decode4 directly instead of this function.
71- pub fn utf8Decode (bytes : []const u8 ) Utf8DecodeError ! u32 {
71+ pub fn utf8Decode (bytes : []const u8 ) Utf8DecodeError ! u21 {
7272 return switch (bytes .len ) {
73- 1 = > @as (u32 , bytes [0 ]),
73+ 1 = > @as (u21 , bytes [0 ]),
7474 2 = > utf8Decode2 (bytes ),
7575 3 = > utf8Decode3 (bytes ),
7676 4 = > utf8Decode4 (bytes ),
@@ -82,10 +82,10 @@ const Utf8Decode2Error = error{
8282 Utf8ExpectedContinuation ,
8383 Utf8OverlongEncoding ,
8484};
85- pub fn utf8Decode2 (bytes : []const u8 ) Utf8Decode2Error ! u32 {
85+ pub fn utf8Decode2 (bytes : []const u8 ) Utf8Decode2Error ! u21 {
8686 assert (bytes .len == 2 );
8787 assert (bytes [0 ] & 0b11100000 == 0b11000000 );
88- var value : u32 = bytes [0 ] & 0b00011111 ;
88+ var value : u21 = bytes [0 ] & 0b00011111 ;
8989
9090 if (bytes [1 ] & 0b11000000 != 0b10000000 ) return error .Utf8ExpectedContinuation ;
9191 value <<= 6 ;
@@ -101,10 +101,10 @@ const Utf8Decode3Error = error{
101101 Utf8OverlongEncoding ,
102102 Utf8EncodesSurrogateHalf ,
103103};
104- pub fn utf8Decode3 (bytes : []const u8 ) Utf8Decode3Error ! u32 {
104+ pub fn utf8Decode3 (bytes : []const u8 ) Utf8Decode3Error ! u21 {
105105 assert (bytes .len == 3 );
106106 assert (bytes [0 ] & 0b11110000 == 0b11100000 );
107- var value : u32 = bytes [0 ] & 0b00001111 ;
107+ var value : u21 = bytes [0 ] & 0b00001111 ;
108108
109109 if (bytes [1 ] & 0b11000000 != 0b10000000 ) return error .Utf8ExpectedContinuation ;
110110 value <<= 6 ;
@@ -125,10 +125,10 @@ const Utf8Decode4Error = error{
125125 Utf8OverlongEncoding ,
126126 Utf8CodepointTooLarge ,
127127};
128- pub fn utf8Decode4 (bytes : []const u8 ) Utf8Decode4Error ! u32 {
128+ pub fn utf8Decode4 (bytes : []const u8 ) Utf8Decode4Error ! u21 {
129129 assert (bytes .len == 4 );
130130 assert (bytes [0 ] & 0b11111000 == 0b11110000 );
131- var value : u32 = bytes [0 ] & 0b00000111 ;
131+ var value : u21 = bytes [0 ] & 0b00000111 ;
132132
133133 if (bytes [1 ] & 0b11000000 != 0b10000000 ) return error .Utf8ExpectedContinuation ;
134134 value <<= 6 ;
@@ -224,11 +224,11 @@ pub const Utf8Iterator = struct {
224224 return it .bytes [it .i - cp_len .. it .i ];
225225 }
226226
227- pub fn nextCodepoint (it : * Utf8Iterator ) ? u32 {
227+ pub fn nextCodepoint (it : * Utf8Iterator ) ? u21 {
228228 const slice = it .nextCodepointSlice () orelse return null ;
229229
230230 switch (slice .len ) {
231- 1 = > return @as (u32 , slice [0 ]),
231+ 1 = > return @as (u21 , slice [0 ]),
232232 2 = > return utf8Decode2 (slice ) catch unreachable ,
233233 3 = > return utf8Decode3 (slice ) catch unreachable ,
234234 4 = > return utf8Decode4 (slice ) catch unreachable ,
@@ -248,19 +248,19 @@ pub const Utf16LeIterator = struct {
248248 };
249249 }
250250
251- pub fn nextCodepoint (it : * Utf16LeIterator ) ! ? u32 {
251+ pub fn nextCodepoint (it : * Utf16LeIterator ) ! ? u21 {
252252 assert (it .i <= it .bytes .len );
253253 if (it .i == it .bytes .len ) return null ;
254- const c0 : u32 = mem .readIntSliceLittle (u16 , it .bytes [it .i .. it .i + 2 ]);
255- if (c0 & ~ @as (u32 , 0x03ff ) == 0xd800 ) {
254+ const c0 : u21 = mem .readIntSliceLittle (u16 , it .bytes [it .i .. it .i + 2 ]);
255+ if (c0 & ~ @as (u21 , 0x03ff ) == 0xd800 ) {
256256 // surrogate pair
257257 it .i += 2 ;
258258 if (it .i >= it .bytes .len ) return error .DanglingSurrogateHalf ;
259- const c1 : u32 = mem .readIntSliceLittle (u16 , it .bytes [it .i .. it .i + 2 ]);
260- if (c1 & ~ @as (u32 , 0x03ff ) != 0xdc00 ) return error .ExpectedSecondSurrogateHalf ;
259+ const c1 : u21 = mem .readIntSliceLittle (u16 , it .bytes [it .i .. it .i + 2 ]);
260+ if (c1 & ~ @as (u21 , 0x03ff ) != 0xdc00 ) return error .ExpectedSecondSurrogateHalf ;
261261 it .i += 2 ;
262262 return 0x10000 + (((c0 & 0x03ff ) << 10 ) | (c1 & 0x03ff ));
263- } else if (c0 & ~ @as (u32 , 0x03ff ) == 0xdc00 ) {
263+ } else if (c0 & ~ @as (u21 , 0x03ff ) == 0xdc00 ) {
264264 return error .UnexpectedSecondSurrogateHalf ;
265265 } else {
266266 it .i += 2 ;
@@ -304,10 +304,10 @@ fn testUtf8EncodeError() void {
304304 testErrorEncode (0xd800 , array [0.. ], error .Utf8CannotEncodeSurrogateHalf );
305305 testErrorEncode (0xdfff , array [0.. ], error .Utf8CannotEncodeSurrogateHalf );
306306 testErrorEncode (0x110000 , array [0.. ], error .CodepointTooLarge );
307- testErrorEncode (0xffffffff , array [0.. ], error .CodepointTooLarge );
307+ testErrorEncode (0x1fffff , array [0.. ], error .CodepointTooLarge );
308308}
309309
310- fn testErrorEncode (codePoint : u32 , array : []u8 , expectedErr : anyerror ) void {
310+ fn testErrorEncode (codePoint : u21 , array : []u8 , expectedErr : anyerror ) void {
311311 testing .expectError (expectedErr , utf8Encode (codePoint , array ));
312312}
313313
@@ -455,11 +455,11 @@ fn testError(bytes: []const u8, expected_err: anyerror) void {
455455 testing .expectError (expected_err , testDecode (bytes ));
456456}
457457
458- fn testValid (bytes : []const u8 , expected_codepoint : u32 ) void {
458+ fn testValid (bytes : []const u8 , expected_codepoint : u21 ) void {
459459 testing .expect ((testDecode (bytes ) catch unreachable ) == expected_codepoint );
460460}
461461
462- fn testDecode (bytes : []const u8 ) ! u32 {
462+ fn testDecode (bytes : []const u8 ) ! u21 {
463463 const length = try utf8ByteSequenceLength (bytes [0 ]);
464464 if (bytes .len < length ) return error .UnexpectedEof ;
465465 testing .expect (bytes .len == length );
0 commit comments