Skip to content

Commit ccc9ebf

Browse files
jacobly0andrewrk
authored andcommitted
std: slightly improve codegen of std.unicode.utf8ValidateSlice
1 parent b4d4d19 commit ccc9ebf

File tree

1 file changed

+8
-11
lines changed

1 file changed

+8
-11
lines changed

lib/std/unicode.zig

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -201,21 +201,18 @@ pub fn utf8CountCodepoints(s: []const u8) !usize {
201201
pub fn utf8ValidateSlice(input: []const u8) bool {
202202
var remaining = input;
203203

204-
const V_len = std.simd.suggestVectorSize(usize) orelse 1;
205-
const V = @Vector(V_len, usize);
206-
const u8s_in_vector = @sizeOf(usize) * V_len;
204+
const chunk_len = std.simd.suggestVectorSize(u8) orelse 1;
205+
const Chunk = @Vector(chunk_len, u8);
207206

208207
// Fast path. Check for and skip ASCII characters at the start of the input.
209-
while (remaining.len >= u8s_in_vector) {
210-
const chunk: V = @bitCast(remaining[0..u8s_in_vector].*);
211-
const swapped = mem.littleToNative(V, chunk);
212-
const reduced = @reduce(.Or, swapped);
213-
const mask: usize = @bitCast([1]u8{0x80} ** @sizeOf(usize));
214-
if (reduced & mask != 0) {
215-
// Found a non ASCII byte
208+
while (remaining.len >= chunk_len) {
209+
const chunk: Chunk = remaining[0..chunk_len].*;
210+
const mask: Chunk = @splat(0x80);
211+
if (@reduce(.Or, chunk & mask == mask)) {
212+
// found a non ASCII byte
216213
break;
217214
}
218-
remaining = remaining[u8s_in_vector..];
215+
remaining = remaining[chunk_len..];
219216
}
220217

221218
// default lowest and highest continuation byte

0 commit comments

Comments
 (0)