@@ -2,6 +2,7 @@ const std = @import("./std.zig");
22const assert = std .debug .assert ;
33const testing = std .testing ;
44const mem = std .mem ;
5+ const builtin = @import ("builtin" );
56
67/// Use this to replace an unknown, unrecognized, or unrepresentable character.
78///
@@ -756,8 +757,34 @@ pub fn utf16leToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) ![]u8
756757 // optimistically guess that it will all be ascii.
757758 var result = try std .ArrayList (u8 ).initCapacity (allocator , utf16le .len );
758759 errdefer result .deinit ();
759- var out_index : usize = 0 ;
760- var it = Utf16LeIterator .init (utf16le );
760+
761+ var remaining = utf16le ;
762+ if (builtin .zig_backend != .stage2_x86_64 ) {
763+ const chunk_len = std .simd .suggestVectorSize (u16 ) orelse 1 ;
764+ const Chunk = @Vector (chunk_len , u16 );
765+
766+ // Fast path. Check for and encode ASCII characters at the start of the input.
767+ while (remaining .len >= chunk_len ) {
768+ const chunk : Chunk = remaining [0.. chunk_len ].* ;
769+ const mask : Chunk = @splat (std .mem .nativeToLittle (u16 , 0x7F ));
770+ if (@reduce (.Or , chunk | mask != mask )) {
771+ // found a non ASCII code unit
772+ break ;
773+ }
774+ const chunk_byte_len = chunk_len * 2 ;
775+ const chunk_bytes : @Vector (chunk_byte_len , u8 ) = (std .mem .sliceAsBytes (remaining )[0.. chunk_byte_len ]).* ;
776+ const deinterlaced_bytes = std .simd .deinterlace (2 , chunk_bytes );
777+ const ascii_bytes : [chunk_len ]u8 = deinterlaced_bytes [0 ];
778+ // We allocated enough space to encode every UTF-16 code unit
779+ // as ASCII, so if the entire string is ASCII then we are
780+ // guaranteed to have enough space allocated
781+ result .appendSliceAssumeCapacity (& ascii_bytes );
782+ remaining = remaining [chunk_len .. ];
783+ }
784+ }
785+
786+ var out_index : usize = result .items .len ;
787+ var it = Utf16LeIterator .init (remaining );
761788 while (try it .nextCodepoint ()) | codepoint | {
762789 const utf8_len = utf8CodepointSequenceLength (codepoint ) catch unreachable ;
763790 try result .resize (result .items .len + utf8_len );
@@ -773,8 +800,34 @@ pub fn utf16leToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) ![:0]
773800 // optimistically guess that it will all be ascii (and allocate space for the null terminator)
774801 var result = try std .ArrayList (u8 ).initCapacity (allocator , utf16le .len + 1 );
775802 errdefer result .deinit ();
776- var out_index : usize = 0 ;
777- var it = Utf16LeIterator .init (utf16le );
803+
804+ var remaining = utf16le ;
805+ if (builtin .zig_backend != .stage2_x86_64 ) {
806+ const chunk_len = std .simd .suggestVectorSize (u16 ) orelse 1 ;
807+ const Chunk = @Vector (chunk_len , u16 );
808+
809+ // Fast path. Check for and encode ASCII characters at the start of the input.
810+ while (remaining .len >= chunk_len ) {
811+ const chunk : Chunk = remaining [0.. chunk_len ].* ;
812+ const mask : Chunk = @splat (std .mem .nativeToLittle (u16 , 0x7F ));
813+ if (@reduce (.Or , chunk | mask != mask )) {
814+ // found a non ASCII code unit
815+ break ;
816+ }
817+ const chunk_byte_len = chunk_len * 2 ;
818+ const chunk_bytes : @Vector (chunk_byte_len , u8 ) = (std .mem .sliceAsBytes (remaining )[0.. chunk_byte_len ]).* ;
819+ const deinterlaced_bytes = std .simd .deinterlace (2 , chunk_bytes );
820+ const ascii_bytes : [chunk_len ]u8 = deinterlaced_bytes [0 ];
821+ // We allocated enough space to encode every UTF-16 code unit
822+ // as ASCII, so if the entire string is ASCII then we are
823+ // guaranteed to have enough space allocated
824+ result .appendSliceAssumeCapacity (& ascii_bytes );
825+ remaining = remaining [chunk_len .. ];
826+ }
827+ }
828+
829+ var out_index = result .items .len ;
830+ var it = Utf16LeIterator .init (remaining );
778831 while (try it .nextCodepoint ()) | codepoint | {
779832 const utf8_len = utf8CodepointSequenceLength (codepoint ) catch unreachable ;
780833 try result .resize (result .items .len + utf8_len );
@@ -788,7 +841,31 @@ pub fn utf16leToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) ![:0]
788841/// Returns end byte index into utf8.
789842pub fn utf16leToUtf8 (utf8 : []u8 , utf16le : []const u16 ) ! usize {
790843 var end_index : usize = 0 ;
791- var it = Utf16LeIterator .init (utf16le );
844+
845+ var remaining = utf16le ;
846+ if (builtin .zig_backend != .stage2_x86_64 ) {
847+ const chunk_len = std .simd .suggestVectorSize (u16 ) orelse 1 ;
848+ const Chunk = @Vector (chunk_len , u16 );
849+
850+ // Fast path. Check for and encode ASCII characters at the start of the input.
851+ while (remaining .len >= chunk_len ) {
852+ const chunk : Chunk = remaining [0.. chunk_len ].* ;
853+ const mask : Chunk = @splat (std .mem .nativeToLittle (u16 , 0x7F ));
854+ if (@reduce (.Or , chunk | mask != mask )) {
855+ // found a non ASCII code unit
856+ break ;
857+ }
858+ const chunk_byte_len = chunk_len * 2 ;
859+ const chunk_bytes : @Vector (chunk_byte_len , u8 ) = (std .mem .sliceAsBytes (remaining )[0.. chunk_byte_len ]).* ;
860+ const deinterlaced_bytes = std .simd .deinterlace (2 , chunk_bytes );
861+ const ascii_bytes : [chunk_len ]u8 = deinterlaced_bytes [0 ];
862+ @memcpy (utf8 [end_index .. end_index + chunk_len ], & ascii_bytes );
863+ end_index += chunk_len ;
864+ remaining = remaining [chunk_len .. ];
865+ }
866+ }
867+
868+ var it = Utf16LeIterator .init (remaining );
792869 while (try it .nextCodepoint ()) | codepoint | {
793870 end_index += try utf8Encode (codepoint , utf8 [end_index .. ]);
794871 }
@@ -863,7 +940,27 @@ pub fn utf8ToUtf16LeWithNull(allocator: mem.Allocator, utf8: []const u8) ![:0]u1
863940 var result = try std .ArrayList (u16 ).initCapacity (allocator , utf8 .len + 1 );
864941 errdefer result .deinit ();
865942
866- const view = try Utf8View .init (utf8 );
943+ var remaining = utf8 ;
944+ if (builtin .zig_backend != .stage2_x86_64 ) {
945+ const chunk_len = std .simd .suggestVectorSize (u8 ) orelse 1 ;
946+ const Chunk = @Vector (chunk_len , u8 );
947+
948+ // Fast path. Check for and encode ASCII characters at the start of the input.
949+ while (remaining .len >= chunk_len ) {
950+ const chunk : Chunk = remaining [0.. chunk_len ].* ;
951+ const mask : Chunk = @splat (0x80 );
952+ if (@reduce (.Or , chunk & mask == mask )) {
953+ // found a non ASCII code unit
954+ break ;
955+ }
956+ const zeroes : Chunk = @splat (0 );
957+ const utf16_chunk : [chunk_len * 2 ]u8 align (@alignOf (u16 )) = std .simd .interlace (.{ chunk , zeroes });
958+ result .appendSliceAssumeCapacity (std .mem .bytesAsSlice (u16 , & utf16_chunk ));
959+ remaining = remaining [chunk_len .. ];
960+ }
961+ }
962+
963+ const view = try Utf8View .init (remaining );
867964 var it = view .iterator ();
868965 while (it .nextCodepoint ()) | codepoint | {
869966 if (codepoint < 0x10000 ) {
@@ -886,11 +983,33 @@ pub fn utf8ToUtf16LeWithNull(allocator: mem.Allocator, utf8: []const u8) ![:0]u1
886983/// Assumes there is enough space for the output.
887984pub fn utf8ToUtf16Le (utf16le : []u16 , utf8 : []const u8 ) ! usize {
888985 var dest_i : usize = 0 ;
986+
987+ var remaining = utf8 ;
988+ if (builtin .zig_backend != .stage2_x86_64 ) {
989+ const chunk_len = std .simd .suggestVectorSize (u8 ) orelse 1 ;
990+ const Chunk = @Vector (chunk_len , u8 );
991+
992+ // Fast path. Check for and encode ASCII characters at the start of the input.
993+ while (remaining .len >= chunk_len ) {
994+ const chunk : Chunk = remaining [0.. chunk_len ].* ;
995+ const mask : Chunk = @splat (0x80 );
996+ if (@reduce (.Or , chunk & mask == mask )) {
997+ // found a non ASCII code unit
998+ break ;
999+ }
1000+ const zeroes : Chunk = @splat (0 );
1001+ const utf16_bytes : [chunk_len * 2 ]u8 align (@alignOf (u16 )) = std .simd .interlace (.{ chunk , zeroes });
1002+ @memcpy (utf16le [dest_i .. ][0.. chunk_len ], std .mem .bytesAsSlice (u16 , & utf16_bytes ));
1003+ dest_i += chunk_len ;
1004+ remaining = remaining [chunk_len .. ];
1005+ }
1006+ }
1007+
8891008 var src_i : usize = 0 ;
890- while (src_i < utf8 .len ) {
891- const n = utf8ByteSequenceLength (utf8 [src_i ]) catch return error .InvalidUtf8 ;
1009+ while (src_i < remaining .len ) {
1010+ const n = utf8ByteSequenceLength (remaining [src_i ]) catch return error .InvalidUtf8 ;
8921011 const next_src_i = src_i + n ;
893- const codepoint = utf8Decode (utf8 [src_i .. next_src_i ]) catch return error .InvalidUtf8 ;
1012+ const codepoint = utf8Decode (remaining [src_i .. next_src_i ]) catch return error .InvalidUtf8 ;
8941013 if (codepoint < 0x10000 ) {
8951014 const short = @as (u16 , @intCast (codepoint ));
8961015 utf16le [dest_i ] = mem .nativeToLittle (u16 , short );
0 commit comments