@@ -564,125 +564,149 @@ fn ReadConsoleWithUtf16ToUtf8Conversion(hConsoleInput: HANDLE, buffer: []u8) Rea
564564 error .ConsoleHandleLimitReached = > @panic ("Reached maximum number of 64 console handles." ),
565565 else = > return error .Unexpected ,
566566 };
567- // The temporary buffer can be huge, so keep it away from stack
568- var heap_allocator : std.heap.HeapAllocator = std .heap .HeapAllocator .init ();
569- defer heap_allocator .deinit ();
570- const allocator : std.mem.Allocator = heap_allocator .allocator ();
571- var temp_buffer : []u8 = allocator .alloc (u8 , buffer .len ) catch @panic ("Out of memory." );
572- defer allocator .free (temp_buffer );
573-
567+ var temp_buffer : [1024 ]u8 = undefined ;
574568 var bytes_read : DWORD = 0 ;
575569 var reached_end_of_line : bool = false ;
576-
577- // Try flushing leftover UTF-8 bytes first (one codepoint at most)
578- if (handle_data .utf8_buffer .bytes_used != 0 ) {
579- // LF will only appear at the first byte and there will be only one byte in the buffer
580- if (handle_data .utf8_buffer .data [0 ] == 0x0A ) {
581- assert (handle_data .utf8_buffer .bytes_used == 1 );
582- reached_end_of_line = true ;
583- }
584- // Is there enough space for all bytes in UTF-8 buffer?
585- const has_enough_space : bool = buffer .len >= handle_data .utf8_buffer .bytes_used ;
586- const max_bytes_to_read : usize = if (has_enough_space ) handle_data .utf8_buffer .bytes_used else buffer .len ;
587- for (0.. max_bytes_to_read ) | index | {
588- temp_buffer [index ] = handle_data .utf8_buffer .data [handle_data .utf8_buffer .front_index ];
589- // Front index wraps around in the case of 4-byte sequence (non-BMP code point)
590- handle_data .utf8_buffer .front_index +%= 1 ;
591- }
592- bytes_read += @truncate (max_bytes_to_read );
593- handle_data .utf8_buffer .bytes_used -= @truncate (max_bytes_to_read );
594- if (has_enough_space ) {
595- // UTF-8 buffer is now empty, we can safely reset front_index to zero
596- handle_data .utf8_buffer .front_index = 0 ;
597- } else {
598- return ReadConsoleProcessUtf8Buffer (buffer , temp_buffer , bytes_read , false );
599- }
600- // LF ends a console read immediately
601- if (reached_end_of_line ) {
602- return ReadConsoleProcessUtf8Buffer (buffer , temp_buffer , bytes_read , false );
603- }
604- }
605- assert (handle_data .utf8_buffer .front_index == 0 );
570+ const TruncateState = enum {
571+ do_not_truncate ,
572+ truncate_after_SUB ,
573+ truncate_all ,
574+ };
575+ var truncate_state : TruncateState = .do_not_truncate ;
606576 while (bytes_read < buffer .len ) {
607- // Read only one code unit each loop
608- var utf16_code_unit : u16 = undefined ;
609- var utf16_code_units_read : DWORD = undefined ;
610- if (kernel32 .ReadConsoleW (hConsoleInput , & utf16_code_unit , 1 , & utf16_code_units_read , null ) == FALSE ) {
611- switch (kernel32 .GetLastError ()) {
612- .INVALID_HANDLE = > return error .NotOpenForReading ,
613- else = > | err | return unexpectedError (err ),
614- }
577+ const remaining_buffer : []u8 = buffer [bytes_read .. buffer .len ];
578+ var has_enough_space_in_remaining_buffer : bool = undefined ;
579+ var bytes_read_into_temp_buffer : DWORD = 0 ;
580+ var truncate_index : DWORD = undefined ;
581+ // If a SUB character is encountered in a previous loop, truncate everything in this loop
582+ if (truncate_state == .truncate_after_SUB ) {
583+ truncate_state = .truncate_all ;
615584 }
616- if (utf16_code_unit == 0x000D ) {
617- // CR should always be followed by an LF, so just discard it
618- continue ;
619- } else if (utf16_code_unit >= 0xD800 and utf16_code_unit <= 0xDBFF ) {
620- // When a high surrogate is encountered, store it into the UTF-16 buffer
621- assert (handle_data .utf16_buffer .code_units_used == 0 );
622- handle_data .utf16_buffer .data [0 ] = utf16_code_unit ;
623- handle_data .utf16_buffer .code_units_used = 1 ;
624- continue ;
625- } else if (utf16_code_unit >= 0xDC00 and utf16_code_unit <= 0xDFFF ) {
626- // When a low surrogate is encountered, assemble surrogate pair and convert to UTF-8
627- if (! (utf16_code_units_read == 1 and
628- handle_data .utf16_buffer .data [0 ] >= 0xD800 and handle_data .utf16_buffer .data [0 ] <= 0xDBFF )) {
629- unreachable ;
585+ // Try flushing leftover UTF-8 bytes first (one codepoint at most)
586+ if (handle_data .utf8_buffer .bytes_used != 0 ) {
587+ if (handle_data .utf8_buffer .data [0 ] == 0x0A ) {
588+ assert (handle_data .utf8_buffer .bytes_used == 1 );
589+ reached_end_of_line = true ;
590+ } else if (handle_data .utf8_buffer .data [0 ] == 0x1A ) {
591+ assert (handle_data .utf8_buffer .bytes_used == 1 );
592+ // Truncate after SUB character in this loop if we never truncated in previous loops
593+ if (truncate_state == .do_not_truncate ) {
594+ truncate_state = .truncate_after_SUB ;
595+ truncate_index = 1 ;
596+ }
597+ }
598+ // Is there enough space for all bytes in UTF-8 buffer?
599+ const has_enough_space : bool = remaining_buffer .len >= handle_data .utf8_buffer .bytes_used ;
600+ const max_bytes_to_read : usize = if (has_enough_space ) handle_data .utf8_buffer .bytes_used else remaining_buffer .len ;
601+ for (0.. max_bytes_to_read ) | index | {
602+ temp_buffer [index ] = handle_data .utf8_buffer .data [handle_data .utf8_buffer .front_index ];
603+ // Front index wraps around in the case of 4-byte sequence (non-BMP code point)
604+ handle_data .utf8_buffer .front_index +%= 1 ;
605+ }
606+ bytes_read_into_temp_buffer += @truncate (max_bytes_to_read );
607+ handle_data .utf8_buffer .bytes_used -= @truncate (max_bytes_to_read );
608+ if (has_enough_space ) {
609+ // UTF-8 buffer is now empty, we can safely reset front_index to zero
610+ handle_data .utf8_buffer .front_index = 0 ;
611+ } else {
612+ switch (truncate_state ) {
613+ .truncate_all = > {},
614+ else = > @memcpy (remaining_buffer [0.. bytes_read_into_temp_buffer ], temp_buffer [0.. bytes_read_into_temp_buffer ]),
615+ }
616+ bytes_read += bytes_read_into_temp_buffer ;
617+ break ;
630618 }
631- handle_data .utf16_buffer .data [1 ] = utf16_code_unit ;
632- handle_data .utf16_buffer .code_units_used = 0 ;
633- const utf8_bytes : usize = std .unicode .utf16leToUtf8 (& handle_data .utf8_buffer .data , & handle_data .utf16_buffer .data ) catch return error .Unexpected ;
634- assert (utf8_bytes == 4 );
635- handle_data .utf8_buffer .bytes_used = 4 ;
636- } else {
637- assert (handle_data .utf16_buffer .code_units_used == 0 );
638- const utf8_bytes : usize = std .unicode .utf16leToUtf8 (& handle_data .utf8_buffer .data , @as (* [1 ]u16 , & utf16_code_unit )) catch return error .Unexpected ;
639- handle_data .utf8_buffer .bytes_used = @truncate (utf8_bytes );
640619 // LF ends a console read immediately
641- if (handle_data .utf8_buffer .bytes_used == 1 and handle_data .utf8_buffer .data [0 ] == 0x0A ) {
642- reached_end_of_line = true ;
620+ if (reached_end_of_line ) {
621+ switch (truncate_state ) {
622+ .truncate_all = > {},
623+ else = > @memcpy (remaining_buffer [0.. bytes_read_into_temp_buffer ], temp_buffer [0.. bytes_read_into_temp_buffer ]),
624+ }
625+ bytes_read += bytes_read_into_temp_buffer ;
626+ break ;
643627 }
644628 }
645- // Is there enough space for all bytes in UTF-8 buffer?
646- const has_enough_space : bool = buffer .len >= bytes_read + handle_data .utf8_buffer .bytes_used ;
647- const max_bytes_to_read : usize = if (has_enough_space ) handle_data .utf8_buffer .bytes_used else buffer .len - bytes_read ;
648- for (0.. max_bytes_to_read ) | index | {
649- temp_buffer [bytes_read + index ] = handle_data .utf8_buffer .data [handle_data .utf8_buffer .front_index ];
650- // Front index wraps around in the case of 4-byte sequence (non-BMP code point)
651- handle_data .utf8_buffer .front_index +%= 1 ;
652- }
653- bytes_read += @truncate (max_bytes_to_read );
654- handle_data .utf8_buffer .bytes_used -= @truncate (max_bytes_to_read );
655- if (has_enough_space ) {
656- // UTF-8 buffer is now empty, we can safely reset front_index to zero
657- handle_data .utf8_buffer .front_index = 0 ;
658- } else {
659- break ;
629+ assert (handle_data .utf8_buffer .front_index == 0 );
630+ while (bytes_read_into_temp_buffer < temp_buffer .len ) {
631+ // Read only one code unit each loop
632+ var utf16_code_unit : u16 = undefined ;
633+ var utf16_code_units_read : DWORD = undefined ;
634+ if (kernel32 .ReadConsoleW (hConsoleInput , & utf16_code_unit , 1 , & utf16_code_units_read , null ) == FALSE ) {
635+ switch (kernel32 .GetLastError ()) {
636+ .INVALID_HANDLE = > return error .NotOpenForReading ,
637+ else = > | err | return unexpectedError (err ),
638+ }
639+ }
640+ if (utf16_code_unit == 0x000D ) {
641+ // CR should always be followed by an LF, so just discard it
642+ continue ;
643+ } else if (utf16_code_unit >= 0xD800 and utf16_code_unit <= 0xDBFF ) {
644+ // When a high surrogate is encountered, store it into the UTF-16 buffer
645+ assert (handle_data .utf16_buffer .code_units_used == 0 );
646+ handle_data .utf16_buffer .data [0 ] = utf16_code_unit ;
647+ handle_data .utf16_buffer .code_units_used = 1 ;
648+ continue ;
649+ } else if (utf16_code_unit >= 0xDC00 and utf16_code_unit <= 0xDFFF ) {
650+ // When a low surrogate is encountered, assemble surrogate pair and convert to UTF-8
651+ if (! (utf16_code_units_read == 1 and handle_data .utf16_buffer .data [0 ] >= 0xD800 and handle_data .utf16_buffer .data [0 ] <= 0xDBFF )) {
652+ unreachable ;
653+ }
654+ handle_data .utf16_buffer .data [1 ] = utf16_code_unit ;
655+ handle_data .utf16_buffer .code_units_used = 0 ;
656+ const utf8_bytes : usize = std .unicode .utf16leToUtf8 (& handle_data .utf8_buffer .data , & handle_data .utf16_buffer .data ) catch return error .Unexpected ;
657+ assert (utf8_bytes == 4 );
658+ handle_data .utf8_buffer .bytes_used = 4 ;
659+ } else {
660+ assert (handle_data .utf16_buffer .code_units_used == 0 );
661+ const utf8_bytes : usize = std .unicode .utf16leToUtf8 (& handle_data .utf8_buffer .data , @as (* [1 ]u16 , & utf16_code_unit )) catch return error .Unexpected ;
662+ handle_data .utf8_buffer .bytes_used = @truncate (utf8_bytes );
663+ if (handle_data .utf8_buffer .bytes_used == 1 ) {
664+ if (handle_data .utf8_buffer .data [0 ] == 0x0A ) {
665+ reached_end_of_line = true ;
666+ } else if (handle_data .utf8_buffer .data [0 ] == 0x1A ) {
667+ if (truncate_state == .do_not_truncate ) {
668+ truncate_state = .truncate_after_SUB ;
669+ truncate_index = bytes_read_into_temp_buffer + 1 ;
670+ }
671+ }
672+ }
673+ }
674+ // Is there enough space for all bytes in UTF-8 buffer?
675+ has_enough_space_in_remaining_buffer = remaining_buffer .len >= bytes_read_into_temp_buffer + handle_data .utf8_buffer .bytes_used ;
676+ const has_enough_space : bool = has_enough_space_in_remaining_buffer and temp_buffer .len >= bytes_read_into_temp_buffer + handle_data .utf8_buffer .bytes_used ;
677+ const max_bytes_to_read : usize = if (has_enough_space ) handle_data .utf8_buffer .bytes_used else remaining_buffer .len - bytes_read_into_temp_buffer ;
678+ for (0.. max_bytes_to_read ) | index | {
679+ temp_buffer [bytes_read_into_temp_buffer + index ] = handle_data .utf8_buffer .data [handle_data .utf8_buffer .front_index ];
680+ // Front index wraps around in the case of 4-byte sequence (non-BMP code point)
681+ handle_data .utf8_buffer .front_index +%= 1 ;
682+ }
683+ bytes_read_into_temp_buffer += @truncate (max_bytes_to_read );
684+ handle_data .utf8_buffer .bytes_used -= @truncate (max_bytes_to_read );
685+ if (has_enough_space ) {
686+ // UTF-8 buffer is now empty, we can safely reset front_index to zero
687+ handle_data .utf8_buffer .front_index = 0 ;
688+ } else {
689+ break ;
690+ }
691+ // LF ends a console read immediately
692+ if (reached_end_of_line ) {
693+ break ;
694+ }
660695 }
661- // LF ends a console read immediately
662- if (reached_end_of_line ) {
696+ // Copy to user-provided buffer
697+ const bytes_copied : DWORD = switch (truncate_state ) {
698+ .do_not_truncate = > bytes_read_into_temp_buffer ,
699+ .truncate_after_SUB = > truncate_index ,
700+ .truncate_all = > 0 ,
701+ };
702+ @memcpy (remaining_buffer [0.. bytes_copied ], temp_buffer [0.. bytes_copied ]);
703+ bytes_read += bytes_copied ;
704+ // Early return conditions
705+ if (! has_enough_space_in_remaining_buffer or reached_end_of_line ) {
663706 break ;
664707 }
665708 }
666- return ReadConsoleProcessUtf8Buffer (buffer , temp_buffer , bytes_read , true );
667- }
668-
669- fn ReadConsoleProcessUtf8Buffer (buffer : []u8 , temp_buffer : []u8 , bytes_read : DWORD , comptime truncate_after_SUB : bool ) DWORD {
670- if (truncate_after_SUB ) {
671- // Truncate everything after the SUB (Ctrl+Z) character
672- var index : DWORD = 0 ;
673- var reached_end_of_file : bool = false ;
674- while (index < bytes_read and ! reached_end_of_file ) {
675- if (temp_buffer [index ] == 0x1A ) {
676- reached_end_of_file = true ;
677- }
678- buffer [index ] = temp_buffer [index ];
679- index += 1 ;
680- }
681- return index ;
682- } else {
683- std .mem .copy (u8 , buffer , temp_buffer );
684- return bytes_read ;
685- }
709+ return bytes_read ;
686710}
687711
688712pub const WriteFileError = error {
@@ -809,7 +833,7 @@ fn WriteConsoleWithUtf8ToUtf16Conversion(handle: HANDLE, bytes: []const u8) Writ
809833 bytes_written += @truncate (bytes_available );
810834 return bytes_written ;
811835 } else {
812- utf16_code_units = std .unicode .utf8ToUtf16Le (& utf16_buffer , bytes [byte_index .. byte_index + utf8_byte_sequence_length ]) catch return error .InvalidUtf8 ;
836+ utf16_code_units = std .unicode .utf8ToUtf16Le (& utf16_buffer , bytes [byte_index .. byte_index + utf8_byte_sequence_length ]) catch return error .InvalidUtf8 ;
813837 byte_index += utf8_byte_sequence_length ;
814838 }
815839 } else {
0 commit comments