Skip to content

Commit ce40268

Browse files
committed
std.os.windows: UTF-8 to UTF-16 conversion for Windows console output
1 parent 89d660c commit ce40268

File tree

4 files changed

+186
-3
lines changed

4 files changed

+186
-3
lines changed

lib/std/os.zig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,6 +1062,10 @@ pub const WriteError = error{
10621062

10631063
/// Connection reset by peer.
10641064
ConnectionResetByPeer,
1065+
1066+
/// This error occurs when trying to write UTF-8 text to a Windows console,
1067+
/// and the UTF-8 to UTF-16 conversion fails. Windows-only.
1068+
InvalidUtf8,
10651069
} || UnexpectedError;
10661070

10671071
/// Write to a file descriptor.

lib/std/os/windows.zig

Lines changed: 179 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,9 @@ pub fn GetQueuedCompletionStatusEx(
445445

446446
pub fn CloseHandle(hObject: HANDLE) void {
447447
assert(ntdll.NtClose(hObject) == .SUCCESS);
448+
if (IsConsoleHandle(hObject)) {
449+
_ = RemoveConsoleHandleData(hObject) catch {};
450+
}
448451
}
449452

450453
pub fn FindClose(hFindFile: HANDLE) void {
@@ -547,6 +550,9 @@ pub const WriteFileError = error{
547550
/// a portion of the file.
548551
LockViolation,
549552
Unexpected,
553+
/// This error occurs when trying to write UTF-8 text to a Windows console,
554+
/// and the UTF-8 to UTF-16 conversion fails.
555+
InvalidUtf8,
550556
};
551557

552558
pub fn WriteFile(
@@ -617,21 +623,108 @@ pub fn WriteFile(
617623
break :blk &overlapped_data;
618624
} else null;
619625
const adjusted_len = math.cast(u32, bytes.len) orelse maxInt(u32);
620-
if (kernel32.WriteFile(handle, bytes.ptr, adjusted_len, &bytes_written, overlapped) == 0) {
626+
if (IsConsoleHandle(handle)) {
627+
assert(offset == null);
628+
bytes_written = WriteConsoleWithUtf8ToUtf16Conversion(handle, bytes) catch |err| return err;
629+
} else {
630+
if (kernel32.WriteFile(handle, bytes.ptr, adjusted_len, &bytes_written, overlapped) == 0) {
631+
switch (kernel32.GetLastError()) {
632+
.INVALID_USER_BUFFER => return error.SystemResources,
633+
.NOT_ENOUGH_MEMORY => return error.SystemResources,
634+
.OPERATION_ABORTED => return error.OperationAborted,
635+
.NOT_ENOUGH_QUOTA => return error.SystemResources,
636+
.IO_PENDING => unreachable,
637+
.BROKEN_PIPE => return error.BrokenPipe,
638+
.INVALID_HANDLE => return error.NotOpenForWriting,
639+
.LOCK_VIOLATION => return error.LockViolation,
640+
else => |err| return unexpectedError(err),
641+
}
642+
}
643+
}
644+
return bytes_written;
645+
}
646+
}
647+
648+
fn WriteConsoleWithUtf8ToUtf16Conversion(handle: HANDLE, bytes: []const u8) WriteFileError!DWORD {
649+
const handle_data: *ConsoleHandleData = GetConsoleHandleData(handle) catch |err| switch (err) {
650+
error.ConsoleHandleLimitReached => @panic("Reached maximum number of 64 console handles."),
651+
else => return error.Unexpected,
652+
};
653+
var bytes_written: DWORD = 0;
654+
var byte_index: DWORD = 0;
655+
while (byte_index < bytes.len) {
656+
var utf16_buffer: [2]u16 = undefined;
657+
var utf16_code_units: usize = undefined;
658+
if (handle_data.utf8_buffer.bytes_used == 0) {
659+
const utf8_byte_sequence_length: u3 = std.unicode.utf8ByteSequenceLength(bytes[byte_index]) catch return error.InvalidUtf8;
660+
const bytes_available: usize = bytes.len - byte_index;
661+
if (bytes_available < utf8_byte_sequence_length) {
662+
for (0..bytes_available) |index| {
663+
handle_data.utf8_buffer.data[index] = bytes[index];
664+
}
665+
bytes_written += @truncate(bytes_available);
666+
return bytes_written;
667+
} else {
668+
utf16_code_units = std.unicode.utf8ToUtf16Le(&utf16_buffer, bytes[byte_index..byte_index + utf8_byte_sequence_length]) catch return error.InvalidUtf8;
669+
byte_index += utf8_byte_sequence_length;
670+
}
671+
} else {
672+
const utf8_byte_sequence_length: u3 = std.unicode.utf8ByteSequenceLength(handle_data.utf8_buffer.data[0]) catch return error.InvalidUtf8;
673+
assert(utf8_byte_sequence_length > 1 and utf8_byte_sequence_length > handle_data.utf8_buffer.bytes_used);
674+
const bytes_available: usize = bytes.len - byte_index;
675+
const bytes_needed: u3 = utf8_byte_sequence_length - handle_data.utf8_buffer.bytes_used;
676+
if (bytes_available < bytes_needed) {
677+
assert(handle_data.utf8_buffer.bytes_used + bytes_available < utf8_byte_sequence_length);
678+
for (0..bytes_available) |index| {
679+
handle_data.utf8_buffer.data[handle_data.utf8_buffer.bytes_used + index] = bytes[index];
680+
}
681+
bytes_written += @truncate(bytes_available);
682+
return bytes_written;
683+
} else {
684+
for (0..bytes_needed) |index| {
685+
handle_data.utf8_buffer.data[handle_data.utf8_buffer.bytes_used + index] = bytes[index];
686+
}
687+
utf16_code_units = std.unicode.utf8ToUtf16Le(&utf16_buffer, handle_data.utf8_buffer.data[0..utf8_byte_sequence_length]) catch return error.InvalidUtf8;
688+
byte_index += bytes_needed;
689+
}
690+
}
691+
switch (utf16_buffer[0]) {
692+
0x000D => {
693+
handle_data.last_character_written_is_CR = true;
694+
},
695+
0x000A => {
696+
if (handle_data.last_character_written_is_CR) {
697+
handle_data.last_character_written_is_CR = false;
698+
} else {
699+
utf16_buffer = .{ 0x000D, 0x000A };
700+
utf16_code_units = 2;
701+
}
702+
},
703+
else => {
704+
handle_data.last_character_written_is_CR = false;
705+
},
706+
}
707+
var utf16_code_units_written: DWORD = undefined;
708+
if (kernel32.WriteConsoleW(handle, &utf16_buffer, @truncate(utf16_code_units), &utf16_code_units_written, null) == FALSE) {
621709
switch (kernel32.GetLastError()) {
622710
.INVALID_USER_BUFFER => return error.SystemResources,
623711
.NOT_ENOUGH_MEMORY => return error.SystemResources,
624712
.OPERATION_ABORTED => return error.OperationAborted,
625713
.NOT_ENOUGH_QUOTA => return error.SystemResources,
626714
.IO_PENDING => unreachable,
627-
.BROKEN_PIPE => return error.BrokenPipe,
715+
.BROKEN_PIPE => unreachable,
628716
.INVALID_HANDLE => return error.NotOpenForWriting,
629717
.LOCK_VIOLATION => return error.LockViolation,
630718
else => |err| return unexpectedError(err),
631719
}
632720
}
633-
return bytes_written;
721+
if (utf16_code_units_written < utf16_code_units) {
722+
return bytes_written;
723+
} else {
724+
bytes_written = byte_index;
725+
}
634726
}
727+
return bytes_written;
635728
}
636729

637730
pub const SetCurrentDirectoryError = error{
@@ -5073,3 +5166,86 @@ pub fn ProcessBaseAddress(handle: HANDLE) ProcessBaseAddressError!HMODULE {
50735166
const ppeb: *const PEB = @ptrCast(@alignCast(peb_out.ptr));
50745167
return ppeb.ImageBaseAddress;
50755168
}
5169+
5170+
pub fn IsConsoleHandle(handle: HANDLE) bool {
5171+
var out: DWORD = undefined;
5172+
return kernel32.GetConsoleMode(handle, &out) != FALSE;
5173+
}
5174+
5175+
const ConsoleHandleData = struct {
5176+
is_assigned: bool = false,
5177+
5178+
handle: ?HANDLE = null,
5179+
5180+
/// On Windows NT, UTF-8 encoded strings should be converted to UTF-16 before writing to the
5181+
/// native console. Since write() might be called with a string fragment or even a single byte,
5182+
/// we have to store residual UTF-8 byte(s) without returning error. UTF-16 code unit(s) will be
5183+
/// generated when we have enough bytes to complete a code point.
5184+
utf8_buffer: Utf8Buffer = .{},
5185+
5186+
/// Used for LF to CRLF conversion before writing to the native console.
5187+
last_character_written_is_CR: bool = false,
5188+
5189+
pub const Utf8Buffer = struct {
5190+
data: [4]u8 = .{ 0x00, 0x00, 0x00, 0x00 },
5191+
bytes_used: u3 = 0,
5192+
};
5193+
};
5194+
5195+
const console_handle_data_limit = 64;
5196+
5197+
var console_handle_data_array: switch (builtin.os.tag) {
5198+
.windows => [console_handle_data_limit]ConsoleHandleData,
5199+
else => void,
5200+
} = switch (builtin.os.tag) {
5201+
.windows => [_]ConsoleHandleData{.{}} ** console_handle_data_limit,
5202+
else => void{},
5203+
};
5204+
5205+
const ConsoleHandleDataError = error{
5206+
OsUnsupported,
5207+
DataNotFound,
5208+
ConsoleHandleLimitReached,
5209+
};
5210+
5211+
fn GetConsoleHandleData(handle: HANDLE) ConsoleHandleDataError!*ConsoleHandleData {
5212+
if (builtin.os.tag == .windows) {
5213+
var found_unassigned: bool = false;
5214+
var first_unassigned_index: usize = undefined;
5215+
for (0..console_handle_data_limit) |index| {
5216+
if (console_handle_data_array[index].is_assigned) {
5217+
if (console_handle_data_array[index].handle == handle) {
5218+
return &console_handle_data_array[index];
5219+
}
5220+
} else if (!found_unassigned) {
5221+
found_unassigned = true;
5222+
first_unassigned_index = index;
5223+
}
5224+
}
5225+
if (found_unassigned) {
5226+
console_handle_data_array[first_unassigned_index].is_assigned = true;
5227+
console_handle_data_array[first_unassigned_index].handle = handle;
5228+
console_handle_data_array[first_unassigned_index].utf8_buffer.bytes_used = 0;
5229+
console_handle_data_array[first_unassigned_index].last_character_written_is_CR = false;
5230+
return &console_handle_data_array[first_unassigned_index];
5231+
} else {
5232+
return error.ConsoleHandleLimitReached;
5233+
}
5234+
} else {
5235+
return error.OsUnsupported;
5236+
}
5237+
}
5238+
5239+
fn RemoveConsoleHandleData(handle: HANDLE) ConsoleHandleDataError!usize {
5240+
if (builtin.os.tag == .windows) {
5241+
for (0..console_handle_data_limit) |index| {
5242+
if (console_handle_data_array[index].is_assigned and console_handle_data_array[index].handle == handle) {
5243+
console_handle_data_array[index].is_assigned = false;
5244+
return index;
5245+
}
5246+
}
5247+
return error.DataNotFound;
5248+
} else {
5249+
return error.OsUnsupported;
5250+
}
5251+
}

lib/std/os/windows/kernel32.zig

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ pub extern "kernel32" fn FillConsoleOutputCharacterW(hConsoleOutput: HANDLE, cCh
174174
pub extern "kernel32" fn FillConsoleOutputAttribute(hConsoleOutput: HANDLE, wAttribute: WORD, nLength: DWORD, dwWriteCoord: COORD, lpNumberOfAttrsWritten: *DWORD) callconv(WINAPI) BOOL;
175175
pub extern "kernel32" fn SetConsoleCursorPosition(hConsoleOutput: HANDLE, dwCursorPosition: COORD) callconv(WINAPI) BOOL;
176176

177+
pub extern "kernel32" fn WriteConsoleW(hConsoleOutput: HANDLE, lpBuffer: *const anyopaque, nNumberOfCharsToWrite: DWORD, lpNumberOfCharsWritten: ?*DWORD, lpReserved: ?LPVOID) callconv(WINAPI) BOOL;
178+
177179
pub extern "kernel32" fn GetCurrentDirectoryW(nBufferLength: DWORD, lpBuffer: ?[*]WCHAR) callconv(WINAPI) DWORD;
178180

179181
pub extern "kernel32" fn GetCurrentThread() callconv(WINAPI) HANDLE;

src/link.zig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,7 @@ pub const File = struct {
499499
DeviceBusy,
500500
InvalidArgument,
501501
HotSwapUnavailableOnHostOperatingSystem,
502+
InvalidUtf8,
502503
};
503504

504505
/// Called from within the CodeGen to lower a local variable instantion as an unnamed

0 commit comments

Comments
 (0)