From 8f09b608eacad7155a9fb4c2f35fe53b450fcff4 Mon Sep 17 00:00:00 2001 From: data-man Date: Sun, 22 Dec 2019 15:38:27 +0500 Subject: [PATCH 1/2] Add 'u' specifier to std.format --- lib/std/fmt.zig | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/lib/std/fmt.zig b/lib/std/fmt.zig index 2eef92a20137..f2f54f63655d 100644 --- a/lib/std/fmt.zig +++ b/lib/std/fmt.zig @@ -76,6 +76,7 @@ fn peekIsAlign(comptime fmt: []const u8) bool { /// - `d`: output numeric value in decimal notation /// - `b`: output integer value in binary notation /// - `c`: output integer as an ASCII character. Integer type must have 8 bits at max. +/// - `u`: output integer as an UTF-8 sequence. Integer type must have 32 bits at max. /// - `*`: output the address of the value instead of the value itself. /// /// If a formatted user type contains a function of the type @@ -520,6 +521,12 @@ pub fn formatIntValue( } else { @compileError("Cannot print integer that is larger than 8 bits as a ascii"); } + } else if (comptime std.mem.eql(u8, fmt, "u")) { + if (@TypeOf(int_value).bit_count <= 32) { + return formatUtf8Codepoint(@as(u32, int_value), options, context, Errors, output); + } else { + @compileError("Cannot print integer that is larger than 32 bits as an UTF-8 sequence"); + } } else if (comptime std.mem.eql(u8, fmt, "b")) { radix = 2; uppercase = false; @@ -585,6 +592,18 @@ pub fn formatAsciiChar( return output(context, @as(*const [1]u8, &c)[0..]); } +pub fn formatUtf8Codepoint( + c: u32, + options: FormatOptions, + context: var, + comptime Errors: type, + output: fn (@TypeOf(context), []const u8) Errors!void, +) Errors!void { + var buf: [4]u8 = undefined; + const len = std.unicode.utf8Encode(c, buf[0..]) catch unreachable; + return output(context, @as(*const [4]u8, &buf)[0..len]); +} + pub fn formatBuf( buf: []const u8, options: FormatOptions, @@ -1205,6 +1224,14 @@ test "int.specifier" { const value: u8 = 'a'; try testFmt("u8: a\n", "u8: {c}\n", .{value}); } + { + const value: u8 = 'a'; + try testFmt("UTF-8: a\n", "UTF-8: {u}\n", .{value}); + } + { + const value: u32 = 0x1F310; + try testFmt("UTF-8: 🌐\n", "UTF-8: {u}\n", .{value}); + } { const value: u8 = 0b1100; try testFmt("u8: 0b1100\n", "u8: 0b{b}\n", .{value}); From a98590780cd7e7e7efe63a276698073eac2f2c87 Mon Sep 17 00:00:00 2001 From: data-man Date: Sun, 29 Dec 2019 07:43:11 +0500 Subject: [PATCH 2/2] use 21 bits --- lib/std/fmt.zig | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/std/fmt.zig b/lib/std/fmt.zig index cd2f225ddbf4..93e3bf32afaa 100644 --- a/lib/std/fmt.zig +++ b/lib/std/fmt.zig @@ -76,7 +76,7 @@ fn peekIsAlign(comptime fmt: []const u8) bool { /// - `d`: output numeric value in decimal notation /// - `b`: output integer value in binary notation /// - `c`: output integer as an ASCII character. Integer type must have 8 bits at max. -/// - `u`: output integer as an UTF-8 sequence. Integer type must have 32 bits at max. +/// - `u`: output integer as an UTF-8 sequence. Integer type must have 21 bits at max. /// - `*`: output the address of the value instead of the value itself. /// /// If a formatted user type contains a function of the type @@ -522,10 +522,10 @@ pub fn formatIntValue( @compileError("Cannot print integer that is larger than 8 bits as a ascii"); } } else if (comptime std.mem.eql(u8, fmt, "u")) { - if (@TypeOf(int_value).bit_count <= 32) { - return formatUtf8Codepoint(@as(u32, int_value), options, context, Errors, output); + if (@TypeOf(int_value).bit_count <= 21) { + return formatUtf8Codepoint(@as(u21, int_value), options, context, Errors, output); } else { - @compileError("Cannot print integer that is larger than 32 bits as an UTF-8 sequence"); + @compileError("Cannot print integer that is larger than 21 bits as an UTF-8 sequence"); } } else if (comptime std.mem.eql(u8, fmt, "b")) { radix = 2; @@ -595,7 +595,7 @@ pub fn formatAsciiChar( } pub fn formatUtf8Codepoint( - c: u32, + c: u21, options: FormatOptions, context: var, comptime Errors: type, @@ -1231,7 +1231,7 @@ test "int.specifier" { try testFmt("UTF-8: a\n", "UTF-8: {u}\n", .{value}); } { - const value: u32 = 0x1F310; + const value: u21 = 0x1F310; try testFmt("UTF-8: 🌐\n", "UTF-8: {u}\n", .{value}); } {