Skip to content

Commit 9a80731

Browse files
committed
refactor: make string formatting more readable
To make the final output code easier to see: * Get rid of the unnecessary line-noise of `.unwrap()`ing calls to `write!()` by moving the `.unwrap()` into a macro. * Join consecutive `write!()` calls using a single multiline format string. * Replace `.push()` and `.push_str(format!())` with `write!()`. * If after doing all of the above, there is only a single `write!()` call in the function, just construct the string directly with `format!()`.
1 parent 6d75cd2 commit 9a80731

File tree

7 files changed

+1121
-1074
lines changed

7 files changed

+1121
-1074
lines changed

library/core/src/unicode/unicode_data.rs

Lines changed: 883 additions & 793 deletions
Large diffs are not rendered by default.

src/tools/unicode-table-generator/src/cascading_map.rs

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
use std::collections::HashMap;
2-
use std::fmt::Write as _;
32
use std::ops::Range;
43

5-
use crate::fmt_list;
64
use crate::raw_emitter::RawEmitter;
5+
use crate::writeln;
76

87
impl RawEmitter {
98
pub fn emit_cascading_map(&mut self, ranges: &[Range<u32>]) -> bool {
@@ -24,8 +23,6 @@ impl RawEmitter {
2423
.flat_map(|r| (r.start..r.end).collect::<Vec<u32>>())
2524
.collect::<Vec<u32>>();
2625

27-
println!("there are {} points", points.len());
28-
2926
// how many distinct ranges need to be counted?
3027
let mut codepoints_by_high_bytes = HashMap::<usize, Vec<u32>>::new();
3128
for point in points {
@@ -37,41 +34,41 @@ impl RawEmitter {
3734
}
3835

3936
let mut bit_for_high_byte = 1u8;
40-
let mut arms = Vec::<String>::new();
37+
let mut arms = String::new();
4138

4239
let mut high_bytes: Vec<usize> = codepoints_by_high_bytes.keys().copied().collect();
4340
high_bytes.sort();
4441
for high_byte in high_bytes {
4542
let codepoints = codepoints_by_high_bytes.get_mut(&high_byte).unwrap();
4643
if codepoints.len() == 1 {
4744
let ch = codepoints.pop().unwrap();
48-
arms.push(format!("{high_byte} => c as u32 == {ch:#04x}"));
45+
writeln!(arms, "{high_byte:#04x} => c as u32 == {ch:#04x},");
4946
continue;
5047
}
5148
// more than 1 codepoint in this arm
5249
for codepoint in codepoints {
5350
map[(*codepoint & 0xff) as usize] |= bit_for_high_byte;
5451
}
55-
arms.push(format!(
56-
"{high_byte} => WHITESPACE_MAP[c as usize & 0xff] & {bit_for_high_byte} != 0"
57-
));
52+
writeln!(
53+
arms,
54+
"{high_byte:#04x} => WHITESPACE_MAP[c as usize & 0xff] & {bit_for_high_byte} != 0,"
55+
);
5856
bit_for_high_byte <<= 1;
5957
}
6058

61-
writeln!(&mut self.file, "static WHITESPACE_MAP: [u8; 256] = [{}];", fmt_list(map.iter()))
62-
.unwrap();
6359
self.bytes_used += 256;
60+
self.file = format!(
61+
"static WHITESPACE_MAP: [u8; 256] = {map:?};
6462
65-
writeln!(&mut self.file, "#[inline]").unwrap();
66-
writeln!(&mut self.file, "pub const fn lookup(c: char) -> bool {{").unwrap();
67-
writeln!(&mut self.file, " debug_assert!(!c.is_ascii());").unwrap();
68-
writeln!(&mut self.file, " match c as u32 >> 8 {{").unwrap();
69-
for arm in arms {
70-
writeln!(&mut self.file, " {arm},").unwrap();
71-
}
72-
writeln!(&mut self.file, " _ => false,").unwrap();
73-
writeln!(&mut self.file, " }}").unwrap();
74-
writeln!(&mut self.file, "}}").unwrap();
63+
#[inline]
64+
pub const fn lookup(c: char) -> bool {{
65+
debug_assert!(!c.is_ascii());
66+
match c as u32 >> 8 {{
67+
{arms}\
68+
_ => false,
69+
}}
70+
}}"
71+
);
7572

7673
true
7774
}
Lines changed: 18 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,18 @@
11
use std::char;
22
use std::collections::BTreeMap;
3-
use std::fmt::{self, Write};
43

5-
use crate::{UnicodeData, fmt_list};
4+
use crate::fmt_helpers::Hex;
5+
use crate::{CharEscape, UnicodeData, fmt_list};
66

77
const INDEX_MASK: u32 = 1 << 22;
88

99
pub(crate) fn generate_case_mapping(data: &UnicodeData) -> (String, [usize; 2]) {
10-
let mut file = String::new();
11-
1210
let (lower_tables, lower_size) = generate_tables("LOWER", &data.to_lower);
13-
file.push_str(&lower_tables);
14-
file.push_str("\n\n");
1511
let (upper_tables, upper_size) = generate_tables("UPPER", &data.to_upper);
16-
file.push_str(&upper_tables);
12+
let file = format!(
13+
"{lower_tables}
14+
{upper_tables}"
15+
);
1716
(file, [lower_size, upper_size])
1817
}
1918

@@ -43,14 +42,18 @@ fn generate_tables(case: &str, data: &BTreeMap<u32, [u32; 3]>) -> (String, usize
4342
INDEX_MASK | (u32::try_from(multis.len()).unwrap() - 1)
4443
};
4544

46-
mappings.push((CharEscape(key), value));
45+
mappings.push((CharEscape(key), Hex(value)));
4746
}
4847

49-
let mut size = 0;
50-
let mut tables = String::new();
51-
writeln!(
52-
tables,
53-
"\
48+
let size = size_of_val(mappings.as_slice()) + size_of_val(multis.as_slice());
49+
let file = format!(
50+
"
51+
#[rustfmt::skip]
52+
static {case}CASE_TABLE: &[(char, u32); {mappings_len}] = &[{mappings}];
53+
54+
#[rustfmt::skip]
55+
static {case}CASE_TABLE_MULTI: &[[char; 3]; {multis_len}] = &[{multis}];
56+
5457
#[inline]
5558
pub fn to_{case_lower}(c: char) -> [char; 3] {{
5659
const {{
@@ -81,40 +84,7 @@ pub fn to_{case_lower}(c: char) -> [char; 3] {{
8184
mappings_len = mappings.len(),
8285
multis = fmt_list(&multis),
8386
multis_len = multis.len(),
84-
)
85-
.unwrap();
86-
87-
size += size_of_val(mappings.as_slice());
88-
writeln!(tables, "#[rustfmt::skip]").unwrap();
89-
write!(
90-
tables,
91-
"static {}CASE_TABLE: &[(char, u32); {}] = &[{}];",
92-
case,
93-
mappings.len(),
94-
fmt_list(mappings),
95-
)
96-
.unwrap();
97-
98-
tables.push_str("\n\n");
87+
);
9988

100-
size += size_of_val(multis.as_slice());
101-
writeln!(tables, "#[rustfmt::skip]").unwrap();
102-
write!(
103-
tables,
104-
"static {}CASE_TABLE_MULTI: &[[char; 3]; {}] = &[{}];",
105-
case,
106-
multis.len(),
107-
fmt_list(multis),
108-
)
109-
.unwrap();
110-
111-
(tables, size)
112-
}
113-
114-
struct CharEscape(char);
115-
116-
impl fmt::Debug for CharEscape {
117-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
118-
write!(f, "'{}'", self.0.escape_default())
119-
}
89+
(file, size)
12090
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
use std::fmt;
2+
3+
// Convenience macros for writing and unwrapping.
4+
#[macro_export]
5+
macro_rules! writeln {
6+
($($args:tt)*) => {{
7+
use std::fmt::Write as _;
8+
std::writeln!($($args)*).unwrap();
9+
}};
10+
}
11+
#[macro_export]
12+
macro_rules! write {
13+
($($args:tt)*) => {{
14+
use std::fmt::Write as _;
15+
std::write!($($args)*).unwrap();
16+
}};
17+
}
18+
19+
pub fn fmt_list<V: fmt::Debug>(values: impl IntoIterator<Item = V>) -> String {
20+
let pieces = values.into_iter().map(|b| format!("{b:?}, "));
21+
let mut out = String::new();
22+
let mut line = String::from("\n ");
23+
for piece in pieces {
24+
if line.len() + piece.len() < 98 {
25+
line.push_str(&piece);
26+
} else {
27+
writeln!(out, "{}", line.trim_end());
28+
line = format!(" {piece}");
29+
}
30+
}
31+
writeln!(out, "{}", line.trim_end());
32+
out
33+
}
34+
35+
/// Wrapper type for formatting a `T` using its `Binary` implementation.
36+
#[derive(Copy, Clone)]
37+
pub struct Bin<T>(pub T);
38+
39+
impl<T: fmt::Binary> fmt::Debug for Bin<T> {
40+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
41+
let bits = size_of::<T>() * 8;
42+
std::write!(f, "0b{:0bits$b}", self.0)
43+
}
44+
}
45+
46+
impl<T: fmt::Binary> fmt::Display for Bin<T> {
47+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
48+
fmt::Debug::fmt(self, f)
49+
}
50+
}
51+
52+
/// Wrapper type for formatting a `T` using its `LowerHex` implementation.
53+
#[derive(Copy, Clone)]
54+
pub struct Hex<T>(pub T);
55+
56+
impl<T: fmt::LowerHex> fmt::Debug for Hex<T> {
57+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
58+
std::write!(f, "{:#x}", self.0)
59+
}
60+
}
61+
62+
impl<T: fmt::LowerHex> fmt::Display for Hex<T> {
63+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64+
fmt::Debug::fmt(self, f)
65+
}
66+
}
67+
68+
/// Wrapper type for formatting a `char` using `escape_unicode`.
69+
#[derive(Copy, Clone)]
70+
pub struct CharEscape(pub char);
71+
72+
impl fmt::Debug for CharEscape {
73+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
74+
std::write!(f, "'{}'", self.0.escape_unicode())
75+
}
76+
}
77+
78+
impl fmt::Display for CharEscape {
79+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
80+
fmt::Debug::fmt(self, f)
81+
}
82+
}

0 commit comments

Comments
 (0)