Skip to content

Commit 0e6131c

Browse files
committed
refactor: make unicode_data tests normal tests
Instead of generating a standalone executable to test `unicode_data`, generate normal tests in `coretests`. This ensures tests are always generated, and will be run as part of the normal testsuite. Also change the generated tests to loop over lookup tables, rather than generating a separate `assert_eq!()` statement for every codepoint. The old approach produced a massive (20,000 lines plus) file which took minutes to compile!
1 parent 9a80731 commit 0e6131c

File tree

6 files changed

+3090
-75
lines changed

6 files changed

+3090
-75
lines changed

library/core/src/unicode/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ pub(crate) mod printable;
2020

2121
mod rt;
2222
#[allow(unreachable_pub)]
23-
mod unicode_data;
23+
pub mod unicode_data;
2424

2525
/// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
2626
/// `char` and `str` methods are based on.

library/coretests/tests/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@
111111
#![feature(try_find)]
112112
#![feature(try_trait_v2)]
113113
#![feature(uint_bit_width)]
114+
#![feature(unicode_internals)]
114115
#![feature(unsize)]
115116
#![feature(unwrap_infallible)]
116117
// tidy-alphabetical-end

library/coretests/tests/unicode.rs

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,101 @@
1+
use core::unicode::unicode_data;
2+
use std::ops::RangeInclusive;
3+
4+
mod test_data;
5+
16
#[test]
27
pub fn version() {
38
let (major, _minor, _update) = core::char::UNICODE_VERSION;
49
assert!(major >= 10);
510
}
11+
12+
#[track_caller]
13+
fn test_boolean_property(ranges: &[RangeInclusive<char>], lookup: fn(char) -> bool) {
14+
let mut start = '\u{80}';
15+
for range in ranges {
16+
for c in start..*range.start() {
17+
assert!(!lookup(c), "{c:?}");
18+
}
19+
for c in range.clone() {
20+
assert!(lookup(c), "{c:?}");
21+
}
22+
start = char::from_u32(*range.end() as u32 + 1).unwrap();
23+
}
24+
for c in start..=char::MAX {
25+
assert!(!lookup(c), "{c:?}");
26+
}
27+
}
28+
29+
#[track_caller]
30+
fn test_case_mapping(ranges: &[(char, [char; 3])], lookup: fn(char) -> [char; 3]) {
31+
let mut start = '\u{80}';
32+
for &(key, val) in ranges {
33+
for c in start..key {
34+
assert_eq!(lookup(c), [c, '\0', '\0'], "{c:?}");
35+
}
36+
assert_eq!(lookup(key), val, "{key:?}");
37+
start = char::from_u32(key as u32 + 1).unwrap();
38+
}
39+
for c in start..=char::MAX {
40+
assert_eq!(lookup(c), [c, '\0', '\0'], "{c:?}");
41+
}
42+
}
43+
44+
#[test]
45+
#[cfg_attr(miri, ignore)]
46+
fn alphabetic() {
47+
test_boolean_property(test_data::ALPHABETIC, unicode_data::alphabetic::lookup);
48+
}
49+
50+
#[test]
51+
#[cfg_attr(miri, ignore)]
52+
fn case_ignorable() {
53+
test_boolean_property(test_data::CASE_IGNORABLE, unicode_data::case_ignorable::lookup);
54+
}
55+
56+
#[test]
57+
#[cfg_attr(miri, ignore)]
58+
fn cased() {
59+
test_boolean_property(test_data::CASED, unicode_data::cased::lookup);
60+
}
61+
62+
#[test]
63+
#[cfg_attr(miri, ignore)]
64+
fn grapheme_extend() {
65+
test_boolean_property(test_data::GRAPHEME_EXTEND, unicode_data::grapheme_extend::lookup);
66+
}
67+
68+
#[test]
69+
#[cfg_attr(miri, ignore)]
70+
fn lowercase() {
71+
test_boolean_property(test_data::LOWERCASE, unicode_data::lowercase::lookup);
72+
}
73+
74+
#[test]
75+
fn n() {
76+
test_boolean_property(test_data::N, unicode_data::n::lookup);
77+
}
78+
79+
#[test]
80+
#[cfg_attr(miri, ignore)]
81+
fn uppercase() {
82+
test_boolean_property(test_data::UPPERCASE, unicode_data::uppercase::lookup);
83+
}
84+
85+
#[test]
86+
#[cfg_attr(miri, ignore)]
87+
fn white_space() {
88+
test_boolean_property(test_data::WHITE_SPACE, unicode_data::white_space::lookup);
89+
}
90+
91+
#[test]
92+
#[cfg_attr(miri, ignore)]
93+
fn to_lowercase() {
94+
test_case_mapping(test_data::TO_LOWER, unicode_data::conversions::to_lower);
95+
}
96+
97+
#[test]
98+
#[cfg_attr(miri, ignore)]
99+
fn to_uppercase() {
100+
test_case_mapping(test_data::TO_UPPER, unicode_data::conversions::to_upper);
101+
}

0 commit comments

Comments
 (0)