Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion library/core/src/char/methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -985,7 +985,11 @@ impl char {
#[doc(hidden)]
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
pub fn is_cased(self) -> bool {
if self.is_ascii() { self.is_ascii_alphabetic() } else { unicode::Cased(self) }
if self.is_ascii() {
self.is_ascii_alphabetic()
} else {
unicode::Lowercase(self) || unicode::Uppercase(self) || unicode::Lt(self)
}
}

/// Returns `true` if this `char` has the `Case_Ignorable` property.
Expand Down
2 changes: 1 addition & 1 deletion library/core/src/unicode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
// for use in alloc, not re-exported in std.
#[rustfmt::skip]
pub use unicode_data::case_ignorable::lookup as Case_Ignorable;
pub use unicode_data::cased::lookup as Cased;
pub use unicode_data::conversions;

#[rustfmt::skip]
pub(crate) use unicode_data::alphabetic::lookup as Alphabetic;
pub(crate) use unicode_data::grapheme_extend::lookup as Grapheme_Extend;
pub(crate) use unicode_data::lowercase::lookup as Lowercase;
pub(crate) use unicode_data::lt::lookup as Lt;
pub(crate) use unicode_data::n::lookup as N;
pub(crate) use unicode_data::uppercase::lookup as Uppercase;
pub(crate) use unicode_data::white_space::lookup as White_Space;
Expand Down
90 changes: 35 additions & 55 deletions library/core/src/unicode/unicode_data.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
//! This file is generated by `./x run src/tools/unicode-table-generator`; do not edit manually!
// Alphabetic : 1723 bytes, 142707 codepoints in 755 ranges (U+0000AA - U+0323B0) using skiplist
// Case_Ignorable : 1043 bytes, 2744 codepoints in 447 ranges (U+0000A8 - U+0E01F0) using skiplist
// Cased : 403 bytes, 4526 codepoints in 157 ranges (U+0000AA - U+01F18A) using skiplist
// Grapheme_Extend : 887 bytes, 2193 codepoints in 375 ranges (U+000300 - U+0E01F0) using skiplist
// Lowercase : 933 bytes, 2543 codepoints in 674 ranges (U+0000AA - U+01E944) using bitset
// Lt : 33 bytes, 31 codepoints in 10 ranges (U+0001C5 - U+001FFD) using skiplist
// N : 455 bytes, 1901 codepoints in 143 ranges (U+0000B2 - U+01FBFA) using skiplist
// Uppercase : 797 bytes, 1952 codepoints in 655 ranges (U+0000C0 - U+01F18A) using bitset
// White_Space : 256 bytes, 19 codepoints in 8 ranges (U+000085 - U+003001) using cascading
// to_lower : 11484 bytes
// to_upper : 13432 bytes
// Total : 31413 bytes
// Total : 31043 bytes

#[inline(always)]
const fn bitset_search<
Expand Down Expand Up @@ -337,59 +337,6 @@ pub mod case_ignorable {
}
}

#[rustfmt::skip]
pub mod cased {
use super::ShortOffsetRunHeader;

static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 22] = [
ShortOffsetRunHeader::new(0, 4256), ShortOffsetRunHeader::new(51, 5024),
ShortOffsetRunHeader::new(61, 7296), ShortOffsetRunHeader::new(65, 7958),
ShortOffsetRunHeader::new(74, 9398), ShortOffsetRunHeader::new(149, 11264),
ShortOffsetRunHeader::new(151, 42560), ShortOffsetRunHeader::new(163, 43824),
ShortOffsetRunHeader::new(183, 64256), ShortOffsetRunHeader::new(189, 65313),
ShortOffsetRunHeader::new(193, 66560), ShortOffsetRunHeader::new(197, 67456),
ShortOffsetRunHeader::new(219, 68736), ShortOffsetRunHeader::new(227, 71840),
ShortOffsetRunHeader::new(235, 93760), ShortOffsetRunHeader::new(237, 119808),
ShortOffsetRunHeader::new(239, 120486), ShortOffsetRunHeader::new(276, 122624),
ShortOffsetRunHeader::new(299, 122928), ShortOffsetRunHeader::new(305, 125184),
ShortOffsetRunHeader::new(307, 127280), ShortOffsetRunHeader::new(309, 1241482),
];
static OFFSETS: [u8; 315] = [
170, 1, 10, 1, 4, 1, 5, 23, 1, 31, 1, 195, 1, 4, 4, 208, 1, 36, 7, 2, 30, 5, 96, 1, 42, 4,
2, 2, 2, 4, 1, 1, 6, 1, 1, 3, 1, 1, 1, 20, 1, 83, 1, 139, 8, 166, 1, 38, 9, 41, 0, 38, 1, 1,
5, 1, 2, 43, 1, 4, 0, 86, 2, 6, 0, 11, 5, 43, 2, 3, 64, 192, 64, 0, 2, 6, 2, 38, 2, 6, 2, 8,
1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13, 5, 3, 1, 7, 116,
1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4, 1, 6, 4, 1, 2, 4,
5, 5, 4, 1, 17, 32, 3, 2, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, 1, 0, 46, 18, 30, 132,
102, 3, 4, 1, 62, 2, 2, 1, 1, 1, 8, 21, 5, 1, 3, 0, 43, 1, 14, 6, 80, 0, 7, 12, 5, 0, 26, 6,
26, 0, 80, 96, 36, 4, 36, 116, 11, 1, 15, 1, 7, 1, 2, 1, 11, 1, 15, 1, 7, 1, 2, 0, 1, 2, 3,
1, 42, 1, 9, 0, 51, 13, 51, 93, 22, 10, 22, 0, 64, 0, 64, 0, 85, 1, 71, 1, 2, 2, 1, 2, 2, 2,
4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, 3, 7, 1, 0, 2, 25,
1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 10, 1, 20, 6, 6, 0,
62, 0, 68, 0, 26, 6, 26, 6, 26, 0,
];
#[inline]
pub fn lookup(c: char) -> bool {
debug_assert!(!c.is_ascii());
(c as u32) >= 0xaa && lookup_slow(c)
}

#[inline(never)]
fn lookup_slow(c: char) -> bool {
const {
assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32);
let mut i = 0;
while i < SHORT_OFFSET_RUNS.len() {
assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len());
i += 1;
}
}
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`
// and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`.
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
}
}

#[rustfmt::skip]
pub mod grapheme_extend {
use super::ShortOffsetRunHeader;
Expand Down Expand Up @@ -572,6 +519,39 @@ pub mod lowercase {
}
}

#[rustfmt::skip]
pub mod lt {
use super::ShortOffsetRunHeader;

static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 3] = [
ShortOffsetRunHeader::new(0, 453), ShortOffsetRunHeader::new(1, 8072),
ShortOffsetRunHeader::new(9, 1122301),
];
static OFFSETS: [u8; 21] = [
0, 1, 2, 1, 2, 1, 38, 1, 0, 8, 8, 8, 8, 8, 12, 1, 15, 1, 47, 1, 0,
];
#[inline]
pub fn lookup(c: char) -> bool {
debug_assert!(!c.is_ascii());
(c as u32) >= 0x1c5 && lookup_slow(c)
}

#[inline(never)]
fn lookup_slow(c: char) -> bool {
const {
assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32);
let mut i = 0;
while i < SHORT_OFFSET_RUNS.len() {
assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len());
i += 1;
}
}
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`
// and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`.
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
}
}

#[rustfmt::skip]
pub mod n {
use super::ShortOffsetRunHeader;
Expand Down
2 changes: 1 addition & 1 deletion src/tools/unicode-table-generator/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ static PROPERTIES: &[&str] = &[
"Alphabetic",
"Lowercase",
"Uppercase",
"Cased",
"Lt",
"Case_Ignorable",
"Grapheme_Extend",
"White_Space",
Expand Down
Loading