Skip to content

Commit 1ac413d

Browse files
committed
Auto merge of rust-lang#146173 - Kmeakin:km/unicode-data/no-ascii, r=jhpratt
Don't include ASCII characters in Unicode tables Split off from rust-lang#145219
2 parents c1e76db + fd1cda6 commit 1ac413d

File tree

3 files changed

+315
-247
lines changed

3 files changed

+315
-247
lines changed

alloc/src/str.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -418,9 +418,8 @@ impl str {
418418
}
419419

420420
fn case_ignorable_then_cased<I: Iterator<Item = char>>(iter: I) -> bool {
421-
use core::unicode::{Case_Ignorable, Cased};
422-
match iter.skip_while(|&c| Case_Ignorable(c)).next() {
423-
Some(c) => Cased(c),
421+
match iter.skip_while(|&c| c.is_case_ignorable()).next() {
422+
Some(c) => c.is_cased(),
424423
None => false,
425424
}
426425
}

core/src/char/methods.rs

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -969,7 +969,43 @@ impl char {
969969
#[must_use]
970970
#[inline]
971971
pub(crate) fn is_grapheme_extended(self) -> bool {
972-
unicode::Grapheme_Extend(self)
972+
!self.is_ascii() && unicode::Grapheme_Extend(self)
973+
}
974+
975+
/// Returns `true` if this `char` has the `Cased` property.
976+
///
977+
/// `Cased` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
978+
/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
979+
///
980+
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
981+
/// [ucd]: https://www.unicode.org/reports/tr44/
982+
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
983+
#[must_use]
984+
#[inline]
985+
#[doc(hidden)]
986+
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
987+
pub fn is_cased(self) -> bool {
988+
if self.is_ascii() { self.is_ascii_alphabetic() } else { unicode::Cased(self) }
989+
}
990+
991+
/// Returns `true` if this `char` has the `Case_Ignorable` property.
992+
///
993+
/// `Case_Ignorable` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
994+
/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
995+
///
996+
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
997+
/// [ucd]: https://www.unicode.org/reports/tr44/
998+
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
999+
#[must_use]
1000+
#[inline]
1001+
#[doc(hidden)]
1002+
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1003+
pub fn is_case_ignorable(self) -> bool {
1004+
if self.is_ascii() {
1005+
matches!(self, '\'' | '.' | ':' | '^' | '`')
1006+
} else {
1007+
unicode::Case_Ignorable(self)
1008+
}
9731009
}
9741010

9751011
/// Returns `true` if this `char` has one of the general categories for numbers.

0 commit comments

Comments
 (0)