-
Notifications
You must be signed in to change notification settings - Fork 13.6k
Add complex case mapping and title case mapping. #26039
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
66af127
addaa5b
d316487
f901086
7ac6b58
c57a412
c6a8d5e
6369dcb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1851,11 +1851,35 @@ impl str { | |
/// let s = "HELLO"; | ||
/// assert_eq!(s.to_lowercase(), "hello"); | ||
/// ``` | ||
#[unstable(feature = "collections")] | ||
#[stable(feature = "unicode_case_mapping", since = "1.2.0")] | ||
pub fn to_lowercase(&self) -> String { | ||
let mut s = String::with_capacity(self.len()); | ||
s.extend(self[..].chars().flat_map(|c| c.to_lowercase())); | ||
for (i, c) in self[..].char_indices() { | ||
if c == 'Σ' { | ||
map_uppercase_sigma(self, i, &mut s) | ||
} else { | ||
s.extend(c.to_lowercase()); | ||
} | ||
} | ||
return s; | ||
|
||
#[cold] | ||
#[inline(never)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Were you able to get a measurable improvement with these attributes? If not I'd personally recommend leaving them off for now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did not take any measurement, I just imitated |
||
fn map_uppercase_sigma(from: &str, i: usize, to: &mut String) { | ||
debug_assert!('Σ'.len_utf8() == 2); | ||
let is_word_final = | ||
case_ignoreable_then_cased(from[..i].chars().rev()) && | ||
!case_ignoreable_then_cased(from[i + 2..].chars()); | ||
to.push_str(if is_word_final { "ς" } else { "σ" }); | ||
} | ||
|
||
fn case_ignoreable_then_cased<I: Iterator<Item=char>>(iter: I) -> bool { | ||
use rustc_unicode::derived_property::{Cased, Case_Ignorable}; | ||
match iter.skip_while(|&c| Case_Ignorable(c)).next() { | ||
Some(c) => Cased(c), | ||
None => false, | ||
} | ||
} | ||
} | ||
|
||
/// Returns the uppercase equivalent of this string. | ||
|
@@ -1868,7 +1892,7 @@ impl str { | |
/// let s = "hello"; | ||
/// assert_eq!(s.to_uppercase(), "HELLO"); | ||
/// ``` | ||
#[unstable(feature = "collections")] | ||
#[stable(feature = "unicode_case_mapping", since = "1.2.0")] | ||
pub fn to_uppercase(&self) -> String { | ||
let mut s = String::with_capacity(self.len()); | ||
s.extend(self[..].chars().flat_map(|c| c.to_uppercase())); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT | ||
// file at the top-level directory of this distribution and at | ||
// http://rust-lang.org/COPYRIGHT. | ||
// | ||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | ||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | ||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | ||
// option. This file may not be copied, modified, or distributed | ||
// except according to those terms. | ||
|
||
use collections::vec::Vec; | ||
|
||
#[test] | ||
fn char_to_lowercase() { | ||
assert_iter_eq('A'.to_lowercase(), &['a']); | ||
assert_iter_eq('É'.to_lowercase(), &['é']); | ||
assert_iter_eq('Dž'.to_lowercase(), &['dž']); | ||
} | ||
|
||
#[test] | ||
fn char_to_uppercase() { | ||
assert_iter_eq('a'.to_uppercase(), &['A']); | ||
assert_iter_eq('é'.to_uppercase(), &['É']); | ||
assert_iter_eq('Dž'.to_uppercase(), &['DŽ']); | ||
assert_iter_eq('ß'.to_uppercase(), &['S', 'S']); | ||
assert_iter_eq('fi'.to_uppercase(), &['F', 'I']); | ||
assert_iter_eq('ᾀ'.to_uppercase(), &['Ἀ', 'Ι']); | ||
} | ||
|
||
#[test] | ||
fn char_to_titlecase() { | ||
assert_iter_eq('a'.to_titlecase(), &['A']); | ||
assert_iter_eq('é'.to_titlecase(), &['É']); | ||
assert_iter_eq('DŽ'.to_titlecase(), &['Dž']); | ||
assert_iter_eq('ß'.to_titlecase(), &['S', 's']); | ||
assert_iter_eq('fi'.to_titlecase(), &['F', 'i']); | ||
assert_iter_eq('ᾀ'.to_titlecase(), &['ᾈ']); | ||
} | ||
|
||
fn assert_iter_eq<I: Iterator<Item=char>>(iter: I, expected: &[char]) { | ||
assert_eq!(iter.collect::<Vec<_>>(), expected); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,6 +37,7 @@ extern crate rustc_unicode; | |
mod binary_heap; | ||
mod bit; | ||
mod btree; | ||
mod char; // char isn't really a collection, but didn't find a better place for this. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. mumble mumble collection of code points mumble mumble There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A |
||
mod enum_set; | ||
mod fmt; | ||
mod linked_list; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1687,6 +1687,19 @@ fn trim_ws() { | |
""); | ||
} | ||
|
||
#[test] | ||
fn to_lowercase() { | ||
assert_eq!("".to_lowercase(), ""); | ||
// https://github.com/rust-lang/rust/issues/26035 | ||
assert_eq!("'Σ AÉΣ'Σ'' Σ DžΣ".to_lowercase(), "'σ aéσ'ς'' σ džς"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's a few other conditions in the checking for the sigma at the end, could this add some tests cases for those? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What cases are you thinking of? End of a word, of the string? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah specifically there's a boolean There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All |
||
} | ||
|
||
#[test] | ||
fn to_uppercase() { | ||
assert_eq!("".to_uppercase(), ""); | ||
assert_eq!("aéDžßfiᾀ".to_uppercase(), "AÉDŽSSFIἈΙ"); | ||
} | ||
|
||
mod pattern { | ||
use std::str::pattern::Pattern; | ||
use std::str::pattern::{Searcher, ReverseSearcher}; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you add a comment for why this is being specially cased here?