From 41fb8f77ee10221db319096ee45a56a29f013564 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Wed, 8 Oct 2014 17:03:04 -0700 Subject: [PATCH 01/19] core: Add from_u32 to the Char trait This is the only free function not part of the trait. --- src/libcore/char.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index e4dc9ce5bd46f..db58f802643b1 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -270,6 +270,9 @@ pub trait Char { /// Panics if given a radix > 36. fn from_digit(num: uint, radix: uint) -> Option; + /// Converts from `u32` to a `char` + fn from_u32(i: u32) -> Option; + /// Returns the hexadecimal Unicode escape of a character. /// /// The rules are as follows: @@ -319,6 +322,9 @@ impl Char for char { fn from_digit(num: uint, radix: uint) -> Option { from_digit(num, radix) } + #[inline] + fn from_u32(i: u32) -> Option { from_u32(i) } + fn escape_unicode(&self, f: |char|) { escape_unicode(*self, f) } fn escape_default(&self, f: |char|) { escape_default(*self, f) } From 070e691379a1d7c6bec6ec077db41c1ac40d90fa Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Wed, 8 Oct 2014 17:05:12 -0700 Subject: [PATCH 02/19] core: Mark Char trait experimental --- src/libcore/char.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index db58f802643b1..e88c7695d1b41 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -229,6 +229,7 @@ pub fn len_utf8_bytes(c: char) -> uint { } /// Basic `char` manipulations. +#[experimental = "trait organization may change"] pub trait Char { /// Checks if a `char` parses as a numeric digit in the given radix. /// @@ -315,6 +316,7 @@ pub trait Char { fn encode_utf16(&self, dst: &mut [u16]) -> Option; } +#[experimental = "trait is experimental"] impl Char for char { fn is_digit_radix(&self, radix: uint) -> bool { is_digit_radix(*self, radix) } From ac2f379abb13b249aa1e630e14fa42f9415160f8 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Wed, 8 Oct 2014 17:06:19 -0700 Subject: [PATCH 03/19] char: Mark the MAX constant stable --- src/libcore/char.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index e88c7695d1b41..1c5de09dd0cbc 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -63,6 +63,7 @@ static MAX_THREE_B: u32 = 0x10000u32; */ /// The highest valid code point +#[stable] pub const MAX: char = '\U0010ffff'; /// Converts from `u32` to a `char` From c2aff692fa88235d356725f98184a5ea5b52eb88 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Wed, 8 Oct 2014 17:15:27 -0700 Subject: [PATCH 04/19] unicode: Rename UnicodeChar::is_digit to is_numeric 'Numeric' is the proper name of the unicode character class, and this frees up the word 'digit' for ascii use in libcore. Since I'm going to rename `Char::is_digit_radix` to `is_digit`, I am not leaving a deprecated method in place, because that would just cause name clashes, as both `Char` and `UnicodeChar` are in the prelude. [breaking-change] --- src/compiletest/runtest.rs | 2 +- src/libcollections/str.rs | 6 +++--- src/libcore/str.rs | 14 +++++++------- src/libcoretest/char.rs | 12 ++++++------ src/librustc/lint/builtin.rs | 2 +- src/libstd/rt/backtrace.rs | 4 ++-- src/libunicode/u_char.rs | 4 ++-- 7 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/compiletest/runtest.rs b/src/compiletest/runtest.rs index 75dc45d16eb36..9bf45de0a17d4 100644 --- a/src/compiletest/runtest.rs +++ b/src/compiletest/runtest.rs @@ -1566,7 +1566,7 @@ fn _arm_exec_compiled_test(config: &Config, let mut exitcode: int = 0; for c in exitcode_out.as_slice().chars() { - if !c.is_digit() { break; } + if !c.is_numeric() { break; } exitcode = exitcode * 10 + match c { '0' ... '9' => c as int - ('0' as int), _ => 101, diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index 9c93669b5acbd..d28cdcc3f4b34 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -1189,7 +1189,7 @@ mod tests { assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11"); let chars: &[char] = &['1', '2']; assert_eq!("12foo1bar12".trim_left_chars(chars), "foo1bar12"); - assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123"); + assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_numeric()), "foo1bar123"); } #[test] @@ -1204,7 +1204,7 @@ mod tests { assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar"); let chars: &[char] = &['1', '2']; assert_eq!("12foo1bar12".trim_right_chars(chars), "12foo1bar"); - assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar"); + assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_numeric()), "123foo1bar"); } #[test] @@ -1219,7 +1219,7 @@ mod tests { assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar"); let chars: &[char] = &['1', '2']; assert_eq!("12foo1bar12".trim_chars(chars), "foo1bar"); - assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar"); + assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_numeric()), "foo1bar"); } #[test] diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 68e490ecb19c4..8d26a970eb8ba 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -1315,7 +1315,7 @@ pub trait StrPrelude for Sized? { /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect(); /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]); /// - /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_digit()).collect(); + /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_numeric()).collect(); /// assert_eq!(v, vec!["abc", "def", "ghi"]); /// /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect(); @@ -1336,7 +1336,7 @@ pub trait StrPrelude for Sized? { /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect(); /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]); /// - /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |c: char| c.is_digit()).collect(); + /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |c: char| c.is_numeric()).collect(); /// assert_eq!(v, vec!["abc", "def2ghi"]); /// /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect(); @@ -1368,7 +1368,7 @@ pub trait StrPrelude for Sized? { /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect(); /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]); /// - /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_digit()).rev().collect(); + /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_numeric()).rev().collect(); /// assert_eq!(v, vec!["ghi", "def", "abc"]); /// /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect(); @@ -1386,7 +1386,7 @@ pub trait StrPrelude for Sized? { /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect(); /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]); /// - /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |c: char| c.is_digit()).collect(); + /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |c: char| c.is_numeric()).collect(); /// assert_eq!(v, vec!["ghi", "abc1def"]); /// /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect(); @@ -1596,7 +1596,7 @@ pub trait StrPrelude for Sized? { /// assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar") /// let x: &[_] = &['1', '2']; /// assert_eq!("12foo1bar12".trim_chars(x), "foo1bar") - /// assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar") + /// assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_numeric()), "foo1bar") /// ``` fn trim_chars<'a, C: CharEq>(&'a self, to_trim: C) -> &'a str; @@ -1612,7 +1612,7 @@ pub trait StrPrelude for Sized? { /// assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11") /// let x: &[_] = &['1', '2']; /// assert_eq!("12foo1bar12".trim_left_chars(x), "foo1bar12") - /// assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123") + /// assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_numeric()), "foo1bar123") /// ``` fn trim_left_chars<'a, C: CharEq>(&'a self, to_trim: C) -> &'a str; @@ -1628,7 +1628,7 @@ pub trait StrPrelude for Sized? { /// assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar") /// let x: &[_] = &['1', '2']; /// assert_eq!("12foo1bar12".trim_right_chars(x), "12foo1bar") - /// assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar") + /// assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_numeric()), "123foo1bar") /// ``` fn trim_right_chars<'a, C: CharEq>(&'a self, to_trim: C) -> &'a str; diff --git a/src/libcoretest/char.rs b/src/libcoretest/char.rs index 8ec3c59da4e00..2d5ca983fec70 100644 --- a/src/libcoretest/char.rs +++ b/src/libcoretest/char.rs @@ -105,12 +105,12 @@ fn test_is_control() { #[test] fn test_is_digit() { - assert!('2'.is_digit()); - assert!('7'.is_digit()); - assert!(!'c'.is_digit()); - assert!(!'i'.is_digit()); - assert!(!'z'.is_digit()); - assert!(!'Q'.is_digit()); + assert!('2'.is_numeric()); + assert!('7'.is_numeric()); + assert!(!'c'.is_numeric()); + assert!(!'i'.is_numeric()); + assert!(!'z'.is_numeric()); + assert!(!'Q'.is_numeric()); } #[test] diff --git a/src/librustc/lint/builtin.rs b/src/librustc/lint/builtin.rs index c763ac889c25d..00c68f42c3249 100644 --- a/src/librustc/lint/builtin.rs +++ b/src/librustc/lint/builtin.rs @@ -920,7 +920,7 @@ impl NonSnakeCase { let mut allow_underscore = true; ident.chars().all(|c| { allow_underscore = match c { - c if c.is_lowercase() || c.is_digit() => true, + c if c.is_lowercase() || c.is_numeric() => true, '_' if allow_underscore => false, _ => return false, }; diff --git a/src/libstd/rt/backtrace.rs b/src/libstd/rt/backtrace.rs index 107518ef27c9d..8102299438749 100644 --- a/src/libstd/rt/backtrace.rs +++ b/src/libstd/rt/backtrace.rs @@ -71,7 +71,7 @@ fn demangle(writer: &mut Writer, s: &str) -> IoResult<()> { while valid { let mut i = 0; for c in chars { - if c.is_digit() { + if c.is_numeric() { i = i * 10 + c as uint - '0' as uint; } else { break @@ -101,7 +101,7 @@ fn demangle(writer: &mut Writer, s: &str) -> IoResult<()> { first = false; } let mut rest = s; - while rest.char_at(0).is_digit() { + while rest.char_at(0).is_numeric() { rest = rest.slice_from(1); } let i: uint = from_str(s.slice_to(s.len() - rest.len())).unwrap(); diff --git a/src/libunicode/u_char.rs b/src/libunicode/u_char.rs index bac8b21ea68bb..4bedc6f21f47c 100644 --- a/src/libunicode/u_char.rs +++ b/src/libunicode/u_char.rs @@ -217,7 +217,7 @@ pub trait UnicodeChar { fn is_control(&self) -> bool; /// Indicates whether the character is numeric (Nd, Nl, or No). - fn is_digit(&self) -> bool; + fn is_numeric(&self) -> bool; /// Converts a character to its lowercase equivalent. /// @@ -281,7 +281,7 @@ impl UnicodeChar for char { fn is_control(&self) -> bool { is_control(*self) } - fn is_digit(&self) -> bool { is_digit(*self) } + fn is_numeric(&self) -> bool { is_digit(*self) } fn to_lowercase(&self) -> char { to_lowercase(*self) } From acb5fefd6d9933b345873355c2c0100184c74727 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Wed, 8 Oct 2014 17:24:15 -0700 Subject: [PATCH 05/19] core: Rename Char::is_digit_radix to is_digit This fits the naming of `to_digit` and `from_digit`. Leave the old name deprecated. --- src/libcore/char.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 1c5de09dd0cbc..428a5879b8bc9 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -245,8 +245,24 @@ pub trait Char { /// # Panics /// /// Panics if given a radix > 36. + #[deprecated = "use is_digit"] fn is_digit_radix(&self, radix: uint) -> bool; + /// Checks if a `char` parses as a numeric digit in the given radix. + /// + /// Compared to `is_digit()`, this function only recognizes the characters + /// `0-9`, `a-z` and `A-Z`. + /// + /// # Return value + /// + /// Returns `true` if `c` is a valid digit under `radix`, and `false` + /// otherwise. + /// + /// # Failure + /// + /// Fails if given a radix > 36. + fn is_digit(&self, radix: uint) -> bool; + /// Converts a character to the corresponding digit. /// /// # Return value @@ -319,8 +335,11 @@ pub trait Char { #[experimental = "trait is experimental"] impl Char for char { + #[deprecated = "use is_digit"] fn is_digit_radix(&self, radix: uint) -> bool { is_digit_radix(*self, radix) } + fn is_digit(&self, radix: uint) -> bool { is_digit_radix(*self, radix) } + fn to_digit(&self, radix: uint) -> Option { to_digit(*self, radix) } fn from_digit(num: uint, radix: uint) -> Option { from_digit(num, radix) } From 0150fa4b1b3e30b1f763905bd1af2d2ccd73c47e Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Wed, 8 Oct 2014 17:33:02 -0700 Subject: [PATCH 06/19] core: Rename Char::len_utf8_bytes to Char::len_utf8 "bytes" is redundant. Deprecate the old. --- src/libcore/char.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 428a5879b8bc9..82dc2becf28cf 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -316,8 +316,13 @@ pub trait Char { /// Returns the amount of bytes this character would need if encoded in /// UTF-8. + #[deprecated = "use len_utf8"] fn len_utf8_bytes(&self) -> uint; + /// Returns the amount of bytes this character would need if encoded in + /// UTF-8. + fn len_utf8(&self) -> uint; + /// Encodes this character as UTF-8 into the provided byte buffer, /// and then returns the number of bytes written. /// @@ -352,8 +357,12 @@ impl Char for char { fn escape_default(&self, f: |char|) { escape_default(*self, f) } #[inline] + #[deprecated = "use len_utf8"] fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) } + #[inline] + fn len_utf8(&self) -> uint { len_utf8_bytes(*self) } + #[inline] fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> Option { // Marked #[inline] to allow llvm optimizing it away From f6607a20c4abbd03a806c1320d059e0911dd0cdb Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Wed, 8 Oct 2014 17:40:31 -0700 Subject: [PATCH 07/19] core: Add Char::len_utf16 Missing method to pair with len_utf8. --- src/libcore/char.rs | 10 ++++++++++ src/libcoretest/char.rs | 8 ++++++++ 2 files changed, 18 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 82dc2becf28cf..93fa614e597fd 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -323,6 +323,10 @@ pub trait Char { /// UTF-8. fn len_utf8(&self) -> uint; + /// Returns the amount of bytes this character would need if encoded in + /// UTF-16. + fn len_utf16(&self) -> uint; + /// Encodes this character as UTF-8 into the provided byte buffer, /// and then returns the number of bytes written. /// @@ -363,6 +367,12 @@ impl Char for char { #[inline] fn len_utf8(&self) -> uint { len_utf8_bytes(*self) } + #[inline] + fn len_utf16(&self) -> uint { + let ch = *self as u32; + if (ch & 0xFFFF_u32) == ch { 1 } else { 2 } + } + #[inline] fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> Option { // Marked #[inline] to allow llvm optimizing it away diff --git a/src/libcoretest/char.rs b/src/libcoretest/char.rs index 2d5ca983fec70..507ddf65e55b3 100644 --- a/src/libcoretest/char.rs +++ b/src/libcoretest/char.rs @@ -197,6 +197,14 @@ fn test_encode_utf16() { check('\U0001f4a9', &[0xd83d, 0xdca9]); } +#[test] +fn test_len_utf16() { + assert!('x'.len_utf16() == 1); + assert!('\u00e9'.len_utf16() == 1); + assert!('\ua66e'.len_utf16() == 1); + assert!('\U0001f4a9'.len_utf16() == 2); +} + #[test] fn test_width() { assert_eq!('\x00'.width(false),Some(0)); From 4dd17245769082d07c3f98100e5a7cf922813ec9 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Fri, 10 Oct 2014 14:55:11 -0700 Subject: [PATCH 08/19] core: Add stability attributes to char::from_digit and from_u32 For now we are preferring free functions for primitive ctors, so they are marked 'unstable' pending final decision. The methods on `Char` are 'deprecated'. --- src/libcore/char.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 93fa614e597fd..90b5506d65d4a 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -68,6 +68,7 @@ pub const MAX: char = '\U0010ffff'; /// Converts from `u32` to a `char` #[inline] +#[unstable = "pending decisions about costructors for primitives"] pub fn from_u32(i: u32) -> Option { // catch out-of-bounds and surrogates if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { @@ -146,6 +147,7 @@ pub fn to_digit(c: char, radix: uint) -> Option { /// Panics if given an `radix` > 36. /// #[inline] +#[unstable = "pending decisions about costructors for primitives"] pub fn from_digit(num: uint, radix: uint) -> Option { if radix > 36 { panic!("from_digit: radix is to high (maximum 36)"); @@ -286,9 +288,11 @@ pub trait Char { /// # Panics /// /// Panics if given a radix > 36. + #[deprecated = "use the char::from_digit free function"] fn from_digit(num: uint, radix: uint) -> Option; /// Converts from `u32` to a `char` + #[deprecated = "use the char::from_u32 free function"] fn from_u32(i: u32) -> Option; /// Returns the hexadecimal Unicode escape of a character. @@ -351,9 +355,11 @@ impl Char for char { fn to_digit(&self, radix: uint) -> Option { to_digit(*self, radix) } + #[deprecated = "use the char::from_digit free function"] fn from_digit(num: uint, radix: uint) -> Option { from_digit(num, radix) } #[inline] + #[deprecated = "use the char::from_u32 free function"] fn from_u32(i: u32) -> Option { from_u32(i) } fn escape_unicode(&self, f: |char|) { escape_unicode(*self, f) } From 95c3f618c08fad57d0b01ae8692f1d9a00c5bec6 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Mon, 13 Oct 2014 10:54:18 -0700 Subject: [PATCH 09/19] core: Deprecated remaining free functions in `char` Prefer the methods. --- src/libcore/char.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 90b5506d65d4a..ad836fca57efc 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -98,6 +98,7 @@ pub fn from_u32(i: u32) -> Option { /// This just wraps `to_digit()`. /// #[inline] +#[deprecated = "use the Char::is_digit method"] pub fn is_digit_radix(c: char, radix: uint) -> bool { match to_digit(c, radix) { Some(_) => true, @@ -120,6 +121,7 @@ pub fn is_digit_radix(c: char, radix: uint) -> bool { /// Panics if given a `radix` outside the range `[0..36]`. /// #[inline] +#[deprecated = "use the Char::to_digit method"] pub fn to_digit(c: char, radix: uint) -> Option { if radix > 36 { panic!("to_digit: radix is too high (maximum 36)"); @@ -174,6 +176,7 @@ pub fn from_digit(num: uint, radix: uint) -> Option { /// - chars in [0x100,0xffff] get 4-digit escapes: `\\uNNNN` /// - chars above 0x10000 get 8-digit escapes: `\\UNNNNNNNN` /// +#[deprecated = "use the Char::escape_unicode method"] pub fn escape_unicode(c: char, f: |char|) { // avoid calling str::to_str_radix because we don't really need to allocate // here. @@ -206,6 +209,7 @@ pub fn escape_unicode(c: char, f: |char|) { /// - Any other chars in the range [0x20,0x7e] are not escaped. /// - Any other chars are given hex Unicode escapes; see `escape_unicode`. /// +#[deprecated = "use the Char::escape_default method"] pub fn escape_default(c: char, f: |char|) { match c { '\t' => { f('\\'); f('t'); } @@ -221,6 +225,7 @@ pub fn escape_default(c: char, f: |char|) { /// Returns the amount of bytes this `char` would need if encoded in UTF-8 #[inline] +#[deprecated = "use the Char::len_utf8 method"] pub fn len_utf8_bytes(c: char) -> uint { let code = c as u32; match () { From b577e4c8d8abfccb82269855701e1e8f10dff9ff Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Mon, 13 Oct 2014 10:57:49 -0700 Subject: [PATCH 10/19] core: Mark remaining Char methods unstable The `Char` trait itself may go away in favor of primitive inherent methods. Still some questions about whether the preconditions are following the final error handling conventions. --- src/libcore/char.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index ad836fca57efc..e2b420a4d3997 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -268,6 +268,7 @@ pub trait Char { /// # Failure /// /// Fails if given a radix > 36. + #[unstable = "pending error conventions"] fn is_digit(&self, radix: uint) -> bool; /// Converts a character to the corresponding digit. @@ -281,6 +282,7 @@ pub trait Char { /// # Panics /// /// Panics if given a radix outside the range [0..36]. + #[unstable = "pending error conventions, trait organization"] fn to_digit(&self, radix: uint) -> Option; /// Converts a number to the character representing it. @@ -307,6 +309,7 @@ pub trait Char { /// * Characters in [0,0xff] get 2-digit escapes: `\\xNN` /// * Characters in [0x100,0xffff] get 4-digit escapes: `\\uNNNN`. /// * Characters above 0x10000 get 8-digit escapes: `\\UNNNNNNNN`. + #[unstable = "pending error conventions, trait organization"] fn escape_unicode(&self, f: |char|); /// Returns a 'default' ASCII and C++11-like literal escape of a @@ -321,6 +324,7 @@ pub trait Char { /// escaped. /// * Any other chars in the range [0x20,0x7e] are not escaped. /// * Any other chars are given hex Unicode escapes; see `escape_unicode`. + #[unstable = "pending error conventions, trait organization"] fn escape_default(&self, f: |char|); /// Returns the amount of bytes this character would need if encoded in @@ -330,10 +334,12 @@ pub trait Char { /// Returns the amount of bytes this character would need if encoded in /// UTF-8. + #[unstable = "pending trait organization"] fn len_utf8(&self) -> uint; /// Returns the amount of bytes this character would need if encoded in /// UTF-16. + #[unstable = "pending trait organization"] fn len_utf16(&self) -> uint; /// Encodes this character as UTF-8 into the provided byte buffer, @@ -341,6 +347,7 @@ pub trait Char { /// /// If the buffer is not large enough, nothing will be written into it /// and a `None` will be returned. + #[unstable = "pending trait organization"] fn encode_utf8(&self, dst: &mut [u8]) -> Option; /// Encodes this character as UTF-16 into the provided `u16` buffer, @@ -348,6 +355,7 @@ pub trait Char { /// /// If the buffer is not large enough, nothing will be written into it /// and a `None` will be returned. + #[unstable = "pending trait organization"] fn encode_utf16(&self, dst: &mut [u16]) -> Option; } @@ -356,8 +364,10 @@ impl Char for char { #[deprecated = "use is_digit"] fn is_digit_radix(&self, radix: uint) -> bool { is_digit_radix(*self, radix) } + #[unstable = "pending trait organization"] fn is_digit(&self, radix: uint) -> bool { is_digit_radix(*self, radix) } + #[unstable = "pending trait organization"] fn to_digit(&self, radix: uint) -> Option { to_digit(*self, radix) } #[deprecated = "use the char::from_digit free function"] @@ -367,8 +377,10 @@ impl Char for char { #[deprecated = "use the char::from_u32 free function"] fn from_u32(i: u32) -> Option { from_u32(i) } + #[unstable = "pending error conventions, trait organization"] fn escape_unicode(&self, f: |char|) { escape_unicode(*self, f) } + #[unstable = "pending error conventions, trait organization"] fn escape_default(&self, f: |char|) { escape_default(*self, f) } #[inline] @@ -376,15 +388,18 @@ impl Char for char { fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) } #[inline] + #[unstable = "pending trait organization"] fn len_utf8(&self) -> uint { len_utf8_bytes(*self) } #[inline] + #[unstable = "pending trait organization"] fn len_utf16(&self) -> uint { let ch = *self as u32; if (ch & 0xFFFF_u32) == ch { 1 } else { 2 } } #[inline] + #[unstable = "pending error conventions, trait organization"] fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> Option { // Marked #[inline] to allow llvm optimizing it away let code = *self as u32; @@ -412,6 +427,7 @@ impl Char for char { } #[inline] + #[unstable = "pending error conventions, trait organization"] fn encode_utf16(&self, dst: &mut [u16]) -> Option { // Marked #[inline] to allow llvm optimizing it away let mut ch = *self as u32; From 5928f6c8b672b7569bc9349dda94cdde0a8a3117 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Mon, 13 Oct 2014 13:03:42 -0700 Subject: [PATCH 11/19] Fix various deprecation warnings from char changes --- src/libcore/char.rs | 122 +++++++++++++++++-------------- src/libcore/fmt/float.rs | 3 +- src/libfmt_macros/lib.rs | 2 +- src/libstd/num/strconv.rs | 3 +- src/libsyntax/parse/lexer/mod.rs | 6 +- src/libterm/terminfo/parm.rs | 4 +- 6 files changed, 75 insertions(+), 65 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index e2b420a4d3997..36cd394ed15e4 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -100,10 +100,7 @@ pub fn from_u32(i: u32) -> Option { #[inline] #[deprecated = "use the Char::is_digit method"] pub fn is_digit_radix(c: char, radix: uint) -> bool { - match to_digit(c, radix) { - Some(_) => true, - None => false, - } + c.is_digit(radix) } /// @@ -123,17 +120,7 @@ pub fn is_digit_radix(c: char, radix: uint) -> bool { #[inline] #[deprecated = "use the Char::to_digit method"] pub fn to_digit(c: char, radix: uint) -> Option { - if radix > 36 { - panic!("to_digit: radix is too high (maximum 36)"); - } - let val = match c { - '0' ... '9' => c as uint - ('0' as uint), - 'a' ... 'z' => c as uint + 10u - ('a' as uint), - 'A' ... 'Z' => c as uint + 10u - ('A' as uint), - _ => return None, - }; - if val < radix { Some(val) } - else { None } + c.to_digit(radix) } /// @@ -178,23 +165,7 @@ pub fn from_digit(num: uint, radix: uint) -> Option { /// #[deprecated = "use the Char::escape_unicode method"] pub fn escape_unicode(c: char, f: |char|) { - // avoid calling str::to_str_radix because we don't really need to allocate - // here. - f('\\'); - let pad = match () { - _ if c <= '\x7f' => { f('x'); 2 } - _ if c <= '\uffff' => { f('u'); 4 } - _ => { f('U'); 8 } - }; - for offset in range_step::(4 * (pad - 1), -1, -4) { - let offset = offset as uint; - unsafe { - match ((c as i32) >> offset) & 0xf { - i @ 0 ... 9 => { f(transmute('0' as i32 + i)); } - i => { f(transmute('a' as i32 + (i - 10))); } - } - } - } + c.escape_unicode(f) } /// @@ -211,29 +182,14 @@ pub fn escape_unicode(c: char, f: |char|) { /// #[deprecated = "use the Char::escape_default method"] pub fn escape_default(c: char, f: |char|) { - match c { - '\t' => { f('\\'); f('t'); } - '\r' => { f('\\'); f('r'); } - '\n' => { f('\\'); f('n'); } - '\\' => { f('\\'); f('\\'); } - '\'' => { f('\\'); f('\''); } - '"' => { f('\\'); f('"'); } - '\x20' ... '\x7e' => { f(c); } - _ => c.escape_unicode(f), - } + c.escape_default(f) } /// Returns the amount of bytes this `char` would need if encoded in UTF-8 #[inline] #[deprecated = "use the Char::len_utf8 method"] pub fn len_utf8_bytes(c: char) -> uint { - let code = c as u32; - match () { - _ if code < MAX_ONE_B => 1u, - _ if code < MAX_TWO_B => 2u, - _ if code < MAX_THREE_B => 3u, - _ => 4u, - } + c.len_utf8() } /// Basic `char` manipulations. @@ -362,13 +318,30 @@ pub trait Char { #[experimental = "trait is experimental"] impl Char for char { #[deprecated = "use is_digit"] - fn is_digit_radix(&self, radix: uint) -> bool { is_digit_radix(*self, radix) } + fn is_digit_radix(&self, radix: uint) -> bool { self.is_digit(radix) } #[unstable = "pending trait organization"] - fn is_digit(&self, radix: uint) -> bool { is_digit_radix(*self, radix) } + fn is_digit(&self, radix: uint) -> bool { + match self.to_digit(radix) { + Some(_) => true, + None => false, + } + } #[unstable = "pending trait organization"] - fn to_digit(&self, radix: uint) -> Option { to_digit(*self, radix) } + fn to_digit(&self, radix: uint) -> Option { + if radix > 36 { + panic!("to_digit: radix is too high (maximum 36)"); + } + let val = match *self { + '0' ... '9' => *self as uint - ('0' as uint), + 'a' ... 'z' => *self as uint + 10u - ('a' as uint), + 'A' ... 'Z' => *self as uint + 10u - ('A' as uint), + _ => return None, + }; + if val < radix { Some(val) } + else { None } + } #[deprecated = "use the char::from_digit free function"] fn from_digit(num: uint, radix: uint) -> Option { from_digit(num, radix) } @@ -378,18 +351,55 @@ impl Char for char { fn from_u32(i: u32) -> Option { from_u32(i) } #[unstable = "pending error conventions, trait organization"] - fn escape_unicode(&self, f: |char|) { escape_unicode(*self, f) } + fn escape_unicode(&self, f: |char|) { + // avoid calling str::to_str_radix because we don't really need to allocate + // here. + f('\\'); + let pad = match () { + _ if *self <= '\xff' => { f('x'); 2 } + _ if *self <= '\uffff' => { f('u'); 4 } + _ => { f('U'); 8 } + }; + for offset in range_step::(4 * (pad - 1), -1, -4) { + let offset = offset as uint; + unsafe { + match ((*self as i32) >> offset) & 0xf { + i @ 0 ... 9 => { f(transmute('0' as i32 + i)); } + i => { f(transmute('a' as i32 + (i - 10))); } + } + } + } + } #[unstable = "pending error conventions, trait organization"] - fn escape_default(&self, f: |char|) { escape_default(*self, f) } + fn escape_default(&self, f: |char|) { + match *self { + '\t' => { f('\\'); f('t'); } + '\r' => { f('\\'); f('r'); } + '\n' => { f('\\'); f('n'); } + '\\' => { f('\\'); f('\\'); } + '\'' => { f('\\'); f('\''); } + '"' => { f('\\'); f('"'); } + '\x20' ... '\x7e' => { f(*self); } + _ => self.escape_unicode(f), + } + } #[inline] #[deprecated = "use len_utf8"] - fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) } + fn len_utf8_bytes(&self) -> uint { self.len_utf8() } #[inline] #[unstable = "pending trait organization"] - fn len_utf8(&self) -> uint { len_utf8_bytes(*self) } + fn len_utf8(&self) -> uint { + let code = *self as u32; + match () { + _ if code < MAX_ONE_B => 1u, + _ if code < MAX_TWO_B => 2u, + _ if code < MAX_THREE_B => 3u, + _ => 4u, + } + } #[inline] #[unstable = "pending trait organization"] diff --git a/src/libcore/fmt/float.rs b/src/libcore/fmt/float.rs index 5fd4e2e326df0..1760c4d8e6616 100644 --- a/src/libcore/fmt/float.rs +++ b/src/libcore/fmt/float.rs @@ -15,6 +15,7 @@ pub use self::SignificantDigits::*; pub use self::SignFormat::*; use char; +use char::Char; use fmt; use iter::{range, DoubleEndedIterator}; use num::{Float, FPNaN, FPInfinite, ToPrimitive}; @@ -222,7 +223,7 @@ pub fn float_to_str_bytes_common( // round the remaining ones. if limit_digits && dig == digit_count { let ascii2value = |chr: u8| { - char::to_digit(chr as char, radix).unwrap() + (chr as char).to_digit(radix).unwrap() }; let value2ascii = |val: uint| { char::from_digit(val, radix).unwrap() as u8 diff --git a/src/libfmt_macros/lib.rs b/src/libfmt_macros/lib.rs index 71a3f24babb82..134819ad02757 100644 --- a/src/libfmt_macros/lib.rs +++ b/src/libfmt_macros/lib.rs @@ -411,7 +411,7 @@ impl<'a> Parser<'a> { loop { match self.cur.clone().next() { Some((_, c)) => { - match char::to_digit(c, 10) { + match c.to_digit(10) { Some(i) => { cur = cur * 10 + i; found = true; diff --git a/src/libstd/num/strconv.rs b/src/libstd/num/strconv.rs index f8ba9b720118a..649298d9c0818 100644 --- a/src/libstd/num/strconv.rs +++ b/src/libstd/num/strconv.rs @@ -17,6 +17,7 @@ pub use self::SignificantDigits::*; pub use self::SignFormat::*; use char; +use char::Char; use num; use num::{Int, Float, FPNaN, FPInfinite, ToPrimitive}; use slice::{SlicePrelude, CloneSliceAllocPrelude}; @@ -320,7 +321,7 @@ pub fn float_to_str_bytes_common( // round the remaining ones. if limit_digits && dig == digit_count { let ascii2value = |chr: u8| { - char::to_digit(chr as char, radix).unwrap() + (chr as char).to_digit(radix).unwrap() }; let value2ascii = |val: uint| { char::from_digit(val, radix).unwrap() as u8 diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index fbca4868255ff..e19e38e297701 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -645,7 +645,7 @@ impl<'a> StringReader<'a> { loop { let c = self.curr; if c == Some('_') { debug!("skipping a _"); self.bump(); continue; } - match c.and_then(|cc| char::to_digit(cc, radix)) { + match c.and_then(|cc| cc.to_digit(radix)) { Some(_) => { debug!("{} in scan_digits", c); len += 1; @@ -677,7 +677,7 @@ impl<'a> StringReader<'a> { return token::Integer(self.name_from(start_bpos)); } } - } else if c.is_digit_radix(10) { + } else if c.is_digit(10) { num_digits = self.scan_digits(10) + 1; } else { num_digits = 0; @@ -696,7 +696,7 @@ impl<'a> StringReader<'a> { // might have stuff after the ., and if it does, it needs to start // with a number self.bump(); - if self.curr.unwrap_or('\0').is_digit_radix(10) { + if self.curr.unwrap_or('\0').is_digit(10) { self.scan_digits(10); self.scan_float_exponent(); } diff --git a/src/libterm/terminfo/parm.rs b/src/libterm/terminfo/parm.rs index f910bfc5bd446..cfab649490076 100644 --- a/src/libterm/terminfo/parm.rs +++ b/src/libterm/terminfo/parm.rs @@ -14,8 +14,6 @@ pub use self::Param::*; use self::States::*; use self::FormatState::*; use self::FormatOp::*; - -use std::char; use std::mem::replace; #[deriving(PartialEq)] @@ -298,7 +296,7 @@ pub fn expand(cap: &[u8], params: &[Param], vars: &mut Variables) }, PushParam => { // params are 1-indexed - stack.push(mparams[match char::to_digit(cur, 10) { + stack.push(mparams[match cur.to_digit(10) { Some(d) => d - 1, None => return Err("bad param number".to_string()) }].clone()); From ca1820b1fce5aa803ccc757e79dd659f599d1516 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Tue, 14 Oct 2014 13:08:54 -0700 Subject: [PATCH 12/19] core: Convert Char methods to by-val self Methods on primitmive Copy types generally should take `self`. [breaking-change] --- src/libcore/char.rs | 54 ++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 36cd394ed15e4..55d2424eba6e3 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -209,7 +209,7 @@ pub trait Char { /// /// Panics if given a radix > 36. #[deprecated = "use is_digit"] - fn is_digit_radix(&self, radix: uint) -> bool; + fn is_digit_radix(self, radix: uint) -> bool; /// Checks if a `char` parses as a numeric digit in the given radix. /// @@ -225,7 +225,7 @@ pub trait Char { /// /// Fails if given a radix > 36. #[unstable = "pending error conventions"] - fn is_digit(&self, radix: uint) -> bool; + fn is_digit(self, radix: uint) -> bool; /// Converts a character to the corresponding digit. /// @@ -239,7 +239,7 @@ pub trait Char { /// /// Panics if given a radix outside the range [0..36]. #[unstable = "pending error conventions, trait organization"] - fn to_digit(&self, radix: uint) -> Option; + fn to_digit(self, radix: uint) -> Option; /// Converts a number to the character representing it. /// @@ -266,7 +266,7 @@ pub trait Char { /// * Characters in [0x100,0xffff] get 4-digit escapes: `\\uNNNN`. /// * Characters above 0x10000 get 8-digit escapes: `\\UNNNNNNNN`. #[unstable = "pending error conventions, trait organization"] - fn escape_unicode(&self, f: |char|); + fn escape_unicode(self, f: |char|); /// Returns a 'default' ASCII and C++11-like literal escape of a /// character. @@ -281,22 +281,22 @@ pub trait Char { /// * Any other chars in the range [0x20,0x7e] are not escaped. /// * Any other chars are given hex Unicode escapes; see `escape_unicode`. #[unstable = "pending error conventions, trait organization"] - fn escape_default(&self, f: |char|); + fn escape_default(self, f: |char|); /// Returns the amount of bytes this character would need if encoded in /// UTF-8. #[deprecated = "use len_utf8"] - fn len_utf8_bytes(&self) -> uint; + fn len_utf8_bytes(self) -> uint; /// Returns the amount of bytes this character would need if encoded in /// UTF-8. #[unstable = "pending trait organization"] - fn len_utf8(&self) -> uint; + fn len_utf8(self) -> uint; /// Returns the amount of bytes this character would need if encoded in /// UTF-16. #[unstable = "pending trait organization"] - fn len_utf16(&self) -> uint; + fn len_utf16(self) -> uint; /// Encodes this character as UTF-8 into the provided byte buffer, /// and then returns the number of bytes written. @@ -318,10 +318,10 @@ pub trait Char { #[experimental = "trait is experimental"] impl Char for char { #[deprecated = "use is_digit"] - fn is_digit_radix(&self, radix: uint) -> bool { self.is_digit(radix) } + fn is_digit_radix(self, radix: uint) -> bool { self.is_digit(radix) } #[unstable = "pending trait organization"] - fn is_digit(&self, radix: uint) -> bool { + fn is_digit(self, radix: uint) -> bool { match self.to_digit(radix) { Some(_) => true, None => false, @@ -329,14 +329,14 @@ impl Char for char { } #[unstable = "pending trait organization"] - fn to_digit(&self, radix: uint) -> Option { + fn to_digit(self, radix: uint) -> Option { if radix > 36 { panic!("to_digit: radix is too high (maximum 36)"); } - let val = match *self { - '0' ... '9' => *self as uint - ('0' as uint), - 'a' ... 'z' => *self as uint + 10u - ('a' as uint), - 'A' ... 'Z' => *self as uint + 10u - ('A' as uint), + let val = match self { + '0' ... '9' => self as uint - ('0' as uint), + 'a' ... 'z' => self as uint + 10u - ('a' as uint), + 'A' ... 'Z' => self as uint + 10u - ('A' as uint), _ => return None, }; if val < radix { Some(val) } @@ -351,19 +351,19 @@ impl Char for char { fn from_u32(i: u32) -> Option { from_u32(i) } #[unstable = "pending error conventions, trait organization"] - fn escape_unicode(&self, f: |char|) { + fn escape_unicode(self, f: |char|) { // avoid calling str::to_str_radix because we don't really need to allocate // here. f('\\'); let pad = match () { - _ if *self <= '\xff' => { f('x'); 2 } - _ if *self <= '\uffff' => { f('u'); 4 } + _ if self <= '\xff' => { f('x'); 2 } + _ if self <= '\uffff' => { f('u'); 4 } _ => { f('U'); 8 } }; for offset in range_step::(4 * (pad - 1), -1, -4) { let offset = offset as uint; unsafe { - match ((*self as i32) >> offset) & 0xf { + match ((self as i32) >> offset) & 0xf { i @ 0 ... 9 => { f(transmute('0' as i32 + i)); } i => { f(transmute('a' as i32 + (i - 10))); } } @@ -372,27 +372,27 @@ impl Char for char { } #[unstable = "pending error conventions, trait organization"] - fn escape_default(&self, f: |char|) { - match *self { + fn escape_default(self, f: |char|) { + match self { '\t' => { f('\\'); f('t'); } '\r' => { f('\\'); f('r'); } '\n' => { f('\\'); f('n'); } '\\' => { f('\\'); f('\\'); } '\'' => { f('\\'); f('\''); } '"' => { f('\\'); f('"'); } - '\x20' ... '\x7e' => { f(*self); } + '\x20' ... '\x7e' => { f(self); } _ => self.escape_unicode(f), } } #[inline] #[deprecated = "use len_utf8"] - fn len_utf8_bytes(&self) -> uint { self.len_utf8() } + fn len_utf8_bytes(self) -> uint { self.len_utf8() } #[inline] #[unstable = "pending trait organization"] - fn len_utf8(&self) -> uint { - let code = *self as u32; + fn len_utf8(self) -> uint { + let code = self as u32; match () { _ if code < MAX_ONE_B => 1u, _ if code < MAX_TWO_B => 2u, @@ -403,8 +403,8 @@ impl Char for char { #[inline] #[unstable = "pending trait organization"] - fn len_utf16(&self) -> uint { - let ch = *self as u32; + fn len_utf16(self) -> uint { + let ch = self as u32; if (ch & 0xFFFF_u32) == ch { 1 } else { 2 } } From aad246160451aacc2f7a707c028bdf44e77ad38d Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Fri, 31 Oct 2014 16:20:41 -0700 Subject: [PATCH 13/19] core: Convert Char::escape_default, escape_unicode to iterators [breaking-change] --- src/libcollections/str.rs | 8 +- src/libcore/char.rs | 129 +++++++++++++++++++++++-------- src/libgraphviz/lib.rs | 2 +- src/librustc_trans/back/link.rs | 2 +- src/librustdoc/clean/mod.rs | 4 +- src/libsyntax/parse/lexer/mod.rs | 4 +- src/libsyntax/print/pprust.rs | 4 +- 7 files changed, 110 insertions(+), 43 deletions(-) diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index d28cdcc3f4b34..0fe40081a4623 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -630,7 +630,9 @@ pub trait StrAllocating: Str { let me = self.as_slice(); let mut out = String::with_capacity(me.len()); for c in me.chars() { - c.escape_default(|c| out.push(c)); + for c in c.escape_default() { + out.push(c); + } } out } @@ -640,7 +642,9 @@ pub trait StrAllocating: Str { let me = self.as_slice(); let mut out = String::with_capacity(me.len()); for c in me.chars() { - c.escape_unicode(|c| out.push(c)); + for c in c.escape_unicode() { + out.push(c); + } } out } diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 55d2424eba6e3..1210465098a11 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -17,7 +17,7 @@ use mem::transmute; use option::{None, Option, Some}; -use iter::range_step; +use iter::{range_step, Iterator, RangeStep}; use slice::SlicePrelude; // UTF-8 ranges and tags for encoding characters @@ -165,7 +165,9 @@ pub fn from_digit(num: uint, radix: uint) -> Option { /// #[deprecated = "use the Char::escape_unicode method"] pub fn escape_unicode(c: char, f: |char|) { - c.escape_unicode(f) + for char in c.escape_unicode() { + f(char); + } } /// @@ -182,7 +184,9 @@ pub fn escape_unicode(c: char, f: |char|) { /// #[deprecated = "use the Char::escape_default method"] pub fn escape_default(c: char, f: |char|) { - c.escape_default(f) + for c in c.escape_default() { + f(c); + } } /// Returns the amount of bytes this `char` would need if encoded in UTF-8 @@ -266,7 +270,7 @@ pub trait Char { /// * Characters in [0x100,0xffff] get 4-digit escapes: `\\uNNNN`. /// * Characters above 0x10000 get 8-digit escapes: `\\UNNNNNNNN`. #[unstable = "pending error conventions, trait organization"] - fn escape_unicode(self, f: |char|); + fn escape_unicode(self) -> UnicodeEscapedChars; /// Returns a 'default' ASCII and C++11-like literal escape of a /// character. @@ -281,7 +285,7 @@ pub trait Char { /// * Any other chars in the range [0x20,0x7e] are not escaped. /// * Any other chars are given hex Unicode escapes; see `escape_unicode`. #[unstable = "pending error conventions, trait organization"] - fn escape_default(self, f: |char|); + fn escape_default(self) -> DefaultEscapedChars; /// Returns the amount of bytes this character would need if encoded in /// UTF-8. @@ -351,38 +355,23 @@ impl Char for char { fn from_u32(i: u32) -> Option { from_u32(i) } #[unstable = "pending error conventions, trait organization"] - fn escape_unicode(self, f: |char|) { - // avoid calling str::to_str_radix because we don't really need to allocate - // here. - f('\\'); - let pad = match () { - _ if self <= '\xff' => { f('x'); 2 } - _ if self <= '\uffff' => { f('u'); 4 } - _ => { f('U'); 8 } - }; - for offset in range_step::(4 * (pad - 1), -1, -4) { - let offset = offset as uint; - unsafe { - match ((self as i32) >> offset) & 0xf { - i @ 0 ... 9 => { f(transmute('0' as i32 + i)); } - i => { f(transmute('a' as i32 + (i - 10))); } - } - } - } + fn escape_unicode(self) -> UnicodeEscapedChars { + UnicodeEscapedChars { c: self, state: UnicodeEscapedCharsState::Backslash } } #[unstable = "pending error conventions, trait organization"] - fn escape_default(self, f: |char|) { - match self { - '\t' => { f('\\'); f('t'); } - '\r' => { f('\\'); f('r'); } - '\n' => { f('\\'); f('n'); } - '\\' => { f('\\'); f('\\'); } - '\'' => { f('\\'); f('\''); } - '"' => { f('\\'); f('"'); } - '\x20' ... '\x7e' => { f(self); } - _ => self.escape_unicode(f), - } + fn escape_default(self) -> DefaultEscapedChars { + let init_state = match self { + '\t' => DefaultEscapedCharsState::Backslash('t'), + '\r' => DefaultEscapedCharsState::Backslash('r'), + '\n' => DefaultEscapedCharsState::Backslash('n'), + '\\' => DefaultEscapedCharsState::Backslash('\\'), + '\'' => DefaultEscapedCharsState::Backslash('\''), + '"' => DefaultEscapedCharsState::Backslash('"'), + '\x20' ... '\x7e' => DefaultEscapedCharsState::Char(self), + _ => DefaultEscapedCharsState::Unicode(self.escape_unicode()) + }; + DefaultEscapedChars { state: init_state } } #[inline] @@ -456,3 +445,75 @@ impl Char for char { } } } + +/// An iterator over the characters that represent a `char`, as escaped by +/// Rust's unicode escaping rules. +pub struct UnicodeEscapedChars { + c: char, + state: UnicodeEscapedCharsState +} + +enum UnicodeEscapedCharsState { + Backslash, + Type, + Value(RangeStep), +} + +impl Iterator for UnicodeEscapedChars { + fn next(&mut self) -> Option { + match self.state { + UnicodeEscapedCharsState::Backslash => { + self.state = UnicodeEscapedCharsState::Type; + Some('\\') + } + UnicodeEscapedCharsState::Type => { + let (typechar, pad) = if self.c <= '\x7f' { ('x', 2) } + else if self.c <= '\uffff' { ('u', 4) } + else { ('U', 8) }; + self.state = UnicodeEscapedCharsState::Value(range_step(4 * (pad - 1), -1, -4i32)); + Some(typechar) + } + UnicodeEscapedCharsState::Value(ref mut range_step) => match range_step.next() { + Some(offset) => { + let offset = offset as uint; + let v = match ((self.c as i32) >> offset) & 0xf { + i @ 0 ... 9 => '0' as i32 + i, + i => 'a' as i32 + (i - 10) + }; + Some(unsafe { transmute(v) }) + } + None => None + } + } + } +} + +/// An iterator over the characters that represent a `char`, escaped +/// for maximum portability. +pub struct DefaultEscapedChars { + state: DefaultEscapedCharsState +} + +enum DefaultEscapedCharsState { + Backslash(char), + Char(char), + Done, + Unicode(UnicodeEscapedChars), +} + +impl Iterator for DefaultEscapedChars { + fn next(&mut self) -> Option { + match self.state { + DefaultEscapedCharsState::Backslash(c) => { + self.state = DefaultEscapedCharsState::Char(c); + Some('\\') + } + DefaultEscapedCharsState::Char(c) => { + self.state = DefaultEscapedCharsState::Done; + Some(c) + } + DefaultEscapedCharsState::Done => None, + DefaultEscapedCharsState::Unicode(ref mut iter) => iter.next() + } + } +} diff --git a/src/libgraphviz/lib.rs b/src/libgraphviz/lib.rs index df8cdabbcaa43..3ad546edf8de2 100644 --- a/src/libgraphviz/lib.rs +++ b/src/libgraphviz/lib.rs @@ -431,7 +431,7 @@ impl<'a> LabelText<'a> { // not escaping \\, since Graphviz escString needs to // interpret backslashes; see EscStr above. '\\' => f(c), - _ => c.escape_default(f) + _ => for c in c.escape_default() { f(c) } } } fn escape_str(s: &str) -> String { diff --git a/src/librustc_trans/back/link.rs b/src/librustc_trans/back/link.rs index d27a338b308c7..6a8074b99585c 100644 --- a/src/librustc_trans/back/link.rs +++ b/src/librustc_trans/back/link.rs @@ -262,7 +262,7 @@ pub fn sanitize(s: &str) -> String { _ => { let mut tstr = String::new(); - char::escape_unicode(c, |c| tstr.push(c)); + for c in c.escape_unicode() { tstr.push(c) } result.push('$'); result.push_str(tstr.as_slice().slice_from(1)); } diff --git a/src/librustdoc/clean/mod.rs b/src/librustdoc/clean/mod.rs index 209d8c7ca0f29..52aab752c5761 100644 --- a/src/librustdoc/clean/mod.rs +++ b/src/librustdoc/clean/mod.rs @@ -2033,9 +2033,9 @@ fn lit_to_string(lit: &ast::Lit) -> String { ast::LitBinary(ref data) => format!("{}", data), ast::LitByte(b) => { let mut res = String::from_str("b'"); - (b as char).escape_default(|c| { + for c in (b as char).escape_default() { res.push(c); - }); + } res.push('\''); res }, diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index e19e38e297701..4c759cfc4fd0a 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -193,7 +193,7 @@ impl<'a> StringReader<'a> { fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> ! { let mut m = m.to_string(); m.push_str(": "); - char::escape_default(c, |c| m.push(c)); + for c in c.escape_default() { m.push(c) } self.fatal_span_(from_pos, to_pos, m.as_slice()); } @@ -202,7 +202,7 @@ impl<'a> StringReader<'a> { fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) { let mut m = m.to_string(); m.push_str(": "); - char::escape_default(c, |c| m.push(c)); + for c in c.escape_default() { m.push(c) } self.err_span_(from_pos, to_pos, m.as_slice()); } diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index 5652a9a9d3a63..4ce0d74bd37f4 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -2756,7 +2756,9 @@ impl<'a> State<'a> { } ast::LitChar(ch) => { let mut res = String::from_str("'"); - ch.escape_default(|c| res.push(c)); + for c in ch.escape_default() { + res.push(c); + } res.push('\''); word(&mut self.s, res.as_slice()) } From d6ee804b632ee03679d6de682841fc7785ef4fbb Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Mon, 3 Nov 2014 10:26:22 -0800 Subject: [PATCH 14/19] unicode: Convert UnicodeChar methods to by-value Extension traits for primitive types should be by-value. [breaking-change] --- src/libunicode/u_char.rs | 50 +++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/src/libunicode/u_char.rs b/src/libunicode/u_char.rs index 4bedc6f21f47c..1e81916a2c6e0 100644 --- a/src/libunicode/u_char.rs +++ b/src/libunicode/u_char.rs @@ -166,7 +166,7 @@ pub fn width(c: char, is_cjk: bool) -> Option { pub trait UnicodeChar { /// Returns whether the specified character is considered a Unicode /// alphabetic code point. - fn is_alphabetic(&self) -> bool; + fn is_alphabetic(self) -> bool; /// Returns whether the specified character satisfies the 'XID_Start' /// Unicode property. @@ -175,7 +175,7 @@ pub trait UnicodeChar { /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), /// mostly similar to ID_Start but modified for closure under NFKx. #[allow(non_snake_case)] - fn is_XID_start(&self) -> bool; + fn is_XID_start(self) -> bool; /// Returns whether the specified `char` satisfies the 'XID_Continue' /// Unicode property. @@ -184,40 +184,40 @@ pub trait UnicodeChar { /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), /// mostly similar to 'ID_Continue' but modified for closure under NFKx. #[allow(non_snake_case)] - fn is_XID_continue(&self) -> bool; + fn is_XID_continue(self) -> bool; /// Indicates whether a character is in lowercase. /// /// This is defined according to the terms of the Unicode Derived Core /// Property `Lowercase`. - fn is_lowercase(&self) -> bool; + fn is_lowercase(self) -> bool; /// Indicates whether a character is in uppercase. /// /// This is defined according to the terms of the Unicode Derived Core /// Property `Uppercase`. - fn is_uppercase(&self) -> bool; + fn is_uppercase(self) -> bool; /// Indicates whether a character is whitespace. /// /// Whitespace is defined in terms of the Unicode Property `White_Space`. - fn is_whitespace(&self) -> bool; + fn is_whitespace(self) -> bool; /// Indicates whether a character is alphanumeric. /// /// Alphanumericness is defined in terms of the Unicode General Categories /// 'Nd', 'Nl', 'No' and the Derived Core Property 'Alphabetic'. - fn is_alphanumeric(&self) -> bool; + fn is_alphanumeric(self) -> bool; /// Indicates whether a character is a control code point. /// /// Control code points are defined in terms of the Unicode General /// Category `Cc`. - fn is_control(&self) -> bool; + fn is_control(self) -> bool; /// Indicates whether the character is numeric (Nd, Nl, or No). - fn is_numeric(&self) -> bool; + fn is_numeric(self) -> bool; /// Converts a character to its lowercase equivalent. /// @@ -228,7 +228,7 @@ pub trait UnicodeChar { /// /// Returns the lowercase equivalent of the character, or the character /// itself if no conversion is possible. - fn to_lowercase(&self) -> char; + fn to_lowercase(self) -> char; /// Converts a character to its uppercase equivalent. /// @@ -250,7 +250,7 @@ pub trait UnicodeChar { /// [`SpecialCasing`.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt /// /// [2]: http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf#G33992 - fn to_uppercase(&self) -> char; + fn to_uppercase(self) -> char; /// Returns this character's displayed width in columns, or `None` if it is a /// control character other than `'\x00'`. @@ -261,31 +261,33 @@ pub trait UnicodeChar { /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) /// recommends that these characters be treated as 1 column (i.e., /// `is_cjk` = `false`) if the context cannot be reliably determined. - fn width(&self, is_cjk: bool) -> Option; + #[experimental = "needs expert opinion. is_cjk flag stands out as ugly"] + fn width(self, is_cjk: bool) -> Option; } impl UnicodeChar for char { - fn is_alphabetic(&self) -> bool { is_alphabetic(*self) } + fn is_alphabetic(self) -> bool { is_alphabetic(self) } - fn is_XID_start(&self) -> bool { is_XID_start(*self) } + fn is_XID_start(self) -> bool { is_XID_start(self) } - fn is_XID_continue(&self) -> bool { is_XID_continue(*self) } + fn is_XID_continue(self) -> bool { is_XID_continue(self) } - fn is_lowercase(&self) -> bool { is_lowercase(*self) } + fn is_lowercase(self) -> bool { is_lowercase(self) } - fn is_uppercase(&self) -> bool { is_uppercase(*self) } + fn is_uppercase(self) -> bool { is_uppercase(self) } - fn is_whitespace(&self) -> bool { is_whitespace(*self) } + fn is_whitespace(self) -> bool { is_whitespace(self) } - fn is_alphanumeric(&self) -> bool { is_alphanumeric(*self) } + fn is_alphanumeric(self) -> bool { is_alphanumeric(self) } - fn is_control(&self) -> bool { is_control(*self) } + fn is_control(self) -> bool { is_control(self) } - fn is_numeric(&self) -> bool { is_digit(*self) } + fn is_numeric(self) -> bool { is_digit(self) } - fn to_lowercase(&self) -> char { to_lowercase(*self) } + fn to_lowercase(self) -> char { to_lowercase(self) } - fn to_uppercase(&self) -> char { to_uppercase(*self) } + fn to_uppercase(self) -> char { to_uppercase(self) } - fn width(&self, is_cjk: bool) -> Option { width(*self, is_cjk) } + #[experimental = "needs expert opinion. is_cjk flag stands out as ugly"] + fn width(self, is_cjk: bool) -> Option { width(self, is_cjk) } } From 76ddd2b1547dd461d0487233a0a19674292c976e Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Mon, 3 Nov 2014 10:22:34 -0800 Subject: [PATCH 15/19] unicode: Add stability attributes to u_char Free functions deprecated. UnicodeChar experimental pending final decisions about prelude. --- src/libfmt_macros/lib.rs | 9 ++- src/libgetopts/lib.rs | 2 +- src/librustc_trans/back/link.rs | 3 +- src/librustdoc/test.rs | 5 +- src/libsyntax/parse/lexer/mod.rs | 4 +- src/libunicode/u_char.rs | 103 +++++++++++++++++++------------ src/libunicode/u_str.rs | 15 ++--- 7 files changed, 82 insertions(+), 59 deletions(-) diff --git a/src/libfmt_macros/lib.rs b/src/libfmt_macros/lib.rs index 134819ad02757..ed86ad52bb5d4 100644 --- a/src/libfmt_macros/lib.rs +++ b/src/libfmt_macros/lib.rs @@ -26,7 +26,6 @@ pub use self::Alignment::*; pub use self::Flag::*; pub use self::Count::*; -use std::char; use std::str; use std::string; @@ -221,7 +220,7 @@ impl<'a> Parser<'a> { fn ws(&mut self) { loop { match self.cur.clone().next() { - Some((_, c)) if char::is_whitespace(c) => { self.cur.next(); } + Some((_, c)) if c.is_whitespace() => { self.cur.next(); } Some(..) | None => { return } } } @@ -261,7 +260,7 @@ impl<'a> Parser<'a> { Some(i) => { ArgumentIs(i) } None => { match self.cur.clone().next() { - Some((_, c)) if char::is_alphabetic(c) => { + Some((_, c)) if c.is_alphabetic() => { ArgumentNamed(self.word()) } _ => ArgumentNext @@ -384,7 +383,7 @@ impl<'a> Parser<'a> { /// characters. fn word(&mut self) -> &'a str { let start = match self.cur.clone().next() { - Some((pos, c)) if char::is_XID_start(c) => { + Some((pos, c)) if c.is_XID_start() => { self.cur.next(); pos } @@ -393,7 +392,7 @@ impl<'a> Parser<'a> { let mut end; loop { match self.cur.clone().next() { - Some((_, c)) if char::is_XID_continue(c) => { + Some((_, c)) if c.is_XID_continue() => { self.cur.next(); } Some((pos, _)) => { end = pos; break } diff --git a/src/libgetopts/lib.rs b/src/libgetopts/lib.rs index c4d712cb67362..a182f582b5f34 100644 --- a/src/libgetopts/lib.rs +++ b/src/libgetopts/lib.rs @@ -886,7 +886,7 @@ fn each_split_within<'a>(ss: &'a str, lim: uint, it: |&'a str| -> bool) } let machine: |&mut bool, (uint, char)| -> bool = |cont, (i, c)| { - let whitespace = if ::std::char::is_whitespace(c) { Ws } else { Cr }; + let whitespace = if c.is_whitespace() { Ws } else { Cr }; let limit = if (i - slice_start + 1) <= lim { UnderLim } else { OverLim }; state = match (state, whitespace, limit) { diff --git a/src/librustc_trans/back/link.rs b/src/librustc_trans/back/link.rs index 6a8074b99585c..db9ebac163c73 100644 --- a/src/librustc_trans/back/link.rs +++ b/src/librustc_trans/back/link.rs @@ -27,7 +27,6 @@ use util::common::time; use util::ppaux; use util::sha2::{Digest, Sha256}; -use std::char; use std::io::fs::PathExtensions; use std::io::{fs, TempDir, Command}; use std::io; @@ -272,7 +271,7 @@ pub fn sanitize(s: &str) -> String { // Underscore-qualify anything that didn't start as an ident. if result.len() > 0u && result.as_bytes()[0] != '_' as u8 && - ! char::is_XID_start(result.as_bytes()[0] as char) { + ! (result.as_bytes()[0] as char).is_XID_start() { return format!("_{}", result.as_slice()); } diff --git a/src/librustdoc/test.rs b/src/librustdoc/test.rs index 2dc1bcf776eb8..63007cf15c651 100644 --- a/src/librustdoc/test.rs +++ b/src/librustdoc/test.rs @@ -9,7 +9,6 @@ // except according to those terms. use std::cell::RefCell; -use std::char; use std::dynamic_lib::DynamicLibrary; use std::io::{Command, TempDir}; use std::io; @@ -300,8 +299,8 @@ impl Collector { // we use these headings as test names, so it's good if // they're valid identifiers. let name = name.chars().enumerate().map(|(i, c)| { - if (i == 0 && char::is_XID_start(c)) || - (i != 0 && char::is_XID_continue(c)) { + if (i == 0 && c.is_XID_start()) || + (i != 0 && c.is_XID_continue()) { c } else { '_' diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 4c759cfc4fd0a..9b3e25c5851c9 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -1385,7 +1385,7 @@ fn ident_start(c: Option) -> bool { (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' - || (c > '\x7f' && char::is_XID_start(c)) + || (c > '\x7f' && c.is_XID_start()) } fn ident_continue(c: Option) -> bool { @@ -1395,7 +1395,7 @@ fn ident_continue(c: Option) -> bool { || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' - || (c > '\x7f' && char::is_XID_continue(c)) + || (c > '\x7f' && c.is_XID_continue()) } #[cfg(test)] diff --git a/src/libunicode/u_char.rs b/src/libunicode/u_char.rs index 1e81916a2c6e0..f347ab6a21e22 100644 --- a/src/libunicode/u_char.rs +++ b/src/libunicode/u_char.rs @@ -20,12 +20,9 @@ use tables::{derived_property, property, general_category, conversions, charwidt /// Returns whether the specified `char` is considered a Unicode alphabetic /// code point +#[deprecated = "use UnicodeChar::is_alphabetic"] pub fn is_alphabetic(c: char) -> bool { - match c { - 'a' ... 'z' | 'A' ... 'Z' => true, - c if c > '\x7f' => derived_property::Alphabetic(c), - _ => false - } + c.is_alphabetic() } /// Returns whether the specified `char` satisfies the 'XID_Start' Unicode property @@ -34,6 +31,7 @@ pub fn is_alphabetic(c: char) -> bool { /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), /// mostly similar to ID_Start but modified for closure under NFKx. #[allow(non_snake_case)] +#[deprecated = "use UnicodeChar::is_XID_start"] pub fn is_XID_start(c: char) -> bool { derived_property::XID_Start(c) } /// Returns whether the specified `char` satisfies the 'XID_Continue' Unicode property @@ -42,6 +40,7 @@ pub fn is_XID_start(c: char) -> bool { derived_property::XID_Start(c) } /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), /// mostly similar to 'ID_Continue' but modified for closure under NFKx. #[allow(non_snake_case)] +#[deprecated = "use UnicodeChar::is_XID_continue"] pub fn is_XID_continue(c: char) -> bool { derived_property::XID_Continue(c) } /// @@ -50,12 +49,9 @@ pub fn is_XID_continue(c: char) -> bool { derived_property::XID_Continue(c) } /// This is defined according to the terms of the Unicode Derived Core Property 'Lowercase'. /// #[inline] +#[deprecated = "use UnicodeChar::is_lowercase"] pub fn is_lowercase(c: char) -> bool { - match c { - 'a' ... 'z' => true, - c if c > '\x7f' => derived_property::Lowercase(c), - _ => false - } + c.is_lowercase() } /// @@ -64,12 +60,9 @@ pub fn is_lowercase(c: char) -> bool { /// This is defined according to the terms of the Unicode Derived Core Property 'Uppercase'. /// #[inline] +#[deprecated = "use UnicodeChar::is_uppercase"] pub fn is_uppercase(c: char) -> bool { - match c { - 'A' ... 'Z' => true, - c if c > '\x7f' => derived_property::Uppercase(c), - _ => false - } + c.is_uppercase() } /// @@ -78,12 +71,9 @@ pub fn is_uppercase(c: char) -> bool { /// Whitespace is defined in terms of the Unicode Property 'White_Space'. /// #[inline] +#[deprecated = "use UnicodeChar::is_whitespace"] pub fn is_whitespace(c: char) -> bool { - match c { - ' ' | '\x09' ... '\x0d' => true, - c if c > '\x7f' => property::White_Space(c), - _ => false - } + c.is_whitespace() } /// @@ -93,9 +83,9 @@ pub fn is_whitespace(c: char) -> bool { /// 'Nd', 'Nl', 'No' and the Derived Core Property 'Alphabetic'. /// #[inline] +#[deprecated = "use UnicodeChar::is_alphanumeric"] pub fn is_alphanumeric(c: char) -> bool { - is_alphabetic(c) - || is_digit(c) + c.is_alphanumeric() } /// @@ -105,16 +95,14 @@ pub fn is_alphanumeric(c: char) -> bool { /// 'Cc'. /// #[inline] +#[deprecated = "use UnicodeChar::is_control"] pub fn is_control(c: char) -> bool { general_category::Cc(c) } /// Indicates whether the `char` is numeric (Nd, Nl, or No) #[inline] +#[deprecated = "use UnicodeChar::is_numeric"] pub fn is_digit(c: char) -> bool { - match c { - '0' ... '9' => true, - c if c > '\x7f' => general_category::N(c), - _ => false - } + c.is_numeric() } /// Convert a char to its uppercase equivalent @@ -132,6 +120,7 @@ pub fn is_digit(c: char) -> bool { /// /// Returns the char itself if no conversion was made #[inline] +#[deprecated = "use UnicodeChar::to_uppercase"] pub fn to_uppercase(c: char) -> char { conversions::to_upper(c) } @@ -145,6 +134,7 @@ pub fn to_uppercase(c: char) -> char { /// /// Returns the char itself if no conversion if possible #[inline] +#[deprecated = "use UnicodeChar::to_lowercase"] pub fn to_lowercase(c: char) -> char { conversions::to_lower(c) } @@ -158,11 +148,13 @@ pub fn to_lowercase(c: char) -> char { /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) /// recommends that these characters be treated as 1 column (i.e., /// `is_cjk` = `false`) if the context cannot be reliably determined. +#[deprecated = "use UnicodeChar::width"] pub fn width(c: char, is_cjk: bool) -> Option { charwidth::width(c, is_cjk) } /// Useful functions for Unicode characters. +#[experimental = "pending prelude organization"] pub trait UnicodeChar { /// Returns whether the specified character is considered a Unicode /// alphabetic code point. @@ -265,29 +257,62 @@ pub trait UnicodeChar { fn width(self, is_cjk: bool) -> Option; } +#[experimental = "pending prelude organization"] impl UnicodeChar for char { - fn is_alphabetic(self) -> bool { is_alphabetic(self) } + fn is_alphabetic(self) -> bool { + match self { + 'a' ... 'z' | 'A' ... 'Z' => true, + c if c > '\x7f' => derived_property::Alphabetic(c), + _ => false + } + } - fn is_XID_start(self) -> bool { is_XID_start(self) } + fn is_XID_start(self) -> bool { derived_property::XID_Start(self) } - fn is_XID_continue(self) -> bool { is_XID_continue(self) } + fn is_XID_continue(self) -> bool { derived_property::XID_Continue(self) } - fn is_lowercase(self) -> bool { is_lowercase(self) } + fn is_lowercase(self) -> bool { + match self { + 'a' ... 'z' => true, + c if c > '\x7f' => derived_property::Lowercase(c), + _ => false + } + } - fn is_uppercase(self) -> bool { is_uppercase(self) } + fn is_uppercase(self) -> bool { + match self { + 'A' ... 'Z' => true, + c if c > '\x7f' => derived_property::Uppercase(c), + _ => false + } + } - fn is_whitespace(self) -> bool { is_whitespace(self) } + fn is_whitespace(self) -> bool { + match self { + ' ' | '\x09' ... '\x0d' => true, + c if c > '\x7f' => property::White_Space(c), + _ => false + } + } - fn is_alphanumeric(self) -> bool { is_alphanumeric(self) } + fn is_alphanumeric(self) -> bool { + self.is_alphabetic() || self.is_numeric() + } - fn is_control(self) -> bool { is_control(self) } + fn is_control(self) -> bool { general_category::Cc(self) } - fn is_numeric(self) -> bool { is_digit(self) } + fn is_numeric(self) -> bool { + match self { + '0' ... '9' => true, + c if c > '\x7f' => general_category::N(c), + _ => false + } + } - fn to_lowercase(self) -> char { to_lowercase(self) } + fn to_lowercase(self) -> char { conversions::to_lower(self) } - fn to_uppercase(self) -> char { to_uppercase(self) } + fn to_uppercase(self) -> char { conversions::to_upper(self) } #[experimental = "needs expert opinion. is_cjk flag stands out as ugly"] - fn width(self, is_cjk: bool) -> Option { width(self, is_cjk) } + fn width(self, is_cjk: bool) -> Option { charwidth::width(self, is_cjk) } } diff --git a/src/libunicode/u_str.rs b/src/libunicode/u_str.rs index 99c1ce503cc4d..56b1f0907d5a4 100644 --- a/src/libunicode/u_str.rs +++ b/src/libunicode/u_str.rs @@ -24,13 +24,13 @@ use core::iter::{Filter, AdditiveIterator, Iterator, DoubleEndedIterator}; use core::kinds::Sized; use core::option::{Option, None, Some}; use core::str::{CharSplits, StrPrelude}; -use u_char; use u_char::UnicodeChar; use tables::grapheme::GraphemeCat; /// An iterator over the words of a string, separated by a sequence of whitespace +/// FIXME: This should be opaque pub type Words<'a> = - Filter<'a, &'a str, CharSplits<'a, extern "Rust" fn(char) -> bool>>; + Filter<'a, &'a str, CharSplits<'a, |char|:'a -> bool>>; /// Methods for Unicode string slices pub trait UnicodeStrPrelude for Sized? { @@ -143,14 +143,15 @@ impl UnicodeStrPrelude for str { #[inline] fn words(&self) -> Words { - self.split(u_char::is_whitespace).filter(|s| !s.is_empty()) + let f = |c: char| c.is_whitespace(); + self.split(f).filter(|s| !s.is_empty()) } #[inline] - fn is_whitespace(&self) -> bool { self.chars().all(u_char::is_whitespace) } + fn is_whitespace(&self) -> bool { self.chars().all(|c| c.is_whitespace()) } #[inline] - fn is_alphanumeric(&self) -> bool { self.chars().all(u_char::is_alphanumeric) } + fn is_alphanumeric(&self) -> bool { self.chars().all(|c| c.is_alphanumeric()) } #[inline] fn width(&self, is_cjk: bool) -> uint { @@ -164,12 +165,12 @@ impl UnicodeStrPrelude for str { #[inline] fn trim_left(&self) -> &str { - self.trim_left_chars(u_char::is_whitespace) + self.trim_left_chars(|c: char| c.is_whitespace()) } #[inline] fn trim_right(&self) -> &str { - self.trim_right_chars(u_char::is_whitespace) + self.trim_right_chars(|c: char| c.is_whitespace()) } } From f39c29d0bc0e58d76e2289dc52038770797a8f38 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Mon, 3 Nov 2014 15:18:45 -0800 Subject: [PATCH 16/19] unicode: Rename is_XID_start to is_xid_start, is_XID_continue to is_xid_continue --- src/libfmt_macros/lib.rs | 4 ++-- src/librustc_trans/back/link.rs | 2 +- src/librustdoc/test.rs | 4 ++-- src/libsyntax/parse/lexer/mod.rs | 6 +++--- src/libunicode/u_char.rs | 25 +++++++++++++++++++++++++ 5 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/libfmt_macros/lib.rs b/src/libfmt_macros/lib.rs index ed86ad52bb5d4..d3bee557220bf 100644 --- a/src/libfmt_macros/lib.rs +++ b/src/libfmt_macros/lib.rs @@ -383,7 +383,7 @@ impl<'a> Parser<'a> { /// characters. fn word(&mut self) -> &'a str { let start = match self.cur.clone().next() { - Some((pos, c)) if c.is_XID_start() => { + Some((pos, c)) if c.is_xid_start() => { self.cur.next(); pos } @@ -392,7 +392,7 @@ impl<'a> Parser<'a> { let mut end; loop { match self.cur.clone().next() { - Some((_, c)) if c.is_XID_continue() => { + Some((_, c)) if c.is_xid_continue() => { self.cur.next(); } Some((pos, _)) => { end = pos; break } diff --git a/src/librustc_trans/back/link.rs b/src/librustc_trans/back/link.rs index db9ebac163c73..3715256e3ec2b 100644 --- a/src/librustc_trans/back/link.rs +++ b/src/librustc_trans/back/link.rs @@ -271,7 +271,7 @@ pub fn sanitize(s: &str) -> String { // Underscore-qualify anything that didn't start as an ident. if result.len() > 0u && result.as_bytes()[0] != '_' as u8 && - ! (result.as_bytes()[0] as char).is_XID_start() { + ! (result.as_bytes()[0] as char).is_xid_start() { return format!("_{}", result.as_slice()); } diff --git a/src/librustdoc/test.rs b/src/librustdoc/test.rs index 63007cf15c651..2a5972bb3d90b 100644 --- a/src/librustdoc/test.rs +++ b/src/librustdoc/test.rs @@ -299,8 +299,8 @@ impl Collector { // we use these headings as test names, so it's good if // they're valid identifiers. let name = name.chars().enumerate().map(|(i, c)| { - if (i == 0 && c.is_XID_start()) || - (i != 0 && c.is_XID_continue()) { + if (i == 0 && c.is_xid_start()) || + (i != 0 && c.is_xid_continue()) { c } else { '_' diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 9b3e25c5851c9..a88029e087b15 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -692,7 +692,7 @@ impl<'a> StringReader<'a> { // integer literal followed by field/method access or a range pattern // (`0..2` and `12.foo()`) if self.curr_is('.') && !self.nextch_is('.') && !self.nextch().unwrap_or('\0') - .is_XID_start() { + .is_xid_start() { // might have stuff after the ., and if it does, it needs to start // with a number self.bump(); @@ -1385,7 +1385,7 @@ fn ident_start(c: Option) -> bool { (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' - || (c > '\x7f' && c.is_XID_start()) + || (c > '\x7f' && c.is_xid_start()) } fn ident_continue(c: Option) -> bool { @@ -1395,7 +1395,7 @@ fn ident_continue(c: Option) -> bool { || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' - || (c > '\x7f' && c.is_XID_continue()) + || (c > '\x7f' && c.is_xid_continue()) } #[cfg(test)] diff --git a/src/libunicode/u_char.rs b/src/libunicode/u_char.rs index f347ab6a21e22..1c4c4d4c4be4d 100644 --- a/src/libunicode/u_char.rs +++ b/src/libunicode/u_char.rs @@ -167,8 +167,18 @@ pub trait UnicodeChar { /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), /// mostly similar to ID_Start but modified for closure under NFKx. #[allow(non_snake_case)] + #[deprecated = "use is_xid_start"] fn is_XID_start(self) -> bool; + /// Returns whether the specified character satisfies the 'XID_Start' + /// Unicode property. + /// + /// 'XID_Start' is a Unicode Derived Property specified in + /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), + /// mostly similar to ID_Start but modified for closure under NFKx. + #[allow(non_snake_case)] + fn is_xid_start(self) -> bool; + /// Returns whether the specified `char` satisfies the 'XID_Continue' /// Unicode property. /// @@ -176,8 +186,17 @@ pub trait UnicodeChar { /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), /// mostly similar to 'ID_Continue' but modified for closure under NFKx. #[allow(non_snake_case)] + #[deprecated = "use is_xid_continue"] fn is_XID_continue(self) -> bool; + /// Returns whether the specified `char` satisfies the 'XID_Continue' + /// Unicode property. + /// + /// 'XID_Continue' is a Unicode Derived Property specified in + /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), + /// mostly similar to 'ID_Continue' but modified for closure under NFKx. + #[allow(non_snake_case)] + fn is_xid_continue(self) -> bool; /// Indicates whether a character is in lowercase. /// @@ -267,10 +286,16 @@ impl UnicodeChar for char { } } + #[deprecated = "use is_xid_start"] fn is_XID_start(self) -> bool { derived_property::XID_Start(self) } + #[deprecated = "use is_xid_continue"] fn is_XID_continue(self) -> bool { derived_property::XID_Continue(self) } + fn is_xid_start(self) -> bool { derived_property::XID_Start(self) } + + fn is_xid_continue(self) -> bool { derived_property::XID_Continue(self) } + fn is_lowercase(self) -> bool { match self { 'a' ... 'z' => true, From 73622f8fdf905f273cf7509dcbcf9f7fb06f022a Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Wed, 5 Nov 2014 17:00:49 -0800 Subject: [PATCH 17/19] unicode: Remove unused `non_snake_case` allows. --- src/libunicode/u_char.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/libunicode/u_char.rs b/src/libunicode/u_char.rs index 1c4c4d4c4be4d..369336639a7fe 100644 --- a/src/libunicode/u_char.rs +++ b/src/libunicode/u_char.rs @@ -176,7 +176,6 @@ pub trait UnicodeChar { /// 'XID_Start' is a Unicode Derived Property specified in /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), /// mostly similar to ID_Start but modified for closure under NFKx. - #[allow(non_snake_case)] fn is_xid_start(self) -> bool; /// Returns whether the specified `char` satisfies the 'XID_Continue' @@ -195,7 +194,6 @@ pub trait UnicodeChar { /// 'XID_Continue' is a Unicode Derived Property specified in /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), /// mostly similar to 'ID_Continue' but modified for closure under NFKx. - #[allow(non_snake_case)] fn is_xid_continue(self) -> bool; /// Indicates whether a character is in lowercase. From 879af89baf65b9f94d676924e249958c86eb21b3 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Wed, 5 Nov 2014 17:50:09 -0800 Subject: [PATCH 18/19] core: Update docs for escape_unicode, escape_default --- src/libcore/char.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 1210465098a11..1fe840650dc96 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -262,7 +262,8 @@ pub trait Char { #[deprecated = "use the char::from_u32 free function"] fn from_u32(i: u32) -> Option; - /// Returns the hexadecimal Unicode escape of a character. + /// Returns an iterator that yields the hexadecimal Unicode escape + /// of a character, as `char`s. /// /// The rules are as follows: /// @@ -272,8 +273,8 @@ pub trait Char { #[unstable = "pending error conventions, trait organization"] fn escape_unicode(self) -> UnicodeEscapedChars; - /// Returns a 'default' ASCII and C++11-like literal escape of a - /// character. + /// Returns an iterator that yields the 'default' ASCII and + /// C++11-like literal escape of a character, as `char`s. /// /// The default is chosen with a bias toward producing literals that are /// legal in a variety of languages, including C++11 and similar C-family From 75ffadf8b65495ababae49d8162f85c58cd2c2a9 Mon Sep 17 00:00:00 2001 From: Brian Anderson Date: Wed, 5 Nov 2014 18:17:27 -0800 Subject: [PATCH 19/19] core: Convert a 'failure' to 'panic' in docs --- src/libcore/char.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 1fe840650dc96..272b36847991f 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -225,9 +225,9 @@ pub trait Char { /// Returns `true` if `c` is a valid digit under `radix`, and `false` /// otherwise. /// - /// # Failure + /// # Panics /// - /// Fails if given a radix > 36. + /// Panics if given a radix > 36. #[unstable = "pending error conventions"] fn is_digit(self, radix: uint) -> bool;