diff --git a/.travis.yml b/.travis.yml index a736bf7..f1132c9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,9 +7,11 @@ sudo: false script: - cargo build --verbose - cargo test --verbose + - cargo test --verbose --no-default-features - cargo package - cd target/package/unicode-normalization-* - cargo test --verbose + - cargo test --verbose --no-default-features notifications: email: on_success: never diff --git a/Cargo.toml b/Cargo.toml index ab5cb0c..cca619b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "unicode-normalization" -version = "0.1.12" +version = "0.1.13" authors = ["kwantam "] homepage = "https://github.com/unicode-rs/unicode-normalization" @@ -18,8 +18,15 @@ Decomposition and Recomposition, as described in Unicode Standard Annex #15. """ +edition = "2018" + exclude = [ "target/*", "Cargo.lock", "scripts/tmp", "*.txt", "tests/*" ] [dependencies.tinyvec] -version = "0.3.2" +version = "0.3.3" features = ["alloc"] + + +[features] +default = ["std"] +std = [] diff --git a/README.md b/README.md index 4dbeb3d..0c63c3a 100644 --- a/README.md +++ b/README.md @@ -31,5 +31,9 @@ to your `Cargo.toml`: ```toml [dependencies] -unicode-normalization = "0.1.8" +unicode-normalization = "0.1.13" ``` + +## `no_std` + `alloc` support + +This crate is completely `no_std` + `alloc` compatible. This can be enabled by disabling the `std` feature, i.e. specifying `default-features = false` for this crate on your `Cargo.toml`. diff --git a/benches/bench.rs b/benches/bench.rs index b3ea836..a977156 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -1,7 +1,7 @@ #![feature(test)] -#![feature(iterator_step_by)] -extern crate unicode_normalization; + extern crate test; +extern crate unicode_normalization; use std::fs; use test::Bencher; diff --git a/scripts/unicode.py b/scripts/unicode.py index d67fa6e..a5bba96 100644 --- a/scripts/unicode.py +++ b/scripts/unicode.py @@ -477,8 +477,8 @@ def minimal_perfect_hash(d): data = UnicodeData() with open("tables.rs", "w", newline = "\n") as out: out.write(PREAMBLE) - out.write("use quick_check::IsNormalized;\n") - out.write("use quick_check::IsNormalized::*;\n") + out.write("use crate::quick_check::IsNormalized;\n") + out.write("use crate::quick_check::IsNormalized::*;\n") out.write("\n") version = "(%s, %s, %s)" % tuple(UNICODE_VERSION.split(".")) diff --git a/src/__test_api.rs b/src/__test_api.rs index 9deff6b..f1a3f92 100644 --- a/src/__test_api.rs +++ b/src/__test_api.rs @@ -4,10 +4,15 @@ // // If you're caught using this outside this crates tests/, you get to clean up the mess. +#[cfg(not(feature = "std"))] +use crate::no_std_prelude::*; + use crate::stream_safe::StreamSafe; + pub fn stream_safe(s: &str) -> String { - StreamSafe::new(s.chars()).collect() + StreamSafe::new(s.chars()).collect() } + pub mod quick_check { pub use crate::quick_check::*; } diff --git a/src/decompose.rs b/src/decompose.rs index 6533c0c..f228023 100644 --- a/src/decompose.rs +++ b/src/decompose.rs @@ -7,10 +7,10 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +use core::fmt::{self, Write}; +use core::iter::Fuse; +use core::ops::Range; use tinyvec::TinyVec; -use std::fmt::{self, Write}; -use std::iter::Fuse; -use std::ops::Range; #[derive(Clone)] enum DecompositionType { @@ -37,7 +37,7 @@ pub struct Decompositions { } #[inline] -pub fn new_canonical>(iter: I) -> Decompositions { +pub fn new_canonical>(iter: I) -> Decompositions { Decompositions { kind: self::DecompositionType::Canonical, iter: iter.fuse(), @@ -47,7 +47,7 @@ pub fn new_canonical>(iter: I) -> Decompositions { } #[inline] -pub fn new_compatible>(iter: I) -> Decompositions { +pub fn new_compatible>(iter: I) -> Decompositions { Decompositions { kind: self::DecompositionType::Compatible, iter: iter.fuse(), @@ -99,7 +99,7 @@ impl Decompositions { } } -impl> Iterator for Decompositions { +impl> Iterator for Decompositions { type Item = char; #[inline] @@ -149,7 +149,7 @@ impl> Iterator for Decompositions { } } -impl + Clone> fmt::Display for Decompositions { +impl + Clone> fmt::Display for Decompositions { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { for c in self.clone() { f.write_char(c)?; diff --git a/src/lib.rs b/src/lib.rs index 56142a2..6749adc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,81 +38,78 @@ //! ``` #![deny(missing_docs, unsafe_code)] -#![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png", - html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png")] +#![doc( + html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png", + html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png" +)] +#![cfg_attr(not(feature = "std"), no_std)] + +#[cfg(not(feature = "std"))] +extern crate alloc; + +#[cfg(feature = "std")] +extern crate core; extern crate tinyvec; -pub use tables::UNICODE_VERSION; -pub use decompose::Decompositions; -pub use quick_check::{ +pub use crate::decompose::Decompositions; +pub use crate::quick_check::{ + is_nfc, is_nfc_quick, is_nfc_stream_safe, is_nfc_stream_safe_quick, is_nfd, is_nfd_quick, + is_nfd_stream_safe, is_nfd_stream_safe_quick, is_nfkc, is_nfkc_quick, is_nfkd, is_nfkd_quick, IsNormalized, - is_nfc, - is_nfc_quick, - is_nfkc, - is_nfkc_quick, - is_nfc_stream_safe, - is_nfc_stream_safe_quick, - is_nfd, - is_nfd_quick, - is_nfkd, - is_nfkd_quick, - is_nfd_stream_safe, - is_nfd_stream_safe_quick, }; -pub use recompose::Recompositions; -pub use stream_safe::StreamSafe; -use std::str::Chars; +pub use crate::recompose::Recompositions; +pub use crate::stream_safe::StreamSafe; +pub use crate::tables::UNICODE_VERSION; +use core::str::Chars; + +mod no_std_prelude; mod decompose; mod lookups; mod normalize; mod perfect_hash; -mod recompose; mod quick_check; +mod recompose; mod stream_safe; + +#[rustfmt::skip] mod tables; -#[cfg(test)] -mod test; #[doc(hidden)] pub mod __test_api; +#[cfg(test)] +mod test; /// Methods for composing and decomposing characters. pub mod char { - pub use normalize::{decompose_canonical, decompose_compatible, compose}; + pub use crate::normalize::{compose, decompose_canonical, decompose_compatible}; - pub use lookups::{canonical_combining_class, is_combining_mark}; + pub use crate::lookups::{canonical_combining_class, is_combining_mark}; } - /// Methods for iterating over strings while applying Unicode normalizations /// as described in /// [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/). -pub trait UnicodeNormalization> { +pub trait UnicodeNormalization> { /// Returns an iterator over the string in Unicode Normalization Form D /// (canonical decomposition). - #[inline] fn nfd(self) -> Decompositions; /// Returns an iterator over the string in Unicode Normalization Form KD /// (compatibility decomposition). - #[inline] fn nfkd(self) -> Decompositions; /// An Iterator over the string in Unicode Normalization Form C /// (canonical decomposition followed by canonical composition). - #[inline] fn nfc(self) -> Recompositions; /// An Iterator over the string in Unicode Normalization Form KC /// (compatibility decomposition followed by canonical composition). - #[inline] fn nfkc(self) -> Recompositions; /// An Iterator over the string with Conjoining Grapheme Joiner characters /// inserted according to the Stream-Safe Text Process (UAX15-D4) - #[inline] fn stream_safe(self) -> StreamSafe; } @@ -143,7 +140,7 @@ impl<'a> UnicodeNormalization> for &'a str { } } -impl> UnicodeNormalization for I { +impl> UnicodeNormalization for I { #[inline] fn nfd(self) -> Decompositions { decompose::new_canonical(self) diff --git a/src/lookups.rs b/src/lookups.rs index edaa0a0..5bf5090 100644 --- a/src/lookups.rs +++ b/src/lookups.rs @@ -10,46 +10,81 @@ //! Lookups of unicode properties using minimal perfect hashing. -use perfect_hash::mph_lookup; -use tables::*; +use crate::perfect_hash::mph_lookup; +use crate::tables::*; /// Look up the canonical combining class for a codepoint. -/// +/// /// The value returned is as defined in the Unicode Character Database. pub fn canonical_combining_class(c: char) -> u8 { - mph_lookup(c.into(), CANONICAL_COMBINING_CLASS_SALT, CANONICAL_COMBINING_CLASS_KV, - u8_lookup_fk, u8_lookup_fv, 0) + mph_lookup( + c.into(), + CANONICAL_COMBINING_CLASS_SALT, + CANONICAL_COMBINING_CLASS_KV, + u8_lookup_fk, + u8_lookup_fv, + 0, + ) } pub(crate) fn composition_table(c1: char, c2: char) -> Option { if c1 < '\u{10000}' && c2 < '\u{10000}' { - mph_lookup((c1 as u32) << 16 | (c2 as u32), - COMPOSITION_TABLE_SALT, COMPOSITION_TABLE_KV, - pair_lookup_fk, pair_lookup_fv_opt, None) + mph_lookup( + (c1 as u32) << 16 | (c2 as u32), + COMPOSITION_TABLE_SALT, + COMPOSITION_TABLE_KV, + pair_lookup_fk, + pair_lookup_fv_opt, + None, + ) } else { composition_table_astral(c1, c2) } } pub(crate) fn canonical_fully_decomposed(c: char) -> Option<&'static [char]> { - mph_lookup(c.into(), CANONICAL_DECOMPOSED_SALT, CANONICAL_DECOMPOSED_KV, - pair_lookup_fk, pair_lookup_fv_opt, None) + mph_lookup( + c.into(), + CANONICAL_DECOMPOSED_SALT, + CANONICAL_DECOMPOSED_KV, + pair_lookup_fk, + pair_lookup_fv_opt, + None, + ) } pub(crate) fn compatibility_fully_decomposed(c: char) -> Option<&'static [char]> { - mph_lookup(c.into(), COMPATIBILITY_DECOMPOSED_SALT, COMPATIBILITY_DECOMPOSED_KV, - pair_lookup_fk, pair_lookup_fv_opt, None) + mph_lookup( + c.into(), + COMPATIBILITY_DECOMPOSED_SALT, + COMPATIBILITY_DECOMPOSED_KV, + pair_lookup_fk, + pair_lookup_fv_opt, + None, + ) } /// Return whether the given character is a combining mark (`General_Category=Mark`) pub fn is_combining_mark(c: char) -> bool { - mph_lookup(c.into(), COMBINING_MARK_SALT, COMBINING_MARK_KV, - bool_lookup_fk, bool_lookup_fv, false) + mph_lookup( + c.into(), + COMBINING_MARK_SALT, + COMBINING_MARK_KV, + bool_lookup_fk, + bool_lookup_fv, + false, + ) } pub fn stream_safe_trailing_nonstarters(c: char) -> usize { - mph_lookup(c.into(), TRAILING_NONSTARTERS_SALT, TRAILING_NONSTARTERS_KV, - u8_lookup_fk, u8_lookup_fv, 0) as usize + mph_lookup( + c.into(), + TRAILING_NONSTARTERS_SALT, + TRAILING_NONSTARTERS_KV, + u8_lookup_fk, + u8_lookup_fv, + 0, + ) as usize } /// Extract the key in a 24 bit key and 8 bit value packed in a u32. diff --git a/src/no_std_prelude.rs b/src/no_std_prelude.rs new file mode 100755 index 0000000..838d122 --- /dev/null +++ b/src/no_std_prelude.rs @@ -0,0 +1,6 @@ +#[cfg(not(feature = "std"))] +pub use alloc::{ + str::Chars, + string::{String, ToString}, + vec::Vec, +}; diff --git a/src/normalize.rs b/src/normalize.rs index 87456df..1097c42 100644 --- a/src/normalize.rs +++ b/src/normalize.rs @@ -9,15 +9,20 @@ // except according to those terms. //! Functions for computing canonical and compatible decompositions for Unicode characters. -use std::char; -use std::ops::FnMut; -use lookups::{canonical_fully_decomposed, composition_table, compatibility_fully_decomposed}; +use crate::lookups::{ + canonical_fully_decomposed, compatibility_fully_decomposed, composition_table, +}; + +use core::{char, ops::FnMut}; /// Compute canonical Unicode decomposition for character. /// See [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/) /// for more information. #[inline] -pub fn decompose_canonical(c: char, emit_char: F) where F: FnMut(char) { +pub fn decompose_canonical(c: char, emit_char: F) +where + F: FnMut(char), +{ decompose(c, canonical_fully_decomposed, emit_char) } @@ -26,14 +31,16 @@ pub fn decompose_canonical(c: char, emit_char: F) where F: FnMut(char) { /// for more information. #[inline] pub fn decompose_compatible(c: char, emit_char: F) { - let decompose_char = |c| compatibility_fully_decomposed(c) - .or_else(|| canonical_fully_decomposed(c)); + let decompose_char = + |c| compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); decompose(c, decompose_char, emit_char) } #[inline] fn decompose(c: char, decompose_char: D, mut emit_char: F) - where D: Fn(char) -> Option<&'static [char]>, F: FnMut(char) +where + D: Fn(char) -> Option<&'static [char]>, + F: FnMut(char), { // 7-bit ASCII never decomposes if c <= '\x7f' { @@ -74,8 +81,8 @@ const T_BASE: u32 = 0x11A7; const L_COUNT: u32 = 19; const V_COUNT: u32 = 21; const T_COUNT: u32 = 28; -const N_COUNT: u32 = (V_COUNT * T_COUNT); -const S_COUNT: u32 = (L_COUNT * N_COUNT); +const N_COUNT: u32 = V_COUNT * T_COUNT; +const S_COUNT: u32 = L_COUNT * N_COUNT; const S_LAST: u32 = S_BASE + S_COUNT - 1; const L_LAST: u32 = L_BASE + L_COUNT - 1; @@ -93,7 +100,10 @@ pub(crate) fn is_hangul_syllable(c: char) -> bool { // Decompose a precomposed Hangul syllable #[allow(unsafe_code)] #[inline(always)] -fn decompose_hangul(s: char, mut emit_char: F) where F: FnMut(char) { +fn decompose_hangul(s: char, mut emit_char: F) +where + F: FnMut(char), +{ let s_index = s as u32 - S_BASE; let l_index = s_index / N_COUNT; unsafe { @@ -113,7 +123,11 @@ fn decompose_hangul(s: char, mut emit_char: F) where F: FnMut(char) { pub(crate) fn hangul_decomposition_length(s: char) -> usize { let si = s as u32 - S_BASE; let ti = si % T_COUNT; - if ti > 0 { 3 } else { 2 } + if ti > 0 { + 3 + } else { + 2 + } } // Compose a pair of Hangul Jamo @@ -124,17 +138,17 @@ fn compose_hangul(a: char, b: char) -> Option { let (a, b) = (a as u32, b as u32); match (a, b) { // Compose a leading consonant and a vowel together into an LV_Syllable - (L_BASE ... L_LAST, V_BASE ... V_LAST) => { + (L_BASE...L_LAST, V_BASE...V_LAST) => { let l_index = a - L_BASE; let v_index = b - V_BASE; let lv_index = l_index * N_COUNT + v_index * T_COUNT; let s = S_BASE + lv_index; - Some(unsafe {char::from_u32_unchecked(s)}) - }, + Some(unsafe { char::from_u32_unchecked(s) }) + } // Compose an LV_Syllable and a trailing consonant into an LVT_Syllable - (S_BASE ... S_LAST, T_FIRST ... T_LAST) if (a - S_BASE) % T_COUNT == 0 => { - Some(unsafe {char::from_u32_unchecked(a + (b - T_BASE))}) - }, + (S_BASE...S_LAST, T_FIRST...T_LAST) if (a - S_BASE) % T_COUNT == 0 => { + Some(unsafe { char::from_u32_unchecked(a + (b - T_BASE)) }) + } _ => None, } } diff --git a/src/perfect_hash.rs b/src/perfect_hash.rs index 0a81714..3dbc166 100644 --- a/src/perfect_hash.rs +++ b/src/perfect_hash.rs @@ -20,16 +20,25 @@ fn my_hash(key: u32, salt: u32, n: usize) -> usize { } /// Do a lookup using minimal perfect hashing. -/// +/// /// The table is stored as a sequence of "salt" values, then a sequence of /// values that contain packed key/value pairs. The strategy is to hash twice. /// The first hash retrieves a salt value that makes the second hash unique. /// The hash function doesn't have to be very good, just good enough that the /// resulting map is unique. #[inline] -pub(crate) fn mph_lookup(x: u32, salt: &[u16], kv: &[KV], fk: FK, fv: FV, - default: V) -> V - where KV: Copy, FK: Fn(KV) -> u32, FV: Fn(KV) -> V +pub(crate) fn mph_lookup( + x: u32, + salt: &[u16], + kv: &[KV], + fk: FK, + fv: FV, + default: V, +) -> V +where + KV: Copy, + FK: Fn(KV) -> u32, + FV: Fn(KV) -> V, { let s = salt[my_hash(x, 0, salt.len())] as u32; let key_val = kv[my_hash(x, s, salt.len())]; diff --git a/src/quick_check.rs b/src/quick_check.rs index 49b1460..4507b2a 100644 --- a/src/quick_check.rs +++ b/src/quick_check.rs @@ -1,7 +1,7 @@ -use UnicodeNormalization; -use lookups::canonical_combining_class; -use stream_safe; -use tables; +use crate::lookups::canonical_combining_class; +use crate::stream_safe; +use crate::tables; +use crate::UnicodeNormalization; /// The QuickCheck algorithm can quickly determine if a text is or isn't /// normalized without any allocations in many cases, but it has to be able to @@ -19,7 +19,9 @@ pub enum IsNormalized { // https://unicode.org/reports/tr15/#Detecting_Normalization_Forms #[inline] fn quick_check(s: I, is_allowed: F, stream_safe: bool) -> IsNormalized - where I: Iterator, F: Fn(char) -> IsNormalized +where + I: Iterator, + F: Fn(char) -> IsNormalized, { let mut last_cc = 0u8; let mut nonstarter_count = 0; @@ -42,7 +44,7 @@ fn quick_check(s: I, is_allowed: F, stream_safe: bool) -> IsNormalized IsNormalized::No => return IsNormalized::No, IsNormalized::Maybe => { result = IsNormalized::Maybe; - }, + } } if stream_safe { let decomp = stream_safe::classify_nonstarters(ch); @@ -67,38 +69,37 @@ fn quick_check(s: I, is_allowed: F, stream_safe: bool) -> IsNormalized /// `IsNormalized::Maybe` if further checks are necessary. In this case a check /// like `s.chars().nfc().eq(s.chars())` should suffice. #[inline] -pub fn is_nfc_quick>(s: I) -> IsNormalized { +pub fn is_nfc_quick>(s: I) -> IsNormalized { quick_check(s, tables::qc_nfc, false) } - /// Quickly check if a string is in NFKC. #[inline] -pub fn is_nfkc_quick>(s: I) -> IsNormalized { +pub fn is_nfkc_quick>(s: I) -> IsNormalized { quick_check(s, tables::qc_nfkc, false) } /// Quickly check if a string is in NFD. #[inline] -pub fn is_nfd_quick>(s: I) -> IsNormalized { +pub fn is_nfd_quick>(s: I) -> IsNormalized { quick_check(s, tables::qc_nfd, false) } /// Quickly check if a string is in NFKD. #[inline] -pub fn is_nfkd_quick>(s: I) -> IsNormalized { +pub fn is_nfkd_quick>(s: I) -> IsNormalized { quick_check(s, tables::qc_nfkd, false) } /// Quickly check if a string is Stream-Safe NFC. #[inline] -pub fn is_nfc_stream_safe_quick>(s: I) -> IsNormalized { +pub fn is_nfc_stream_safe_quick>(s: I) -> IsNormalized { quick_check(s, tables::qc_nfc, true) } /// Quickly check if a string is Stream-Safe NFD. #[inline] -pub fn is_nfd_stream_safe_quick>(s: I) -> IsNormalized { +pub fn is_nfd_stream_safe_quick>(s: I) -> IsNormalized { quick_check(s, tables::qc_nfd, true) } @@ -164,11 +165,7 @@ pub fn is_nfd_stream_safe(s: &str) -> bool { #[cfg(test)] mod tests { - use super::{ - IsNormalized, - is_nfc_stream_safe_quick, - is_nfd_stream_safe_quick, - }; + use super::{is_nfc_stream_safe_quick, is_nfd_stream_safe_quick, IsNormalized}; #[test] fn test_stream_safe_nfd() { diff --git a/src/recompose.rs b/src/recompose.rs index 40b20dc..2a1960a 100644 --- a/src/recompose.rs +++ b/src/recompose.rs @@ -8,9 +8,9 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use decompose::Decompositions; +use crate::decompose::Decompositions; +use core::fmt::{self, Write}; use tinyvec::TinyVec; -use std::fmt::{self, Write}; #[derive(Clone)] enum RecompositionState { @@ -30,7 +30,7 @@ pub struct Recompositions { } #[inline] -pub fn new_canonical>(iter: I) -> Recompositions { +pub fn new_canonical>(iter: I) -> Recompositions { Recompositions { iter: super::decompose::new_canonical(iter), state: self::RecompositionState::Composing, @@ -41,7 +41,7 @@ pub fn new_canonical>(iter: I) -> Recompositions { } #[inline] -pub fn new_compatible>(iter: I) -> Recompositions { +pub fn new_compatible>(iter: I) -> Recompositions { Recompositions { iter: super::decompose::new_compatible(iter), state: self::RecompositionState::Composing, @@ -51,7 +51,7 @@ pub fn new_compatible>(iter: I) -> Recompositions { } } -impl> Iterator for Recompositions { +impl> Iterator for Recompositions { type Item = char; #[inline] @@ -70,26 +70,24 @@ impl> Iterator for Recompositions { } self.composee = Some(ch); continue; - }, + } Some(k) => k, }; match self.last_ccc { - None => { - match super::char::compose(k, ch) { - Some(r) => { - self.composee = Some(r); - continue; - } - None => { - if ch_class == 0 { - self.composee = Some(ch); - return Some(k); - } - self.buffer.push(ch); - self.last_ccc = Some(ch_class); + None => match super::char::compose(k, ch) { + Some(r) => { + self.composee = Some(r); + continue; + } + None => { + if ch_class == 0 { + self.composee = Some(ch); + return Some(k); } + self.buffer.push(ch); + self.last_ccc = Some(ch_class); } - } + }, Some(l_class) => { if l_class >= ch_class { // `ch` is blocked from `composee` @@ -121,36 +119,32 @@ impl> Iterator for Recompositions { return self.composee.take(); } } - Purging(next) => { - match self.buffer.get(next).cloned() { - None => { - self.buffer.clear(); - self.state = Composing; - } - s => { - self.state = Purging(next + 1); - return s - } + Purging(next) => match self.buffer.get(next).cloned() { + None => { + self.buffer.clear(); + self.state = Composing; } - } - Finished(next) => { - match self.buffer.get(next).cloned() { - None => { - self.buffer.clear(); - return self.composee.take() - } - s => { - self.state = Finished(next + 1); - return s - } + s => { + self.state = Purging(next + 1); + return s; } - } + }, + Finished(next) => match self.buffer.get(next).cloned() { + None => { + self.buffer.clear(); + return self.composee.take(); + } + s => { + self.state = Finished(next + 1); + return s; + } + }, } } } } -impl + Clone> fmt::Display for Recompositions { +impl + Clone> fmt::Display for Recompositions { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { for c in self.clone() { f.write_char(c)?; diff --git a/src/stream_safe.rs b/src/stream_safe.rs index 2cfcc36..1ba7d76 100644 --- a/src/stream_safe.rs +++ b/src/stream_safe.rs @@ -1,12 +1,9 @@ -use normalize::{ - hangul_decomposition_length, - is_hangul_syllable, -}; -use lookups::{ +use crate::lookups::{ canonical_combining_class, canonical_fully_decomposed, compatibility_fully_decomposed, stream_safe_trailing_nonstarters, }; -use tables::stream_safe_leading_nonstarters; +use crate::normalize::{hangul_decomposition_length, is_hangul_syllable}; +use crate::tables::stream_safe_leading_nonstarters; pub(crate) const MAX_NONSTARTERS: usize = 30; const COMBINING_GRAPHEME_JOINER: char = '\u{034F}'; @@ -22,11 +19,15 @@ pub struct StreamSafe { impl StreamSafe { pub(crate) fn new(iter: I) -> Self { - Self { iter, nonstarter_count: 0, buffer: None } + Self { + iter, + nonstarter_count: 0, + buffer: None, + } } } -impl> Iterator for StreamSafe { +impl> Iterator for StreamSafe { type Item = char; #[inline] @@ -72,7 +73,7 @@ pub(crate) fn classify_nonstarters(c: char) -> Decomposition { leading_nonstarters: 0, trailing_nonstarters: 0, decomposition_len: 1, - } + }; } // Next, special case Hangul, since it's not handled by our tables. if is_hangul_syllable(c) { @@ -82,15 +83,12 @@ pub(crate) fn classify_nonstarters(c: char) -> Decomposition { decomposition_len: hangul_decomposition_length(c), }; } - let decomp = compatibility_fully_decomposed(c) - .or_else(|| canonical_fully_decomposed(c)); + let decomp = compatibility_fully_decomposed(c).or_else(|| canonical_fully_decomposed(c)); match decomp { - Some(decomp) => { - Decomposition { - leading_nonstarters: stream_safe_leading_nonstarters(c), - trailing_nonstarters: stream_safe_trailing_nonstarters(c), - decomposition_len: decomp.len(), - } + Some(decomp) => Decomposition { + leading_nonstarters: stream_safe_leading_nonstarters(c), + trailing_nonstarters: stream_safe_trailing_nonstarters(c), + decomposition_len: decomp.len(), }, None => { let is_nonstarter = canonical_combining_class(c) != 0; @@ -106,13 +104,14 @@ pub(crate) fn classify_nonstarters(c: char) -> Decomposition { #[cfg(test)] mod tests { - use super::{ - StreamSafe, - classify_nonstarters, - }; - use std::char; - use normalize::decompose_compatible; - use lookups::canonical_combining_class; + use super::{classify_nonstarters, StreamSafe}; + use crate::lookups::canonical_combining_class; + use crate::normalize::decompose_compatible; + + #[cfg(not(feature = "std"))] + use crate::no_std_prelude::*; + + use core::char; fn stream_safe(s: &str) -> String { StreamSafe::new(s.chars()).collect() @@ -136,7 +135,7 @@ mod tests { None => continue, }; let c = classify_nonstarters(ch); - let mut s = vec![]; + let mut s = Vec::new(); decompose_compatible(ch, |c| s.push(c)); assert_eq!(s.len(), c.decomposition_len); diff --git a/src/tables.rs b/src/tables.rs index f92898d..368e6bb 100644 --- a/src/tables.rs +++ b/src/tables.rs @@ -11,8 +11,8 @@ // NOTE: The following code was generated by "scripts/unicode.py", do not edit directly #![allow(missing_docs)] -use quick_check::IsNormalized; -use quick_check::IsNormalized::*; +use crate::quick_check::IsNormalized; +use crate::quick_check::IsNormalized::*; #[allow(unused)] pub const UNICODE_VERSION: (u64, u64, u64) = (9, 0, 0); diff --git a/src/test.rs b/src/test.rs index 8aaadba..2e87a87 100644 --- a/src/test.rs +++ b/src/test.rs @@ -8,11 +8,12 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. - -use std::char; -use super::UnicodeNormalization; use super::char::is_combining_mark; +use super::UnicodeNormalization; +use core::char; +#[cfg(not(feature = "std"))] +use crate::no_std_prelude::*; #[test] fn test_nfd() { @@ -21,8 +22,11 @@ fn test_nfd() { assert_eq!($input.nfd().to_string(), $expected); // A dummy iterator that is not std::str::Chars directly; // note that `id_func` is used to ensure `Clone` implementation - assert_eq!($input.chars().map(|c| c).nfd().collect::(), $expected); - } + assert_eq!( + $input.chars().map(|c| c).nfd().collect::(), + $expected + ); + }; } t!("abc", "abc"); t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}"); @@ -41,7 +45,7 @@ fn test_nfkd() { macro_rules! t { ($input: expr, $expected: expr) => { assert_eq!($input.nfkd().to_string(), $expected); - } + }; } t!("abc", "abc"); t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}"); @@ -60,7 +64,7 @@ fn test_nfc() { macro_rules! t { ($input: expr, $expected: expr) => { assert_eq!($input.nfc().to_string(), $expected); - } + }; } t!("abc", "abc"); t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}"); @@ -72,7 +76,10 @@ fn test_nfc() { t!("\u{301}a", "\u{301}a"); t!("\u{d4db}", "\u{d4db}"); t!("\u{ac1c}", "\u{ac1c}"); - t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b"); + t!( + "a\u{300}\u{305}\u{315}\u{5ae}b", + "\u{e0}\u{5ae}\u{305}\u{315}b" + ); } #[test] @@ -80,7 +87,7 @@ fn test_nfkc() { macro_rules! t { ($input: expr, $expected: expr) => { assert_eq!($input.nfkc().to_string(), $expected); - } + }; } t!("abc", "abc"); t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}"); @@ -92,7 +99,10 @@ fn test_nfkc() { t!("\u{301}a", "\u{301}a"); t!("\u{d4db}", "\u{d4db}"); t!("\u{ac1c}", "\u{ac1c}"); - t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b"); + t!( + "a\u{300}\u{305}\u{315}\u{5ae}b", + "\u{e0}\u{5ae}\u{305}\u{315}b" + ); } #[test] diff --git a/tests/tests.rs b/tests/tests.rs index 03531b0..9aefd97 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1,13 +1,11 @@ extern crate unicode_normalization; use unicode_normalization::UnicodeNormalization; -use unicode_normalization::__test_api::{ - stream_safe, -}; +use unicode_normalization::__test_api::stream_safe; mod data { pub mod normalization_tests; } -use data::normalization_tests::NORMALIZATION_TESTS; +use crate::data::normalization_tests::NORMALIZATION_TESTS; #[test] fn test_normalization_tests_unaffected() { @@ -21,7 +19,9 @@ fn test_normalization_tests_unaffected() { #[test] fn test_official() { macro_rules! normString { - ($method: ident, $input: expr) => { $input.$method().collect::() } + ($method: ident, $input: expr) => { + $input.$method().collect::() + }; } for test in NORMALIZATION_TESTS {