From 4a1451936596da270dc903f36b92faf23897c816 Mon Sep 17 00:00:00 2001 From: Henri Sivonen Date: Mon, 1 Jul 2024 15:01:34 +0300 Subject: [PATCH 01/13] Adjust Punycode overflow checks * The change made in 1.0.0 incorrectly assumed that the input length limit removed the need to do overflow check when decoding. Now the internal-caller length limit is taken as a permission to skip overflow checks only when encoding. * The RFC gives overflow checking pre-flight math for languages like that don't have checked math. Since Rust does, the code now uses checked_add and checked_mul instead of pre-flight when overflow checks are performed. --- idna/Cargo.toml | 2 +- idna/src/punycode.rs | 74 +++++++++++++++++++++++++------------------- 2 files changed, 44 insertions(+), 32 deletions(-) diff --git a/idna/Cargo.toml b/idna/Cargo.toml index 94202c93..2d15e814 100644 --- a/idna/Cargo.toml +++ b/idna/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "idna" -version = "1.0.1" +version = "1.0.2" authors = ["The rust-url developers"] description = "IDNA (Internationalizing Domain Names in Applications) and Punycode." categories = ["no_std"] diff --git a/idna/src/punycode.rs b/idna/src/punycode.rs index 6a330889..e25ce859 100644 --- a/idna/src/punycode.rs +++ b/idna/src/punycode.rs @@ -71,15 +71,16 @@ pub fn decode(input: &str) -> Option> { /// Marker for internal vs. external caller to retain old API behavior /// while tweaking behavior for internal callers. /// -/// External callers retain the old behavior of the pre-existing -/// public entry points to this module by 1) limiting input length -/// to the 32-bit accumulator overflowing and 2) by not performing -/// ASCII case folding. +/// External callers need overflow checks when encoding, but internal +/// callers don't, because `PUNYCODE_ENCODE_MAX_INPUT_LENGTH` is set +/// to 1000, and per RFC 3492 section 6.4, the integer variable does +/// not need to be able to represent values larger than +/// (char::MAX - INITIAL_N) * (PUNYCODE_ENCODE_MAX_INPUT_LENGTH + 1), +/// which is less than u32::MAX. /// -/// Internal callers omit overflow checks due to the input length -/// being constrained before calling into this module. Additionally, -/// when the code unit is `u8`, upper-case ASCII is replaced with -/// lower-case ASCII. +/// External callers need to handle upper-case ASCII when decoding, +/// but internal callers don't, because the internal code calls the +/// decoder only with lower-case inputs. pub(crate) trait PunycodeCaller { const EXTERNAL_CALLER: bool; } @@ -162,8 +163,6 @@ pub(crate) struct Decoder { impl Decoder { /// Split the input iterator and return a Vec with insertions of encoded characters - /// - /// XXX: Add a policy parameter to skip overflow checks pub(crate) fn decode<'a, T: PunycodeCodeUnit + Copy, C: PunycodeCaller>( &'a mut self, input: &'a [T], @@ -192,7 +191,7 @@ impl Decoder { let mut length = base_len as u32; let mut code_point = INITIAL_N; let mut bias = INITIAL_BIAS; - let mut i = 0; + let mut i = 0u32; let mut iter = input.iter(); loop { let previous_i = i; @@ -211,10 +210,8 @@ impl Decoder { } else { return Err(()); }; - if C::EXTERNAL_CALLER && (digit > (u32::MAX - i) / weight) { - return Err(()); // Overflow - } - i = i.checked_add(digit * weight).ok_or(())?; + let product = digit.checked_mul(weight).ok_or(())?; + i = i.checked_add(product).ok_or(())?; let t = if k <= bias { T_MIN } else if k >= bias + T_MAX { @@ -225,10 +222,7 @@ impl Decoder { if digit < t { break; } - if C::EXTERNAL_CALLER && (weight > u32::MAX / (BASE - t)) { - return Err(()); // Overflow - } - weight *= BASE - t; + weight = weight.checked_mul(BASE - t).ok_or(())?; k += BASE; byte = match iter.next() { None => return Err(()), // End of input before the end of this delta @@ -237,13 +231,10 @@ impl Decoder { } bias = adapt(i - previous_i, length + 1, previous_i == 0); - if C::EXTERNAL_CALLER && (i / (length + 1) > u32::MAX - code_point) { - return Err(()); // Overflow - } // i was supposed to wrap around from length+1 to 0, // incrementing code_point each time. - code_point += i / (length + 1); + code_point = code_point.checked_add(i / (length + 1)).ok_or(())?; i %= length + 1; let c = match char::from_u32(code_point) { Some(c) => c, @@ -381,11 +372,24 @@ where } } + if !C::EXTERNAL_CALLER { + // We should never get an overflow here with the internal caller being + // length-limited, but let's check anyway once here trusting the math + // from RFC 3492 section 6.4 and then omit the overflow checks in the + // loop below. + let len_plus_one = input_length + .checked_add(1) + .ok_or(PunycodeEncodeError::Overflow)?; + len_plus_one + .checked_mul(u32::from(char::MAX) - INITIAL_N) + .ok_or(PunycodeEncodeError::Overflow)?; + } + if basic_length > 0 { output.write_char('-')?; } let mut code_point = INITIAL_N; - let mut delta = 0; + let mut delta = 0u32; let mut bias = INITIAL_BIAS; let mut processed = basic_length; while processed < input_length { @@ -397,18 +401,26 @@ where .filter(|&c| c >= code_point) .min() .unwrap(); - if C::EXTERNAL_CALLER - && (min_code_point - code_point > (u32::MAX - delta) / (processed + 1)) - { - return Err(PunycodeEncodeError::Overflow); // Overflow - } // Increase delta to advance the decoder’s state to - delta += (min_code_point - code_point) * (processed + 1); + if C::EXTERNAL_CALLER { + let product = (min_code_point - code_point) + .checked_mul(processed + 1) + .ok_or(PunycodeEncodeError::Overflow)?; + delta = delta + .checked_add(product) + .ok_or(PunycodeEncodeError::Overflow)?; + } else { + delta += (min_code_point - code_point) * (processed + 1); + } code_point = min_code_point; for c in input.clone() { let c = c as u32; if c < code_point { - delta = delta.checked_add(1).ok_or(PunycodeEncodeError::Overflow)?; + if C::EXTERNAL_CALLER { + delta = delta.checked_add(1).ok_or(PunycodeEncodeError::Overflow)?; + } else { + delta += 1; + } } if c == code_point { // Represent delta as a generalized variable-length integer: From 6be49d0820ed61c9dbee39ac3c6bd16021392926 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Mon, 1 Jul 2024 13:01:25 -0700 Subject: [PATCH 02/13] Remove no_std category (crates.io doesn't support it, and it is now rejected), use keywords instead --- idna/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/idna/Cargo.toml b/idna/Cargo.toml index 2d15e814..91a862f1 100644 --- a/idna/Cargo.toml +++ b/idna/Cargo.toml @@ -3,7 +3,7 @@ name = "idna" version = "1.0.2" authors = ["The rust-url developers"] description = "IDNA (Internationalizing Domain Names in Applications) and Punycode." -categories = ["no_std"] +keywords = ["no_std", "web", "http"] repository = "https://github.com/servo/rust-url/" license = "MIT OR Apache-2.0" autotests = false From 9fd5e1c7b341b7392a2bd1ddc769f1da4ee8d192 Mon Sep 17 00:00:00 2001 From: Henri Sivonen Date: Wed, 3 Jul 2024 12:11:57 +0300 Subject: [PATCH 03/13] Add benches that use the main idna 1.0 entry point in idna and url --- idna/benches/all.rs | 48 +++++++++++++++++++++++++++ url/benches/parse_url.rs | 70 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 117 insertions(+), 1 deletion(-) diff --git a/idna/benches/all.rs b/idna/benches/all.rs index c59c4233..e39e5bd2 100644 --- a/idna/benches/all.rs +++ b/idna/benches/all.rs @@ -49,6 +49,46 @@ fn to_ascii_merged(bench: &mut Bencher) { bench.iter(|| config.to_ascii(black_box(encoded))); } +fn to_ascii_cow_plain(bench: &mut Bencher) { + let encoded = "example.com".as_bytes(); + bench.iter(|| idna::domain_to_ascii_cow(black_box(encoded), idna::AsciiDenyList::URL)); +} + +fn to_ascii_cow_leading_digit(bench: &mut Bencher) { + let encoded = "1test.example".as_bytes(); + bench.iter(|| idna::domain_to_ascii_cow(black_box(encoded), idna::AsciiDenyList::URL)); +} + +fn to_ascii_cow_unicode_mixed(bench: &mut Bencher) { + let encoded = "مثال.example".as_bytes(); + bench.iter(|| idna::domain_to_ascii_cow(black_box(encoded), idna::AsciiDenyList::URL)); +} + +fn to_ascii_cow_punycode_mixed(bench: &mut Bencher) { + let encoded = "xn--mgbh0fb.example".as_bytes(); + bench.iter(|| idna::domain_to_ascii_cow(black_box(encoded), idna::AsciiDenyList::URL)); +} + +fn to_ascii_cow_unicode_ltr(bench: &mut Bencher) { + let encoded = "නම.උදාහරණ".as_bytes(); + bench.iter(|| idna::domain_to_ascii_cow(black_box(encoded), idna::AsciiDenyList::URL)); +} + +fn to_ascii_cow_punycode_ltr(bench: &mut Bencher) { + let encoded = "xn--r0co.xn--ozc8dl2c3bxd".as_bytes(); + bench.iter(|| idna::domain_to_ascii_cow(black_box(encoded), idna::AsciiDenyList::URL)); +} + +fn to_ascii_cow_unicode_rtl(bench: &mut Bencher) { + let encoded = "الاسم.مثال".as_bytes(); + bench.iter(|| idna::domain_to_ascii_cow(black_box(encoded), idna::AsciiDenyList::URL)); +} + +fn to_ascii_cow_punycode_rtl(bench: &mut Bencher) { + let encoded = "xn--mgba0b1dh.xn--mgbh0fb".as_bytes(); + bench.iter(|| idna::domain_to_ascii_cow(black_box(encoded), idna::AsciiDenyList::URL)); +} + benchmark_group!( benches, to_unicode_puny_label, @@ -58,5 +98,13 @@ benchmark_group!( to_ascii_already_puny_label, to_ascii_simple, to_ascii_merged, + to_ascii_cow_plain, + to_ascii_cow_leading_digit, + to_ascii_cow_unicode_mixed, + to_ascii_cow_punycode_mixed, + to_ascii_cow_unicode_ltr, + to_ascii_cow_punycode_ltr, + to_ascii_cow_unicode_rtl, + to_ascii_cow_punycode_rtl, ); benchmark_main!(benches); diff --git a/url/benches/parse_url.rs b/url/benches/parse_url.rs index cc87dda5..3c9cd175 100644 --- a/url/benches/parse_url.rs +++ b/url/benches/parse_url.rs @@ -19,5 +19,73 @@ fn long(bench: &mut Bencher) { bench.iter(|| black_box(url).parse::().unwrap()); } -benchmark_group!(benches, short, long); +fn plain(bench: &mut Bencher) { + let url = "https://example.com/"; + + bench.bytes = url.len() as u64; + bench.iter(|| black_box(url).parse::().unwrap()); +} + +fn leading_digit(bench: &mut Bencher) { + let url = "https://1test.example/"; + + bench.bytes = url.len() as u64; + bench.iter(|| black_box(url).parse::().unwrap()); +} + +fn unicode_mixed(bench: &mut Bencher) { + let url = "https://مثال.example/"; + + bench.bytes = url.len() as u64; + bench.iter(|| black_box(url).parse::().unwrap()); +} + +fn punycode_mixed(bench: &mut Bencher) { + let url = "https://xn--mgbh0fb.example/"; + + bench.bytes = url.len() as u64; + bench.iter(|| black_box(url).parse::().unwrap()); +} + +fn unicode_ltr(bench: &mut Bencher) { + let url = "https://නම.උදාහරණ/"; + + bench.bytes = url.len() as u64; + bench.iter(|| black_box(url).parse::().unwrap()); +} + +fn punycode_ltr(bench: &mut Bencher) { + let url = "https://xn--r0co.xn--ozc8dl2c3bxd/"; + + bench.bytes = url.len() as u64; + bench.iter(|| black_box(url).parse::().unwrap()); +} + +fn unicode_rtl(bench: &mut Bencher) { + let url = "https://الاسم.مثال/"; + + bench.bytes = url.len() as u64; + bench.iter(|| black_box(url).parse::().unwrap()); +} + +fn punycode_rtl(bench: &mut Bencher) { + let url = "https://xn--mgba0b1dh.xn--mgbh0fb/"; + + bench.bytes = url.len() as u64; + bench.iter(|| black_box(url).parse::().unwrap()); +} + +benchmark_group!( + benches, + short, + long, + plain, + leading_digit, + unicode_mixed, + punycode_mixed, + unicode_ltr, + punycode_ltr, + unicode_rtl, + punycode_rtl, +); benchmark_main!(benches); From 4cb705ea8b78b57e5883692a9521aefbc5f76816 Mon Sep 17 00:00:00 2001 From: Henri Sivonen Date: Thu, 4 Jul 2024 14:45:42 +0300 Subject: [PATCH 04/13] Put the Unicode back end behind an adapter crate --- idna/Cargo.toml | 5 +- idna/src/uts46.rs | 145 +++++++++------------------------------------- 2 files changed, 30 insertions(+), 120 deletions(-) diff --git a/idna/Cargo.toml b/idna/Cargo.toml index 91a862f1..a98555d9 100644 --- a/idna/Cargo.toml +++ b/idna/Cargo.toml @@ -17,7 +17,7 @@ doctest = false default = ["std", "compiled_data"] std = ["alloc"] alloc = [] -compiled_data = ["icu_normalizer/compiled_data", "icu_properties/compiled_data"] +compiled_data = ["idna_adapter/compiled_data"] [[test]] name = "tests" @@ -36,10 +36,9 @@ tester = "0.9" serde_json = "1.0" [dependencies] -icu_normalizer = "1.4.3" -icu_properties = "1.4.2" utf8_iter = "1.0.4" smallvec = { version = "1.13.1", features = ["const_generics"]} +idna_adapter = { path = "../../idna_adapter" } [[bench]] name = "all" diff --git a/idna/src/uts46.rs b/idna/src/uts46.rs index d8a7eb3e..aab8fab9 100644 --- a/idna/src/uts46.rs +++ b/idna/src/uts46.rs @@ -26,13 +26,7 @@ use crate::punycode::InternalCaller; use alloc::borrow::Cow; use alloc::string::String; use core::fmt::Write; -use icu_normalizer::properties::CanonicalCombiningClassMap; -use icu_normalizer::uts46::Uts46Mapper; -use icu_properties::maps::CodePointMapDataBorrowed; -use icu_properties::BidiClass; -use icu_properties::CanonicalCombiningClass; -use icu_properties::GeneralCategory; -use icu_properties::JoiningType; +use idna_adapter::*; use smallvec::SmallVec; use utf8_iter::Utf8CharsEx; @@ -106,79 +100,6 @@ const fn ldh_mask() -> u128 { accu } -/// Turns a joining type into a mask for comparing with multiple type at once. -const fn joining_type_to_mask(jt: JoiningType) -> u32 { - 1u32 << jt.0 -} - -/// Mask for checking for both left and dual joining. -const LEFT_OR_DUAL_JOINING_MASK: u32 = - joining_type_to_mask(JoiningType::LeftJoining) | joining_type_to_mask(JoiningType::DualJoining); - -/// Mask for checking for both left and dual joining. -const RIGHT_OR_DUAL_JOINING_MASK: u32 = joining_type_to_mask(JoiningType::RightJoining) - | joining_type_to_mask(JoiningType::DualJoining); - -/// Turns a bidi class into a mask for comparing with multiple classes at once. -const fn bidi_class_to_mask(bc: BidiClass) -> u32 { - 1u32 << bc.0 -} - -/// Mask for checking if the domain is a bidi domain. -const RTL_MASK: u32 = bidi_class_to_mask(BidiClass::RightToLeft) - | bidi_class_to_mask(BidiClass::ArabicLetter) - | bidi_class_to_mask(BidiClass::ArabicNumber); - -/// Mask for allowable bidi classes in the first character of a label -/// (either LTR or RTL) in a bidi domain. -const FIRST_BC_MASK: u32 = bidi_class_to_mask(BidiClass::LeftToRight) - | bidi_class_to_mask(BidiClass::RightToLeft) - | bidi_class_to_mask(BidiClass::ArabicLetter); - -// Mask for allowable bidi classes of the last (non-Non-Spacing Mark) -// character in an LTR label in a bidi domain. -const LAST_LTR_MASK: u32 = - bidi_class_to_mask(BidiClass::LeftToRight) | bidi_class_to_mask(BidiClass::EuropeanNumber); - -// Mask for allowable bidi classes of the last (non-Non-Spacing Mark) -// character in an RTL label in a bidi domain. -const LAST_RTL_MASK: u32 = bidi_class_to_mask(BidiClass::RightToLeft) - | bidi_class_to_mask(BidiClass::ArabicLetter) - | bidi_class_to_mask(BidiClass::EuropeanNumber) - | bidi_class_to_mask(BidiClass::ArabicNumber); - -// Mask for allowable bidi classes of the middle characters in an LTR label in a bidi domain. -const MIDDLE_LTR_MASK: u32 = bidi_class_to_mask(BidiClass::LeftToRight) - | bidi_class_to_mask(BidiClass::EuropeanNumber) - | bidi_class_to_mask(BidiClass::EuropeanSeparator) - | bidi_class_to_mask(BidiClass::CommonSeparator) - | bidi_class_to_mask(BidiClass::EuropeanTerminator) - | bidi_class_to_mask(BidiClass::OtherNeutral) - | bidi_class_to_mask(BidiClass::BoundaryNeutral) - | bidi_class_to_mask(BidiClass::NonspacingMark); - -// Mask for allowable bidi classes of the middle characters in an RTL label in a bidi domain. -const MIDDLE_RTL_MASK: u32 = bidi_class_to_mask(BidiClass::RightToLeft) - | bidi_class_to_mask(BidiClass::ArabicLetter) - | bidi_class_to_mask(BidiClass::ArabicNumber) - | bidi_class_to_mask(BidiClass::EuropeanNumber) - | bidi_class_to_mask(BidiClass::EuropeanSeparator) - | bidi_class_to_mask(BidiClass::CommonSeparator) - | bidi_class_to_mask(BidiClass::EuropeanTerminator) - | bidi_class_to_mask(BidiClass::OtherNeutral) - | bidi_class_to_mask(BidiClass::BoundaryNeutral) - | bidi_class_to_mask(BidiClass::NonspacingMark); - -/// Turns a genecal category into a mask for comparing with multiple categories at once. -const fn general_category_to_mask(gc: GeneralCategory) -> u32 { - 1 << (gc as u32) -} - -/// Mask for the disallowed general categories of the first character in a label. -const MARK_MASK: u32 = general_category_to_mask(GeneralCategory::NonspacingMark) - | general_category_to_mask(GeneralCategory::SpacingMark) - | general_category_to_mask(GeneralCategory::EnclosingMark); - const PUNYCODE_PREFIX: u32 = ((b'-' as u32) << 24) | ((b'-' as u32) << 16) | ((b'N' as u32) << 8) | b'X' as u32; @@ -566,11 +487,7 @@ pub fn verify_dns_length(domain_name: &str, allow_trailing_dot: bool) -> bool { /// An implementation of UTS #46. pub struct Uts46 { - mapper: Uts46Mapper, - canonical_combining_class: CanonicalCombiningClassMap, - general_category: CodePointMapDataBorrowed<'static, GeneralCategory>, - bidi_class: CodePointMapDataBorrowed<'static, BidiClass>, - joining_type: CodePointMapDataBorrowed<'static, JoiningType>, + data: idna_adapter::Adapter, } #[cfg(feature = "compiled_data")] @@ -585,11 +502,7 @@ impl Uts46 { #[cfg(feature = "compiled_data")] pub const fn new() -> Self { Self { - mapper: Uts46Mapper::new(), - canonical_combining_class: CanonicalCombiningClassMap::new(), - general_category: icu_properties::maps::general_category(), - bidi_class: icu_properties::maps::bidi_class(), - joining_type: icu_properties::maps::joining_type(), + data: idna_adapter::Adapter::new(), } } @@ -1295,7 +1208,7 @@ impl Uts46 { let mut first_needs_combining_mark_check = ascii.is_empty(); let mut needs_contextj_check = !non_ascii.is_empty(); let mut mapping = self - .mapper + .data .map_normalize(non_ascii.chars()) .map(|c| apply_ascii_deny_list_to_lower_cased_unicode(c, deny_list)); loop { @@ -1431,8 +1344,8 @@ impl Uts46 { if is_bidi { for label in domain_buffer.split_mut(|c| *c == '.') { if let Some((first, tail)) = label.split_first_mut() { - let first_bc = self.bidi_class.get(*first); - if (FIRST_BC_MASK & bidi_class_to_mask(first_bc)) == 0 { + let first_bc = self.data.bidi_class(*first); + if !FIRST_BC_MASK.intersects(first_bc.to_mask()) { // Neither RTL label nor LTR label if fail_fast { return (0, false, true); @@ -1441,19 +1354,19 @@ impl Uts46 { *first = '\u{FFFD}'; continue; } - let is_ltr = first_bc == BidiClass::LeftToRight; + let is_ltr = first_bc.is_ltr(); // Trim NSM let mut middle = tail; #[allow(clippy::while_let_loop)] loop { if let Some((last, prior)) = middle.split_last_mut() { - let last_bc = self.bidi_class.get(*last); - if last_bc == BidiClass::NonspacingMark { + let last_bc = self.data.bidi_class(*last); + if last_bc.is_nonspacing_mark() { middle = prior; continue; } let last_mask = if is_ltr { LAST_LTR_MASK } else { LAST_RTL_MASK }; - if (bidi_class_to_mask(last_bc) & last_mask) == 0 { + if !last_mask.intersects(last_bc.to_mask()) { if fail_fast { return (0, false, true); } @@ -1462,8 +1375,8 @@ impl Uts46 { } if is_ltr { for c in prior.iter_mut() { - let bc = self.bidi_class.get(*c); - if (bidi_class_to_mask(bc) & MIDDLE_LTR_MASK) == 0 { + let bc = self.data.bidi_class(*c); + if !MIDDLE_LTR_MASK.intersects(bc.to_mask()) { if fail_fast { return (0, false, true); } @@ -1474,8 +1387,8 @@ impl Uts46 { } else { let mut numeral_state = RtlNumeralState::Undecided; for c in prior.iter_mut() { - let bc = self.bidi_class.get(*c); - if (bidi_class_to_mask(bc) & MIDDLE_RTL_MASK) == 0 { + let bc = self.data.bidi_class(*c); + if !MIDDLE_RTL_MASK.intersects(bc.to_mask()) { if fail_fast { return (0, false, true); } @@ -1484,14 +1397,14 @@ impl Uts46 { } else { match numeral_state { RtlNumeralState::Undecided => { - if bc == BidiClass::EuropeanNumber { + if bc.is_european_number() { numeral_state = RtlNumeralState::European; - } else if bc == BidiClass::ArabicNumber { + } else if bc.is_arabic_number() { numeral_state = RtlNumeralState::Arabic; } } RtlNumeralState::European => { - if bc == BidiClass::ArabicNumber { + if bc.is_arabic_number() { if fail_fast { return (0, false, true); } @@ -1500,7 +1413,7 @@ impl Uts46 { } } RtlNumeralState::Arabic => { - if bc == BidiClass::EuropeanNumber { + if bc.is_european_number() { if fail_fast { return (0, false, true); } @@ -1512,9 +1425,9 @@ impl Uts46 { } } if (numeral_state == RtlNumeralState::European - && last_bc == BidiClass::ArabicNumber) + && last_bc.is_arabic_number()) || (numeral_state == RtlNumeralState::Arabic - && last_bc == BidiClass::EuropeanNumber) + && last_bc.is_european_number()) { if fail_fast { return (0, false, true); @@ -1549,7 +1462,7 @@ impl Uts46 { had_errors: &mut bool, ) -> bool { for c in self - .mapper + .data .normalize_validate(label_buffer.iter().copied()) .map(|c| apply_ascii_deny_list_to_lower_cased_unicode(c, deny_list_deny_dot)) { @@ -1606,7 +1519,7 @@ impl Uts46 { } if first_needs_combining_mark_check { if let Some(first) = mut_label.first_mut() { - if (general_category_to_mask(self.general_category.get(*first)) & MARK_MASK) != 0 { + if self.data.is_mark(*first) { if fail_fast { return true; } @@ -1626,9 +1539,7 @@ impl Uts46 { if let Some((joiner, tail)) = joiner_and_tail.split_first_mut() { if let Some(previous) = head.last() { - if self.canonical_combining_class.get(*previous) - == CanonicalCombiningClass::Virama - { + if self.data.is_virama(*previous) { continue; } } else { @@ -1686,14 +1597,14 @@ impl Uts46 { fn has_appropriately_joining_char>( &self, iter: I, - required_mask: u32, + required_mask: JoiningTypeMask, ) -> bool { for c in iter { - let jt = self.joining_type.get(c); - if (joining_type_to_mask(jt) & required_mask) != 0 { + let jt = self.data.joining_type(c); + if jt.to_mask().intersects(required_mask) { return true; } - if jt == JoiningType::Transparent { + if jt.is_transparent() { continue; } return false; @@ -1721,7 +1632,7 @@ impl Uts46 { if in_inclusive_range_char(c, '\u{11000}', '\u{1E7FF}') { continue; } - if (RTL_MASK & bidi_class_to_mask(self.bidi_class.get(c))) != 0 { + if RTL_MASK.intersects(self.data.bidi_class(c).to_mask()) { return true; } } From 9c695539048fc31382c53713d81eac3d28447fd7 Mon Sep 17 00:00:00 2001 From: Henri Sivonen Date: Fri, 13 Sep 2024 11:56:31 +0300 Subject: [PATCH 05/13] Split fastest ASCII fast path from the rest --- idna/src/uts46.rs | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/idna/src/uts46.rs b/idna/src/uts46.rs index aab8fab9..df382e5d 100644 --- a/idna/src/uts46.rs +++ b/idna/src/uts46.rs @@ -1026,9 +1026,8 @@ impl Uts46 { Ok(ProcessingSuccess::WroteToSink) } - /// The part of `process` that doesn't need to be generic over the sink and - /// can avoid monomorphizing in the interest of code size. - #[inline(never)] + /// The part of `process` that doesn't need to be generic over the sink. + #[inline(always)] fn process_inner<'a>( &self, domain_name: &'a [u8], @@ -1042,7 +1041,7 @@ impl Uts46 { // performance. let mut iter = domain_name.iter(); let mut most_recent_label_start = iter.clone(); - let tail = loop { + loop { if let Some(&b) = iter.next() { if in_inclusive_range8(b, b'a', b'z') { continue; @@ -1051,13 +1050,37 @@ impl Uts46 { most_recent_label_start = iter.clone(); continue; } - break most_recent_label_start.as_slice(); + return self.process_innermost( + domain_name, + ascii_deny_list, + hyphens, + fail_fast, + domain_buffer, + already_punycode, + most_recent_label_start.as_slice(), + ); } else { // Success! The whole input passes through on the fastest path! return (domain_name.len(), false, false); } - }; + } + } + /// The part of `process` that doesn't need to be generic over the sink and + /// can avoid monomorphizing in the interest of code size. + /// Separating this into a different stack frame compared to `process_inner` + /// improves performance in the ICU4X case. + #[inline(never)] + fn process_innermost<'a>( + &self, + domain_name: &'a [u8], + ascii_deny_list: AsciiDenyList, + hyphens: Hyphens, + fail_fast: bool, + domain_buffer: &mut SmallVec<[char; 253]>, + already_punycode: &mut SmallVec<[AlreadyAsciiLabel<'a>; 8]>, + tail: &'a [u8], + ) -> (usize, bool, bool) { let deny_list = ascii_deny_list.bits; let deny_list_deny_dot = deny_list | DOT_MASK; From f4a8b9266c7cfec9af590c3525e066fa1c5b025a Mon Sep 17 00:00:00 2001 From: Henri Sivonen Date: Mon, 8 Jul 2024 08:46:26 +0300 Subject: [PATCH 06/13] Bench hyphen in a domain that is otherwise lower-case ASCII --- idna/benches/all.rs | 6 ++++++ url/benches/parse_url.rs | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/idna/benches/all.rs b/idna/benches/all.rs index e39e5bd2..c779adb6 100644 --- a/idna/benches/all.rs +++ b/idna/benches/all.rs @@ -54,6 +54,11 @@ fn to_ascii_cow_plain(bench: &mut Bencher) { bench.iter(|| idna::domain_to_ascii_cow(black_box(encoded), idna::AsciiDenyList::URL)); } +fn to_ascii_cow_hyphen(bench: &mut Bencher) { + let encoded = "hyphenated-example.com".as_bytes(); + bench.iter(|| idna::domain_to_ascii_cow(black_box(encoded), idna::AsciiDenyList::URL)); +} + fn to_ascii_cow_leading_digit(bench: &mut Bencher) { let encoded = "1test.example".as_bytes(); bench.iter(|| idna::domain_to_ascii_cow(black_box(encoded), idna::AsciiDenyList::URL)); @@ -99,6 +104,7 @@ benchmark_group!( to_ascii_simple, to_ascii_merged, to_ascii_cow_plain, + to_ascii_cow_hyphen, to_ascii_cow_leading_digit, to_ascii_cow_unicode_mixed, to_ascii_cow_punycode_mixed, diff --git a/url/benches/parse_url.rs b/url/benches/parse_url.rs index 3c9cd175..531c2e99 100644 --- a/url/benches/parse_url.rs +++ b/url/benches/parse_url.rs @@ -26,6 +26,13 @@ fn plain(bench: &mut Bencher) { bench.iter(|| black_box(url).parse::().unwrap()); } +fn hyphen(bench: &mut Bencher) { + let url = "https://hyphenated-example.com/"; + + bench.bytes = url.len() as u64; + bench.iter(|| black_box(url).parse::().unwrap()); +} + fn leading_digit(bench: &mut Bencher) { let url = "https://1test.example/"; @@ -80,6 +87,7 @@ benchmark_group!( short, long, plain, + hyphen, leading_digit, unicode_mixed, punycode_mixed, From 881f7ae00a4a6cd1855c56503dcfa1aea9dd4c99 Mon Sep 17 00:00:00 2001 From: Henri Sivonen Date: Mon, 8 Jul 2024 09:49:32 +0300 Subject: [PATCH 07/13] Adjust MSRV --- idna/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/idna/Cargo.toml b/idna/Cargo.toml index a98555d9..b7d5f2ca 100644 --- a/idna/Cargo.toml +++ b/idna/Cargo.toml @@ -8,7 +8,7 @@ repository = "https://github.com/servo/rust-url/" license = "MIT OR Apache-2.0" autotests = false edition = "2018" -rust-version = "1.67" +rust-version = "1.57" # For panic in const context [lib] doctest = false From a771f806dea780acf3846eac4e43f0999ebb0874 Mon Sep 17 00:00:00 2001 From: Henri Sivonen Date: Mon, 16 Sep 2024 09:55:16 +0300 Subject: [PATCH 08/13] Add README remarks about alternative Unicode back ends --- README.md | 4 ++++ idna/README.md | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/README.md b/README.md index 99d5c2b7..0d9bacbd 100644 --- a/README.md +++ b/README.md @@ -12,3 +12,7 @@ URL library for Rust, based on the [URL Standard](https://url.spec.whatwg.org/). [Documentation](https://docs.rs/url) Please see [UPGRADING.md](https://github.com/servo/rust-url/blob/main/UPGRADING.md) if you are upgrading from a previous version. + +## Alternative Unicode back ends + +`url` depends on the `idna` crate. By default, `idna` uses [ICU4X](https://github.com/unicode-org/icu4x/) as its Unicode back end. If you wish to opt for different tradeoffs between correctness, run-time performance, binary size, compile time, and MSRV, please see the [README of the latest version of the `idna_adapter` crate](https://docs.rs/crate/idna_adapter/latest) for how to opt into a different Unicode back end. diff --git a/idna/README.md b/idna/README.md index 1afa3b78..5ef8a7a5 100644 --- a/idna/README.md +++ b/idna/README.md @@ -28,6 +28,10 @@ Apps that need to display host names to the user should use `uts46::Uts46::to_us * `std` - Adds `impl std::error::Error for Errors {}` (and implies `alloc`). * By default, all of the above are enabled. +## Alternative Unicode back ends + +By default, `idna` uses [ICU4X](https://github.com/unicode-org/icu4x/) as its Unicode back end. If you wish to opt for different tradeoffs between correctness, run-time performance, binary size, compile time, and MSRV, please see the [README of the latest version of the `idna_adapter` crate](https://docs.rs/crate/idna_adapter/latest) for how to opt into a different Unicode back end. + ## Breaking changes since 0.5.0 * Stricter IDNA 2008 restrictions are no longer supported. Attempting to enable them panics immediately. UTS 46 allows all the names that IDNA 2008 allows, and when transitional processing is disabled, they resolve the same way. There are additional names that IDNA 2008 disallows but UTS 46 maps to names that IDNA 2008 allows (notably, input is mapped to fold-case output). UTS 46 also allows symbols that were allowed in IDNA 2003 as well as newer symbols that are allowed according to the same principle. (Earlier versions of this crate allowed rejecting such symbols. Rejecting characters that UTS 46 maps to IDNA 2008-permitted characters wasn't supported in earlier versions, either.) From b05732bbefeae74a5896db6f610cd40bfbb7c788 Mon Sep 17 00:00:00 2001 From: Henri Sivonen Date: Tue, 29 Oct 2024 08:34:19 +0200 Subject: [PATCH 09/13] Change the idna_adapter dependency to crates.io --- idna/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/idna/Cargo.toml b/idna/Cargo.toml index b7d5f2ca..2ea97fc3 100644 --- a/idna/Cargo.toml +++ b/idna/Cargo.toml @@ -38,7 +38,7 @@ serde_json = "1.0" [dependencies] utf8_iter = "1.0.4" smallvec = { version = "1.13.1", features = ["const_generics"]} -idna_adapter = { path = "../../idna_adapter" } +idna_adapter = "1" [[bench]] name = "all" From f700ca8b3a44eba070815533c3d0ea74264f59a3 Mon Sep 17 00:00:00 2001 From: Henri Sivonen Date: Tue, 29 Oct 2024 08:46:25 +0200 Subject: [PATCH 10/13] Address clippy lints --- idna/src/uts46.rs | 137 +++++++++++++++++++++++----------------------- 1 file changed, 69 insertions(+), 68 deletions(-) diff --git a/idna/src/uts46.rs b/idna/src/uts46.rs index df382e5d..fa5ea58a 100644 --- a/idna/src/uts46.rs +++ b/idna/src/uts46.rs @@ -515,14 +515,14 @@ impl Uts46 { /// # Arguments /// /// * `domain_name` - The input domain name as UTF-8 bytes. (The UTF-8ness is checked by - /// this method and input that is not well-formed UTF-8 is treated as an error. If you - /// already have a `&str`, call `.as_bytes()` on it.) + /// this method and input that is not well-formed UTF-8 is treated as an error. If you + /// already have a `&str`, call `.as_bytes()` on it.) /// * `ascii_deny_list` - What ASCII deny list, if any, to apply. The UTS 46 - /// _UseSTD3ASCIIRules_ flag or the WHATWG URL Standard forbidden domain code point - /// processing is handled via this argument. Most callers are probably the best off - /// by using [`AsciiDenyList::URL`] here. + /// _UseSTD3ASCIIRules_ flag or the WHATWG URL Standard forbidden domain code point + /// processing is handled via this argument. Most callers are probably the best off + /// by using [`AsciiDenyList::URL`] here. /// * `hyphens` - The UTS 46 _CheckHyphens_ flag. Most callers are probably the best - /// off by using [`Hyphens::Allow`] here. + /// off by using [`Hyphens::Allow`] here. /// * `dns_length` - The UTS 46 _VerifyDNSLength_ flag. pub fn to_ascii<'a>( &self, @@ -581,14 +581,14 @@ impl Uts46 { /// # Arguments /// /// * `domain_name` - The input domain name as UTF-8 bytes. (The UTF-8ness is checked by - /// this method and input that is not well-formed UTF-8 is treated as an error. If you - /// already have a `&str`, call `.as_bytes()` on it.) + /// this method and input that is not well-formed UTF-8 is treated as an error. If you + /// already have a `&str`, call `.as_bytes()` on it.) /// * `ascii_deny_list` - What ASCII deny list, if any, to apply. The UTS 46 - /// _UseSTD3ASCIIRules_ flag or the WHATWG URL Standard forbidden domain code point - /// processing is handled via this argument. Most callers are probably the best off - /// by using [`AsciiDenyList::URL`] here. + /// _UseSTD3ASCIIRules_ flag or the WHATWG URL Standard forbidden domain code point + /// processing is handled via this argument. Most callers are probably the best off + /// by using [`AsciiDenyList::URL`] here. /// * `hyphens` - The UTS 46 _CheckHyphens_ flag. Most callers are probably the best - /// off by using [`Hyphens::Allow`] here. + /// off by using [`Hyphens::Allow`] here. pub fn to_unicode<'a>( &self, domain_name: &'a [u8], @@ -627,23 +627,23 @@ impl Uts46 { /// # Arguments /// /// * `domain_name` - The input domain name as UTF-8 bytes. (The UTF-8ness is checked by - /// this method and input that is not well-formed UTF-8 is treated as an error. If you - /// already have a `&str`, call `.as_bytes()` on it.) + /// this method and input that is not well-formed UTF-8 is treated as an error. If you + /// already have a `&str`, call `.as_bytes()` on it.) /// * `ascii_deny_list` - What ASCII deny list, if any, to apply. The UTS 46 - /// _UseSTD3ASCIIRules_ flag or the WHATWG URL Standard forbidden domain code point - /// processing is handled via this argument. Most callers are probably the best off - /// by using [`AsciiDenyList::URL`] here. + /// _UseSTD3ASCIIRules_ flag or the WHATWG URL Standard forbidden domain code point + /// processing is handled via this argument. Most callers are probably the best off + /// by using [`AsciiDenyList::URL`] here. /// * `hyphens` - The UTS 46 _CheckHyphens_ flag. Most callers are probably the best - /// off by using [`Hyphens::Allow`] here. + /// off by using [`Hyphens::Allow`] here. /// * `output_as_unicode` - A closure for deciding if a label should be output as Unicode - /// (as opposed to Punycode). The first argument is the label for which a decision is - /// needed (always non-empty slice). The second argument is the TLD (potentially empty). - /// The third argument is `true` iff the domain name as a whole is a bidi domain name. - /// Only non-erroneous labels that contain at least one non-ASCII character are passed - /// to the closure as the first argument. The second and third argument values are - /// guaranteed to remain the same during a single call to `process`, and the closure - /// may cache computations derived from the second and third argument (hence the - /// `FnMut` type). + /// (as opposed to Punycode). The first argument is the label for which a decision is + /// needed (always non-empty slice). The second argument is the TLD (potentially empty). + /// The third argument is `true` iff the domain name as a whole is a bidi domain name. + /// Only non-erroneous labels that contain at least one non-ASCII character are passed + /// to the closure as the first argument. The second and third argument values are + /// guaranteed to remain the same during a single call to `process`, and the closure + /// may cache computations derived from the second and third argument (hence the + /// `FnMut` type). pub fn to_user_interface<'a, OutputUnicode: FnMut(&[char], &[char], bool) -> bool>( &self, domain_name: &'a [u8], @@ -679,59 +679,59 @@ impl Uts46 { /// # Arguments /// /// * `domain_name` - The input domain name as UTF-8 bytes. (The UTF-8ness is checked by - /// this method and input that is not well-formed UTF-8 is treated as an error. If you - /// already have a `&str`, call `.as_bytes()` on it.) + /// this method and input that is not well-formed UTF-8 is treated as an error. If you + /// already have a `&str`, call `.as_bytes()` on it.) /// * `ascii_deny_list` - What ASCII deny list, if any, to apply. The UTS 46 - /// _UseSTD3ASCIIRules_ flag or the WHATWG URL Standard forbidden domain code point - /// processing is handled via this argument. Most callers are probably the best off - /// by using [`AsciiDenyList::URL`] here. + /// _UseSTD3ASCIIRules_ flag or the WHATWG URL Standard forbidden domain code point + /// processing is handled via this argument. Most callers are probably the best off + /// by using [`AsciiDenyList::URL`] here. /// * `hyphens` - The UTS 46 _CheckHyphens_ flag. Most callers are probably the best - /// off by using [`Hyphens::Allow`] here. + /// off by using [`Hyphens::Allow`] here. /// * `error_policy` - Whether to fail fast or to produce output that may be rendered - /// for the user to examine in case of errors. + /// for the user to examine in case of errors. /// * `output_as_unicode` - A closure for deciding if a label should be output as Unicode - /// (as opposed to Punycode). The first argument is the label for which a decision is - /// needed (always non-empty slice). The second argument is the TLD (potentially empty). - /// The third argument is `true` iff the domain name as a whole is a bidi domain name. - /// Only non-erroneous labels that contain at least one non-ASCII character are passed - /// to the closure as the first argument. The second and third argument values are - /// guaranteed to remain the same during a single call to `process`, and the closure - /// may cache computations derived from the second and third argument (hence the - /// `FnMut` type). To perform the _ToASCII_ operation, `|_, _, _| false` must be - /// passed as the closure. To perform the _ToUnicode_ operation, `|_, _, _| true` must - /// be passed as the closure. A more complex closure may be used to prepare a domain - /// name for display in a user interface so that labels are converted to the Unicode - /// form in general but potentially misleading labels are converted to the Punycode - /// form. - /// `sink` - The object that receives the output (in the non-passthrough case). - /// `ascii_sink` - A second sink that receives the _ToASCII_ form only if there - /// were no errors and `sink` received at least one character of non-ASCII output. - /// The purpose of this argument is to enable a user interface display form of the - /// domain and the _ToASCII_ form of the domain to be computed efficiently together. - /// This argument is useless when `output_as_unicode` always returns `false`, in - /// which case the _ToASCII_ form ends up in `sink` already. If `ascii_sink` receives - /// no output and the return value is `Ok(ProcessingSuccess::WroteToSink)`, use the - /// output received by `sink` also as the _ToASCII_ result. + /// (as opposed to Punycode). The first argument is the label for which a decision is + /// needed (always non-empty slice). The second argument is the TLD (potentially empty). + /// The third argument is `true` iff the domain name as a whole is a bidi domain name. + /// Only non-erroneous labels that contain at least one non-ASCII character are passed + /// to the closure as the first argument. The second and third argument values are + /// guaranteed to remain the same during a single call to `process`, and the closure + /// may cache computations derived from the second and third argument (hence the + /// `FnMut` type). To perform the _ToASCII_ operation, `|_, _, _| false` must be + /// passed as the closure. To perform the _ToUnicode_ operation, `|_, _, _| true` must + /// be passed as the closure. A more complex closure may be used to prepare a domain + /// name for display in a user interface so that labels are converted to the Unicode + /// form in general but potentially misleading labels are converted to the Punycode + /// form. + /// * `sink` - The object that receives the output (in the non-passthrough case). + /// * `ascii_sink` - A second sink that receives the _ToASCII_ form only if there + /// were no errors and `sink` received at least one character of non-ASCII output. + /// The purpose of this argument is to enable a user interface display form of the + /// domain and the _ToASCII_ form of the domain to be computed efficiently together. + /// This argument is useless when `output_as_unicode` always returns `false`, in + /// which case the _ToASCII_ form ends up in `sink` already. If `ascii_sink` receives + /// no output and the return value is `Ok(ProcessingSuccess::WroteToSink)`, use the + /// output received by `sink` also as the _ToASCII_ result. /// /// # Return value /// /// * `Ok(ProcessingSuccess::Passthrough)` - The caller must treat - /// `unsafe { core::str::from_utf8_unchecked(domain_name) }` as the output. (This - /// return value asserts that calling `core::str::from_utf8_unchecked(domain_name)` - /// is safe.) + /// `unsafe { core::str::from_utf8_unchecked(domain_name) }` as the output. (This + /// return value asserts that calling `core::str::from_utf8_unchecked(domain_name)` + /// is safe.) /// * `Ok(ProcessingSuccess::WroteToSink)` - The caller must treat was was written - /// to `sink` as the output. If another sink was passed as `ascii_sink` but it did - /// not receive output, the caller must treat what was written to `sink` also as - /// the _ToASCII_ output. Otherwise, if `ascii_sink` received output, the caller - /// must treat what was written to `ascii_sink` as the _ToASCII_ output. + /// to `sink` as the output. If another sink was passed as `ascii_sink` but it did + /// not receive output, the caller must treat what was written to `sink` also as + /// the _ToASCII_ output. Otherwise, if `ascii_sink` received output, the caller + /// must treat what was written to `ascii_sink` as the _ToASCII_ output. /// * `Err(ProcessingError::ValidityError)` - The input was in error and must - /// not be used for DNS lookup or otherwise in a network protocol. If `error_policy` - /// was `ErrorPolicy::MarkErrors`, the output written to `sink` may be displayed - /// to the user as an illustration of where the error was or the errors were. + /// not be used for DNS lookup or otherwise in a network protocol. If `error_policy` + /// was `ErrorPolicy::MarkErrors`, the output written to `sink` may be displayed + /// to the user as an illustration of where the error was or the errors were. /// * `Err(ProcessingError::SinkError)` - Either `sink` or `ascii_sink` returned - /// [`core::fmt::Error`]. The partial output written to `sink` `ascii_sink` must not - /// be used. If `W` never returns [`core::fmt::Error`], this method never returns - /// `Err(ProcessingError::SinkError)`. + /// [`core::fmt::Error`]. The partial output written to `sink` `ascii_sink` must not + /// be used. If `W` never returns [`core::fmt::Error`], this method never returns + /// `Err(ProcessingError::SinkError)`. /// /// # Safety-usable invariant /// @@ -1070,6 +1070,7 @@ impl Uts46 { /// can avoid monomorphizing in the interest of code size. /// Separating this into a different stack frame compared to `process_inner` /// improves performance in the ICU4X case. + #[allow(clippy::too_many_arguments)] #[inline(never)] fn process_innermost<'a>( &self, From 662970feda44f593d86af07600b93c09dfb43374 Mon Sep 17 00:00:00 2001 From: Henri Sivonen Date: Tue, 29 Oct 2024 09:41:17 +0200 Subject: [PATCH 11/13] Increment version number of idna to 1.0.3 --- idna/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/idna/Cargo.toml b/idna/Cargo.toml index 2ea97fc3..60024732 100644 --- a/idna/Cargo.toml +++ b/idna/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "idna" -version = "1.0.2" +version = "1.0.3" authors = ["The rust-url developers"] description = "IDNA (Internationalizing Domain Names in Applications) and Punycode." keywords = ["no_std", "web", "http"] From e6cd8f774c3517591d6152e9dfcf58593f8229db Mon Sep 17 00:00:00 2001 From: Henri Sivonen Date: Tue, 29 Oct 2024 10:07:10 +0200 Subject: [PATCH 12/13] Test MSRV with idna unicode-rs back end and test ICU4X back end with 1.67 --- .github/workflows/main.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9e6e92bc..db81ed6f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -15,7 +15,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - rust: [1.67.0, stable, beta, nightly] + rust: [1.57.0, 1.67.0, stable, beta, nightly] exclude: - os: macos-latest rust: 1.67.0 @@ -37,6 +37,10 @@ jobs: toolchain: ${{ matrix.rust }} # Add toolchain for no_std tests - run: rustup toolchain install nightly + - name: Downgrade idna_adapter on Rust 1.57.0 + if: | + matrix.rust == '1.57.0' + run: cargo update -p idna_adapter --precise 1.1.0 - name: Add `aarch64-unknown-none` toolchain for `no_std` tests if: | matrix.os == 'ubuntu-latest' && @@ -54,7 +58,8 @@ jobs: - name: Run debugger_visualizer tests if: | matrix.os == 'windows-latest' && - matrix.rust != '1.56.0' + matrix.rust != '1.57.0' && + matrix.rust != '1.67.0' run: cargo test --test debugger_visualizer --features "url/debugger_visualizer,url_debug_tests/debugger_visualizer" -- --test-threads=1 || echo "debugger test failed" continue-on-error: true # Fails on GH actions, but not locally. - name: Test `no_std` support From 7ead88b3021105efe7105429de6b2e815b4bb095 Mon Sep 17 00:00:00 2001 From: Henri Sivonen Date: Mon, 4 Nov 2024 10:46:26 +0200 Subject: [PATCH 13/13] Prepare url crate for publication with idna 1.0.3 (#987) --- url/Cargo.toml | 6 +++--- url/src/lib.rs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/url/Cargo.toml b/url/Cargo.toml index adc66bda..9001be1b 100644 --- a/url/Cargo.toml +++ b/url/Cargo.toml @@ -2,7 +2,7 @@ name = "url" # When updating version, also modify html_root_url in the lib.rs -version = "2.5.2" +version = "2.5.3" authors = ["The rust-url developers"] description = "URL library for Rust, based on the WHATWG URL Standard" @@ -14,7 +14,7 @@ categories = ["parser-implementations", "web-programming", "encoding", "no_std"] license = "MIT OR Apache-2.0" include = ["src/**/*", "LICENSE-*", "README.md", "tests/**"] edition = "2018" -rust-version = "1.56" +rust-version = "1.57" # From idna [dev-dependencies] serde = { version = "1.0", features = ["derive"] } @@ -26,7 +26,7 @@ wasm-bindgen-test = "0.3" [dependencies] form_urlencoded = { version = "1.2.1", path = "../form_urlencoded", default-features = false, features = ["alloc"] } -idna = { version = "1.0.2", path = "../idna", default-features = false, features = ["alloc", "compiled_data"] } +idna = { version = "1.0.3", path = "../idna", default-features = false, features = ["alloc", "compiled_data"] } percent-encoding = { version = "2.3.1", path = "../percent_encoding", default-features = false, features = ["alloc"] } serde = { version = "1.0", optional = true, features = ["derive"] } diff --git a/url/src/lib.rs b/url/src/lib.rs index cc263a79..bf944741 100644 --- a/url/src/lib.rs +++ b/url/src/lib.rs @@ -143,7 +143,7 @@ url = { version = "2", features = ["debugger_visualizer"] } */ #![no_std] -#![doc(html_root_url = "https://docs.rs/url/2.5.2")] +#![doc(html_root_url = "https://docs.rs/url/2.5.3")] #![cfg_attr( feature = "debugger_visualizer", debugger_visualizer(natvis_file = "../../debug_metadata/url.natvis")