Skip to content

Adjust Punycode overflow checks #949

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion idna/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "idna"
version = "1.0.1"
version = "1.0.2"
authors = ["The rust-url developers"]
description = "IDNA (Internationalizing Domain Names in Applications) and Punycode."
categories = ["no_std"]
Expand Down
74 changes: 43 additions & 31 deletions idna/src/punycode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,16 @@
/// Marker for internal vs. external caller to retain old API behavior
/// while tweaking behavior for internal callers.
///
/// External callers retain the old behavior of the pre-existing
/// public entry points to this module by 1) limiting input length
/// to the 32-bit accumulator overflowing and 2) by not performing
/// ASCII case folding.
/// External callers need overflow checks when encoding, but internal
/// callers don't, because `PUNYCODE_ENCODE_MAX_INPUT_LENGTH` is set
/// to 1000, and per RFC 3492 section 6.4, the integer variable does
/// not need to be able to represent values larger than
/// (char::MAX - INITIAL_N) * (PUNYCODE_ENCODE_MAX_INPUT_LENGTH + 1),
/// which is less than u32::MAX.
///
/// Internal callers omit overflow checks due to the input length
/// being constrained before calling into this module. Additionally,
/// when the code unit is `u8`, upper-case ASCII is replaced with
/// lower-case ASCII.
/// External callers need to handle upper-case ASCII when decoding,
/// but internal callers don't, because the internal code calls the
/// decoder only with lower-case inputs.
pub(crate) trait PunycodeCaller {
const EXTERNAL_CALLER: bool;
}
Expand Down Expand Up @@ -162,8 +163,6 @@

impl Decoder {
/// Split the input iterator and return a Vec with insertions of encoded characters
///
/// XXX: Add a policy parameter to skip overflow checks
pub(crate) fn decode<'a, T: PunycodeCodeUnit + Copy, C: PunycodeCaller>(
&'a mut self,
input: &'a [T],
Expand Down Expand Up @@ -192,7 +191,7 @@
let mut length = base_len as u32;
let mut code_point = INITIAL_N;
let mut bias = INITIAL_BIAS;
let mut i = 0;
let mut i = 0u32;
let mut iter = input.iter();
loop {
let previous_i = i;
Expand All @@ -211,10 +210,8 @@
} else {
return Err(());
};
if C::EXTERNAL_CALLER && (digit > (u32::MAX - i) / weight) {
return Err(()); // Overflow
}
i = i.checked_add(digit * weight).ok_or(())?;
let product = digit.checked_mul(weight).ok_or(())?;
i = i.checked_add(product).ok_or(())?;
let t = if k <= bias {
T_MIN
} else if k >= bias + T_MAX {
Expand All @@ -225,10 +222,7 @@
if digit < t {
break;
}
if C::EXTERNAL_CALLER && (weight > u32::MAX / (BASE - t)) {
return Err(()); // Overflow
}
weight *= BASE - t;
weight = weight.checked_mul(BASE - t).ok_or(())?;
k += BASE;
byte = match iter.next() {
None => return Err(()), // End of input before the end of this delta
Expand All @@ -237,13 +231,10 @@
}

bias = adapt(i - previous_i, length + 1, previous_i == 0);
if C::EXTERNAL_CALLER && (i / (length + 1) > u32::MAX - code_point) {
return Err(()); // Overflow
}

// i was supposed to wrap around from length+1 to 0,
// incrementing code_point each time.
code_point += i / (length + 1);
code_point = code_point.checked_add(i / (length + 1)).ok_or(())?;
i %= length + 1;
let c = match char::from_u32(code_point) {
Some(c) => c,
Expand Down Expand Up @@ -381,11 +372,24 @@
}
}

if !C::EXTERNAL_CALLER {
// We should never get an overflow here with the internal caller being
// length-limited, but let's check anyway once here trusting the math
// from RFC 3492 section 6.4 and then omit the overflow checks in the
// loop below.
let len_plus_one = input_length
.checked_add(1)
.ok_or(PunycodeEncodeError::Overflow)?;
len_plus_one
.checked_mul(u32::from(char::MAX) - INITIAL_N)
.ok_or(PunycodeEncodeError::Overflow)?;
}

if basic_length > 0 {
output.write_char('-')?;
}
let mut code_point = INITIAL_N;
let mut delta = 0;
let mut delta = 0u32;
let mut bias = INITIAL_BIAS;
let mut processed = basic_length;
while processed < input_length {
Expand All @@ -397,18 +401,26 @@
.filter(|&c| c >= code_point)
.min()
.unwrap();
if C::EXTERNAL_CALLER
&& (min_code_point - code_point > (u32::MAX - delta) / (processed + 1))
{
return Err(PunycodeEncodeError::Overflow); // Overflow
}
// Increase delta to advance the decoder’s <code_point,i> state to <min_code_point,0>
delta += (min_code_point - code_point) * (processed + 1);
if C::EXTERNAL_CALLER {
let product = (min_code_point - code_point)
.checked_mul(processed + 1)
.ok_or(PunycodeEncodeError::Overflow)?;
delta = delta
.checked_add(product)

Check warning on line 410 in idna/src/punycode.rs

View check run for this annotation

Codecov / codecov/patch

idna/src/punycode.rs#L410

Added line #L410 was not covered by tests
.ok_or(PunycodeEncodeError::Overflow)?;
} else {
delta += (min_code_point - code_point) * (processed + 1);
}
code_point = min_code_point;
for c in input.clone() {
let c = c as u32;
if c < code_point {
delta = delta.checked_add(1).ok_or(PunycodeEncodeError::Overflow)?;
if C::EXTERNAL_CALLER {
delta = delta.checked_add(1).ok_or(PunycodeEncodeError::Overflow)?;
} else {
delta += 1;
}
}
if c == code_point {
// Represent delta as a generalized variable-length integer:
Expand Down