@@ -71,15 +71,16 @@ pub fn decode(input: &str) -> Option<Vec<char>> {
71
71
/// Marker for internal vs. external caller to retain old API behavior
72
72
/// while tweaking behavior for internal callers.
73
73
///
74
- /// External callers retain the old behavior of the pre-existing
75
- /// public entry points to this module by 1) limiting input length
76
- /// to the 32-bit accumulator overflowing and 2) by not performing
77
- /// ASCII case folding.
74
+ /// External callers need overflow checks when encoding, but internal
75
+ /// callers don't, because `PUNYCODE_ENCODE_MAX_INPUT_LENGTH` is set
76
+ /// to 1000, and per RFC 3492 section 6.4, the integer variable does
77
+ /// not need to be able to represent values larger than
78
+ /// (char::MAX - INITIAL_N) * (PUNYCODE_ENCODE_MAX_INPUT_LENGTH + 1),
79
+ /// which is less than u32::MAX.
78
80
///
79
- /// Internal callers omit overflow checks due to the input length
80
- /// being constrained before calling into this module. Additionally,
81
- /// when the code unit is `u8`, upper-case ASCII is replaced with
82
- /// lower-case ASCII.
81
+ /// External callers need to handle upper-case ASCII when decoding,
82
+ /// but internal callers don't, because the internal code calls the
83
+ /// decoder only with lower-case inputs.
83
84
pub ( crate ) trait PunycodeCaller {
84
85
const EXTERNAL_CALLER : bool ;
85
86
}
@@ -162,8 +163,6 @@ pub(crate) struct Decoder {
162
163
163
164
impl Decoder {
164
165
/// Split the input iterator and return a Vec with insertions of encoded characters
165
- ///
166
- /// XXX: Add a policy parameter to skip overflow checks
167
166
pub ( crate ) fn decode < ' a , T : PunycodeCodeUnit + Copy , C : PunycodeCaller > (
168
167
& ' a mut self ,
169
168
input : & ' a [ T ] ,
@@ -192,7 +191,7 @@ impl Decoder {
192
191
let mut length = base_len as u32 ;
193
192
let mut code_point = INITIAL_N ;
194
193
let mut bias = INITIAL_BIAS ;
195
- let mut i = 0 ;
194
+ let mut i = 0u32 ;
196
195
let mut iter = input. iter ( ) ;
197
196
loop {
198
197
let previous_i = i;
@@ -211,10 +210,8 @@ impl Decoder {
211
210
} else {
212
211
return Err ( ( ) ) ;
213
212
} ;
214
- if C :: EXTERNAL_CALLER && ( digit > ( u32:: MAX - i) / weight) {
215
- return Err ( ( ) ) ; // Overflow
216
- }
217
- i = i. checked_add ( digit * weight) . ok_or ( ( ) ) ?;
213
+ let product = digit. checked_mul ( weight) . ok_or ( ( ) ) ?;
214
+ i = i. checked_add ( product) . ok_or ( ( ) ) ?;
218
215
let t = if k <= bias {
219
216
T_MIN
220
217
} else if k >= bias + T_MAX {
@@ -225,10 +222,7 @@ impl Decoder {
225
222
if digit < t {
226
223
break ;
227
224
}
228
- if C :: EXTERNAL_CALLER && ( weight > u32:: MAX / ( BASE - t) ) {
229
- return Err ( ( ) ) ; // Overflow
230
- }
231
- weight *= BASE - t;
225
+ weight = weight. checked_mul ( BASE - t) . ok_or ( ( ) ) ?;
232
226
k += BASE ;
233
227
byte = match iter. next ( ) {
234
228
None => return Err ( ( ) ) , // End of input before the end of this delta
@@ -237,13 +231,10 @@ impl Decoder {
237
231
}
238
232
239
233
bias = adapt ( i - previous_i, length + 1 , previous_i == 0 ) ;
240
- if C :: EXTERNAL_CALLER && ( i / ( length + 1 ) > u32:: MAX - code_point) {
241
- return Err ( ( ) ) ; // Overflow
242
- }
243
234
244
235
// i was supposed to wrap around from length+1 to 0,
245
236
// incrementing code_point each time.
246
- code_point += i / ( length + 1 ) ;
237
+ code_point = code_point . checked_add ( i / ( length + 1 ) ) . ok_or ( ( ) ) ? ;
247
238
i %= length + 1 ;
248
239
let c = match char:: from_u32 ( code_point) {
249
240
Some ( c) => c,
@@ -381,11 +372,24 @@ where
381
372
}
382
373
}
383
374
375
+ if !C :: EXTERNAL_CALLER {
376
+ // We should never get an overflow here with the internal caller being
377
+ // length-limited, but let's check anyway once here trusting the math
378
+ // from RFC 3492 section 6.4 and then omit the overflow checks in the
379
+ // loop below.
380
+ let len_plus_one = input_length
381
+ . checked_add ( 1 )
382
+ . ok_or ( PunycodeEncodeError :: Overflow ) ?;
383
+ len_plus_one
384
+ . checked_mul ( u32:: from ( char:: MAX ) - INITIAL_N )
385
+ . ok_or ( PunycodeEncodeError :: Overflow ) ?;
386
+ }
387
+
384
388
if basic_length > 0 {
385
389
output. write_char ( '-' ) ?;
386
390
}
387
391
let mut code_point = INITIAL_N ;
388
- let mut delta = 0 ;
392
+ let mut delta = 0u32 ;
389
393
let mut bias = INITIAL_BIAS ;
390
394
let mut processed = basic_length;
391
395
while processed < input_length {
@@ -397,18 +401,26 @@ where
397
401
. filter ( |& c| c >= code_point)
398
402
. min ( )
399
403
. unwrap ( ) ;
400
- if C :: EXTERNAL_CALLER
401
- && ( min_code_point - code_point > ( u32:: MAX - delta) / ( processed + 1 ) )
402
- {
403
- return Err ( PunycodeEncodeError :: Overflow ) ; // Overflow
404
- }
405
404
// Increase delta to advance the decoder’s <code_point,i> state to <min_code_point,0>
406
- delta += ( min_code_point - code_point) * ( processed + 1 ) ;
405
+ if C :: EXTERNAL_CALLER {
406
+ let product = ( min_code_point - code_point)
407
+ . checked_mul ( processed + 1 )
408
+ . ok_or ( PunycodeEncodeError :: Overflow ) ?;
409
+ delta = delta
410
+ . checked_add ( product)
411
+ . ok_or ( PunycodeEncodeError :: Overflow ) ?;
412
+ } else {
413
+ delta += ( min_code_point - code_point) * ( processed + 1 ) ;
414
+ }
407
415
code_point = min_code_point;
408
416
for c in input. clone ( ) {
409
417
let c = c as u32 ;
410
418
if c < code_point {
411
- delta = delta. checked_add ( 1 ) . ok_or ( PunycodeEncodeError :: Overflow ) ?;
419
+ if C :: EXTERNAL_CALLER {
420
+ delta = delta. checked_add ( 1 ) . ok_or ( PunycodeEncodeError :: Overflow ) ?;
421
+ } else {
422
+ delta += 1 ;
423
+ }
412
424
}
413
425
if c == code_point {
414
426
// Represent delta as a generalized variable-length integer:
0 commit comments