@@ -2399,26 +2399,68 @@ impl Display for bool {
23992399impl Debug for str {
24002400 fn fmt ( & self , f : & mut Formatter < ' _ > ) -> Result {
24012401 f. write_char ( '"' ) ?;
2402- let mut from = 0 ;
2403- for ( i, c) in self . char_indices ( ) {
2404- // a fast path for ASCII chars that do not need escapes:
2405- if matches ! ( c, ' ' ..='~' ) && !matches ! ( c, '\\' | '\"' ) {
2406- continue ;
2407- }
24082402
2409- let esc = c. escape_debug_ext ( EscapeDebugExtArgs {
2410- escape_grapheme_extended : true ,
2411- escape_single_quote : false ,
2412- escape_double_quote : true ,
2413- } ) ;
2414- // If char needs escaping, flush backlog so far and write, else skip
2415- if esc. len ( ) != 1 {
2416- f. write_str ( & self [ from..i] ) ?;
2403+ // substring we know is printable
2404+ let mut printable_range = 0 ..0 ;
2405+
2406+ // the outer loop here splits the string into ASCII-only, and Unicode-only chunks,
2407+ // which are then processed separately, to enable a fast path for the ASCII-only chunk.
2408+ let mut rest = self . as_bytes ( ) ;
2409+ while rest. len ( ) > 0 {
2410+ let mut ascii_bytes: & [ u8 ] ;
2411+ let unicode_bytes: & [ u8 ] ;
2412+
2413+ // first, handle an ascii-only prefix
2414+ let non_ascii_position = rest. iter ( ) . position ( |& b| b >= 0x80 ) . unwrap_or ( rest. len ( ) ) ;
2415+ // SAFETY: the position was derived from an iterator, so is known to be within bounds, and at a char boundary
2416+ ( ascii_bytes, rest) = unsafe { rest. split_at_unchecked ( non_ascii_position) } ;
2417+
2418+ fn needs_escape ( b : u8 ) -> bool {
2419+ b > 0x7E || b < 0x20 || b == b'\\' || b == b'"'
2420+ }
2421+ while let Some ( escape_position) = ascii_bytes. iter ( ) . position ( |& b| needs_escape ( b) ) {
2422+ printable_range. end += escape_position;
2423+ f. write_str ( & self [ printable_range. clone ( ) ] ) ?;
2424+
2425+ let c = ascii_bytes[ escape_position] as char ;
2426+ let esc = c. escape_debug_ext ( EscapeDebugExtArgs {
2427+ escape_grapheme_extended : true ,
2428+ escape_single_quote : false ,
2429+ escape_double_quote : true ,
2430+ } ) ;
24172431 Display :: fmt ( & esc, f) ?;
2418- from = i + c. len_utf8 ( ) ;
2432+
2433+ ascii_bytes = & ascii_bytes[ escape_position + 1 ..] ;
2434+ printable_range = ( printable_range. end + 1 ) ..( printable_range. end + 1 ) ;
2435+ }
2436+ printable_range. end += ascii_bytes. len ( ) ;
2437+
2438+ // then, handle a unicode-only prefix
2439+ let ascii_position = rest. iter ( ) . position ( |& b| b < 0x80 ) . unwrap_or ( rest. len ( ) ) ;
2440+ // SAFETY: the position was derived from an iterator, so is known to be within bounds, and at a char boundary
2441+ ( unicode_bytes, rest) = unsafe { rest. split_at_unchecked ( ascii_position) } ;
2442+ // SAFETY: prefix is a valid utf8 sequence, and at a char boundary
2443+ let unicode_prefix = unsafe { crate :: str:: from_utf8_unchecked ( unicode_bytes) } ;
2444+
2445+ for c in unicode_prefix. chars ( ) {
2446+ // SAFETY: we know that our slice only contains unicode chars
2447+ unsafe { crate :: hint:: assert_unchecked ( c as u32 >= 0x80 ) } ;
2448+ let esc = c. escape_debug_ext ( EscapeDebugExtArgs {
2449+ escape_grapheme_extended : true ,
2450+ escape_single_quote : false ,
2451+ escape_double_quote : true ,
2452+ } ) ;
2453+ if esc. len ( ) != 1 {
2454+ f. write_str ( & self [ printable_range. clone ( ) ] ) ?;
2455+ Display :: fmt ( & esc, f) ?;
2456+ printable_range. start = printable_range. end + c. len_utf8 ( ) ;
2457+ }
2458+ printable_range. end += c. len_utf8 ( ) ;
24192459 }
24202460 }
2421- f. write_str ( & self [ from..] ) ?;
2461+
2462+ f. write_str ( & self [ printable_range] ) ?;
2463+
24222464 f. write_char ( '"' )
24232465 }
24242466}
0 commit comments