@@ -998,15 +998,43 @@ impl VerifiedInvoiceRequest {
998998 InvoiceRequestFields {
999999 payer_signing_pubkey : * payer_signing_pubkey,
10001000 quantity : * quantity,
1001- payer_note_truncated : payer_note. clone ( ) . map ( |mut s| {
1002- s. truncate ( PAYER_NOTE_LIMIT ) ;
1003- UntrustedString ( s)
1004- } ) ,
1001+ payer_note_truncated : payer_note
1002+ . clone ( )
1003+ // Truncate the payer note to `PAYER_NOTE_LIMIT` bytes, rounding
1004+ // down to the nearest valid UTF-8 code point boundary.
1005+ . map ( |s| UntrustedString ( string_truncate_safe ( s, PAYER_NOTE_LIMIT ) ) ) ,
10051006 human_readable_name : self . offer_from_hrn ( ) . clone ( ) ,
10061007 }
10071008 }
10081009}
10091010
1011+ /// `String::truncate(new_len)` panics if you split inside a UTF-8 code point,
1012+ /// which would leave the `String` containing invalid UTF-8. This function will
1013+ /// instead truncate the string to the next smaller code point boundary so the
1014+ /// truncated string always remains valid UTF-8.
1015+ ///
1016+ /// This can still split a grapheme cluster, but that's probably fine.
1017+ /// We'd otherwise have to pull in the `unicode-segmentation` crate and its big
1018+ /// unicode tables to find the next smaller grapheme cluster boundary.
1019+ fn string_truncate_safe ( mut s : String , new_len : usize ) -> String {
1020+ // Finds the largest byte index `x` not exceeding byte index `index` where
1021+ // `s.is_char_boundary(x)` is true.
1022+ // TODO(phlip9): remove when `std::str::floor_char_boundary` stabilizes.
1023+ let truncated_len = if new_len >= s. len ( ) {
1024+ s. len ( )
1025+ } else {
1026+ // UTF-8 code points are 1-4 bytes long, so we can limit our search
1027+ // to this range: [new_len - 3, new_len]
1028+ let lower_bound_index = new_len. saturating_sub ( 3 ) ;
1029+ ( lower_bound_index..=new_len)
1030+ . rev ( )
1031+ . find ( |idx| s. is_char_boundary ( * idx) )
1032+ . unwrap_or ( lower_bound_index)
1033+ } ;
1034+ s. truncate ( truncated_len) ;
1035+ s
1036+ }
1037+
10101038impl InvoiceRequestContents {
10111039 pub ( super ) fn metadata ( & self ) -> & [ u8 ] {
10121040 self . inner . metadata ( )
@@ -1426,6 +1454,7 @@ mod tests {
14261454 use crate :: ln:: inbound_payment:: ExpandedKey ;
14271455 use crate :: ln:: msgs:: { DecodeError , MAX_VALUE_MSAT } ;
14281456 use crate :: offers:: invoice:: { Bolt12Invoice , SIGNATURE_TAG as INVOICE_SIGNATURE_TAG } ;
1457+ use crate :: offers:: invoice_request:: string_truncate_safe;
14291458 use crate :: offers:: merkle:: { self , SignatureTlvStreamRef , TaggedHash , TlvStream } ;
14301459 use crate :: offers:: nonce:: Nonce ;
14311460 #[ cfg( not( c_bindings) ) ]
@@ -2947,14 +2976,20 @@ mod tests {
29472976 . unwrap ( ) ;
29482977 assert_eq ! ( offer. issuer_signing_pubkey( ) , Some ( node_id) ) ;
29492978
2979+ // UTF-8 payer note that we can't naively `.truncate(PAYER_NOTE_LIMIT)`
2980+ // because it would split a multi-byte UTF-8 code point.
2981+ let payer_note = "❤️" . repeat ( 86 ) ;
2982+ assert_eq ! ( payer_note. len( ) , PAYER_NOTE_LIMIT + 4 ) ;
2983+ let expected_payer_note = "❤️" . repeat ( 85 ) ;
2984+
29502985 let invoice_request = offer
29512986 . request_invoice ( & expanded_key, nonce, & secp_ctx, payment_id)
29522987 . unwrap ( )
29532988 . chain ( Network :: Testnet )
29542989 . unwrap ( )
29552990 . quantity ( 1 )
29562991 . unwrap ( )
2957- . payer_note ( "0" . repeat ( PAYER_NOTE_LIMIT * 2 ) )
2992+ . payer_note ( payer_note )
29582993 . build_and_sign ( )
29592994 . unwrap ( ) ;
29602995 match invoice_request. verify_using_metadata ( & expanded_key, & secp_ctx) {
@@ -2966,7 +3001,7 @@ mod tests {
29663001 InvoiceRequestFields {
29673002 payer_signing_pubkey: invoice_request. payer_signing_pubkey( ) ,
29683003 quantity: Some ( 1 ) ,
2969- payer_note_truncated: Some ( UntrustedString ( "0" . repeat ( PAYER_NOTE_LIMIT ) ) ) ,
3004+ payer_note_truncated: Some ( UntrustedString ( expected_payer_note ) ) ,
29703005 human_readable_name: None ,
29713006 }
29723007 ) ;
@@ -2981,4 +3016,31 @@ mod tests {
29813016 Err ( _) => panic ! ( "unexpected error" ) ,
29823017 }
29833018 }
3019+
3020+ #[ test]
3021+ fn test_string_truncate_safe ( ) {
3022+ // We'll correctly truncate to the nearest UTF-8 code point boundary:
3023+ // ❤ variation-selector
3024+ // e29da4 efb88f
3025+ let s = String :: from ( "❤️" ) ;
3026+ for idx in 0 ..( s. len ( ) + 5 ) {
3027+ if idx >= s. len ( ) {
3028+ assert_eq ! ( s, string_truncate_safe( s. clone( ) , idx) ) ;
3029+ } else if ( 3 ..s. len ( ) ) . contains ( & idx) {
3030+ assert_eq ! ( "❤" , string_truncate_safe( s. clone( ) , idx) ) ;
3031+ } else {
3032+ assert_eq ! ( "" , string_truncate_safe( s. clone( ) , idx) ) ;
3033+ }
3034+ }
3035+
3036+ // Every byte in an ASCII string is also a full UTF-8 code point.
3037+ let s = String :: from ( "my ASCII string!" ) ;
3038+ for idx in 0 ..( s. len ( ) + 5 ) {
3039+ if idx >= s. len ( ) {
3040+ assert_eq ! ( s, string_truncate_safe( s. clone( ) , idx) ) ;
3041+ } else {
3042+ assert_eq ! ( s[ ..idx] , string_truncate_safe( s. clone( ) , idx) ) ;
3043+ }
3044+ }
3045+ }
29843046}
0 commit comments