@@ -998,15 +998,37 @@ impl VerifiedInvoiceRequest {
998998 InvoiceRequestFields {
999999 payer_signing_pubkey : * payer_signing_pubkey,
10001000 quantity : * quantity,
1001- payer_note_truncated : payer_note. clone ( ) . map ( |mut s| {
1002- s. truncate ( PAYER_NOTE_LIMIT ) ;
1003- UntrustedString ( s)
1004- } ) ,
1001+ payer_note_truncated : payer_note
1002+ . clone ( )
1003+ // Truncate the payer note to `PAYER_NOTE_LIMIT` bytes, rounding
1004+ // down to the nearest valid UTF-8 code point boundary.
1005+ . map ( |s| UntrustedString ( string_truncate_safe ( s, PAYER_NOTE_LIMIT ) ) ) ,
10051006 human_readable_name : self . offer_from_hrn ( ) . clone ( ) ,
10061007 }
10071008 }
10081009}
10091010
1011+ /// `String::truncate(new_len)` panics if you split inside a UTF-8 code point,
1012+ /// which would leave the `String` containing invalid UTF-8. This function will
1013+ /// instead truncate the string to the next smaller code point boundary so the
1014+ /// truncated string always remains valid UTF-8.
1015+ ///
1016+ /// This can still split a grapheme cluster, but that's probably fine.
1017+ /// We'd otherwise have to pull in the `unicode-segmentation` crate and its big
1018+ /// unicode tables to find the next smaller grapheme cluster boundary.
1019+ fn string_truncate_safe ( mut s : String , new_len : usize ) -> String {
1020+ // Finds the largest byte index `x` not exceeding byte index `index` where
1021+ // `s.is_char_boundary(x)` is true.
1022+ // TODO(phlip9): remove when `std::str::floor_char_boundary` stabilizes.
1023+ let truncated_len = if new_len >= s. len ( ) {
1024+ s. len ( )
1025+ } else {
1026+ ( 0 ..=new_len) . rev ( ) . find ( |idx| s. is_char_boundary ( * idx) ) . unwrap_or ( 0 )
1027+ } ;
1028+ s. truncate ( truncated_len) ;
1029+ s
1030+ }
1031+
10101032impl InvoiceRequestContents {
10111033 pub ( super ) fn metadata ( & self ) -> & [ u8 ] {
10121034 self . inner . metadata ( )
@@ -1426,6 +1448,7 @@ mod tests {
14261448 use crate :: ln:: inbound_payment:: ExpandedKey ;
14271449 use crate :: ln:: msgs:: { DecodeError , MAX_VALUE_MSAT } ;
14281450 use crate :: offers:: invoice:: { Bolt12Invoice , SIGNATURE_TAG as INVOICE_SIGNATURE_TAG } ;
1451+ use crate :: offers:: invoice_request:: string_truncate_safe;
14291452 use crate :: offers:: merkle:: { self , SignatureTlvStreamRef , TaggedHash , TlvStream } ;
14301453 use crate :: offers:: nonce:: Nonce ;
14311454 #[ cfg( not( c_bindings) ) ]
@@ -2947,14 +2970,20 @@ mod tests {
29472970 . unwrap ( ) ;
29482971 assert_eq ! ( offer. issuer_signing_pubkey( ) , Some ( node_id) ) ;
29492972
2973+ // UTF-8 payer note that we can't naively `.truncate(PAYER_NOTE_LIMIT)`
2974+ // because it would split a multi-byte UTF-8 code point.
2975+ let payer_note = "❤️" . repeat ( 86 ) ;
2976+ assert_eq ! ( payer_note. len( ) , PAYER_NOTE_LIMIT + 4 ) ;
2977+ let expected_payer_note = "❤️" . repeat ( 85 ) ;
2978+
29502979 let invoice_request = offer
29512980 . request_invoice ( & expanded_key, nonce, & secp_ctx, payment_id)
29522981 . unwrap ( )
29532982 . chain ( Network :: Testnet )
29542983 . unwrap ( )
29552984 . quantity ( 1 )
29562985 . unwrap ( )
2957- . payer_note ( "0" . repeat ( PAYER_NOTE_LIMIT * 2 ) )
2986+ . payer_note ( payer_note )
29582987 . build_and_sign ( )
29592988 . unwrap ( ) ;
29602989 match invoice_request. verify_using_metadata ( & expanded_key, & secp_ctx) {
@@ -2966,7 +2995,7 @@ mod tests {
29662995 InvoiceRequestFields {
29672996 payer_signing_pubkey: invoice_request. payer_signing_pubkey( ) ,
29682997 quantity: Some ( 1 ) ,
2969- payer_note_truncated: Some ( UntrustedString ( "0" . repeat ( PAYER_NOTE_LIMIT ) ) ) ,
2998+ payer_note_truncated: Some ( UntrustedString ( expected_payer_note ) ) ,
29702999 human_readable_name: None ,
29713000 }
29723001 ) ;
@@ -2981,4 +3010,31 @@ mod tests {
29813010 Err ( _) => panic ! ( "unexpected error" ) ,
29823011 }
29833012 }
3013+
3014+ #[ test]
3015+ fn test_string_truncate_safe ( ) {
3016+ // We'll correctly truncate to the nearest UTF-8 code point boundary:
3017+ // ❤ variation-selector
3018+ // e29da4 efb88f
3019+ let s = String :: from ( "❤️" ) ;
3020+ for idx in 0 ..( s. len ( ) + 5 ) {
3021+ if idx >= s. len ( ) {
3022+ assert_eq ! ( s, string_truncate_safe( s. clone( ) , idx) ) ;
3023+ } else if ( 3 ..s. len ( ) ) . contains ( & idx) {
3024+ assert_eq ! ( "❤" , string_truncate_safe( s. clone( ) , idx) ) ;
3025+ } else {
3026+ assert_eq ! ( "" , string_truncate_safe( s. clone( ) , idx) ) ;
3027+ }
3028+ }
3029+
3030+ // Every byte in an ASCII string is also a full UTF-8 code point.
3031+ let s = String :: from ( "my ASCII string!" ) ;
3032+ for idx in 0 ..( s. len ( ) + 5 ) {
3033+ if idx >= s. len ( ) {
3034+ assert_eq ! ( s, string_truncate_safe( s. clone( ) , idx) ) ;
3035+ } else {
3036+ assert_eq ! ( s[ ..idx] , string_truncate_safe( s. clone( ) , idx) ) ;
3037+ }
3038+ }
3039+ }
29843040}
0 commit comments