Skip to content

Commit 627ee3e

Browse files
committed
offers: avoid panic when truncating payer_note in UTF-8 code point
1 parent 5dcd6c4 commit 627ee3e

File tree

1 file changed

+68
-6
lines changed

1 file changed

+68
-6
lines changed

lightning/src/offers/invoice_request.rs

Lines changed: 68 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -998,15 +998,43 @@ impl VerifiedInvoiceRequest {
998998
InvoiceRequestFields {
999999
payer_signing_pubkey: *payer_signing_pubkey,
10001000
quantity: *quantity,
1001-
payer_note_truncated: payer_note.clone().map(|mut s| {
1002-
s.truncate(PAYER_NOTE_LIMIT);
1003-
UntrustedString(s)
1004-
}),
1001+
payer_note_truncated: payer_note
1002+
.clone()
1003+
// Truncate the payer note to `PAYER_NOTE_LIMIT` bytes, rounding
1004+
// down to the nearest valid UTF-8 code point boundary.
1005+
.map(|s| UntrustedString(string_truncate_safe(s, PAYER_NOTE_LIMIT))),
10051006
human_readable_name: self.offer_from_hrn().clone(),
10061007
}
10071008
}
10081009
}
10091010

1011+
/// `String::truncate(new_len)` panics if you split inside a UTF-8 code point,
1012+
/// which would leave the `String` containing invalid UTF-8. This function will
1013+
/// instead truncate the string to the next smaller code point boundary so the
1014+
/// truncated string always remains valid UTF-8.
1015+
///
1016+
/// This can still split a grapheme cluster, but that's probably fine.
1017+
/// We'd otherwise have to pull in the `unicode-segmentation` crate and its big
1018+
/// unicode tables to find the next smaller grapheme cluster boundary.
1019+
fn string_truncate_safe(mut s: String, new_len: usize) -> String {
1020+
// Finds the largest byte index `x` not exceeding byte index `index` where
1021+
// `s.is_char_boundary(x)` is true.
1022+
// TODO(phlip9): remove when `std::str::floor_char_boundary` stabilizes.
1023+
let truncated_len = if new_len >= s.len() {
1024+
s.len()
1025+
} else {
1026+
// UTF-8 code points are 1-4 bytes long, so we can limit our search
1027+
// to this range: [new_len - 3, new_len]
1028+
let lower_bound_index = new_len.saturating_sub(3);
1029+
(lower_bound_index..=new_len)
1030+
.rev()
1031+
.find(|idx| s.is_char_boundary(*idx))
1032+
.unwrap_or(lower_bound_index)
1033+
};
1034+
s.truncate(truncated_len);
1035+
s
1036+
}
1037+
10101038
impl InvoiceRequestContents {
10111039
pub(super) fn metadata(&self) -> &[u8] {
10121040
self.inner.metadata()
@@ -1426,6 +1454,7 @@ mod tests {
14261454
use crate::ln::inbound_payment::ExpandedKey;
14271455
use crate::ln::msgs::{DecodeError, MAX_VALUE_MSAT};
14281456
use crate::offers::invoice::{Bolt12Invoice, SIGNATURE_TAG as INVOICE_SIGNATURE_TAG};
1457+
use crate::offers::invoice_request::string_truncate_safe;
14291458
use crate::offers::merkle::{self, SignatureTlvStreamRef, TaggedHash, TlvStream};
14301459
use crate::offers::nonce::Nonce;
14311460
#[cfg(not(c_bindings))]
@@ -2947,14 +2976,20 @@ mod tests {
29472976
.unwrap();
29482977
assert_eq!(offer.issuer_signing_pubkey(), Some(node_id));
29492978

2979+
// UTF-8 payer note that we can't naively `.truncate(PAYER_NOTE_LIMIT)`
2980+
// because it would split a multi-byte UTF-8 code point.
2981+
let payer_note = "❤️".repeat(86);
2982+
assert_eq!(payer_note.len(), PAYER_NOTE_LIMIT + 4);
2983+
let expected_payer_note = "❤️".repeat(85);
2984+
29502985
let invoice_request = offer
29512986
.request_invoice(&expanded_key, nonce, &secp_ctx, payment_id)
29522987
.unwrap()
29532988
.chain(Network::Testnet)
29542989
.unwrap()
29552990
.quantity(1)
29562991
.unwrap()
2957-
.payer_note("0".repeat(PAYER_NOTE_LIMIT * 2))
2992+
.payer_note(payer_note)
29582993
.build_and_sign()
29592994
.unwrap();
29602995
match invoice_request.verify_using_metadata(&expanded_key, &secp_ctx) {
@@ -2966,7 +3001,7 @@ mod tests {
29663001
InvoiceRequestFields {
29673002
payer_signing_pubkey: invoice_request.payer_signing_pubkey(),
29683003
quantity: Some(1),
2969-
payer_note_truncated: Some(UntrustedString("0".repeat(PAYER_NOTE_LIMIT))),
3004+
payer_note_truncated: Some(UntrustedString(expected_payer_note)),
29703005
human_readable_name: None,
29713006
}
29723007
);
@@ -2981,4 +3016,31 @@ mod tests {
29813016
Err(_) => panic!("unexpected error"),
29823017
}
29833018
}
3019+
3020+
#[test]
3021+
fn test_string_truncate_safe() {
3022+
// We'll correctly truncate to the nearest UTF-8 code point boundary:
3023+
// ❤ variation-selector
3024+
// e29da4 efb88f
3025+
let s = String::from("❤️");
3026+
for idx in 0..(s.len() + 5) {
3027+
if idx >= s.len() {
3028+
assert_eq!(s, string_truncate_safe(s.clone(), idx));
3029+
} else if (3..s.len()).contains(&idx) {
3030+
assert_eq!("❤", string_truncate_safe(s.clone(), idx));
3031+
} else {
3032+
assert_eq!("", string_truncate_safe(s.clone(), idx));
3033+
}
3034+
}
3035+
3036+
// Every byte in an ASCII string is also a full UTF-8 code point.
3037+
let s = String::from("my ASCII string!");
3038+
for idx in 0..(s.len() + 5) {
3039+
if idx >= s.len() {
3040+
assert_eq!(s, string_truncate_safe(s.clone(), idx));
3041+
} else {
3042+
assert_eq!(s[..idx], string_truncate_safe(s.clone(), idx));
3043+
}
3044+
}
3045+
}
29843046
}

0 commit comments

Comments
 (0)