From fecac81874c41724a855c9e696c312353609df23 Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Wed, 14 Jul 2021 18:19:45 +0000
Subject: [PATCH 1/4] Support pending update_fail_htlcs in reconnect_nodes test
 util

---
 lightning/src/ln/chanmon_update_fail_tests.rs |  6 +--
 lightning/src/ln/functional_test_utils.rs     | 18 ++++----
 lightning/src/ln/functional_tests.rs          | 44 +++++++++----------
 lightning/src/ln/onion_route_tests.rs         |  2 +-
 4 files changed, 36 insertions(+), 34 deletions(-)

diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs
index dd7b1906ca0..2c62abc55f8 100644
--- a/lightning/src/ln/chanmon_update_fail_tests.rs
+++ b/lightning/src/ln/chanmon_update_fail_tests.rs
@@ -197,7 +197,7 @@ fn do_test_simple_monitor_temporary_update_fail(disconnect: bool, persister_fail
 	if disconnect {
 		nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false);
 		nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id(), false);
-		reconnect_nodes(&nodes[0], &nodes[1], (true, true), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+		reconnect_nodes(&nodes[0], &nodes[1], (true, true), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
 	}
 
 	match persister_fail {
@@ -251,7 +251,7 @@ fn do_test_simple_monitor_temporary_update_fail(disconnect: bool, persister_fail
 	if disconnect {
 		nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false);
 		nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id(), false);
-		reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+		reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
 	}
 
 	// ...and make sure we can force-close a frozen channel
@@ -1912,7 +1912,7 @@ fn do_during_funding_monitor_fail(confirm_a_first: bool, restore_b_before_conf:
 	// Make sure nodes[1] isn't stupid enough to re-send the FundingLocked on reconnect
 	nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false);
 	nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id(), false);
-	reconnect_nodes(&nodes[0], &nodes[1], (false, confirm_a_first), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+	reconnect_nodes(&nodes[0], &nodes[1], (false, confirm_a_first), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
 	assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
 	assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
 
diff --git a/lightning/src/ln/functional_test_utils.rs b/lightning/src/ln/functional_test_utils.rs
index c9dcb8517c3..a6d82ccee52 100644
--- a/lightning/src/ln/functional_test_utils.rs
+++ b/lightning/src/ln/functional_test_utils.rs
@@ -1624,7 +1624,7 @@ macro_rules! handle_chan_reestablish_msgs {
 
 /// pending_htlc_adds includes both the holding cell and in-flight update_add_htlcs, whereas
 /// for claims/fails they are separated out.
-pub fn reconnect_nodes<'a, 'b, 'c>(node_a: &Node<'a, 'b, 'c>, node_b: &Node<'a, 'b, 'c>, send_funding_locked: (bool, bool), pending_htlc_adds: (i64, i64), pending_htlc_claims: (usize, usize), pending_cell_htlc_claims: (usize, usize), pending_cell_htlc_fails: (usize, usize), pending_raa: (bool, bool))  {
+pub fn reconnect_nodes<'a, 'b, 'c>(node_a: &Node<'a, 'b, 'c>, node_b: &Node<'a, 'b, 'c>, send_funding_locked: (bool, bool), pending_htlc_adds: (i64, i64), pending_htlc_claims: (usize, usize), pending_htlc_fails: (usize, usize), pending_cell_htlc_claims: (usize, usize), pending_cell_htlc_fails: (usize, usize), pending_raa: (bool, bool))  {
 	node_a.node.peer_connected(&node_b.node.get_our_node_id(), &msgs::Init { features: InitFeatures::empty() });
 	let reestablish_1 = get_chan_reestablish_msgs!(node_a, node_b);
 	node_b.node.peer_connected(&node_a.node.get_our_node_id(), &msgs::Init { features: InitFeatures::empty() });
@@ -1678,8 +1678,10 @@ pub fn reconnect_nodes<'a, 'b, 'c>(node_a: &Node<'a, 'b, 'c>, node_b: &Node<'a,
 	}
 
 	// We don't yet support both needing updates, as that would require a different commitment dance:
-	assert!((pending_htlc_adds.0 == 0 && pending_htlc_claims.0 == 0 && pending_cell_htlc_claims.0 == 0 && pending_cell_htlc_fails.0 == 0) ||
-			(pending_htlc_adds.1 == 0 && pending_htlc_claims.1 == 0 && pending_cell_htlc_claims.1 == 0 && pending_cell_htlc_fails.1 == 0));
+	assert!((pending_htlc_adds.0 == 0 && pending_htlc_claims.0 == 0 && pending_htlc_fails.0 == 0 &&
+			 pending_cell_htlc_claims.0 == 0 && pending_cell_htlc_fails.0 == 0) ||
+			(pending_htlc_adds.1 == 0 && pending_htlc_claims.1 == 0 && pending_htlc_fails.1 == 0 &&
+			 pending_cell_htlc_claims.1 == 0 && pending_cell_htlc_fails.1 == 0));
 
 	for chan_msgs in resp_1.drain(..) {
 		if send_funding_locked.0 {
@@ -1702,7 +1704,7 @@ pub fn reconnect_nodes<'a, 'b, 'c>(node_a: &Node<'a, 'b, 'c>, node_b: &Node<'a,
 		} else {
 			assert!(chan_msgs.1.is_none());
 		}
-		if pending_htlc_adds.0 != 0 || pending_htlc_claims.0 != 0 || pending_cell_htlc_claims.0 != 0 || pending_cell_htlc_fails.0 != 0 {
+		if pending_htlc_adds.0 != 0 || pending_htlc_claims.0 != 0 || pending_htlc_fails.0 != 0 || pending_cell_htlc_claims.0 != 0 || pending_cell_htlc_fails.0 != 0 {
 			let commitment_update = chan_msgs.2.unwrap();
 			if pending_htlc_adds.0 != -1 { // We use -1 to denote a response commitment_signed
 				assert_eq!(commitment_update.update_add_htlcs.len(), pending_htlc_adds.0 as usize);
@@ -1710,7 +1712,7 @@ pub fn reconnect_nodes<'a, 'b, 'c>(node_a: &Node<'a, 'b, 'c>, node_b: &Node<'a,
 				assert!(commitment_update.update_add_htlcs.is_empty());
 			}
 			assert_eq!(commitment_update.update_fulfill_htlcs.len(), pending_htlc_claims.0 + pending_cell_htlc_claims.0);
-			assert_eq!(commitment_update.update_fail_htlcs.len(), pending_cell_htlc_fails.0);
+			assert_eq!(commitment_update.update_fail_htlcs.len(), pending_htlc_fails.0 + pending_cell_htlc_fails.0);
 			assert!(commitment_update.update_fail_malformed_htlcs.is_empty());
 			for update_add in commitment_update.update_add_htlcs {
 				node_a.node.handle_update_add_htlc(&node_b.node.get_our_node_id(), &update_add);
@@ -1759,13 +1761,13 @@ pub fn reconnect_nodes<'a, 'b, 'c>(node_a: &Node<'a, 'b, 'c>, node_b: &Node<'a,
 		} else {
 			assert!(chan_msgs.1.is_none());
 		}
-		if pending_htlc_adds.1 != 0 || pending_htlc_claims.1 != 0 || pending_cell_htlc_claims.1 != 0 || pending_cell_htlc_fails.1 != 0 {
+		if pending_htlc_adds.1 != 0 || pending_htlc_claims.1 != 0 || pending_htlc_fails.1 != 0 || pending_cell_htlc_claims.1 != 0 || pending_cell_htlc_fails.1 != 0 {
 			let commitment_update = chan_msgs.2.unwrap();
 			if pending_htlc_adds.1 != -1 { // We use -1 to denote a response commitment_signed
 				assert_eq!(commitment_update.update_add_htlcs.len(), pending_htlc_adds.1 as usize);
 			}
-			assert_eq!(commitment_update.update_fulfill_htlcs.len(), pending_htlc_claims.0 + pending_cell_htlc_claims.0);
-			assert_eq!(commitment_update.update_fail_htlcs.len(), pending_cell_htlc_fails.0);
+			assert_eq!(commitment_update.update_fulfill_htlcs.len(), pending_htlc_claims.1 + pending_cell_htlc_claims.1);
+			assert_eq!(commitment_update.update_fail_htlcs.len(), pending_htlc_fails.1 + pending_cell_htlc_fails.1);
 			assert!(commitment_update.update_fail_malformed_htlcs.is_empty());
 			for update_add in commitment_update.update_add_htlcs {
 				node_b.node.handle_update_add_htlc(&node_a.node.get_our_node_id(), &update_add);
diff --git a/lightning/src/ln/functional_tests.rs b/lightning/src/ln/functional_tests.rs
index 4e51f6b4bf5..f296cb3a7e7 100644
--- a/lightning/src/ln/functional_tests.rs
+++ b/lightning/src/ln/functional_tests.rs
@@ -3537,7 +3537,7 @@ fn test_dup_events_on_peer_disconnect() {
 	nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false);
 	nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id(), false);
 
-	reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (1, 0), (0, 0), (0, 0), (false, false));
+	reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (1, 0), (0, 0), (0, 0), (0, 0), (false, false));
 	assert!(nodes[0].node.get_and_clear_pending_events().is_empty());
 }
 
@@ -3553,7 +3553,7 @@ fn test_simple_peer_disconnect() {
 
 	nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false);
 	nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id(), false);
-	reconnect_nodes(&nodes[0], &nodes[1], (true, true), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+	reconnect_nodes(&nodes[0], &nodes[1], (true, true), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
 
 	let payment_preimage_1 = route_payment(&nodes[0], &vec!(&nodes[1], &nodes[2])[..], 1000000).0;
 	let payment_hash_2 = route_payment(&nodes[0], &vec!(&nodes[1], &nodes[2])[..], 1000000).1;
@@ -3562,7 +3562,7 @@ fn test_simple_peer_disconnect() {
 
 	nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false);
 	nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id(), false);
-	reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+	reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
 
 	let payment_preimage_3 = route_payment(&nodes[0], &vec!(&nodes[1], &nodes[2])[..], 1000000).0;
 	let payment_preimage_4 = route_payment(&nodes[0], &vec!(&nodes[1], &nodes[2])[..], 1000000).0;
@@ -3575,7 +3575,7 @@ fn test_simple_peer_disconnect() {
 	claim_payment_along_route(&nodes[0], &[&[&nodes[1], &nodes[2]]], true, payment_preimage_3);
 	fail_payment_along_route(&nodes[0], &[&nodes[1], &nodes[2]], true, payment_hash_5);
 
-	reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (1, 0), (1, 0), (false, false));
+	reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (1, 0), (1, 0), (false, false));
 	{
 		let events = nodes[0].node.get_and_clear_pending_events();
 		assert_eq!(events.len(), 2);
@@ -3679,19 +3679,19 @@ fn do_test_drop_messages_peer_disconnect(messages_delivered: u8, simulate_broken
 		}
 		// Even if the funding_locked messages get exchanged, as long as nothing further was
 		// received on either side, both sides will need to resend them.
-		reconnect_nodes(&nodes[0], &nodes[1], (true, true), (0, 1), (0, 0), (0, 0), (0, 0), (false, false));
+		reconnect_nodes(&nodes[0], &nodes[1], (true, true), (0, 1), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
 	} else if messages_delivered == 3 {
 		// nodes[0] still wants its RAA + commitment_signed
-		reconnect_nodes(&nodes[0], &nodes[1], (false, false), (-1, 0), (0, 0), (0, 0), (0, 0), (true, false));
+		reconnect_nodes(&nodes[0], &nodes[1], (false, false), (-1, 0), (0, 0), (0, 0), (0, 0), (0, 0), (true, false));
 	} else if messages_delivered == 4 {
 		// nodes[0] still wants its commitment_signed
-		reconnect_nodes(&nodes[0], &nodes[1], (false, false), (-1, 0), (0, 0), (0, 0), (0, 0), (false, false));
+		reconnect_nodes(&nodes[0], &nodes[1], (false, false), (-1, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
 	} else if messages_delivered == 5 {
 		// nodes[1] still wants its final RAA
-		reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (false, true));
+		reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, true));
 	} else if messages_delivered == 6 {
 		// Everything was delivered...
-		reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+		reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
 	}
 
 	let events_1 = nodes[1].node.get_and_clear_pending_events();
@@ -3703,7 +3703,7 @@ fn do_test_drop_messages_peer_disconnect(messages_delivered: u8, simulate_broken
 
 	nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false);
 	nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id(), false);
-	reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+	reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
 
 	nodes[1].node.process_pending_htlc_forwards();
 
@@ -3778,7 +3778,7 @@ fn do_test_drop_messages_peer_disconnect(messages_delivered: u8, simulate_broken
 	nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false);
 	nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id(), false);
 	if messages_delivered < 2 {
-		reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (1, 0), (0, 0), (0, 0), (false, false));
+		reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (1, 0), (0, 0), (0, 0), (0, 0), (false, false));
 		if messages_delivered < 1 {
 			let events_4 = nodes[0].node.get_and_clear_pending_events();
 			assert_eq!(events_4.len(), 1);
@@ -3793,21 +3793,21 @@ fn do_test_drop_messages_peer_disconnect(messages_delivered: u8, simulate_broken
 		}
 	} else if messages_delivered == 2 {
 		// nodes[0] still wants its RAA + commitment_signed
-		reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, -1), (0, 0), (0, 0), (0, 0), (false, true));
+		reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, -1), (0, 0), (0, 0), (0, 0), (0, 0), (false, true));
 	} else if messages_delivered == 3 {
 		// nodes[0] still wants its commitment_signed
-		reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, -1), (0, 0), (0, 0), (0, 0), (false, false));
+		reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, -1), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
 	} else if messages_delivered == 4 {
 		// nodes[1] still wants its final RAA
-		reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (true, false));
+		reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (true, false));
 	} else if messages_delivered == 5 {
 		// Everything was delivered...
-		reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+		reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
 	}
 
 	nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false);
 	nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id(), false);
-	reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+	reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
 
 	// Channel should still work fine...
 	let net_graph_msg_handler = &nodes[0].net_graph_msg_handler;
@@ -3859,7 +3859,7 @@ fn test_funding_peer_disconnect() {
 		_ => panic!("Unexpected event"),
 	}
 
-	reconnect_nodes(&nodes[0], &nodes[1], (false, true), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+	reconnect_nodes(&nodes[0], &nodes[1], (false, true), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
 
 	nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false);
 	nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id(), false);
@@ -3882,7 +3882,7 @@ fn test_funding_peer_disconnect() {
 		_ => panic!("Unexpected event"),
 	};
 
-	reconnect_nodes(&nodes[0], &nodes[1], (true, true), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+	reconnect_nodes(&nodes[0], &nodes[1], (true, true), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
 
 	nodes[0].node.handle_funding_locked(&nodes[1].node.get_our_node_id(), &funding_locked);
 	nodes[0].node.handle_announcement_signatures(&nodes[1].node.get_our_node_id(), &bs_announcement_sigs);
@@ -3960,7 +3960,7 @@ fn test_funding_peer_disconnect() {
 	nodes[0].node = &nodes_0_deserialized;
 	check_added_monitors!(nodes[0], 1);
 
-	reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+	reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
 
 	// as_announcement should be re-generated exactly by broadcast_node_announcement.
 	nodes[0].node.broadcast_node_announcement([0, 0, 0], [0; 32], Vec::new());
@@ -4696,7 +4696,7 @@ fn test_simple_manager_serialize_deserialize() {
 	nodes[0].node = &nodes_0_deserialized;
 	check_added_monitors!(nodes[0], 1);
 
-	reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+	reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
 
 	fail_payment(&nodes[0], &[&nodes[1]], our_payment_hash);
 	claim_payment(&nodes[0], &[&nodes[1]], our_payment_preimage);
@@ -4810,8 +4810,8 @@ fn test_manager_serialize_deserialize_inconsistent_monitor() {
 	nodes[0].node = &nodes_0_deserialized;
 
 	// nodes[1] and nodes[2] have no lost state with nodes[0]...
-	reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
-	reconnect_nodes(&nodes[0], &nodes[2], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+	reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+	reconnect_nodes(&nodes[0], &nodes[2], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
 	//... and we can even still claim the payment!
 	claim_payment(&nodes[2], &[&nodes[0], &nodes[1]], our_payment_preimage);
 
diff --git a/lightning/src/ln/onion_route_tests.rs b/lightning/src/ln/onion_route_tests.rs
index 3fc01f1bb8b..39a3735353e 100644
--- a/lightning/src/ln/onion_route_tests.rs
+++ b/lightning/src/ln/onion_route_tests.rs
@@ -482,7 +482,7 @@ fn test_onion_failure() {
 		nodes[1].node.peer_disconnected(&nodes[2].node.get_our_node_id(), false);
 		nodes[2].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false);
 	}, true, Some(UPDATE|20), Some(msgs::HTLCFailChannelUpdate::ChannelUpdateMessage{msg: ChannelUpdate::dummy()}));
-	reconnect_nodes(&nodes[1], &nodes[2], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+	reconnect_nodes(&nodes[1], &nodes[2], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
 
 	run_onion_failure_test("expiry_too_far", 0, &nodes, &route, &payment_hash, &payment_secret, |msg| {
 		let session_priv = SecretKey::from_slice(&[3; 32]).unwrap();

From 7e78fa660cec8a73286c94c1073ee588140e7a01 Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Tue, 29 Jun 2021 21:05:45 +0000
Subject: [PATCH 2/4] Handle double-HTLC-claims without failing the backwards
 channel

When receiving an update_fulfill_htlc message, we immediately
forward the claim backwards along the payment path before waiting
for a full commitment_signed dance. This is great, but can cause
duplicative claims if a node sends an update_fulfill_htlc message,
disconnects, reconnects, and then has to re-send its
update_fulfill_htlc message again.

While there was code to handle this, it treated it as a channel
error on the inbound channel, which is incorrect - this is an
expected, albeit incredibly rare, condition. Instead, we handle
these double-claims correctly, simply ignoring them.

With debug_assertions enabled, we also check that the previous
close of the same HTLC was a fulfill, and that we are not moving
from a HTLC failure to an HTLC claim after its too late.

A test is also added, which hits all three failure cases in
`Channel::get_update_fulfill_htlc`.

Found by the chanmon_consistency fuzzer.
---
 lightning/src/ln/chanmon_update_fail_tests.rs | 116 ++++++++++++++++++
 lightning/src/ln/channel.rs                   |  77 ++++++++++--
 2 files changed, 184 insertions(+), 9 deletions(-)

diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs
index 2c62abc55f8..2918b4cef13 100644
--- a/lightning/src/ln/chanmon_update_fail_tests.rs
+++ b/lightning/src/ln/chanmon_update_fail_tests.rs
@@ -2215,3 +2215,119 @@ fn channel_holding_cell_serialize() {
 	do_channel_holding_cell_serialize(true, false);
 	do_channel_holding_cell_serialize(false, true); // last arg doesn't matter
 }
+
+#[derive(PartialEq)]
+enum HTLCStatusAtDupClaim {
+	Received,
+	HoldingCell,
+	Cleared,
+}
+fn do_test_reconnect_dup_htlc_claims(htlc_status: HTLCStatusAtDupClaim, second_fails: bool) {
+	// When receiving an update_fulfill_htlc message, we immediately forward the claim backwards
+	// along the payment path before waiting for a full commitment_signed dance. This is great, but
+	// can cause duplicative claims if a node sends an update_fulfill_htlc message, disconnects,
+	// reconnects, and then has to re-send its update_fulfill_htlc message again.
+	// In previous code, we didn't handle the double-claim correctly, spuriously closing the
+	// channel on which the inbound HTLC was received.
+	let chanmon_cfgs = create_chanmon_cfgs(3);
+	let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
+	let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]);
+	let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs);
+
+	create_announced_chan_between_nodes(&nodes, 0, 1, InitFeatures::known(), InitFeatures::known());
+	let chan_2 = create_announced_chan_between_nodes(&nodes, 1, 2, InitFeatures::known(), InitFeatures::known()).2;
+
+	let (payment_preimage, payment_hash, _) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 100_000);
+
+	let mut as_raa = None;
+	if htlc_status == HTLCStatusAtDupClaim::HoldingCell {
+		// In order to get the HTLC claim into the holding cell at nodes[1], we need nodes[1] to be
+		// awaiting a remote revoke_and_ack from nodes[0].
+		let (_, second_payment_hash, second_payment_secret) = get_payment_preimage_hash!(nodes[1]);
+		let route = get_route(&nodes[0].node.get_our_node_id(), &nodes[0].net_graph_msg_handler.network_graph.read().unwrap(),
+			&nodes[1].node.get_our_node_id(), Some(InvoiceFeatures::known()), None, &Vec::new(), 100_000, TEST_FINAL_CLTV, nodes[1].logger).unwrap();
+		nodes[0].node.send_payment(&route, second_payment_hash, &Some(second_payment_secret)).unwrap();
+		check_added_monitors!(nodes[0], 1);
+
+		let send_event = SendEvent::from_event(nodes[0].node.get_and_clear_pending_msg_events().remove(0));
+		nodes[1].node.handle_update_add_htlc(&nodes[0].node.get_our_node_id(), &send_event.msgs[0]);
+		nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &send_event.commitment_msg);
+		check_added_monitors!(nodes[1], 1);
+
+		let (bs_raa, bs_cs) = get_revoke_commit_msgs!(nodes[1], nodes[0].node.get_our_node_id());
+		nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_raa);
+		check_added_monitors!(nodes[0], 1);
+		nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &bs_cs);
+		check_added_monitors!(nodes[0], 1);
+
+		as_raa = Some(get_event_msg!(nodes[0], MessageSendEvent::SendRevokeAndACK, nodes[1].node.get_our_node_id()));
+	}
+
+	let fulfill_msg = msgs::UpdateFulfillHTLC {
+		channel_id: chan_2,
+		htlc_id: 0,
+		payment_preimage,
+	};
+	if second_fails {
+		assert!(nodes[2].node.fail_htlc_backwards(&payment_hash));
+		expect_pending_htlcs_forwardable!(nodes[2]);
+		check_added_monitors!(nodes[2], 1);
+		get_htlc_update_msgs!(nodes[2], nodes[1].node.get_our_node_id());
+	} else {
+		assert!(nodes[2].node.claim_funds(payment_preimage));
+		check_added_monitors!(nodes[2], 1);
+		let cs_updates = get_htlc_update_msgs!(nodes[2], nodes[1].node.get_our_node_id());
+		assert_eq!(cs_updates.update_fulfill_htlcs.len(), 1);
+		// Check that the message we're about to deliver matches the one generated:
+		assert_eq!(fulfill_msg, cs_updates.update_fulfill_htlcs[0]);
+	}
+	nodes[1].node.handle_update_fulfill_htlc(&nodes[2].node.get_our_node_id(), &fulfill_msg);
+	check_added_monitors!(nodes[1], 1);
+
+	let mut bs_updates = None;
+	if htlc_status != HTLCStatusAtDupClaim::HoldingCell {
+		bs_updates = Some(get_htlc_update_msgs!(nodes[1], nodes[0].node.get_our_node_id()));
+		assert_eq!(bs_updates.as_ref().unwrap().update_fulfill_htlcs.len(), 1);
+		nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &bs_updates.as_ref().unwrap().update_fulfill_htlcs[0]);
+		expect_payment_sent!(nodes[0], payment_preimage);
+		if htlc_status == HTLCStatusAtDupClaim::Cleared {
+			commitment_signed_dance!(nodes[0], nodes[1], &bs_updates.as_ref().unwrap().commitment_signed, false);
+		}
+	} else {
+		assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
+	}
+
+	nodes[1].node.peer_disconnected(&nodes[2].node.get_our_node_id(), false);
+	nodes[2].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false);
+
+	if second_fails {
+		reconnect_nodes(&nodes[1], &nodes[2], (false, false), (0, 0), (0, 0), (1, 0), (0, 0), (0, 0), (false, false));
+		expect_pending_htlcs_forwardable!(nodes[1]);
+	} else {
+		reconnect_nodes(&nodes[1], &nodes[2], (false, false), (0, 0), (1, 0), (0, 0), (0, 0), (0, 0), (false, false));
+	}
+
+	if htlc_status == HTLCStatusAtDupClaim::HoldingCell {
+		nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &as_raa.unwrap());
+		check_added_monitors!(nodes[1], 1);
+		expect_pending_htlcs_forwardable_ignore!(nodes[1]); // We finally receive the second payment, but don't claim it
+
+		bs_updates = Some(get_htlc_update_msgs!(nodes[1], nodes[0].node.get_our_node_id()));
+		assert_eq!(bs_updates.as_ref().unwrap().update_fulfill_htlcs.len(), 1);
+		nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &bs_updates.as_ref().unwrap().update_fulfill_htlcs[0]);
+		expect_payment_sent!(nodes[0], payment_preimage);
+	}
+	if htlc_status != HTLCStatusAtDupClaim::Cleared {
+		commitment_signed_dance!(nodes[0], nodes[1], &bs_updates.as_ref().unwrap().commitment_signed, false);
+	}
+}
+
+#[test]
+fn test_reconnect_dup_htlc_claims() {
+	do_test_reconnect_dup_htlc_claims(HTLCStatusAtDupClaim::Received, false);
+	do_test_reconnect_dup_htlc_claims(HTLCStatusAtDupClaim::HoldingCell, false);
+	do_test_reconnect_dup_htlc_claims(HTLCStatusAtDupClaim::Cleared, false);
+	do_test_reconnect_dup_htlc_claims(HTLCStatusAtDupClaim::Received, true);
+	do_test_reconnect_dup_htlc_claims(HTLCStatusAtDupClaim::HoldingCell, true);
+	do_test_reconnect_dup_htlc_claims(HTLCStatusAtDupClaim::Cleared, true);
+}
diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs
index d8a284fa818..6d955257734 100644
--- a/lightning/src/ln/channel.rs
+++ b/lightning/src/ln/channel.rs
@@ -444,6 +444,15 @@ pub(super) struct Channel<Signer: Sign> {
 	///
 	/// See-also <https://github.com/lightningnetwork/lnd/issues/4006>
 	pub workaround_lnd_bug_4006: Option<msgs::FundingLocked>,
+
+	#[cfg(any(test, feature = "fuzztarget"))]
+	// When we receive an HTLC fulfill on an outbound path, we may immediately fulfill the
+	// corresponding HTLC on the inbound path. If, then, the outbound path channel is
+	// disconnected and reconnected (before we've exchange commitment_signed and revoke_and_ack
+	// messages), they may re-broadcast their update_fulfill_htlc, causing a duplicate claim. This
+	// is fine, but as a sanity check in our failure to generate the second claim, we check here
+	// that the original was a claim, and that we aren't now trying to fulfill a failed HTLC.
+	historical_inbound_htlc_fulfills: HashSet<u64>,
 }
 
 #[cfg(any(test, feature = "fuzztarget"))]
@@ -645,6 +654,9 @@ impl<Signer: Sign> Channel<Signer> {
 			next_remote_commitment_tx_fee_info_cached: Mutex::new(None),
 
 			workaround_lnd_bug_4006: None,
+
+			#[cfg(any(test, feature = "fuzztarget"))]
+			historical_inbound_htlc_fulfills: HashSet::new(),
 		})
 	}
 
@@ -890,6 +902,9 @@ impl<Signer: Sign> Channel<Signer> {
 			next_remote_commitment_tx_fee_info_cached: Mutex::new(None),
 
 			workaround_lnd_bug_4006: None,
+
+			#[cfg(any(test, feature = "fuzztarget"))]
+			historical_inbound_htlc_fulfills: HashSet::new(),
 		};
 
 		Ok(chan)
@@ -1249,8 +1264,8 @@ impl<Signer: Sign> Channel<Signer> {
 						if let &InboundHTLCRemovalReason::Fulfill(_) = reason {
 						} else {
 							log_warn!(logger, "Have preimage and want to fulfill HTLC with payment hash {} we already failed against channel {}", log_bytes!(htlc.payment_hash.0), log_bytes!(self.channel_id()));
+							debug_assert!(false, "Tried to fulfill an HTLC that was already failed");
 						}
-						debug_assert!(false, "Tried to fulfill an HTLC that was already fail/fulfilled");
 						return Ok((None, None));
 					},
 					_ => {
@@ -1263,7 +1278,11 @@ impl<Signer: Sign> Channel<Signer> {
 			}
 		}
 		if pending_idx == core::usize::MAX {
-			return Err(ChannelError::Ignore("Unable to find a pending HTLC which matched the given HTLC ID".to_owned()));
+			#[cfg(any(test, feature = "fuzztarget"))]
+			// If we failed to find an HTLC to fulfill, make sure it was previously fulfilled and
+			// this is simply a duplicate claim, not previously failed and we lost funds.
+			debug_assert!(self.historical_inbound_htlc_fulfills.contains(&htlc_id_arg));
+			return Ok((None, None));
 		}
 
 		// Now update local state:
@@ -1285,7 +1304,8 @@ impl<Signer: Sign> Channel<Signer> {
 						if htlc_id_arg == htlc_id {
 							// Make sure we don't leave latest_monitor_update_id incremented here:
 							self.latest_monitor_update_id -= 1;
-							debug_assert!(false, "Tried to fulfill an HTLC that was already fulfilled");
+							#[cfg(any(test, feature = "fuzztarget"))]
+							debug_assert!(self.historical_inbound_htlc_fulfills.contains(&htlc_id_arg));
 							return Ok((None, None));
 						}
 					},
@@ -1305,8 +1325,12 @@ impl<Signer: Sign> Channel<Signer> {
 			self.holding_cell_htlc_updates.push(HTLCUpdateAwaitingACK::ClaimHTLC {
 				payment_preimage: payment_preimage_arg, htlc_id: htlc_id_arg,
 			});
+			#[cfg(any(test, feature = "fuzztarget"))]
+			self.historical_inbound_htlc_fulfills.insert(htlc_id_arg);
 			return Ok((None, Some(monitor_update)));
 		}
+		#[cfg(any(test, feature = "fuzztarget"))]
+		self.historical_inbound_htlc_fulfills.insert(htlc_id_arg);
 
 		{
 			let htlc = &mut self.pending_inbound_htlcs[pending_idx];
@@ -1366,8 +1390,11 @@ impl<Signer: Sign> Channel<Signer> {
 			if htlc.htlc_id == htlc_id_arg {
 				match htlc.state {
 					InboundHTLCState::Committed => {},
-					InboundHTLCState::LocalRemoved(_) => {
-						debug_assert!(false, "Tried to fail an HTLC that was already fail/fulfilled");
+					InboundHTLCState::LocalRemoved(ref reason) => {
+						if let &InboundHTLCRemovalReason::Fulfill(_) = reason {
+						} else {
+							debug_assert!(false, "Tried to fail an HTLC that was already failed");
+						}
 						return Ok(None);
 					},
 					_ => {
@@ -1379,7 +1406,11 @@ impl<Signer: Sign> Channel<Signer> {
 			}
 		}
 		if pending_idx == core::usize::MAX {
-			return Err(ChannelError::Ignore("Unable to find a pending HTLC which matched the given HTLC ID".to_owned()));
+			#[cfg(any(test, feature = "fuzztarget"))]
+			// If we failed to find an HTLC to fail, make sure it was previously fulfilled and this
+			// is simply a duplicate fail, not previously failed and we failed-back too early.
+			debug_assert!(self.historical_inbound_htlc_fulfills.contains(&htlc_id_arg));
+			return Ok(None);
 		}
 
 		// Now update local state:
@@ -1388,8 +1419,9 @@ impl<Signer: Sign> Channel<Signer> {
 				match pending_update {
 					&HTLCUpdateAwaitingACK::ClaimHTLC { htlc_id, .. } => {
 						if htlc_id_arg == htlc_id {
-							debug_assert!(false, "Tried to fail an HTLC that was already fulfilled");
-							return Err(ChannelError::Ignore("Unable to find a pending HTLC which matched the given HTLC ID".to_owned()));
+							#[cfg(any(test, feature = "fuzztarget"))]
+							debug_assert!(self.historical_inbound_htlc_fulfills.contains(&htlc_id_arg));
+							return Ok(None);
 						}
 					},
 					&HTLCUpdateAwaitingACK::FailHTLC { htlc_id, .. } => {
@@ -2453,7 +2485,14 @@ impl<Signer: Sign> Channel<Signer> {
 					},
 					&HTLCUpdateAwaitingACK::FailHTLC { htlc_id, ref err_packet } => {
 						match self.get_update_fail_htlc(htlc_id, err_packet.clone(), logger) {
-							Ok(update_fail_msg_option) => update_fail_htlcs.push(update_fail_msg_option.unwrap()),
+							Ok(update_fail_msg_option) => {
+								// If an HTLC failure was previously added to the holding cell (via
+								// `get_update_fail_htlc`) then generating the fail message itself
+								// must not fail - we should never end up in a state where we
+								// double-fail an HTLC or fail-then-claim an HTLC as it indicates
+								// we didn't wait for a full revocation before failing.
+								update_fail_htlcs.push(update_fail_msg_option.unwrap())
+							},
 							Err(e) => {
 								if let ChannelError::Ignore(_) = e {}
 								else {
@@ -4690,6 +4729,13 @@ impl<Signer: Sign> Writeable for Channel<Signer> {
 
 		self.channel_update_status.write(writer)?;
 
+		#[cfg(any(test, feature = "fuzztarget"))]
+		(self.historical_inbound_htlc_fulfills.len() as u64).write(writer)?;
+		#[cfg(any(test, feature = "fuzztarget"))]
+		for htlc in self.historical_inbound_htlc_fulfills.iter() {
+			htlc.write(writer)?;
+		}
+
 		write_tlv_fields!(writer, {
 			(0, self.announcement_sigs, option),
 			// minimum_depth and counterparty_selected_channel_reserve_satoshis used to have a
@@ -4882,6 +4928,16 @@ impl<'a, Signer: Sign, K: Deref> ReadableArgs<&'a K> for Channel<Signer>
 
 		let channel_update_status = Readable::read(reader)?;
 
+		#[cfg(any(test, feature = "fuzztarget"))]
+		let mut historical_inbound_htlc_fulfills = HashSet::new();
+		#[cfg(any(test, feature = "fuzztarget"))]
+		{
+			let htlc_fulfills_len: u64 = Readable::read(reader)?;
+			for _ in 0..htlc_fulfills_len {
+				assert!(historical_inbound_htlc_fulfills.insert(Readable::read(reader)?));
+			}
+		}
+
 		let mut announcement_sigs = None;
 		read_tlv_fields!(reader, {
 			(0, announcement_sigs, option),
@@ -4973,6 +5029,9 @@ impl<'a, Signer: Sign, K: Deref> ReadableArgs<&'a K> for Channel<Signer>
 			next_remote_commitment_tx_fee_info_cached: Mutex::new(None),
 
 			workaround_lnd_bug_4006: None,
+
+			#[cfg(any(test, feature = "fuzztarget"))]
+			historical_inbound_htlc_fulfills,
 		})
 	}
 }

From c09104f46ef5b44a1efb38669b95d21fa77d37ec Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Thu, 15 Jul 2021 21:56:42 +0000
Subject: [PATCH 3/4] Simplify call graph of get_update_fulfill_htlc since it
 can't Err.

---
 lightning/src/ln/channel.rs        | 113 +++++++++++++++++------------
 lightning/src/ln/channelmanager.rs |  36 ++++-----
 2 files changed, 83 insertions(+), 66 deletions(-)

diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs
index 6d955257734..f46d76042df 100644
--- a/lightning/src/ln/channel.rs
+++ b/lightning/src/ln/channel.rs
@@ -301,6 +301,33 @@ pub struct CounterpartyForwardingInfo {
 	pub cltv_expiry_delta: u16,
 }
 
+/// A return value enum for get_update_fulfill_htlc. See UpdateFulfillCommitFetch variants for
+/// description
+enum UpdateFulfillFetch {
+	NewClaim {
+		monitor_update: ChannelMonitorUpdate,
+		msg: Option<msgs::UpdateFulfillHTLC>,
+	},
+	DuplicateClaim {},
+}
+
+/// The return type of get_update_fulfill_htlc_and_commit.
+pub enum UpdateFulfillCommitFetch {
+	/// Indicates the HTLC fulfill is new, and either generated an update_fulfill message, placed
+	/// it in the holding cell, or re-generated the update_fulfill message after the same claim was
+	/// previously placed in the holding cell (and has since been removed).
+	NewClaim {
+		/// The ChannelMonitorUpdate which places the new payment preimage in the channel monitor
+		monitor_update: ChannelMonitorUpdate,
+		/// The update_fulfill message and commitment_signed message (if the claim was not placed
+		/// in the holding cell).
+		msgs: Option<(msgs::UpdateFulfillHTLC, msgs::CommitmentSigned)>,
+	},
+	/// Indicates the HTLC fulfill is duplicative and already existed either in the holding cell
+	/// or has been forgotten (presumably previously claimed).
+	DuplicateClaim {},
+}
+
 // TODO: We should refactor this to be an Inbound/OutboundChannel until initial setup handshaking
 // has been completed, and then turn into a Channel to get compiler-time enforcement of things like
 // calling channel_id() before we're set up or things like get_outbound_funding_signed on an
@@ -1232,13 +1259,7 @@ impl<Signer: Sign> Channel<Signer> {
 		make_funding_redeemscript(&self.get_holder_pubkeys().funding_pubkey, self.counterparty_funding_pubkey())
 	}
 
-	/// Per HTLC, only one get_update_fail_htlc or get_update_fulfill_htlc call may be made.
-	/// In such cases we debug_assert!(false) and return a ChannelError::Ignore. Thus, will always
-	/// return Ok(_) if debug assertions are turned on or preconditions are met.
-	///
-	/// Note that it is still possible to hit these assertions in case we find a preimage on-chain
-	/// but then have a reorg which settles on an HTLC-failure on chain.
-	fn get_update_fulfill_htlc<L: Deref>(&mut self, htlc_id_arg: u64, payment_preimage_arg: PaymentPreimage, logger: &L) -> Result<(Option<msgs::UpdateFulfillHTLC>, Option<ChannelMonitorUpdate>), ChannelError> where L::Target: Logger {
+	fn get_update_fulfill_htlc<L: Deref>(&mut self, htlc_id_arg: u64, payment_preimage_arg: PaymentPreimage, logger: &L) -> UpdateFulfillFetch where L::Target: Logger {
 		// Either ChannelFunded got set (which means it won't be unset) or there is no way any
 		// caller thought we could have something claimed (cause we wouldn't have accepted in an
 		// incoming HTLC anyway). If we got to ShutdownComplete, callers aren't allowed to call us,
@@ -1266,7 +1287,7 @@ impl<Signer: Sign> Channel<Signer> {
 							log_warn!(logger, "Have preimage and want to fulfill HTLC with payment hash {} we already failed against channel {}", log_bytes!(htlc.payment_hash.0), log_bytes!(self.channel_id()));
 							debug_assert!(false, "Tried to fulfill an HTLC that was already failed");
 						}
-						return Ok((None, None));
+						return UpdateFulfillFetch::DuplicateClaim {};
 					},
 					_ => {
 						debug_assert!(false, "Have an inbound HTLC we tried to claim before it was fully committed to");
@@ -1282,7 +1303,7 @@ impl<Signer: Sign> Channel<Signer> {
 			// If we failed to find an HTLC to fulfill, make sure it was previously fulfilled and
 			// this is simply a duplicate claim, not previously failed and we lost funds.
 			debug_assert!(self.historical_inbound_htlc_fulfills.contains(&htlc_id_arg));
-			return Ok((None, None));
+			return UpdateFulfillFetch::DuplicateClaim {};
 		}
 
 		// Now update local state:
@@ -1306,7 +1327,7 @@ impl<Signer: Sign> Channel<Signer> {
 							self.latest_monitor_update_id -= 1;
 							#[cfg(any(test, feature = "fuzztarget"))]
 							debug_assert!(self.historical_inbound_htlc_fulfills.contains(&htlc_id_arg));
-							return Ok((None, None));
+							return UpdateFulfillFetch::DuplicateClaim {};
 						}
 					},
 					&HTLCUpdateAwaitingACK::FailHTLC { htlc_id, .. } => {
@@ -1315,7 +1336,7 @@ impl<Signer: Sign> Channel<Signer> {
 							// TODO: We may actually be able to switch to a fulfill here, though its
 							// rare enough it may not be worth the complexity burden.
 							debug_assert!(false, "Tried to fulfill an HTLC that was already failed");
-							return Ok((None, Some(monitor_update)));
+							return UpdateFulfillFetch::NewClaim { monitor_update, msg: None };
 						}
 					},
 					_ => {}
@@ -1327,7 +1348,7 @@ impl<Signer: Sign> Channel<Signer> {
 			});
 			#[cfg(any(test, feature = "fuzztarget"))]
 			self.historical_inbound_htlc_fulfills.insert(htlc_id_arg);
-			return Ok((None, Some(monitor_update)));
+			return UpdateFulfillFetch::NewClaim { monitor_update, msg: None };
 		}
 		#[cfg(any(test, feature = "fuzztarget"))]
 		self.historical_inbound_htlc_fulfills.insert(htlc_id_arg);
@@ -1337,44 +1358,43 @@ impl<Signer: Sign> Channel<Signer> {
 			if let InboundHTLCState::Committed = htlc.state {
 			} else {
 				debug_assert!(false, "Have an inbound HTLC we tried to claim before it was fully committed to");
-				return Ok((None, Some(monitor_update)));
+				return UpdateFulfillFetch::NewClaim { monitor_update, msg: None };
 			}
 			log_trace!(logger, "Upgrading HTLC {} to LocalRemoved with a Fulfill in channel {}!", log_bytes!(htlc.payment_hash.0), log_bytes!(self.channel_id));
 			htlc.state = InboundHTLCState::LocalRemoved(InboundHTLCRemovalReason::Fulfill(payment_preimage_arg.clone()));
 		}
 
-		Ok((Some(msgs::UpdateFulfillHTLC {
-			channel_id: self.channel_id(),
-			htlc_id: htlc_id_arg,
-			payment_preimage: payment_preimage_arg,
-		}), Some(monitor_update)))
+		UpdateFulfillFetch::NewClaim {
+			monitor_update,
+			msg: Some(msgs::UpdateFulfillHTLC {
+				channel_id: self.channel_id(),
+				htlc_id: htlc_id_arg,
+				payment_preimage: payment_preimage_arg,
+			}),
+		}
 	}
 
-	pub fn get_update_fulfill_htlc_and_commit<L: Deref>(&mut self, htlc_id: u64, payment_preimage: PaymentPreimage, logger: &L) -> Result<(Option<(msgs::UpdateFulfillHTLC, msgs::CommitmentSigned)>, Option<ChannelMonitorUpdate>), ChannelError> where L::Target: Logger {
-		match self.get_update_fulfill_htlc(htlc_id, payment_preimage, logger)? {
-			(Some(update_fulfill_htlc), Some(mut monitor_update)) => {
+	pub fn get_update_fulfill_htlc_and_commit<L: Deref>(&mut self, htlc_id: u64, payment_preimage: PaymentPreimage, logger: &L) -> Result<UpdateFulfillCommitFetch, ChannelError> where L::Target: Logger {
+		match self.get_update_fulfill_htlc(htlc_id, payment_preimage, logger) {
+			UpdateFulfillFetch::NewClaim { mut monitor_update, msg: Some(update_fulfill_htlc) } => {
 				let (commitment, mut additional_update) = self.send_commitment_no_status_check(logger)?;
 				// send_commitment_no_status_check may bump latest_monitor_id but we want them to be
 				// strictly increasing by one, so decrement it here.
 				self.latest_monitor_update_id = monitor_update.update_id;
 				monitor_update.updates.append(&mut additional_update.updates);
-				Ok((Some((update_fulfill_htlc, commitment)), Some(monitor_update)))
-			},
-			(Some(update_fulfill_htlc), None) => {
-				let (commitment, monitor_update) = self.send_commitment_no_status_check(logger)?;
-				Ok((Some((update_fulfill_htlc, commitment)), Some(monitor_update)))
+				Ok(UpdateFulfillCommitFetch::NewClaim { monitor_update, msgs: Some((update_fulfill_htlc, commitment)) })
 			},
-			(None, Some(monitor_update)) => Ok((None, Some(monitor_update))),
-			(None, None) => Ok((None, None))
+			UpdateFulfillFetch::NewClaim { monitor_update, msg: None } => Ok(UpdateFulfillCommitFetch::NewClaim { monitor_update, msgs: None }),
+			UpdateFulfillFetch::DuplicateClaim {} => Ok(UpdateFulfillCommitFetch::DuplicateClaim {}),
 		}
 	}
 
-	/// Per HTLC, only one get_update_fail_htlc or get_update_fulfill_htlc call may be made.
-	/// In such cases we debug_assert!(false) and return a ChannelError::Ignore. Thus, will always
-	/// return Ok(_) if debug assertions are turned on or preconditions are met.
-	///
-	/// Note that it is still possible to hit these assertions in case we find a preimage on-chain
-	/// but then have a reorg which settles on an HTLC-failure on chain.
+	/// We can only have one resolution per HTLC. In some cases around reconnect, we may fulfill
+	/// an HTLC more than once or fulfill once and then attempt to fail after reconnect. We cannot,
+	/// however, fail more than once as we wait for an upstream failure to be irrevocably committed
+	/// before we fail backwards.
+	/// If we do fail twice, we debug_assert!(false) and return Ok(None). Thus, will always return
+	/// Ok(_) if debug assertions are turned on or preconditions are met.
 	pub fn get_update_fail_htlc<L: Deref>(&mut self, htlc_id_arg: u64, err_packet: msgs::OnionErrorPacket, logger: &L) -> Result<Option<msgs::UpdateFailHTLC>, ChannelError> where L::Target: Logger {
 		if (self.channel_state & (ChannelState::ChannelFunded as u32)) != (ChannelState::ChannelFunded as u32) {
 			panic!("Was asked to fail an HTLC when channel was not in an operational state");
@@ -2468,20 +2488,17 @@ impl<Signer: Sign> Channel<Signer> {
 						}
 					},
 					&HTLCUpdateAwaitingACK::ClaimHTLC { ref payment_preimage, htlc_id, .. } => {
-						match self.get_update_fulfill_htlc(htlc_id, *payment_preimage, logger) {
-							Ok((update_fulfill_msg_option, additional_monitor_update_opt)) => {
-								update_fulfill_htlcs.push(update_fulfill_msg_option.unwrap());
-								if let Some(mut additional_monitor_update) = additional_monitor_update_opt {
-									monitor_update.updates.append(&mut additional_monitor_update.updates);
-								}
-							},
-							Err(e) => {
-								if let ChannelError::Ignore(_) = e {}
-								else {
-									panic!("Got a non-IgnoreError action trying to fulfill holding cell HTLC");
-								}
-							}
-						}
+						// If an HTLC claim was previously added to the holding cell (via
+						// `get_update_fulfill_htlc`, then generating the claim message itself must
+						// not fail - any in between attempts to claim the HTLC will have resulted
+						// in it hitting the holding cell again and we cannot change the state of a
+						// holding cell HTLC from fulfill to anything else.
+						let (update_fulfill_msg_option, mut additional_monitor_update) =
+							if let UpdateFulfillFetch::NewClaim { msg, monitor_update } = self.get_update_fulfill_htlc(htlc_id, *payment_preimage, logger) {
+								(msg, monitor_update)
+							} else { unreachable!() };
+						update_fulfill_htlcs.push(update_fulfill_msg_option.unwrap());
+						monitor_update.updates.append(&mut additional_monitor_update.updates);
 					},
 					&HTLCUpdateAwaitingACK::FailHTLC { htlc_id, ref err_packet } => {
 						match self.get_update_fail_htlc(htlc_id, err_packet.clone(), logger) {
diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs
index 21d13eaefc2..67975238050 100644
--- a/lightning/src/ln/channelmanager.rs
+++ b/lightning/src/ln/channelmanager.rs
@@ -44,7 +44,7 @@ use chain::transaction::{OutPoint, TransactionData};
 // construct one themselves.
 use ln::{PaymentHash, PaymentPreimage, PaymentSecret};
 pub use ln::channel::CounterpartyForwardingInfo;
-use ln::channel::{Channel, ChannelError, ChannelUpdateStatus};
+use ln::channel::{Channel, ChannelError, ChannelUpdateStatus, UpdateFulfillCommitFetch};
 use ln::features::{InitFeatures, NodeFeatures};
 use routing::router::{Route, RouteHop};
 use ln::msgs;
@@ -2681,8 +2681,8 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
 		if let hash_map::Entry::Occupied(mut chan) = channel_state.by_id.entry(chan_id) {
 			let was_frozen_for_monitor = chan.get().is_awaiting_monitor_update();
 			match chan.get_mut().get_update_fulfill_htlc_and_commit(prev_hop.htlc_id, payment_preimage, &self.logger) {
-				Ok((msgs, monitor_option)) => {
-					if let Some(monitor_update) = monitor_option {
+				Ok(msgs_monitor_option) => {
+					if let UpdateFulfillCommitFetch::NewClaim { msgs, monitor_update } = msgs_monitor_option {
 						if let Err(e) = self.chain_monitor.update_channel(chan.get().get_funding_txo().unwrap(), monitor_update) {
 							if was_frozen_for_monitor {
 								assert!(msgs.is_none());
@@ -2690,21 +2690,21 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
 								return Err(Some((chan.get().get_counterparty_node_id(), handle_monitor_err!(self, e, channel_state, chan, RAACommitmentOrder::CommitmentFirst, false, msgs.is_some()).unwrap_err())));
 							}
 						}
-					}
-					if let Some((msg, commitment_signed)) = msgs {
-						log_debug!(self.logger, "Claiming funds for HTLC with preimage {} resulted in a commitment_signed for channel {}",
-							log_bytes!(payment_preimage.0), log_bytes!(chan.get().channel_id()));
-						channel_state.pending_msg_events.push(events::MessageSendEvent::UpdateHTLCs {
-							node_id: chan.get().get_counterparty_node_id(),
-							updates: msgs::CommitmentUpdate {
-								update_add_htlcs: Vec::new(),
-								update_fulfill_htlcs: vec![msg],
-								update_fail_htlcs: Vec::new(),
-								update_fail_malformed_htlcs: Vec::new(),
-								update_fee: None,
-								commitment_signed,
-							}
-						});
+						if let Some((msg, commitment_signed)) = msgs {
+							log_debug!(self.logger, "Claiming funds for HTLC with preimage {} resulted in a commitment_signed for channel {}",
+								log_bytes!(payment_preimage.0), log_bytes!(chan.get().channel_id()));
+							channel_state.pending_msg_events.push(events::MessageSendEvent::UpdateHTLCs {
+								node_id: chan.get().get_counterparty_node_id(),
+								updates: msgs::CommitmentUpdate {
+									update_add_htlcs: Vec::new(),
+									update_fulfill_htlcs: vec![msg],
+									update_fail_htlcs: Vec::new(),
+									update_fail_malformed_htlcs: Vec::new(),
+									update_fee: None,
+									commitment_signed,
+								}
+							});
+						}
 					}
 					return Ok(())
 				},

From f06f9d11365360dc2add96acd7916673ea9ce383 Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Thu, 15 Jul 2021 22:26:51 +0000
Subject: [PATCH 4/4] Fail channel if we can't sign a new commitment tx during
 HTLC claim

Previously, we could fail to generate a new commitment transaction
but it simply indicated we had gone to doule-claim an HTLC. Now
that double-claims are returned instead as Ok(None), we should
handle the error case and fail the channel, as the only way to hit
the error case is if key derivation failed or the user refused to
sign the new commitment transaction.

This also resolves an issue where we wouldn't inform our
ChannelMonitor of the new payment preimage in case we failed to
fetch a signature for the new commitment transaction.
---
 lightning/src/chain/channelmonitor.rs |  2 +-
 lightning/src/ln/channel.rs           |  7 +++--
 lightning/src/ln/channelmanager.rs    | 37 +++++++++++++++------------
 3 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs
index b984e521c48..42c3e13a34f 100644
--- a/lightning/src/chain/channelmonitor.rs
+++ b/lightning/src/chain/channelmonitor.rs
@@ -114,7 +114,7 @@ impl Readable for ChannelMonitorUpdate {
 }
 
 /// An error enum representing a failure to persist a channel monitor update.
-#[derive(Clone, Debug)]
+#[derive(Clone, Copy, Debug, PartialEq)]
 pub enum ChannelMonitorUpdateErr {
 	/// Used to indicate a temporary failure (eg connection to a watchtower or remote backup of
 	/// our state failed, but is expected to succeed at some point in the future).
diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs
index f46d76042df..5a0ceabed53 100644
--- a/lightning/src/ln/channel.rs
+++ b/lightning/src/ln/channel.rs
@@ -1374,10 +1374,13 @@ impl<Signer: Sign> Channel<Signer> {
 		}
 	}
 
-	pub fn get_update_fulfill_htlc_and_commit<L: Deref>(&mut self, htlc_id: u64, payment_preimage: PaymentPreimage, logger: &L) -> Result<UpdateFulfillCommitFetch, ChannelError> where L::Target: Logger {
+	pub fn get_update_fulfill_htlc_and_commit<L: Deref>(&mut self, htlc_id: u64, payment_preimage: PaymentPreimage, logger: &L) -> Result<UpdateFulfillCommitFetch, (ChannelError, ChannelMonitorUpdate)> where L::Target: Logger {
 		match self.get_update_fulfill_htlc(htlc_id, payment_preimage, logger) {
 			UpdateFulfillFetch::NewClaim { mut monitor_update, msg: Some(update_fulfill_htlc) } => {
-				let (commitment, mut additional_update) = self.send_commitment_no_status_check(logger)?;
+				let (commitment, mut additional_update) = match self.send_commitment_no_status_check(logger) {
+					Err(e) => return Err((e, monitor_update)),
+					Ok(res) => res
+				};
 				// send_commitment_no_status_check may bump latest_monitor_id but we want them to be
 				// strictly increasing by one, so decrement it here.
 				self.latest_monitor_update_id = monitor_update.update_id;
diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs
index 67975238050..482c1635631 100644
--- a/lightning/src/ln/channelmanager.rs
+++ b/lightning/src/ln/channelmanager.rs
@@ -57,7 +57,7 @@ use util::events::{EventHandler, EventsProvider, MessageSendEvent, MessageSendEv
 use util::{byte_utils, events};
 use util::ser::{Readable, ReadableArgs, MaybeReadable, Writeable, Writer};
 use util::chacha20::{ChaCha20, ChaChaReader};
-use util::logger::Logger;
+use util::logger::{Logger, Level};
 use util::errors::APIError;
 
 use prelude::*;
@@ -2679,16 +2679,17 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
 		};
 
 		if let hash_map::Entry::Occupied(mut chan) = channel_state.by_id.entry(chan_id) {
-			let was_frozen_for_monitor = chan.get().is_awaiting_monitor_update();
 			match chan.get_mut().get_update_fulfill_htlc_and_commit(prev_hop.htlc_id, payment_preimage, &self.logger) {
 				Ok(msgs_monitor_option) => {
 					if let UpdateFulfillCommitFetch::NewClaim { msgs, monitor_update } = msgs_monitor_option {
 						if let Err(e) = self.chain_monitor.update_channel(chan.get().get_funding_txo().unwrap(), monitor_update) {
-							if was_frozen_for_monitor {
-								assert!(msgs.is_none());
-							} else {
-								return Err(Some((chan.get().get_counterparty_node_id(), handle_monitor_err!(self, e, channel_state, chan, RAACommitmentOrder::CommitmentFirst, false, msgs.is_some()).unwrap_err())));
-							}
+							log_given_level!(self.logger, if e == ChannelMonitorUpdateErr::PermanentFailure { Level::Error } else { Level::Debug },
+								"Failed to update channel monitor with preimage {:?}: {:?}",
+								payment_preimage, e);
+							return Err(Some((
+								chan.get().get_counterparty_node_id(),
+								handle_monitor_err!(self, e, channel_state, chan, RAACommitmentOrder::CommitmentFirst, false, msgs.is_some()).unwrap_err(),
+							)));
 						}
 						if let Some((msg, commitment_signed)) = msgs {
 							log_debug!(self.logger, "Claiming funds for HTLC with preimage {} resulted in a commitment_signed for channel {}",
@@ -2708,16 +2709,18 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
 					}
 					return Ok(())
 				},
-				Err(e) => {
-					// TODO: Do something with e?
-					// This should only occur if we are claiming an HTLC at the same time as the
-					// HTLC is being failed (eg because a block is being connected and this caused
-					// an HTLC to time out). This should, of course, only occur if the user is the
-					// one doing the claiming (as it being a part of a peer claim would imply we're
-					// about to lose funds) and only if the lock in claim_funds was dropped as a
-					// previous HTLC was failed (thus not for an MPP payment).
-					debug_assert!(false, "This shouldn't be reachable except in absurdly rare cases between monitor updates and HTLC timeouts: {:?}", e);
-					return Err(None)
+				Err((e, monitor_update)) => {
+					if let Err(e) = self.chain_monitor.update_channel(chan.get().get_funding_txo().unwrap(), monitor_update) {
+						log_given_level!(self.logger, if e == ChannelMonitorUpdateErr::PermanentFailure { Level::Error } else { Level::Info },
+							"Failed to update channel monitor with preimage {:?} immediately prior to force-close: {:?}",
+							payment_preimage, e);
+					}
+					let counterparty_node_id = chan.get().get_counterparty_node_id();
+					let (drop, res) = convert_chan_err!(self, e, channel_state.short_to_id, chan.get_mut(), &chan_id);
+					if drop {
+						chan.remove_entry();
+					}
+					return Err(Some((counterparty_node_id, res)));
 				},
 			}
 		} else { unreachable!(); }