From fca67bcaa4c7bf36da7cebf142b0baf33488909d Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Sun, 24 Apr 2022 20:57:53 +0000 Subject: [PATCH 1/2] [lightning-net-tokio] Fix race-y unwrap fetching peer socket address I recently saw the following panic on one of my test nodes: ``` thread 'tokio-runtime-worker' panicked at 'called `Result::unwrap()` on an `Err` value: Os { code: 107, kind: NotConnected, message: "Transport endpoint is not connected" }', rust-lightning/lightning-net-tokio/src/lib.rs:250:38 ``` Presumably what happened is somehow the connection was closed in between us accepting it and us going to start processing it. While this is a somewhat surprising race, its clearly reachable. The fix proposed here is quite trivial - simply don't `unwrap` trying to fetch our peer's socket address, instead treat the peer address as `None` and discover the disconnection later when we go to read. --- lightning-net-tokio/src/lib.rs | 41 +++++++++++++++------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/lightning-net-tokio/src/lib.rs b/lightning-net-tokio/src/lib.rs index a9fd861bc84..5e1aa40b339 100644 --- a/lightning-net-tokio/src/lib.rs +++ b/lightning-net-tokio/src/lib.rs @@ -85,7 +85,6 @@ use lightning::ln::msgs::{ChannelMessageHandler, RoutingMessageHandler, NetAddre use lightning::util::logger::Logger; use std::task; -use std::net::IpAddr; use std::net::SocketAddr; use std::net::TcpStream as StdTcpStream; use std::sync::{Arc, Mutex}; @@ -212,6 +211,20 @@ impl Connection { } } +fn get_addr_from_stream(stream: &StdTcpStream) -> Option { + match stream.peer_addr() { + Ok(SocketAddr::V4(sockaddr)) => Some(NetAddress::IPv4 { + addr: sockaddr.ip().octets(), + port: sockaddr.port(), + }), + Ok(SocketAddr::V6(sockaddr)) => Some(NetAddress::IPv6 { + addr: sockaddr.ip().octets(), + port: sockaddr.port(), + }), + Err(_) => None, + } +} + /// Process incoming messages and feed outgoing messages on the provided socket generated by /// accepting an incoming connection. /// @@ -223,21 +236,12 @@ pub fn setup_inbound(peer_manager: Arc Some(NetAddress::IPv4 { - addr: ip.octets(), - port: ip_addr.port(), - }), - IpAddr::V6(ip) => Some(NetAddress::IPv6 { - addr: ip.octets(), - port: ip_addr.port(), - }), - }) { + let handle_opt = if let Ok(_) = peer_manager.new_inbound_connection(SocketDescriptor::new(us.clone()), remote_addr) { Some(tokio::spawn(Connection::schedule_read(peer_manager, us, reader, read_receiver, write_receiver))) } else { // Note that we will skip socket_disconnected here, in accordance with the PeerManager @@ -274,20 +278,11 @@ pub fn setup_outbound(peer_manager: Arc Some(NetAddress::IPv4 { - addr: ip.octets(), - port: ip_addr.port(), - }), - IpAddr::V6(ip) => Some(NetAddress::IPv6 { - addr: ip.octets(), - port: ip_addr.port(), - }), - }) { + let handle_opt = if let Ok(initial_send) = peer_manager.new_outbound_connection(their_node_id, SocketDescriptor::new(us.clone()), remote_addr) { Some(tokio::spawn(async move { // We should essentially always have enough room in a TCP socket buffer to send the // initial 10s of bytes. However, tokio running in single-threaded mode will always From 3f22d81a70a75edf7ed4e59ad0e3318ac8b49631 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Mon, 25 Apr 2022 18:39:28 +0000 Subject: [PATCH 2/2] Add a test for socket connection races Sadly this does not reproduce the issue fixed in the previous commit. --- lightning-net-tokio/src/lib.rs | 69 ++++++++++++++++++++++++++++++---- 1 file changed, 62 insertions(+), 7 deletions(-) diff --git a/lightning-net-tokio/src/lib.rs b/lightning-net-tokio/src/lib.rs index 5e1aa40b339..22df5c3e8bd 100644 --- a/lightning-net-tokio/src/lib.rs +++ b/lightning-net-tokio/src/lib.rs @@ -556,6 +556,22 @@ mod tests { } } + fn make_tcp_connection() -> (std::net::TcpStream, std::net::TcpStream) { + if let Ok(listener) = std::net::TcpListener::bind("127.0.0.1:9735") { + (std::net::TcpStream::connect("127.0.0.1:9735").unwrap(), listener.accept().unwrap().0) + } else if let Ok(listener) = std::net::TcpListener::bind("127.0.0.1:19735") { + (std::net::TcpStream::connect("127.0.0.1:19735").unwrap(), listener.accept().unwrap().0) + } else if let Ok(listener) = std::net::TcpListener::bind("127.0.0.1:9997") { + (std::net::TcpStream::connect("127.0.0.1:9997").unwrap(), listener.accept().unwrap().0) + } else if let Ok(listener) = std::net::TcpListener::bind("127.0.0.1:9998") { + (std::net::TcpStream::connect("127.0.0.1:9998").unwrap(), listener.accept().unwrap().0) + } else if let Ok(listener) = std::net::TcpListener::bind("127.0.0.1:9999") { + (std::net::TcpStream::connect("127.0.0.1:9999").unwrap(), listener.accept().unwrap().0) + } else if let Ok(listener) = std::net::TcpListener::bind("127.0.0.1:46926") { + (std::net::TcpStream::connect("127.0.0.1:46926").unwrap(), listener.accept().unwrap().0) + } else { panic!("Failed to bind to v4 localhost on common ports"); } + } + async fn do_basic_connection_test() { let secp_ctx = Secp256k1::new(); let a_key = SecretKey::from_slice(&[1; 32]).unwrap(); @@ -595,13 +611,7 @@ mod tests { // address. This may not always be the case in containers and the like, so if this test is // failing for you check that you have a loopback interface and it is configured with // 127.0.0.1. - let (conn_a, conn_b) = if let Ok(listener) = std::net::TcpListener::bind("127.0.0.1:9735") { - (std::net::TcpStream::connect("127.0.0.1:9735").unwrap(), listener.accept().unwrap().0) - } else if let Ok(listener) = std::net::TcpListener::bind("127.0.0.1:9999") { - (std::net::TcpStream::connect("127.0.0.1:9999").unwrap(), listener.accept().unwrap().0) - } else if let Ok(listener) = std::net::TcpListener::bind("127.0.0.1:46926") { - (std::net::TcpStream::connect("127.0.0.1:46926").unwrap(), listener.accept().unwrap().0) - } else { panic!("Failed to bind to v4 localhost on common ports"); }; + let (conn_a, conn_b) = make_tcp_connection(); let fut_a = super::setup_outbound(Arc::clone(&a_manager), b_pub, conn_a); let fut_b = super::setup_inbound(b_manager, conn_b); @@ -629,8 +639,53 @@ mod tests { async fn basic_threaded_connection_test() { do_basic_connection_test().await; } + #[tokio::test] async fn basic_unthreaded_connection_test() { do_basic_connection_test().await; } + + async fn race_disconnect_accept() { + // Previously, if we handed an already-disconnected socket to `setup_inbound` we'd panic. + // This attempts to find other similar races by opening connections and shutting them down + // while connecting. Sadly in testing this did *not* reproduce the previous issue. + let secp_ctx = Secp256k1::new(); + let a_key = SecretKey::from_slice(&[1; 32]).unwrap(); + let b_key = SecretKey::from_slice(&[2; 32]).unwrap(); + let b_pub = PublicKey::from_secret_key(&secp_ctx, &b_key); + + let a_manager = Arc::new(PeerManager::new(MessageHandler { + chan_handler: Arc::new(lightning::ln::peer_handler::ErroringMessageHandler::new()), + route_handler: Arc::new(lightning::ln::peer_handler::IgnoringMessageHandler{}), + }, a_key, &[1; 32], Arc::new(TestLogger()), Arc::new(lightning::ln::peer_handler::IgnoringMessageHandler{}))); + + // Make two connections, one for an inbound and one for an outbound connection + let conn_a = { + let (conn_a, _) = make_tcp_connection(); + conn_a + }; + let conn_b = { + let (_, conn_b) = make_tcp_connection(); + conn_b + }; + + // Call connection setup inside new tokio tasks. + let manager_reference = Arc::clone(&a_manager); + tokio::spawn(async move { + super::setup_inbound(manager_reference, conn_a).await + }); + tokio::spawn(async move { + super::setup_outbound(a_manager, b_pub, conn_b).await + }); + } + + #[tokio::test(flavor = "multi_thread")] + async fn threaded_race_disconnect_accept() { + race_disconnect_accept().await; + } + + #[tokio::test] + async fn unthreaded_race_disconnect_accept() { + race_disconnect_accept().await; + } }