diff --git a/CHANGELOG.md b/CHANGELOG.md index c1ce5c1..28b8bea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ Notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased + +## Fixed + +- Whitelist-only nodes retry their configured peers, re-resolving hostnames, instead of exiting with `NoReachablePeers` + ## 0.6.0 ## Changed diff --git a/src/builder.rs b/src/builder.rs index 4b9e5a2..49506c3 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -49,8 +49,10 @@ impl Builder { } /// Only connect to peers in the whitelist. When enabled, the node will not discover - /// new peers via DNS seeding or addr gossip. If all whitelist peers disconnect, - /// the node will exit with [`NodeError::NoReachablePeers`](crate::error::NodeError::NoReachablePeers). + /// new peers via DNS seeding or addr gossip. If all whitelist peers are unreachable, + /// the node will keep retrying them, re-resolving any hostnames, until shutdown. + /// If the whitelist is empty, the node will exit with + /// [`NodeError::NoReachablePeers`](crate::error::NodeError::NoReachablePeers). pub fn whitelist_only(mut self) -> Self { self.config.whitelist_only = true; self diff --git a/src/network/peer_map.rs b/src/network/peer_map.rs index 359648f..0b98faf 100644 --- a/src/network/peer_map.rs +++ b/src/network/peer_map.rs @@ -3,6 +3,7 @@ use std::{ fmt::Debug, net::{IpAddr, Ipv4Addr}, sync::Arc, + time::Duration, }; use addrman::Record; @@ -11,13 +12,18 @@ use bitcoin::{ p2p::{address::AddrV2, ServiceFlags}, FeeRate, Network, }; -use rand::{rngs::StdRng, seq::IteratorRandom, SeedableRng}; +use rand::{ + rngs::StdRng, + seq::{IteratorRandom, SliceRandom}, + SeedableRng, +}; use tokio::{ sync::{ mpsc::{self, Sender}, Mutex, }, task::JoinHandle, + time::Instant, }; use crate::{ @@ -31,6 +37,9 @@ use super::{AddressBook, ConnectionType, MainThreadMessage, PeerThreadMessage}; const LOCAL_HOST: IpAddr = IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)); +// How long to wait before re-seeding the dial queue from the whitelist. +const WHITELIST_REFILL_COOLDOWN: Duration = Duration::from_secs(1); + // Preferred peers to connect to based on the user configuration type Whitelist = Vec; @@ -56,6 +65,8 @@ pub(crate) struct PeerMap { db: Arc>, connector: ConnectionType, whitelist: Whitelist, + dial_queue: Whitelist, + last_refill: Option, dialog: Arc, timeout_config: PeerTimeoutConfig, } @@ -82,7 +93,9 @@ impl PeerMap { map: HashMap::new(), db: Arc::new(Mutex::new(AddressBook::new())), connector: connection_type, + dial_queue: whitelist.clone(), whitelist, + last_refill: None, dialog, timeout_config, } @@ -103,7 +116,13 @@ impl PeerMap { // Add a new trusted peer to the whitelist pub fn add_trusted_peer(&mut self, peer: TrustedPeer) { - self.whitelist.push(peer); + self.whitelist.push(peer.clone()); + self.dial_queue.push(peer); + } + + // Whether an empty dial queue should be refilled instead of ending the node. + pub fn will_retry_whitelist(&self) -> bool { + self.whitelist_only && !self.whitelist.is_empty() } // Send out a TCP connection to a new peer and begin tracking the task @@ -215,9 +234,13 @@ impl PeerMap { // Pull a peer from the configuration if we have one. If not, select a random peer from the database, // as long as it is not from the same netgroup. If there are no peers in the database, try DNS. - // When `whitelist_only` is set, only whitelist peers are used. + // When `whitelist_only` is set, only whitelist peers are used: the dial queue is re-seeded from + // the whitelist when it runs dry, so configured peers are retried until shutdown. pub async fn next_peer(&mut self) -> Option { - while let Some(peer) = self.whitelist.pop() { + if self.dial_queue.is_empty() && self.whitelist_only { + self.refill_dial_queue(); + } + while let Some(peer) = self.dial_queue.pop() { let port = peer .port .unwrap_or(default_port_from_network(&self.network)); @@ -243,11 +266,11 @@ impl PeerMap { "Resolved {host} to {} address(es)", resolved.len() )); - // Push every resolved address onto the whitelist so each is tried on + // Push every resolved address onto the dial queue so each is tried on // a subsequent call. Reversed so the resolver's preferred order is // preserved under LIFO pop. for resolved_addr in resolved.into_iter().rev() { - self.whitelist.push(TrustedPeer { + self.dial_queue.push(TrustedPeer { address: TrustedPeerInner::Addr(resolved_addr), port: Some(port), known_services: peer.known_services, @@ -294,6 +317,22 @@ impl PeerMap { db_lock.select() } + // Re-seed the dial queue from the configured whitelist, rate-limited so a total + // outage does not loop on DNS. Hostnames are re-resolved when popped, so + // reconnections follow DNS changes. + fn refill_dial_queue(&mut self) { + if let Some(last) = self.last_refill { + if last.elapsed() < WHITELIST_REFILL_COOLDOWN { + return; + } + } + self.last_refill = Some(Instant::now()); + self.dial_queue = self.whitelist.clone(); + // Spread reconnections across the allowlist instead of always + // favoring the same entry. + self.dial_queue.shuffle(&mut StdRng::from_entropy()); + } + // We tried this peer and successfully connected. pub async fn tried(&mut self, nonce: PeerId) { if let Some(peer) = self.map.get(&nonce) { @@ -310,3 +349,69 @@ impl PeerMap { } } } + +#[cfg(test)] +mod tests { + use super::*; + + fn test_peer_map(whitelist: Whitelist, whitelist_only: bool) -> PeerMap { + let (mtx, _mrx) = mpsc::channel(8); + let (info_tx, _info_rx) = mpsc::channel(8); + let (warn_tx, _warn_rx) = mpsc::unbounded_channel(); + let (event_tx, _event_rx) = mpsc::unbounded_channel(); + PeerMap::new( + mtx, + Network::Regtest, + BlockType::default(), + whitelist, + whitelist_only, + Arc::new(Dialog::new(info_tx, warn_tx, event_tx)), + ConnectionType::default(), + PeerTimeoutConfig::default(), + ) + } + + fn two_local_peers() -> Whitelist { + vec![ + TrustedPeer::from_ip(Ipv4Addr::new(127, 0, 0, 1)), + TrustedPeer::from_ip(Ipv4Addr::new(127, 0, 0, 2)), + ] + } + + #[tokio::test(start_paused = true)] + async fn whitelist_refills_after_exhaustion() { + let mut peer_map = test_peer_map(two_local_peers(), true); + // Drain the initial dial queue. + assert!(peer_map.next_peer().await.is_some()); + assert!(peer_map.next_peer().await.is_some()); + // The queue is empty, so the whitelist re-seeds it. + assert!(peer_map.next_peer().await.is_some()); + assert!(peer_map.next_peer().await.is_some()); + // A second refill is rate-limited. + assert!(peer_map.next_peer().await.is_none()); + // After the cooldown the whitelist is retried again. + tokio::time::sleep(WHITELIST_REFILL_COOLDOWN).await; + assert!(peer_map.next_peer().await.is_some()); + } + + #[tokio::test] + async fn gossip_mode_does_not_refill() { + let mut peer_map = test_peer_map(two_local_peers(), false); + assert!(peer_map.next_peer().await.is_some()); + assert!(peer_map.next_peer().await.is_some()); + // Without `whitelist_only` the whitelist is not retried, and regtest + // has no DNS seeds or stored peers to fall back on. + assert!(peer_map.next_peer().await.is_none()); + } + + #[tokio::test] + async fn runtime_peers_join_the_retry_cycle() { + let mut peer_map = test_peer_map(Vec::new(), true); + assert!(!peer_map.will_retry_whitelist()); + peer_map.add_trusted_peer(TrustedPeer::from_ip(Ipv4Addr::new(127, 0, 0, 1))); + assert!(peer_map.will_retry_whitelist()); + assert!(peer_map.next_peer().await.is_some()); + // The added peer is part of the configured set, so it refills too. + assert!(peer_map.next_peer().await.is_some()); + } +} diff --git a/src/node.rs b/src/node.rs index ea35773..fdc5d95 100644 --- a/src/node.rs +++ b/src/node.rs @@ -288,17 +288,22 @@ impl Node { let required = self.next_required_peers(); // Find more peers when lower than the desired threshold. if live < required { - self.dialog.send_warning(Warning::NeedConnections { - connected: live, - required, - }); - let address = self - .peer_map - .next_peer() - .await - .ok_or(NodeError::NoReachablePeers)?; - if self.peer_map.dispatch(address).await.is_err() { - self.dialog.send_warning(Warning::CouldNotConnect); + match self.peer_map.next_peer().await { + Some(address) => { + self.dialog.send_warning(Warning::NeedConnections { + connected: live, + required, + }); + if self.peer_map.dispatch(address).await.is_err() { + self.dialog.send_warning(Warning::CouldNotConnect); + } + } + None => { + if !self.peer_map.will_retry_whitelist() { + return Err(NodeError::NoReachablePeers); + } + // The whitelist refill is rate-limited; try again on a later tick. + } } } Ok(()) diff --git a/tests/core.rs b/tests/core.rs index 05cbea7..19d89e6 100644 --- a/tests/core.rs +++ b/tests/core.rs @@ -724,3 +724,57 @@ async fn whitelist_only_sync() { requester.shutdown().unwrap(); rpc.stop().unwrap(); } + +#[tokio::test] +async fn whitelist_only_reconnects() { + let (bitcoind, socket_addr) = start_bitcoind(true).unwrap(); + let rpc = &bitcoind.client; + let tempdir = tempfile::TempDir::new().unwrap().path().to_owned(); + let miner = rpc.new_address().unwrap(); + mine_blocks(rpc, &miner, 10, 2).await; + let best = best_hash(rpc); + let host = (IpAddr::V4(*socket_addr.ip()), Some(socket_addr.port())); + let builder = bip157::builder::Builder::new(bitcoin::Network::Regtest) + .chain_state(ChainState::Checkpoint(HashCheckpoint::from_genesis( + bitcoin::Network::Regtest, + ))) + .add_peer(host) + .whitelist_only() + // Force the connection rotation often, so the dial queue runs dry + // while the test is running. + .maximum_connection_time(Duration::from_secs(2)) + .data_dir(&tempdir); + let (node, client) = builder.build(); + tokio::task::spawn(async move { node.run().await }); + let Client { + requester, + info_rx, + warn_rx, + event_rx: mut channel, + } = client; + tokio::task::spawn(async move { print_logs(info_rx, warn_rx).await }); + sync_assert(&best, &mut channel).await; + // Each round outlives a rotation, so the node must redial the whitelist + // peer to keep following the chain. + for _ in 0..3 { + mine_blocks(rpc, &miner, 1, 2).await; + let best = best_hash(rpc); + let deadline = tokio::time::Instant::now() + Duration::from_secs(15); + loop { + let tip = requester + .chain_tip() + .await + .expect("node exited instead of retrying the whitelist"); + if tip.hash == best { + break; + } + assert!( + tokio::time::Instant::now() < deadline, + "node did not sync to the new tip after a rotation" + ); + tokio::time::sleep(Duration::from_millis(250)).await; + } + } + requester.shutdown().unwrap(); + rpc.stop().unwrap(); +}