diff --git a/.github/workflows/memcheck.yml b/.github/workflows/memcheck.yml index e6556b9f57..cfe8001ad0 100644 --- a/.github/workflows/memcheck.yml +++ b/.github/workflows/memcheck.yml @@ -99,7 +99,7 @@ jobs: mkdir $ANT_DATA_PATH/client ls -l $ANT_DATA_PATH cp ./the-test-data.zip ./the-test-data_1.zip - ./target/release/ant --log-output-dest data-dir file_TYPE upload "" > ./second_upload 2>&1 + ./target/release/ant --log-output-dest data-dir file upload "./the-test-data_1.zip" > ./second_upload 2>&1 enrelease-candidatev: ANT_LOG: "all" timeout-minutes: 25 @@ -114,11 +114,7 @@ jobs: - name: Start the restart node again run: | - ./target/release/antnode \ - --root-dir-type PARESTART_TEST_NODE_DATA_PATH \ - --log-output-dest $RESTART_TEST_NODE_DATA_PATH \ - --local \ - --rewards-address "0x03B770D9cD32077cC0bF330c13C114a87643B124" & + ./target/release/antnode --root-dir $RESTART_TEST_NODE_DATA_PATH --log-output-dest $RESTART_TEST_NODE_DATA_PATH --local --rewards-address "0x03B770D9cD32077cC0bF330c13C114a87643B124" & sleep 10 env: ANT_LOG: "all" diff --git a/ant-networking/src/lib.rs b/ant-networking/src/lib.rs index 434aa192ad..2cfa242e32 100644 --- a/ant-networking/src/lib.rs +++ b/ant-networking/src/lib.rs @@ -385,6 +385,10 @@ impl Network { let mut close_nodes = self .client_get_all_close_peers_in_range_or_close_group(&record_address) .await?; + info!( + "For record {record_address:?} quoting {} nodes. ignore_peers is {ignore_peers:?}", + close_nodes.len() + ); // Filter out results from the ignored peers. close_nodes.retain(|peer_id| !ignore_peers.contains(peer_id)); diff --git a/autonomi/src/client/mod.rs b/autonomi/src/client/mod.rs index fae0a87ba8..352eb53f5d 100644 --- a/autonomi/src/client/mod.rs +++ b/autonomi/src/client/mod.rs @@ -120,6 +120,12 @@ impl Client { receiver.await.expect("sender should not close")?; debug!("Client is connected to the network"); + // With the switch to the new bootstrap cache scheme, + // Seems the too many `initial dial`s could result in failure, + // if startup quoting/upload tasks got started up immediatly. + // Hence, put in a forced duration to allow `initial network discovery` to be completed. + std::thread::sleep(std::time::Duration::from_secs(10)); + Ok(Self { network, client_event_sender: Arc::new(None), diff --git a/autonomi/src/client/quote.rs b/autonomi/src/client/quote.rs index 9794f165d7..a2e7bf7cf9 100644 --- a/autonomi/src/client/quote.rs +++ b/autonomi/src/client/quote.rs @@ -11,7 +11,7 @@ use crate::client::rate_limiter::RateLimiter; use ant_evm::payment_vault::get_market_price; use ant_evm::{Amount, EvmNetwork, PaymentQuote, QuotePayment, QuotingMetrics}; use ant_networking::{Network, NetworkError}; -use ant_protocol::{storage::ChunkAddress, NetworkAddress}; +use ant_protocol::{storage::ChunkAddress, NetworkAddress, CLOSE_GROUP_SIZE}; use libp2p::PeerId; use std::collections::HashMap; use xor_name::XorName; @@ -159,6 +159,14 @@ async fn fetch_store_quote_with_retries( loop { match fetch_store_quote(network, content_addr).await { Ok(quote) => { + if quote.len() < CLOSE_GROUP_SIZE { + retries += 1; + error!("Error while fetching store quote: not enough quotes ({}/{CLOSE_GROUP_SIZE}), retry #{retries}, quotes {quote:?}", + quote.len()); + if retries > 2 { + break Err(CostError::CouldNotGetStoreQuote(content_addr)); + } + } break Ok((content_addr, quote)); } Err(err) if retries < 2 => { @@ -172,6 +180,9 @@ async fn fetch_store_quote_with_retries( break Err(CostError::CouldNotGetStoreQuote(content_addr)); } } + // Shall have a sleep between retires to avoid choking the network + // This shall rare to happen though. + std::thread::sleep(std::time::Duration::from_secs(5)); } }