diff --git a/.github/workflows/memcheck.yml b/.github/workflows/memcheck.yml index e6556b9f57..4d05240300 100644 --- a/.github/workflows/memcheck.yml +++ b/.github/workflows/memcheck.yml @@ -99,7 +99,7 @@ jobs: mkdir $ANT_DATA_PATH/client ls -l $ANT_DATA_PATH cp ./the-test-data.zip ./the-test-data_1.zip - ./target/release/ant --log-output-dest data-dir file_TYPE upload "" > ./second_upload 2>&1 + ./target/release/ant --log-output-dest=data-dir file upload "./the-test-data_1.zip" > ./second_upload 2>&1 enrelease-candidatev: ANT_LOG: "all" timeout-minutes: 25 @@ -114,11 +114,7 @@ jobs: - name: Start the restart node again run: | - ./target/release/antnode \ - --root-dir-type PARESTART_TEST_NODE_DATA_PATH \ - --log-output-dest $RESTART_TEST_NODE_DATA_PATH \ - --local \ - --rewards-address "0x03B770D9cD32077cC0bF330c13C114a87643B124" & + ./target/release/antnode --root-dir $RESTART_TEST_NODE_DATA_PATH --log-output-dest $RESTART_TEST_NODE_DATA_PATH --local --rewards-address "0x03B770D9cD32077cC0bF330c13C114a87643B124" & sleep 10 env: ANT_LOG: "all" @@ -150,9 +146,7 @@ jobs: if: always() - name: File Download - run: > - ./target/release/ant - --log-output-dest=data-dir file download ${{ env.UPLOAD_ADDRESS }} ./downloaded_resources + run: ./target/release/ant --log-output-dest=data-dir file download ${{ env.UPLOAD_ADDRESS }} ./downloaded_resources env: ANT_LOG: "v" timeout-minutes: 2 @@ -166,7 +160,7 @@ jobs: - name: Stop the local network and upload logs if: always() - uses: maidsafe/ant-local-testnet-action@main + uses: maidsafe/ant-local-testnet-action@feat-addrs-logs with: action: stop log_file_prefix: safe_test_logs_memcheck diff --git a/.github/workflows/merge.yml b/.github/workflows/merge.yml index 60faed6af6..c7eeaa16ab 100644 --- a/.github/workflows/merge.yml +++ b/.github/workflows/merge.yml @@ -578,7 +578,7 @@ jobs: - name: Stop the local network and upload logs if: always() - uses: maidsafe/ant-local-testnet-action@main + uses: maidsafe/ant-local-testnet-action@feat-addrs-logs with: action: stop log_file_prefix: safe_test_logs_e2e @@ -1372,7 +1372,7 @@ jobs: - name: Stop the local network and upload logs if: always() - uses: maidsafe/ant-local-testnet-action@main + uses: maidsafe/ant-local-testnet-action@feat-addrs-logs with: action: stop platform: ubuntu-latest diff --git a/ant-networking/src/driver.rs b/ant-networking/src/driver.rs index 4534b49110..bb1637a099 100644 --- a/ant-networking/src/driver.rs +++ b/ant-networking/src/driver.rs @@ -135,7 +135,7 @@ const PERIODIC_KAD_BOOTSTRAP_INTERVAL_MAX_S: u64 = 21600; // Init during compilation, instead of runtime error that should never happen // Option::expect will be stabilised as const in the future (https://github.com/rust-lang/rust/issues/67441) -const REPLICATION_FACTOR: NonZeroUsize = match NonZeroUsize::new(CLOSE_GROUP_SIZE) { +const REPLICATION_FACTOR: NonZeroUsize = match NonZeroUsize::new(CLOSE_GROUP_SIZE + 2) { Some(v) => v, None => panic!("CLOSE_GROUP_SIZE should not be zero"), }; diff --git a/ant-networking/src/lib.rs b/ant-networking/src/lib.rs index 434aa192ad..2cfa242e32 100644 --- a/ant-networking/src/lib.rs +++ b/ant-networking/src/lib.rs @@ -385,6 +385,10 @@ impl Network { let mut close_nodes = self .client_get_all_close_peers_in_range_or_close_group(&record_address) .await?; + info!( + "For record {record_address:?} quoting {} nodes. ignore_peers is {ignore_peers:?}", + close_nodes.len() + ); // Filter out results from the ignored peers. close_nodes.retain(|peer_id| !ignore_peers.contains(peer_id)); diff --git a/autonomi/src/client/mod.rs b/autonomi/src/client/mod.rs index fae0a87ba8..352eb53f5d 100644 --- a/autonomi/src/client/mod.rs +++ b/autonomi/src/client/mod.rs @@ -120,6 +120,12 @@ impl Client { receiver.await.expect("sender should not close")?; debug!("Client is connected to the network"); + // With the switch to the new bootstrap cache scheme, + // Seems the too many `initial dial`s could result in failure, + // if startup quoting/upload tasks got started up immediatly. + // Hence, put in a forced duration to allow `initial network discovery` to be completed. + std::thread::sleep(std::time::Duration::from_secs(10)); + Ok(Self { network, client_event_sender: Arc::new(None), diff --git a/autonomi/src/client/quote.rs b/autonomi/src/client/quote.rs index 9794f165d7..a2e7bf7cf9 100644 --- a/autonomi/src/client/quote.rs +++ b/autonomi/src/client/quote.rs @@ -11,7 +11,7 @@ use crate::client::rate_limiter::RateLimiter; use ant_evm::payment_vault::get_market_price; use ant_evm::{Amount, EvmNetwork, PaymentQuote, QuotePayment, QuotingMetrics}; use ant_networking::{Network, NetworkError}; -use ant_protocol::{storage::ChunkAddress, NetworkAddress}; +use ant_protocol::{storage::ChunkAddress, NetworkAddress, CLOSE_GROUP_SIZE}; use libp2p::PeerId; use std::collections::HashMap; use xor_name::XorName; @@ -159,6 +159,14 @@ async fn fetch_store_quote_with_retries( loop { match fetch_store_quote(network, content_addr).await { Ok(quote) => { + if quote.len() < CLOSE_GROUP_SIZE { + retries += 1; + error!("Error while fetching store quote: not enough quotes ({}/{CLOSE_GROUP_SIZE}), retry #{retries}, quotes {quote:?}", + quote.len()); + if retries > 2 { + break Err(CostError::CouldNotGetStoreQuote(content_addr)); + } + } break Ok((content_addr, quote)); } Err(err) if retries < 2 => { @@ -172,6 +180,9 @@ async fn fetch_store_quote_with_retries( break Err(CostError::CouldNotGetStoreQuote(content_addr)); } } + // Shall have a sleep between retires to avoid choking the network + // This shall rare to happen though. + std::thread::sleep(std::time::Duration::from_secs(5)); } } diff --git a/autonomi/tests/put.rs b/autonomi/tests/put.rs index f5d411e691..ba1157c3ce 100644 --- a/autonomi/tests/put.rs +++ b/autonomi/tests/put.rs @@ -9,9 +9,7 @@ use ant_logging::LogBuilder; use autonomi::Client; use eyre::Result; -use std::time::Duration; use test_utils::{evm::get_funded_wallet, gen_random_data, peers_from_env}; -use tokio::time::sleep; #[tokio::test] async fn put() -> Result<()> { @@ -23,8 +21,6 @@ async fn put() -> Result<()> { let addr = client.data_put_public(data.clone(), wallet.into()).await?; - sleep(Duration::from_secs(10)).await; - let data_fetched = client.data_get_public(addr).await?; assert_eq!(data, data_fetched, "data fetched should match data put");