Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge pr2536 pr2538 #2539

Merged
merged 4 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/merge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ jobs:
if: matrix.os != 'windows-latest'
run: |
set -e
for i in {1..100}; do
for i in {1..50}; do
dd if=/dev/urandom of=random_file_$i.bin bs=1M count=1 status=none
./target/release/ant --log-output-dest data-dir file upload random_file_$i.bin --public
./target/release/ant --log-output-dest data-dir file upload random_file_$i.bin
Expand All @@ -423,7 +423,7 @@ jobs:
shell: pwsh
run: |
$ErrorActionPreference = "Stop"
for ($i = 1; $i -le 100; $i++) {
for ($i = 1; $i -le 50; $i++) {
$fileName = "random_file_$i.bin"
$byteArray = [byte[]]@(0xFF) * (1MB) # Create a 1 MB array filled with 0xFF
[System.IO.File]::WriteAllBytes($fileName, $byteArray)
Expand Down
2 changes: 1 addition & 1 deletion ant-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ path = "src/main.rs"

[features]
default = ["metrics"]
local = ["ant-bootstrap/local", "autonomi/local"]
local = ["ant-bootstrap/local", "autonomi/local", "ant-logging/process-metrics"]
metrics = ["ant-logging/process-metrics"]
nightly = []

Expand Down
4 changes: 2 additions & 2 deletions ant-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub use access::user_data;
use clap::Parser;
use color_eyre::Result;

#[cfg(feature = "metrics")]
#[cfg(feature = "local")]
use ant_logging::metrics::init_metrics;
use ant_logging::{LogBuilder, LogFormat, ReloadHandle, WorkerGuard};
use ant_protocol::version;
Expand Down Expand Up @@ -73,7 +73,7 @@ async fn main() -> Result<()> {
}

let _log_guards = init_logging_and_metrics(&opt)?;
#[cfg(feature = "metrics")]
#[cfg(feature = "local")]
tokio::spawn(init_metrics(std::process::id()));

info!("\"{}\"", std::env::args().collect::<Vec<_>>().join(" "));
Expand Down
2 changes: 1 addition & 1 deletion ant-networking/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ const PERIODIC_KAD_BOOTSTRAP_INTERVAL_MAX_S: u64 = 21600;

// Init during compilation, instead of runtime error that should never happen
// Option<T>::expect will be stabilised as const in the future (https://github.com/rust-lang/rust/issues/67441)
const REPLICATION_FACTOR: NonZeroUsize = match NonZeroUsize::new(CLOSE_GROUP_SIZE) {
const REPLICATION_FACTOR: NonZeroUsize = match NonZeroUsize::new(CLOSE_GROUP_SIZE + 2) {
Some(v) => v,
None => panic!("CLOSE_GROUP_SIZE should not be zero"),
};
Expand Down
4 changes: 4 additions & 0 deletions ant-networking/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,10 @@ impl Network {
.await?;
// Filter out results from the ignored peers.
close_nodes.retain(|peer_id| !ignore_peers.contains(peer_id));
info!(
"For record {record_address:?} quoting {} nodes. ignore_peers is {ignore_peers:?}",
close_nodes.len()
);

if close_nodes.is_empty() {
error!("Can't get store_cost of {record_address:?}, as all close_nodes are ignored");
Expand Down
4 changes: 2 additions & 2 deletions ant-node/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ path = "src/bin/antnode/main.rs"
default = ["metrics", "upnp", "open-metrics", "encrypt-records"]
encrypt-records = ["ant-networking/encrypt-records"]
extension-module = ["pyo3/extension-module"]
local = ["ant-networking/local", "ant-evm/local", "ant-bootstrap/local"]
local = ["ant-networking/local", "ant-evm/local", "ant-bootstrap/local", "ant-logging/process-metrics"]
loud = ["ant-networking/loud"] # loud mode: print important messages to console
metrics = ["ant-logging/process-metrics"]
metrics = []
nightly = []
open-metrics = ["ant-networking/open-metrics", "prometheus-client"]
otlp = ["ant-logging/otlp"]
Expand Down
4 changes: 2 additions & 2 deletions ant-node/src/bin/antnode/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ mod subcommands;
use crate::subcommands::EvmNetworkCommand;
use ant_bootstrap::{BootstrapCacheConfig, BootstrapCacheStore, PeersArgs};
use ant_evm::{get_evm_network_from_env, EvmNetwork, RewardsAddress};
#[cfg(feature = "metrics")]
#[cfg(feature = "local")]
use ant_logging::metrics::init_metrics;
use ant_logging::{Level, LogFormat, LogOutputDest, ReloadHandle};
use ant_node::{Marker, NodeBuilder, NodeEvent, NodeEventsReceiver};
Expand Down Expand Up @@ -306,7 +306,7 @@ fn main() -> Result<()> {
// Create a tokio runtime per `run_node` attempt, this ensures
// any spawned tasks are closed before we would attempt to run
// another process with these args.
#[cfg(feature = "metrics")]
#[cfg(feature = "local")]
rt.spawn(init_metrics(std::process::id()));
let initial_peres = rt.block_on(opt.peers.get_addrs(None, Some(100)))?;
debug!("Node's owner set to: {:?}", opt.owner);
Expand Down
6 changes: 4 additions & 2 deletions ant-node/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ use ant_bootstrap::BootstrapCacheStore;
use ant_evm::RewardsAddress;
#[cfg(feature = "open-metrics")]
use ant_networking::MetricsRegistries;
use ant_networking::{Instant, Network, NetworkBuilder, NetworkEvent, NodeIssue, SwarmDriver};
use ant_networking::{
target_arch::sleep, Instant, Network, NetworkBuilder, NetworkEvent, NodeIssue, SwarmDriver,
};
use ant_protocol::{
convert_distance_to_u256,
error::Error as ProtocolError,
Expand Down Expand Up @@ -969,7 +971,7 @@ impl Node {
}
}
// Sleep a short while to avoid causing a spike on resource usage.
std::thread::sleep(std::time::Duration::from_secs(10));
sleep(std::time::Duration::from_secs(10)).await;
}
}
}
Expand Down
6 changes: 6 additions & 0 deletions autonomi/src/client/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,12 @@ impl Client {
receiver.await.expect("sender should not close")?;
debug!("Client is connected to the network");

// With the switch to the new bootstrap cache scheme,
// Seems the too many `initial dial`s could result in failure,
// when startup quoting/upload tasks got started up immediatly.
// Hence, put in a forced wait to allow `initial network discovery` to be completed.
ant_networking::target_arch::sleep(Duration::from_secs(5)).await;

Ok(Self {
network,
client_event_sender: Arc::new(None),
Expand Down
13 changes: 12 additions & 1 deletion autonomi/src/client/quote.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::client::rate_limiter::RateLimiter;
use ant_evm::payment_vault::get_market_price;
use ant_evm::{Amount, EvmNetwork, PaymentQuote, QuotePayment, QuotingMetrics};
use ant_networking::{Network, NetworkError};
use ant_protocol::{storage::ChunkAddress, NetworkAddress};
use ant_protocol::{storage::ChunkAddress, NetworkAddress, CLOSE_GROUP_SIZE};
use libp2p::PeerId;
use std::collections::HashMap;
use xor_name::XorName;
Expand Down Expand Up @@ -159,6 +159,14 @@ async fn fetch_store_quote_with_retries(
loop {
match fetch_store_quote(network, content_addr).await {
Ok(quote) => {
if quote.len() < CLOSE_GROUP_SIZE {
retries += 1;
error!("Error while fetching store quote: not enough quotes ({}/{CLOSE_GROUP_SIZE}), retry #{retries}, quotes {quote:?}",
quote.len());
if retries > 2 {
break Err(CostError::CouldNotGetStoreQuote(content_addr));
}
}
break Ok((content_addr, quote));
}
Err(err) if retries < 2 => {
Expand All @@ -172,6 +180,9 @@ async fn fetch_store_quote_with_retries(
break Err(CostError::CouldNotGetStoreQuote(content_addr));
}
}
// Shall have a sleep between retries to avoid choking the network.
// This shall be rare to happen though.
std::thread::sleep(std::time::Duration::from_secs(5));
}
}

Expand Down
Loading