Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add domain snap sync algorithm #3027

Closed
wants to merge 13 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crates/sc-consensus-subspace/src/archiver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ const ACKNOWLEDGEMENT_TIMEOUT: Duration = Duration::from_mins(2);
/// Ideally, we'd decouple pruning from finalization, but it may require invasive changes in
/// Substrate and is not worth it right now.
/// https://github.com/paritytech/substrate/discussions/14359
pub(crate) const FINALIZATION_DEPTH_IN_SEGMENTS: SegmentIndex = SegmentIndex::new(5);
pub const FINALIZATION_DEPTH_IN_SEGMENTS: SegmentIndex = SegmentIndex::new(5);

#[derive(Debug)]
struct SegmentHeadersStoreInner<AS> {
Expand Down
31 changes: 30 additions & 1 deletion crates/subspace-service/src/domains.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
// Remove after adding domain snap-sync
#![allow(dead_code)]
pub mod snap_sync_orchestrator;

use crate::domains::request_handler::{
generate_protocol_name, LastConfirmedBlockRequest, LastConfirmedBlockResponse,
};
use crate::domains::snap_sync_orchestrator::SnapSyncOrchestrator;
use crate::FullBackend;
use async_trait::async_trait;
use domain_runtime_primitives::Balance;
use futures::channel::oneshot;
use parity_scale_codec::{Decode, Encode};
use sc_client_api::AuxStore;
use sc_consensus_subspace::archiver::SegmentHeadersStore;
use sc_network::{IfDisconnected, NetworkRequest, PeerId, RequestFailure};
use sc_network_sync::SyncingService;
use sp_blockchain::HeaderBackend;
Expand All @@ -22,7 +27,31 @@ pub(crate) mod request_handler;

const REQUEST_PAUSE: Duration = Duration::from_secs(5);

/// Last confirmed domain block info error
/// Provides parameters for domain snap sync synchronization with the consensus chain snap sync.
pub struct ConsensusChainSyncParams<Block, CBlock, CNR, AS>
where
Block: BlockT,
CBlock: BlockT,
CNR: NetworkRequest + Sync + Send,
AS: AuxStore,
{
/// Synchronizes consensus snap sync stages.
pub snap_sync_orchestrator: Arc<SnapSyncOrchestrator>,
/// Provides execution receipts for the last confirmed domain block.
pub execution_receipt_provider: Box<dyn LastDomainBlockReceiptProvider<Block, CBlock>>,
/// Consensus chain fork ID
pub fork_id: Option<String>,
/// Consensus chain network service
pub network_service: CNR,
/// Consensus chain sync service
pub sync_service: Arc<SyncingService<CBlock>>,
/// Consensus chain backend (for obtaining offchain storage)
pub backend: Arc<FullBackend>,
/// Provides segment headers.
pub segment_headers_store: SegmentHeadersStore<AS>,
}

/// Last confirmed domain block info error.
#[derive(Debug, thiserror::Error)]
pub enum LastConfirmedDomainBlockResponseError {
#[error("Last confirmed domain block info request failed: {0}")]
Expand Down
106 changes: 106 additions & 0 deletions crates/subspace-service/src/domains/snap_sync_orchestrator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
//! Provides synchronization primitives for consensus and domain chains snap sync.

use crate::sync_from_dsn::snap_sync::{DefaultTargetBlockProvider, SnapSyncTargetBlockProvider};
use async_trait::async_trait;
use parking_lot::Mutex;
use std::sync::Arc;
use subspace_core_primitives::BlockNumber;
use tokio::sync::Notify;
use tracing::debug;

pub(crate) fn create_target_block_provider(
snap_sync_orchestrator: Option<Arc<SnapSyncOrchestrator>>,
) -> Arc<dyn SnapSyncTargetBlockProvider> {
if let Some(snap_sync_orchestrator) = snap_sync_orchestrator {
snap_sync_orchestrator
} else {
Arc::new(DefaultTargetBlockProvider)
}
}

/// Synchronizes consensus and domain chain snap sync.
pub struct SnapSyncOrchestrator {
notify_consensus_snap_sync_unblocked: Notify,
consensus_snap_sync_block_number: Mutex<Option<BlockNumber>>,
notify_domain_snap_sync_finished: Notify,
domain_snap_sync_finished: Mutex<bool>,
}

#[async_trait]
impl SnapSyncTargetBlockProvider for SnapSyncOrchestrator {
async fn target_block(&self) -> Option<BlockNumber> {
self.consensus_snap_sync_unblocked().await;

self.target_consensus_snap_sync_block_number()
}
}

impl Default for SnapSyncOrchestrator {
fn default() -> Self {
Self::new()
}
}

impl SnapSyncOrchestrator {
/// Constructor
pub fn new() -> Self {
Self {
notify_consensus_snap_sync_unblocked: Notify::new(),
consensus_snap_sync_block_number: Mutex::new(None),
notify_domain_snap_sync_finished: Notify::new(),
domain_snap_sync_finished: Mutex::new(false),
}
}

/// Returns optional target block for consensus chain snap sync. None means target block is
/// not defined yet.
pub fn target_consensus_snap_sync_block_number(&self) -> Option<BlockNumber> {
*self.consensus_snap_sync_block_number.lock()
}

/// Wait for the allowing signal for the consensus chain snap sync.
pub async fn consensus_snap_sync_unblocked(&self) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SubspaceLink::block_importing_notification_stream()

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIR, we discussed offline a possibility of changing the current algorithm with sync orchestrator from blocking to reactive approach by utilizing SubspaceLink::block_importing_notification_stream() and its ability to acknowledge blocks. I tried to use that approach and deleted several initial synchronization points instead.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not deleting this one then? My point was that we ideally wouldn't need this orchestrator at all.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consensus chain snap sync is a part of the more complex domain snap sync process. In this form, it must start after we acquire the correct target block. I removed other blocking orchestrator points after our conversation: for example, we don't need to send signals from consensus snap sync anymore, however, it's not clear to me how to remove the dependency completely. Let's wait until the full solution is merged and return to this, I'm open for a change here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you just have a mutex or oneshot channel or something that is passed down to subspace-service that prevents sync as such from starting? I don't think you need to block/unblock it many times anyway, just pause until something happens on domain side and it is not necessarily specific to Snap sync either.

What else is this orchestrator needed for?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current implementation will contain the target block provider that conceals the orchestrator in the full version:

pub trait SnapSyncTargetBlockProvider: Send + Sync {
    async fn target_block(&self) -> Option<BlockNumber>;
}

The default non-blocking implementation returns None, which is close to what you proposed. I tried to limit the scope of the PR, but it seems the whole solution will provide more context and will be easier to review despite its size.

debug!("Waiting for a signal to start consensus chain snap sync.");
self.notify_consensus_snap_sync_unblocked.notified().await;
debug!("Finished waiting for a signal to start consensus chain snap sync.");
}

/// Unblocks (allows) consensus chain snap sync with the given target block.
pub fn unblock_consensus_snap_sync(&self, target_block_number: BlockNumber) {
debug!(%target_block_number, "Allowed starting consensus chain snap sync.");
self.consensus_snap_sync_block_number
.lock()
.replace(target_block_number);

self.notify_consensus_snap_sync_unblocked.notify_waiters();
}

/// Returns true if domain snap sync finished.
pub fn domain_snap_sync_finished(&self) -> bool {
*self.domain_snap_sync_finished.lock()
}

/// Signal that domain snap sync finished.
pub fn mark_domain_snap_sync_finished(&self) {
debug!("Signal that domain snap sync finished.");
*self.domain_snap_sync_finished.lock() = true;

self.notify_domain_snap_sync_finished.notify_waiters();
}

/// Wait for a signal that domain snap sync finished.
pub async fn domain_snap_sync_finished_blocking(&self) {
debug!("Waiting for a signal that domain snap sync finished.");
self.notify_domain_snap_sync_finished.notified().await;
debug!("Finished waiting for a signal that domain snap sync finished.");
}

/// Unblock all processes (synchronization cancelled).
pub fn unblock_all(&self) {
debug!("Allow all processes (synchronization cancelled).");

self.notify_consensus_snap_sync_unblocked.notify_waiters();
self.notify_domain_snap_sync_finished.notify_waiters();
*self.domain_snap_sync_finished.lock() = true;
}
}
3 changes: 2 additions & 1 deletion crates/subspace-service/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
)]

pub mod config;
pub(crate) mod domains;
pub mod domains;
pub mod dsn;
mod metrics;
pub(crate) mod mmr;
Expand All @@ -51,6 +51,7 @@ use frame_system_rpc_runtime_api::AccountNonceApi;
use futures::channel::oneshot;
use futures::FutureExt;
use jsonrpsee::RpcModule;
pub use mmr::sync::mmr_sync;
use pallet_transaction_payment_rpc_runtime_api::TransactionPaymentApi;
use parking_lot::Mutex;
use prometheus_client::registry::Registry;
Expand Down
3 changes: 2 additions & 1 deletion crates/subspace-service/src/sync_from_dsn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ pub(crate) mod import_blocks;
pub(crate) mod piece_validator;
pub(crate) mod segment_header_downloader;
pub(crate) mod snap_sync;
pub(crate) mod snap_sync_engine;
pub mod snap_sync_engine;

use crate::sync_from_dsn::import_blocks::import_blocks_from_dsn;
use crate::sync_from_dsn::segment_header_downloader::SegmentHeaderDownloader;
Expand All @@ -14,6 +14,7 @@ use sc_consensus::import_queue::ImportQueueService;
use sc_consensus_subspace::archiver::SegmentHeadersStore;
use sc_network::service::traits::NetworkService;
use sc_network::NetworkBlock;
pub use snap_sync::wait_for_block_import;
use sp_api::ProvideRuntimeApi;
use sp_blockchain::HeaderBackend;
use sp_consensus_subspace::SubspaceApi;
Expand Down
72 changes: 44 additions & 28 deletions crates/subspace-service/src/sync_from_dsn/snap_sync.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ use crate::sync_from_dsn::import_blocks::download_and_reconstruct_blocks;
use crate::sync_from_dsn::segment_header_downloader::SegmentHeaderDownloader;
use crate::sync_from_dsn::snap_sync_engine::SnapSyncingEngine;
use crate::sync_from_dsn::DsnSyncPieceGetter;
use sc_client_api::{AuxStore, ProofProvider};
use async_trait::async_trait;
use futures::StreamExt;
use sc_client_api::{AuxStore, BlockchainEvents, ProofProvider};
use sc_consensus::import_queue::ImportQueueService;
use sc_consensus::{
BlockImport, BlockImportParams, ForkChoiceStrategy, ImportedState, IncomingBlock, StateAction,
Expand All @@ -29,7 +31,24 @@ use subspace_core_primitives::{BlockNumber, PublicKey};
use subspace_erasure_coding::ErasureCoding;
use subspace_networking::Node;
use tokio::time::sleep;
use tracing::{debug, error};
use tracing::{debug, error, info_span, trace, Instrument};

/// Provides target block number for snap-sync (blocking operation).
#[async_trait]
pub trait SnapSyncTargetBlockProvider: Send + Sync {
// TODO: remove after adding the rest of domain snap sync code
#[allow(dead_code)]
async fn target_block(&self) -> Option<BlockNumber>;
}

pub(crate) struct DefaultTargetBlockProvider;

#[async_trait]
impl SnapSyncTargetBlockProvider for DefaultTargetBlockProvider {
async fn target_block(&self) -> Option<BlockNumber> {
None
}
}

#[allow(clippy::too_many_arguments)]
pub(crate) async fn snap_sync<Block, AS, Client, PG>(
Expand All @@ -50,6 +69,7 @@ pub(crate) async fn snap_sync<Block, AS, Client, PG>(
+ ProvideRuntimeApi<Block>
+ ProofProvider<Block>
+ BlockImport<Block>
+ BlockchainEvents<Block>
+ Send
+ Sync
+ 'static,
Expand Down Expand Up @@ -262,6 +282,7 @@ where
+ ProvideRuntimeApi<Block>
+ ProofProvider<Block>
+ BlockImport<Block>
+ BlockchainEvents<Block>
+ Send
+ Sync
+ 'static,
Expand Down Expand Up @@ -375,46 +396,41 @@ where

// Wait for blocks to be imported
// TODO: Replace this hack with actual watching of block import
shamil-gadelshin marked this conversation as resolved.
Show resolved Hide resolved
wait_for_block_import(client.as_ref(), last_block_number.into()).await;
wait_for_block_import(client.as_ref(), last_block_number.into())
.instrument(info_span!("consensus chain snap sync"))
shamil-gadelshin marked this conversation as resolved.
Show resolved Hide resolved
.await;

debug!(info = ?client.info(), "Snap sync finished successfully");

Ok(())
}

async fn wait_for_block_import<Block, Client>(
pub async fn wait_for_block_import<Block, Client>(
client: &Client,
waiting_block_number: NumberFor<Block>,
) where
Block: BlockT,
Client: HeaderBackend<Block>,
Client: HeaderBackend<Block> + BlockchainEvents<Block>,
{
const WAIT_DURATION: Duration = Duration::from_secs(5);
const MAX_NO_NEW_IMPORT_ITERATIONS: u32 = 10;
shamil-gadelshin marked this conversation as resolved.
Show resolved Hide resolved
let mut current_iteration = 0;
let mut last_best_block_number = client.info().best_number;
loop {
let info = client.info();
debug!(%current_iteration, %waiting_block_number, "Waiting client info: {:?}", info);

tokio::time::sleep(WAIT_DURATION).await;

if info.best_number >= waiting_block_number {
break;
}
let mut blocks_stream = client.every_import_notification_stream();

if last_best_block_number == info.best_number {
current_iteration += 1;
} else {
current_iteration = 0;
}
let info = client.info();
debug!(
%waiting_block_number,
"Waiting client info: {:?}", info
);

if current_iteration >= MAX_NO_NEW_IMPORT_ITERATIONS {
debug!(%current_iteration, %waiting_block_number, "Max idle period reached. {:?}", info);
break;
}
if info.best_number >= waiting_block_number {
return;
}

while let Some(block) = blocks_stream.next().await {
let current_block_number = *block.header.number();
trace!(%current_block_number, %waiting_block_number, "Waiting for the target block");

last_best_block_number = info.best_number;
if current_block_number >= waiting_block_number {
return;
}
}
}

Expand Down
5 changes: 5 additions & 0 deletions domains/client/domain-operator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@ futures-timer = "3.0.3"
parking_lot = "0.12.2"
sc-client-api = { git = "https://github.com/subspace/polkadot-sdk", rev = "5871818e1d736f1843eb9078f886290695165c42" }
sc-consensus = { git = "https://github.com/subspace/polkadot-sdk", rev = "5871818e1d736f1843eb9078f886290695165c42" }
sc-consensus-subspace = { version = "0.1.0", path = "../../../crates/sc-consensus-subspace" }
sc-domains = { version = "0.1.0", path = "../../../crates/sc-domains" }
sc-network = { git = "https://github.com/subspace/polkadot-sdk", rev = "5871818e1d736f1843eb9078f886290695165c42" }
sc-network-common = { git = "https://github.com/subspace/polkadot-sdk", rev = "5871818e1d736f1843eb9078f886290695165c42" }
sc-network-sync = { git = "https://github.com/subspace/polkadot-sdk", rev = "5871818e1d736f1843eb9078f886290695165c42" }
sc-transaction-pool = { git = "https://github.com/subspace/polkadot-sdk", rev = "5871818e1d736f1843eb9078f886290695165c42" }
sc-transaction-pool-api = { git = "https://github.com/subspace/polkadot-sdk", rev = "5871818e1d736f1843eb9078f886290695165c42" }
sc-utils = { git = "https://github.com/subspace/polkadot-sdk", rev = "5871818e1d736f1843eb9078f886290695165c42" }
Expand All @@ -39,6 +43,7 @@ sp-trie = { git = "https://github.com/subspace/polkadot-sdk", rev = "5871818e1d7
sp-weights = { git = "https://github.com/subspace/polkadot-sdk", rev = "5871818e1d736f1843eb9078f886290695165c42" }
subspace-core-primitives = { version = "0.1.0", path = "../../../crates/subspace-core-primitives" }
subspace-runtime-primitives = { version = "0.1.0", path = "../../../crates/subspace-runtime-primitives" }
subspace-service = { version = "0.1.0", path = "../../../crates/subspace-service" }
tracing = "0.1.40"
thiserror = "1.0.64"
tokio = { version = "1.40.0", features = ["macros"] }
Expand Down
1 change: 1 addition & 0 deletions domains/client/domain-operator/src/domain_worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ pub(super) async fn start_worker<
+ ProvideRuntimeApi<Block>
+ ProofProvider<Block>
+ Finalizer<Block, Backend>
+ BlockchainEvents<Block>
+ 'static,
Client::Api: DomainCoreApi<Block>
+ MessengerApi<Block, NumberFor<CBlock>, CBlock::Hash>
Expand Down
2 changes: 2 additions & 0 deletions domains/client/domain-operator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@

#![feature(array_windows)]
#![feature(const_option)]
#![feature(duration_constructors)]
#![feature(extract_if)]

mod aux_schema;
Expand All @@ -72,6 +73,7 @@ mod domain_worker;
mod fetch_domain_bootstrap_info;
mod fraud_proof;
mod operator;
mod snap_sync;
#[cfg(test)]
mod tests;
mod utils;
Expand Down
1 change: 1 addition & 0 deletions domains/client/domain-operator/src/operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ where
+ ProvideRuntimeApi<Block>
+ ProofProvider<Block>
+ Finalizer<Block, Backend>
+ BlockchainEvents<Block>
+ 'static,
Client::Api: DomainCoreApi<Block>
+ MessengerApi<Block, NumberFor<CBlock>, CBlock::Hash>
Expand Down
Loading
Loading