Skip to content

Commit

Permalink
DatasetEntryIndexer: memorization + Updated our crate dependencies …
Browse files Browse the repository at this point in the history
…so they can be built in isolation (#874)

* DatasetEntryIndexer::concurrent_dataset_handles_processing(): use precalculated mapping

* Update cargo deps

* CHANGELOG.md: update
  • Loading branch information
s373r authored Oct 3, 2024
1 parent c9a9b8d commit faebd9f
Show file tree
Hide file tree
Showing 18 changed files with 89 additions and 46 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Recommendation: for ease of reading, use the following order:
- Introducing `DatasetLifecycleMessageRenamed`
- Simplified error handling code in repositories
- Hidden part of the test code behind the feature gate
- Updated our crate dependencies so they can be built in isolation

## [0.204.4] - 2024-09-30
### Changed
Expand Down
3 changes: 2 additions & 1 deletion src/adapter/flight-sql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ uuid = { version = "1", default-features = false }


[dev-dependencies]
kamu-data-utils = { workspace = true }
kamu-data-utils = { workspace = true, features = ["testing"] }

indoc = "2"
test-log = { version = "0.2", features = ["trace"] }
tokio = { version = "1", default-features = false, features = [] }
Expand Down
1 change: 1 addition & 0 deletions src/adapter/graphql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ uuid = { version = "1", default-features = false }
# TODO: Limit to mock or in-memory implementations only
container-runtime = { workspace = true }
init-on-startup = { workspace = true }
kamu = { workspace = true, features = ["testing"] }
kamu-accounts = { workspace = true, features = ["testing"] }
kamu-accounts-inmem = { workspace = true }
kamu-accounts-services = { workspace = true }
Expand Down
1 change: 1 addition & 0 deletions src/adapter/http/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ uuid = { version = "1", default-features = false, features = ["v4"] }
[dev-dependencies]
container-runtime = { workspace = true }
init-on-startup = { workspace = true }
kamu = { workspace = true, features = ["testing"] }
kamu-accounts = { workspace = true, features = ["testing"] }
kamu-accounts-inmem = { workspace = true }
kamu-accounts-services = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion src/adapter/odata/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ tracing = "0.1"


[dev-dependencies]
kamu = { workspace = true, features = ["testing"] }
messaging-outbox = { workspace = true }
time-source = { workspace = true }

hyper = { version = "1", default-features = false }
indoc = { version = "2" }
kamu = { workspace = true }
pretty_assertions = { version = "1" }
reqwest = { version = "0.12", default-features = false }
tempfile = { version = "3" }
Expand Down
1 change: 1 addition & 0 deletions src/app/cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ libc = "0.2" # For getting uid:gid


[dev-dependencies]
kamu = { workspace = true, features = ["testing"] }
kamu-cli-puppet = { workspace = true, default-features = false, features = [
"extensions",
] }
Expand Down
1 change: 1 addition & 0 deletions src/domain/datasets/domain/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,4 @@ mockall = { optional = true, version = "0.13", default-features = false }


[dev-dependencies]
mockall = { version = "0.13", default-features = false }
74 changes: 45 additions & 29 deletions src/domain/datasets/services/src/dataset_entry_indexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0.

use std::collections::HashMap;
use std::sync::Arc;

use dill::{component, interface, meta};
Expand Down Expand Up @@ -83,21 +84,18 @@ impl DatasetEntryIndexer {
) -> Result<(), InternalError> {
let mut join_set = tokio::task::JoinSet::new();
let now = self.time_source.now();
let account_name_id_mapping = self.build_account_name_id_mapping(&dataset_handles).await?;

for dataset_handle in dataset_handles {
let task_account_repository = self.account_repository.clone();
let task_dataset_entry_repo = self.dataset_entry_repo.clone();
let task_owner_account_id =
account_name_id_mapping[&dataset_handle.alias.account_name].clone();
let task_now = now;
let task_dataset_entry_repo = self.dataset_entry_repo.clone();

join_set.spawn(async move {
let owner_account_id = get_dataset_owner_id(
&task_account_repository,
&dataset_handle.alias.account_name,
)
.await?;
let dataset_entry = DatasetEntry::new(
dataset_handle.id,
owner_account_id.clone(),
task_owner_account_id,
dataset_handle.alias.dataset_name,
task_now,
);
Expand All @@ -117,6 +115,45 @@ impl DatasetEntryIndexer {

Ok(())
}

async fn build_account_name_id_mapping(
&self,
dataset_handles: &[DatasetHandle],
) -> Result<HashMap<Option<odf::AccountName>, odf::AccountID>, InternalError> {
let mut map = HashMap::new();

for dataset_handle in dataset_handles {
let maybe_owner_name = &dataset_handle.alias.account_name;

if map.contains_key(maybe_owner_name) {
continue;
}

let owner_account_id = self.get_dataset_owner_id(maybe_owner_name).await?;

map.insert(maybe_owner_name.clone(), owner_account_id);
}

Ok(map)
}

async fn get_dataset_owner_id(
&self,
maybe_owner_name: &Option<odf::AccountName>,
) -> Result<odf::AccountID, InternalError> {
match &maybe_owner_name {
Some(account_name) => {
let account = self
.account_repository
.get_account_by_name(account_name)
.await
.int_err()?;

Ok(account.id)
}
None => Ok(DEFAULT_ACCOUNT_ID.clone()),
}
}
}

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand All @@ -140,24 +177,3 @@ impl InitOnStartup for DatasetEntryIndexer {
}

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Helpers
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

async fn get_dataset_owner_id(
account_repository: &Arc<dyn AccountRepository>,
maybe_owner_name: &Option<odf::AccountName>,
) -> Result<odf::AccountID, InternalError> {
match &maybe_owner_name {
Some(account_name) => {
let account = account_repository
.get_account_by_name(account_name)
.await
.int_err()?;

Ok(account.id)
}
None => Ok(DEFAULT_ACCOUNT_ID.clone()),
}
}

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2 changes: 1 addition & 1 deletion src/domain/flow-system/services/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ serde_with = { version = "3", default-features = false }


[dev-dependencies]
kamu = { workspace = true }
kamu = { workspace = true, features = ["testing"] }
kamu-accounts-inmem = { workspace = true }
kamu-accounts-services = { workspace = true }
kamu-flow-system-inmem = { workspace = true }
Expand Down
4 changes: 2 additions & 2 deletions src/e2e/app/cli/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ doctest = false
[dependencies]
internal-error = { workspace = true }
kamu-cli-e2e-common-macros = { workspace = true }
kamu-cli-puppet = { workspace = true, default-features = false }
kamu-cli-puppet = { workspace = true, default-features = false, features = ["extensions"] }
opendatafabric = { workspace = true }

async-trait = "0.1"
chrono = { version = "0.4", default-features = false, features = ["now"] }
indoc = "2"
pretty_assertions = "1"
regex = "1"
reqwest = { version = "0.12", default-features = false, features = [] }
reqwest = { version = "0.12", default-features = false, features = ["json"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
sqlx = { version = "0.8", default-features = false, features = [
Expand Down
2 changes: 1 addition & 1 deletion src/e2e/app/cli/repo-tests/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ normal = ["kamu-cli"]

[dependencies]
internal-error = { workspace = true }
kamu = { workspace = true }
kamu = { workspace = true, features = ["testing"] }
# We add a dependency to ensure kamu-cli is up to date before calling tests
kamu-cli = { workspace = true }
kamu-cli-e2e-common = { workspace = true }
Expand Down
3 changes: 2 additions & 1 deletion src/infra/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ ingest-evm = ["dep:alloy", "dep:datafusion-ethers"]
ingest-ftp = ["dep:curl", "dep:curl-sys"]
ingest-mqtt = ["dep:rumqttc"]
query-extensions-json = ["dep:datafusion-functions-json"]
testing = ["dep:mockall"]
testing = ["dep:mockall", "kamu-data-utils/testing"]


[dependencies]
Expand Down Expand Up @@ -153,6 +153,7 @@ libc = "0.2" # For getting uid:gid
database-common = { workspace = true }
kamu-accounts-inmem = { workspace = true }
kamu-accounts-services = { workspace = true }
kamu-data-utils = { workspace = true, features = ["testing"] }
kamu-datasets-services = { workspace = true }

criterion = { version = "0.5", features = ["async_tokio"] }
Expand Down
2 changes: 2 additions & 0 deletions src/infra/ingest-datafusion/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ url = { version = "2", features = ["serde"] }


[dev-dependencies]
kamu-data-utils = { workspace = true, features = ["testing"] }

criterion = { version = "0.5", features = ["async_tokio"] }
indoc = "2"
pretty_assertions = "1"
Expand Down
7 changes: 5 additions & 2 deletions src/utils/data-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ doctest = false

[features]
default = []
testing = []
testing = ["dep:pretty_assertions"]


[dependencies]
Expand All @@ -39,17 +39,20 @@ datafusion = { version = "42", default-features = false, features = [
] }
digest = "0.10"
hex = "0.4"
pretty_assertions = { version = "1" }
sha3 = "0.10"
tracing = { version = "0.1", default-features = false }
thiserror = { version = "1", default-features = false }
url = "2"
serde = { version = "1", default-features = false }
serde_json = { version = "1" }

# Optional
pretty_assertions = { optional = true, version = "1" }


[dev-dependencies]
indoc = "2"
pretty_assertions = { version = "1" }
test-log = { version = "0.2", features = ["trace"] }
tokio = { version = "1", default-features = false, features = ["rt", "macros"] }
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
7 changes: 6 additions & 1 deletion src/utils/database-common-macros/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,9 @@ proc-macro = true

[dependencies]
quote = "1"
syn = { version = "2", default-features = false }
syn = { version = "2", default-features = false, features = [
"full",
"parsing",
"printing",
"proc-macro",
] }
6 changes: 3 additions & 3 deletions src/utils/database-common-macros/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ impl Parse for CatalogItem2 {
/// The structure must contain the Catalog as a field
///
/// # Examples
/// ```
/// ```compile_fail
/// #[transactional_method()]
/// async fn set_system_flow_schedule(&self) {
/// // `transaction_catalog` is available inside the method body
Expand Down Expand Up @@ -269,7 +269,7 @@ pub fn transactional_method(_attr: TokenStream, item: TokenStream) -> TokenStrea
/// The structure must contain the Catalog as a field
///
/// # Examples
/// ```
/// ```compile_fail
/// // `service` request from a transactional Catalog
/// #[transactional_method1(service: Arc<dyn Service>)]
/// async fn set_system_flow_schedule(&self) {
Expand Down Expand Up @@ -308,7 +308,7 @@ pub fn transactional_method1(attr: TokenStream, item: TokenStream) -> TokenStrea
/// The structure must contain the Catalog as a field
///
/// # Examples
/// ```
/// ```compile_fail
/// // `service` request from a transactional Catalog
/// #[transactional_method2(service1: Arc<dyn Service1>, service2: Arc<dyn Service2>)]
/// async fn set_system_flow_schedule(&self) {
Expand Down
7 changes: 6 additions & 1 deletion src/utils/event-sourcing-macros/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,10 @@ proc-macro = true


[dependencies]
syn = { version = "2", default-features = false }
quote = "1"
syn = { version = "2", default-features = false, features = [
"derive",
"parsing",
"printing",
"proc-macro",
] }
11 changes: 8 additions & 3 deletions src/utils/time-source/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,15 @@ doctest = false

[dependencies]
async-trait = { version = "0.1", default-features = false }
chrono = { version = "0.4", default-features = false }
chrono = { version = "0.4", default-features = false, features = ["now"] }
dill = "0.9"
tokio = { version = "1", default-features = false }
tokio = { version = "1", default-features = false, features = [
"macros",
"rt",
"sync",
"time",
] }


[dev-dependencies]
futures = { version = "0.3", default-features = false }
futures = { version = "0.3", default-features = false }

0 comments on commit faebd9f

Please sign in to comment.