From 2ae2a6674e8a780cb93723de9b749b65ea28562e Mon Sep 17 00:00:00 2001 From: zyy17 Date: Sat, 20 Jul 2024 09:55:48 +0800 Subject: [PATCH] refactor: add `get_storage_path()` and `get_catalog_and_schema()` (#4397) refactor: add get_storage_path() and get_catalog_and_schema() --- Cargo.lock | 1 + src/common/meta/src/ddl/utils.rs | 21 ++++++++++ src/store-api/Cargo.toml | 1 + src/store-api/src/path_utils.rs | 66 ++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 999480b7cf0f..1c7ddf1b404c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10899,6 +10899,7 @@ dependencies = [ "common-base", "common-error", "common-macro", + "common-meta", "common-recordbatch", "common-time", "common-wal", diff --git a/src/common/meta/src/ddl/utils.rs b/src/common/meta/src/ddl/utils.rs index 36cb97338655..7f4b279c39e0 100644 --- a/src/common/meta/src/ddl/utils.rs +++ b/src/common/meta/src/ddl/utils.rs @@ -55,6 +55,12 @@ pub fn region_storage_path(catalog: &str, schema: &str) -> String { format!("{}/{}", catalog, schema) } +/// Extracts catalog and schema from the path that created by [region_storage_path]. +pub fn get_catalog_and_schema(path: &str) -> Option<(String, String)> { + let mut split = path.split('/'); + Some((split.next()?.to_string(), split.next()?.to_string())) +} + pub async fn check_and_get_physical_table_id( table_metadata_manager: &TableMetadataManagerRef, tasks: &[CreateTableTask], @@ -145,3 +151,18 @@ pub fn convert_region_routes_to_detecting_regions( }) .collect::>() } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_catalog_and_schema() { + let test_catalog = "my_catalog"; + let test_schema = "my_schema"; + let path = region_storage_path(test_catalog, test_schema); + let (catalog, schema) = get_catalog_and_schema(&path).unwrap(); + assert_eq!(catalog, test_catalog); + assert_eq!(schema, test_schema); + } +} diff --git a/src/store-api/Cargo.toml b/src/store-api/Cargo.toml index 9e8143a9ec9f..c3ee5c393039 100644 --- a/src/store-api/Cargo.toml +++ b/src/store-api/Cargo.toml @@ -30,4 +30,5 @@ tokio.workspace = true [dev-dependencies] async-stream.workspace = true +common-meta.workspace = true serde_json.workspace = true diff --git a/src/store-api/src/path_utils.rs b/src/store-api/src/path_utils.rs index b83f6e33d5f4..122a19660df3 100644 --- a/src/store-api/src/path_utils.rs +++ b/src/store-api/src/path_utils.rs @@ -45,10 +45,48 @@ pub fn region_dir(path: &str, region_id: RegionId) -> String { ) } +/// get_storage_path returns the storage path from the region_dir. It will always return the storage path if the region_dir is valid, otherwise None. +/// The storage path is constructed from the catalog and schema, which are generated by `common_meta::ddl::utils::region_storage_path`. +/// We can extract the catalog and schema from the region_dir by following example: +/// ``` +/// use common_meta::ddl::utils::get_catalog_and_schema; +/// +/// fn catalog_and_schema(region_dir: &str, region_id: RegionId) -> Option<(String, String)> { +/// get_catalog_and_schema(&get_storage_path(region_dir, region_id)?) +/// } +/// ``` +pub fn get_storage_path(region_dir: &str, region_id: RegionId) -> Option { + if !region_dir.starts_with(DATA_DIR) { + return None; + } + + // For example, if region_dir is "data/my_catalog/my_schema/42/42_0000000001/", the parts will be '42/42_0000000001'. + let parts = format!( + "{}/{}", + region_id.table_id(), + region_name(region_id.table_id(), region_id.region_number()) + ); + + // Ignore the last '/'. The original path will be like "${DATA_DIR}${catalog}/${schema}". + let pos = region_dir.rfind(&parts)? - 1; + + if pos < DATA_DIR.len() { + return None; + } + + Some(region_dir[DATA_DIR.len()..pos].to_string()) +} + #[cfg(test)] mod tests { + use common_meta::ddl::utils::{get_catalog_and_schema, region_storage_path}; + use super::*; + fn catalog_and_schema(region_dir: &str, region_id: RegionId) -> Option<(String, String)> { + get_catalog_and_schema(&get_storage_path(region_dir, region_id)?) + } + #[test] fn test_region_dir() { let region_id = RegionId::new(42, 1); @@ -57,4 +95,32 @@ mod tests { "data/my_catalog/my_schema/42/42_0000000001/" ); } + + #[test] + fn test_get_catalog_and_schema_from_region_dir() { + let tests = [ + (RegionId::new(42, 1), "my_catalog", "my_schema"), + (RegionId::new(1234, 1), "my_catalog_1234", "my_schema_1234"), + (RegionId::new(5678, 1), "my_catalog_5678", "my_schema"), + (RegionId::new(5678, 1), "my_catalog", "my_schema_5678"), + ]; + + for (region_id, test_catalog, test_schema) in tests.iter() { + let region_dir = region_dir( + region_storage_path(test_catalog, test_schema).as_str(), + *region_id, + ); + let (catalog, schema) = catalog_and_schema(®ion_dir, *region_id).unwrap(); + assert_eq!(catalog, *test_catalog); + assert_eq!(schema, *test_schema); + } + } + + #[test] + fn test_get_catalog_and_schema_from_invalid_region_dir() { + assert_eq!( + catalog_and_schema("invalid_data", RegionId::new(42, 1)), + None + ); + } }