diff --git a/README.md b/README.md index b6ffa9f..79290be 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ them easy to interact with in a unix-like way. - [0004-hashed-n-tuple-storage-layout](https://ocfl.github.io/extensions/0004-hashed-n-tuple-storage-layout.html) - [0005-mutable-head](https://ocfl.github.io/extensions/0005-mutable-head.html): Only read is supported; not write. +- [0006-flat-omit-prefix-storage-layout](https://ocfl.github.io/extensions/0006-flat-omit-prefix-storage-layout.html) Additionally, it uses the following extensions for write support that have not been specified: diff --git a/resources/main/specs/0006-flat-omit-prefix-storage-layout.md b/resources/main/specs/0006-flat-omit-prefix-storage-layout.md new file mode 100644 index 0000000..91946fe --- /dev/null +++ b/resources/main/specs/0006-flat-omit-prefix-storage-layout.md @@ -0,0 +1,141 @@ +# OCFL Community Extension 0006: Flat Omit Prefix Storage Layout + +* **Extension Name:** 0006-flat-omit-prefix-storage-layout +* **Authors:** Andrew Woods +* **Minimum OCFL Version:** 1.0 +* **OCFL Community Extensions Version:** 1.0 +* **Obsoletes:** n/a +* **Obsoleted by:** n/a + +## Overview + +This storage root extension describes a flat OCFL storage layout. The OCFL object directories are direct children of +the OCFL storage root directory. +The OCFL object identifiers are expected to contain prefixes which are removed in the mapping to directory names. The +OCFL object identifier prefix is defined as all characters before and including a configurable delimiter. + +The limitations of this layout are filesystem dependent, but are generally as follows: + +* The size of object identifiers, minus the length of the prefix, cannot exceed the maximum allowed directory name size + (eg. 255 characters) +* Object identifiers cannot include characters that are illegal in directory names +* Performance may degrade as the size of a repository increases because every object is a direct child of the storage root + +## Parameters + +### Summary + +* **Name:** `delimiter` + * **Description:** The case-insensitive, delimiter marking the end of the OCFL object identifier prefix; MUST consist + of a character string of length one or greater. If the delimiter is found multiple times in the OCFL object + identifier, its last occurence (right-most) will be used to select the termination of the prefix. + * **Type:** string + * **Constraints:** Must not be empty + * **Default:** + +## Examples + +### Example 1 + +This example demonstrates mappings where the single-character delimiter is found one or more times in the OCFL object +identifier. + +#### Parameters + +There is no default configuration; therefore, configuration parameters must be provided. + +```json +{ + "extensionName": "0006-flat-omit-prefix-storage-layout", + "delimiter": ":" +} +``` + +#### Mappings + +| Object ID | Object Root Path | +| --- | --- | +| namespace:12887296 | `12887296` | +| urn:uuid:6e8bc430-9c3a-11d9-9669-0800200c9a66 | `6e8bc430-9c3a-11d9-9669-0800200c9a66` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── 12887296/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── 6e8bc430-9c3a-11d9-9669-0800200c9a66/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` + +### Example 2 + +This example demonstrates mappings where the multi-character delimiter is found one or more times in the OCFL object +identifier. + +#### Parameters + +There is no default configuration; therefore, configuration parameters must be provided. + +```json +{ + "extensionName": "0006-flat-omit-prefix-storage-layout", + "delimiter": "edu/" +} +``` + +#### Mappings + +| Object ID | Object Root Path | +| --- | --- | +| https://institution.edu/3448793 | `3448793` | +| https://institution.edu/abc/edu/f8.05v | `f8.05v` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── 3448793/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── f8.05v/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` + +### Example 3 + +This example demonstrates mappings that produce directory names that are invalid on unix filesystems; therefore this +layout cannot be used in a repository that needs to be able to store objects with identifiers like these. + +#### Parameters + +There is no default configuration; therefore, configuration parameters must be provided. + +```json +{ + "extensionName": "0006-flat-omit-prefix-storage-layout", + "delimiter": "info:" +} +``` + +#### Mappings + +| Object ID | Object Root Path | +| --- | --- | +| info:fedora/object-01 | `fedora/object-01` | +| https://example.org/info:/12345/x54xz321/s3/f8.05v | `/12345/x54xz321/s3/f8.05v` | diff --git a/src/cmd/mod.rs b/src/cmd/mod.rs index de04fdd..4f40b7c 100644 --- a/src/cmd/mod.rs +++ b/src/cmd/mod.rs @@ -193,6 +193,10 @@ fn create_layout(layout_name: Layout, config_file: Option<&Path>) -> Result Some(StorageLayout::new( + LayoutExtensionName::FlatOmitPrefixLayout, + config_bytes.as_deref(), + )?), }; Ok(layout) diff --git a/src/cmd/opts.rs b/src/cmd/opts.rs index 6659cba..316bfe6 100644 --- a/src/cmd/opts.rs +++ b/src/cmd/opts.rs @@ -565,6 +565,8 @@ pub enum Layout { HashedNTuple, #[strum(serialize = "0003-hash-and-id-n-tuple-storage-layout")] HashedNTupleObjectId, + #[strum(serialize = "0006-flat-omit-prefix-storage-layout")] + FlatOmitPrefix, } arg_enum! { diff --git a/src/ocfl/consts.rs b/src/ocfl/consts.rs index 1282ccc..1d3077f 100644 --- a/src/ocfl/consts.rs +++ b/src/ocfl/consts.rs @@ -23,6 +23,7 @@ pub const HASHED_NTUPLE_OBJECT_ID_LAYOUT_EXTENSION: &str = "0003-hash-and-id-n-tuple-storage-layout"; pub const HASHED_NTUPLE_LAYOUT_EXTENSION: &str = "0004-hashed-n-tuple-storage-layout"; pub const MUTABLE_HEAD_EXTENSION: &str = "0005-mutable-head"; +pub const FLAT_OMIT_PREFIX_LAYOUT_EXTENSION: &str = "0006-flat-omit-prefix-storage-layout"; pub const ROCFL_STAGING_EXTENSION: &str = "rocfl-staging"; pub const ROCFL_LOCKS_EXTENSION: &str = "rocfl-locks"; @@ -32,6 +33,7 @@ pub static SUPPORTED_EXTENSIONS: Lazy> = Lazy::new(|| { set.insert(HASHED_NTUPLE_OBJECT_ID_LAYOUT_EXTENSION); set.insert(HASHED_NTUPLE_LAYOUT_EXTENSION); set.insert(MUTABLE_HEAD_EXTENSION); + set.insert(FLAT_OMIT_PREFIX_LAYOUT_EXTENSION); set.insert(ROCFL_STAGING_EXTENSION); set.insert(ROCFL_LOCKS_EXTENSION); set diff --git a/src/ocfl/specs.rs b/src/ocfl/specs.rs index b25d6a8..6ba0504 100644 --- a/src/ocfl/specs.rs +++ b/src/ocfl/specs.rs @@ -5,3 +5,5 @@ pub const EXT_0003_SPEC: &str = include_str!("../../resources/main/specs/0003-hash-and-id-n-tuple-storage-layout.md"); pub const EXT_0004_SPEC: &str = include_str!("../../resources/main/specs/0004-hashed-n-tuple-storage-layout.md"); +pub const EXT_0006_SPEC: &str = + include_str!("../../resources/main/specs/0006-flat-omit-prefix-storage-layout.md"); diff --git a/src/ocfl/store/fs.rs b/src/ocfl/store/fs.rs index 258b785..4461cd5 100644 --- a/src/ocfl/store/fs.rs +++ b/src/ocfl/store/fs.rs @@ -1056,6 +1056,7 @@ fn write_layout_config(root: impl AsRef, layout: &StorageLayout) -> Result LayoutExtensionName::FlatDirectLayout => specs::EXT_0002_SPEC, LayoutExtensionName::HashedNTupleObjectIdLayout => specs::EXT_0003_SPEC, LayoutExtensionName::HashedNTupleLayout => specs::EXT_0004_SPEC, + LayoutExtensionName::FlatOmitPrefixLayout => specs::EXT_0006_SPEC, }; write!( diff --git a/src/ocfl/store/layout.rs b/src/ocfl/store/layout.rs index f9e5029..305efbe 100644 --- a/src/ocfl/store/layout.rs +++ b/src/ocfl/store/layout.rs @@ -32,6 +32,9 @@ pub enum LayoutExtensionName { #[strum(serialize = "0003-hash-and-id-n-tuple-storage-layout")] #[serde(rename = "0003-hash-and-id-n-tuple-storage-layout")] HashedNTupleObjectIdLayout, + #[strum(serialize = "0006-flat-omit-prefix-storage-layout")] + #[serde(rename = "0006-flat-omit-prefix-storage-layout")] + FlatOmitPrefixLayout, } impl StorageLayout { @@ -47,6 +50,9 @@ impl StorageLayout { LayoutExtensionName::HashedNTupleObjectIdLayout => { Ok(HashedNTupleObjectIdLayoutExtension::new(config_bytes)?.into()) } + LayoutExtensionName::FlatOmitPrefixLayout => { + Ok(FlatOmitPrefixLayoutExtension::new(config_bytes)?.into()) + } } }; @@ -92,6 +98,14 @@ struct HashedNTupleObjectIdLayoutExtension { config: HashedNTupleObjectIdLayoutConfig, } +/// [Flat Omit Prefix Storage Layout Extension](https://ocfl.github.io/extensions/0006-flat-omit-prefix-storage-layout.html) +#[derive(Debug)] +struct FlatOmitPrefixLayoutExtension { + config: FlatOmitPrefixLayoutConfig, + case_matters: bool, + normalized_delimiter: String, +} + /// [Flat Direct Storage Layout Config](https://ocfl.github.io/extensions/0002-flat-direct-storage-layout.html) #[derive(Deserialize, Serialize, Debug)] #[serde(rename_all = "camelCase", default)] @@ -120,11 +134,20 @@ struct HashedNTupleObjectIdLayoutConfig { number_of_tuples: usize, } +/// [Flat Omit Prefix Storage Layout Config](https://ocfl.github.io/extensions/0006-flat-omit-prefix-storage-layout.html) +#[derive(Deserialize, Serialize, Debug)] +#[serde(rename_all = "camelCase")] +struct FlatOmitPrefixLayoutConfig { + extension_name: LayoutExtensionName, + delimiter: String, +} + #[derive(Debug)] enum LayoutExtension { FlatDirect(FlatDirectLayoutExtension), HashedNTuple(HashedNTupleLayoutExtension), HashedNTupleObjectId(HashedNTupleObjectIdLayoutExtension), + FlatOmitPrefix(FlatOmitPrefixLayoutExtension), } impl FlatDirectLayoutConfig { @@ -194,12 +217,30 @@ impl Default for HashedNTupleObjectIdLayoutConfig { } } +impl FlatOmitPrefixLayoutConfig { + fn validate(&self) -> Result<()> { + validate_extension_name( + &LayoutExtensionName::FlatOmitPrefixLayout, + &self.extension_name, + )?; + + if self.delimiter.is_empty() { + return Err(RocflError::InvalidConfiguration( + "delimiter was empty but it must be non-empty".to_string(), + )); + } + + Ok(()) + } +} + impl LayoutExtension { fn map_object_id(&self, object_id: &str) -> String { match self { LayoutExtension::FlatDirect(ext) => ext.map_object_id(object_id), LayoutExtension::HashedNTuple(ext) => ext.map_object_id(object_id), LayoutExtension::HashedNTupleObjectId(ext) => ext.map_object_id(object_id), + LayoutExtension::FlatOmitPrefix(ext) => ext.map_object_id(object_id), } } @@ -208,6 +249,7 @@ impl LayoutExtension { LayoutExtension::FlatDirect(ext) => ext.config.extension_name, LayoutExtension::HashedNTuple(ext) => ext.config.extension_name, LayoutExtension::HashedNTupleObjectId(ext) => ext.config.extension_name, + LayoutExtension::FlatOmitPrefix(ext) => ext.config.extension_name, } } @@ -218,6 +260,7 @@ impl LayoutExtension { LayoutExtension::HashedNTupleObjectId(ext) => { Ok(serde_json::to_vec_pretty(&ext.config)?) } + LayoutExtension::FlatOmitPrefix(ext) => Ok(serde_json::to_vec_pretty(&ext.config)?), } } } @@ -240,6 +283,12 @@ impl From for LayoutExtension { } } +impl From for LayoutExtension { + fn from(extension: FlatOmitPrefixLayoutExtension) -> Self { + LayoutExtension::FlatOmitPrefix(extension) + } +} + impl FlatDirectLayoutExtension { fn new(config_bytes: Option<&[u8]>) -> Result { let config = match config_bytes { @@ -355,6 +404,59 @@ impl HashedNTupleObjectIdLayoutExtension { } } +impl FlatOmitPrefixLayoutExtension { + fn new(config_bytes: Option<&[u8]>) -> Result { + let config = match config_bytes { + Some(config_bytes) => { + let config: FlatOmitPrefixLayoutConfig = serde_json::from_slice(config_bytes)?; + config.validate()?; + config + } + None => { + return Err(RocflError::InvalidConfiguration( + "Storage layout extension configuration must be specified".to_string(), + )) + } + }; + + let case_matters = config.delimiter.to_lowercase() != config.delimiter.to_uppercase(); + + let normalized_delimiter = if case_matters { + config.delimiter.to_lowercase() + } else { + config.delimiter.clone() + }; + + Ok(Self { + config, + case_matters, + normalized_delimiter, + }) + } + + /// Object IDs have a prefix removed and the remaining part is returned + fn map_object_id(&self, object_id: &str) -> String { + let test_id = if self.case_matters { + Cow::Owned(object_id.to_lowercase()) + } else { + Cow::Borrowed(object_id) + }; + + match test_id.rfind(&self.normalized_delimiter) { + None => object_id.to_string(), + Some(index) => { + let length = self.normalized_delimiter.len(); + if object_id.len() == index + length { + // FIXME this should really be an error + "".to_string() + } else { + object_id[index + length..].to_string() + } + } + } + } +} + /// Splits the digest into N tuples of M size, joined with a / fn digest_to_tuples(digest: &str, tuple_size: usize, number_of_tuples: usize) -> String { let mut path = String::new(); @@ -460,6 +562,7 @@ mod tests { use super::{ lower_percent_escape, HashedNTupleLayoutExtension, HashedNTupleObjectIdLayoutExtension, }; + use crate::ocfl::store::layout::FlatOmitPrefixLayoutExtension; use crate::ocfl::Result; const ID_1: &str = "info:example/test-123"; @@ -809,6 +912,48 @@ mod tests { let _ = hashed_ntuple_ext("sha256", 10, 10, false).unwrap(); } + #[test] + fn flat_omit_mapping_single_char() { + let ext = flat_omit_ext(":").unwrap(); + + assert_eq!("example/test-123", ext.map_object_id(ID_1)); + assert_eq!("lè-$id", ext.map_object_id(ID_2)); + assert_eq!(ID_3, ext.map_object_id(ID_3)); + assert_eq!( + "6e8bc430-9c3a-11d9-9669-0800200c9a66", + ext.map_object_id("urn:uuid:6e8bc430-9c3a-11d9-9669-0800200c9a66") + ); + assert_eq!( + "", + ext.map_object_id("urn:uuid:6e8bc430-9c3a-11d9-9669-0800200c9a66:") + ); + } + + #[test] + fn flat_omit_mapping_multi_char() { + let ext = flat_omit_ext("edu/").unwrap(); + + assert_eq!( + "3448793", + ext.map_object_id("https://institution.edu/3448793") + ); + assert_eq!( + "f8.05v", + ext.map_object_id("https://institution.edu/abc/edu/f8.05v") + ); + assert_eq!("", ext.map_object_id("https://example.edu/")); + assert_eq!( + "https://example.com/", + ext.map_object_id("https://example.com/") + ); + } + + #[test] + #[should_panic(expected = "delimiter was empty")] + fn flat_omit_fail_when_delimiter_empty() { + let _ = flat_omit_ext("").unwrap(); + } + fn hashed_ntuple_ext( algorithm: &str, tuple_size: usize, @@ -848,4 +993,17 @@ mod tests { .as_bytes(), )) } + + fn flat_omit_ext(delimiter: &str) -> Result { + FlatOmitPrefixLayoutExtension::new(Some( + format!( + "{{ + \"extensionName\": \"0006-flat-omit-prefix-storage-layout\", + \"delimiter\": \"{}\" + }}", + delimiter + ) + .as_bytes(), + )) + } } diff --git a/src/ocfl/store/s3.rs b/src/ocfl/store/s3.rs index 956e84d..82ff7d6 100644 --- a/src/ocfl/store/s3.rs +++ b/src/ocfl/store/s3.rs @@ -1053,6 +1053,7 @@ fn write_layout_config(s3_client: &S3Client, layout: &StorageLayout) -> Result<( LayoutExtensionName::FlatDirectLayout => specs::EXT_0002_SPEC, LayoutExtensionName::HashedNTupleObjectIdLayout => specs::EXT_0003_SPEC, LayoutExtensionName::HashedNTupleLayout => specs::EXT_0004_SPEC, + LayoutExtensionName::FlatOmitPrefixLayout => specs::EXT_0006_SPEC, }; s3_client.put_object_bytes( diff --git a/tests/fs-tests.rs b/tests/fs-tests.rs index 4fd9a56..01ee86d 100644 --- a/tests/fs-tests.rs +++ b/tests/fs-tests.rs @@ -1562,13 +1562,8 @@ fn fail_object_commit_when_no_known_storage_layout_and_root_specified_and_obj_al ) .unwrap(); - repo.commit( - object_2_id, - CommitMeta::new(), - Some(object_root), - false, - ) - .unwrap(); + repo.commit(object_2_id, CommitMeta::new(), Some(object_root), false) + .unwrap(); } #[test] diff --git a/tests/s3-tests.rs b/tests/s3-tests.rs index bd7798c..afb562f 100644 --- a/tests/s3-tests.rs +++ b/tests/s3-tests.rs @@ -393,7 +393,11 @@ fn fail_commit_when_out_of_sync() { ); } -fn panic_or_run_s3_test(name: &str, message: &str, test: impl FnOnce(S3Client, String, TempDir, TempDir) + UnwindSafe) { +fn panic_or_run_s3_test( + name: &str, + message: &str, + test: impl FnOnce(S3Client, String, TempDir, TempDir) + UnwindSafe, +) { if should_ignore_test() { println!("Skipping test {}", name); panic!("{}", message); @@ -402,7 +406,10 @@ fn panic_or_run_s3_test(name: &str, message: &str, test: impl FnOnce(S3Client, S run_s3_test(name, test) } -fn skip_or_run_s3_test(name: &str, test: impl FnOnce(S3Client, String, TempDir, TempDir) + UnwindSafe) { +fn skip_or_run_s3_test( + name: &str, + test: impl FnOnce(S3Client, String, TempDir, TempDir) + UnwindSafe, +) { if should_ignore_test() { println!("Skipping test {}", name); return;