Skip to content

Commit

Permalink
schema now local
Browse files Browse the repository at this point in the history
  • Loading branch information
mburridge96 committed Oct 23, 2024
1 parent 7f048e7 commit 5480d0e
Show file tree
Hide file tree
Showing 8 changed files with 2,791 additions and 40 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions cli/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ pub struct ZipCrateCommand {
#[clap(short,long,default_value_t=String::from("./"))]
pub target_crate: String,
// Copy and include external reachable data files
#[clap(short, long, default_value_t = true)]
pub external: bool,
}

Expand Down
4 changes: 2 additions & 2 deletions python/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2,660 changes: 2,660 additions & 0 deletions src/resources/ro_crate_1_1.jsonld

Large diffs are not rendered by default.

63 changes: 62 additions & 1 deletion src/ro_crate/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
//! RoCrate data structure
use crate::ro_crate::rocrate::RoCrate;
use crate::ro_crate::schema::validate_crate_keys;
use crate::ro_crate::schema::load_rocrate_schema;
use serde_json;
use std::collections::HashSet;
use std::fs;
use std::io;
use std::path::{Path, PathBuf};
Expand Down Expand Up @@ -80,6 +81,19 @@ pub enum CrateReadError {
VocabNotValid(String),
}

impl PartialEq for CrateReadError {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
// We don't compare the actual io::Error or serde_json::Error, just the variant type
(CrateReadError::IoError(_), CrateReadError::IoError(_)) => true,
(CrateReadError::JsonError(_), CrateReadError::JsonError(_)) => true,
// For `VocabNotValid`, we compare the actual error message
(CrateReadError::VocabNotValid(a), CrateReadError::VocabNotValid(b)) => a == b,
_ => false,
}
}
}

impl From<io::Error> for CrateReadError {
/// Converts an `io::Error` into a `CrateReadError::IoError`.
fn from(err: io::Error) -> CrateReadError {
Expand All @@ -94,6 +108,34 @@ impl From<serde_json::Error> for CrateReadError {
}
}

/// Validates that the keys in a given RO-Crate match those defined in the base schema vocabulary.
///
/// This function checks the crate's properties against the official RO-Crate context and any embedded vocabularies.
/// It does not validate properties by dereferencing URIs but rather checks if the properties' keys are recognized.
pub fn validate_crate_keys(rocrate: &RoCrate) -> bool {
match load_rocrate_schema() {
Ok(crate_metadata) => {
let crate_context: Vec<String> = crate_metadata.context.keys().cloned().collect();
let custom_context = rocrate.get_context_items();
let vals = RoCrate::get_all_property_values(rocrate);

// Convert vec1 and vec2 to HashSets for efficient lookup
let set1: HashSet<_> = crate_context.into_iter().collect();
let set2: HashSet<_> = custom_context.into_iter().collect();
let mut invalid_key: Vec<&String> = Vec::new();

for item in &vals {
if !set1.contains(item) && !set2.contains(item) {
invalid_key.push(item);
}
}

invalid_key.is_empty()
}
Err(_e) => false,
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand All @@ -110,6 +152,25 @@ mod tests {
assert!(crate_result.is_ok());
}

#[test]
fn test_read_crate_valid() {
let path = fixture_path("_ro-crate-metadata-minimal.json");

let crate_result = read_crate(&path, true);
assert!(crate_result.is_ok());
}

#[test]
fn test_read_crate_invalid() {
let path = fixture_path("_ro-crate-metadata-broken-schema.json");

let crate_result = read_crate(&path, true).unwrap_err();
match crate_result {
CrateReadError::VocabNotValid(_) => (),
_ => panic!(),
}
}

#[test]
fn test_read_crate_file_not_found() {
let path = fixture_path("non_existent_file.json");
Expand Down
69 changes: 35 additions & 34 deletions src/ro_crate/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,9 @@
use crate::ro_crate::constraints::{Id, IdValue, License};
use crate::ro_crate::rocrate::RoCrate;
use reqwest;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};

const ROCRATE_SCHEMA_1_1: &str = "https://www.researchobject.org/ro-crate/1.1/context.jsonld";
use std::collections::HashMap;
const ROCRATE_SCHEMA_1_1: &str = include_str!("../resources/ro_crate_1_1.jsonld");

/// Represents the JSON-LD context of the RO-Crate schema.
///
Expand All @@ -29,45 +27,48 @@ pub struct RoCrateJSONLDContext {
pub context: HashMap<String, String>,
}

/// Downloads the RO-Crate schema from a predefined URL.

/// Loads in RO-Crate schema for validation.
///
/// This function fetches the JSON-LD context defining the RO-Crate schema, attempting to parse it
/// into an `RoCrateJSONLDContext` struct. It is a synchronous operation and may block the thread
/// during the request and subsequent parsing.
pub fn download_rocrate_schema() -> Result<RoCrateJSONLDContext, Box<dyn std::error::Error>> {
let res = reqwest::blocking::get(ROCRATE_SCHEMA_1_1)?.text()?;
let context = serde_json::from_str(&res)?;
pub fn load_rocrate_schema() -> Result<RoCrateJSONLDContext, Box<dyn std::error::Error>> {
load_rocrate_schema_from_str(ROCRATE_SCHEMA_1_1)
}
pub fn load_rocrate_schema_from_str(json_str: &str) -> Result<RoCrateJSONLDContext, Box<dyn std::error::Error>> {
let context = serde_json::from_str(json_str)?;
Ok(context)
}

/// Validates that the keys in a given RO-Crate match those defined in the base schema vocabulary.
///
/// This function checks the crate's properties against the official RO-Crate context and any embedded vocabularies.
/// It does not validate properties by dereferencing URIs but rather checks if the properties' keys are recognized.
pub fn validate_crate_keys(rocrate: &RoCrate) -> bool {
match download_rocrate_schema() {
Ok(crate_metadata) => {
let crate_context: Vec<String> = crate_metadata.context.keys().cloned().collect();
let custom_context = rocrate.get_context_items();
let vals = RoCrate::get_all_property_values(&rocrate);

// Convert vec1 and vec2 to HashSets for efficient lookup
let set1: HashSet<_> = crate_context.into_iter().collect();
let set2: HashSet<_> = custom_context.into_iter().collect();
let mut invalid_key: Vec<&String> = Vec::new();
#[cfg(test)]
mod tests {
use super::*;

for item in &vals {
if !set1.contains(item) && !set2.contains(item) {
invalid_key.push(&item);
}
}

if invalid_key.is_empty() {
true
} else {
false
#[test]
fn test_load_rocrate_schema_from_str() {
let mock_json = r#"
{
"@id": "https://w3id.org/ro/crate/1.1/context",
"name": ["RO-Crate JSON-LD Context"],
"version": "1.1.3",
"url": {"@id": "https://w3id.org/ro/crate/1.1"},
"schemaVersion": {"@id": "http://schema.org/version/10.0/"},
"isBasedOn": [{"@id": "http://schema.org/version/10.0/"}],
"license": {"@id": "https://creativecommons.org/publicdomain/zero/1.0/"},
"@context": {
"3DModel": "http://schema.org/3DModel",
"APIReference": "http://schema.org/APIReference"
}
}
Err(_e) => false,
"#;

let result = load_rocrate_schema_from_str(mock_json);
assert!(result.is_ok());

let context = result.unwrap();
assert_eq!(context.version, "1.1.3");
assert!(context.context.contains_key("3DModel"));
}

}
2 changes: 1 addition & 1 deletion src/ro_crate/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ pub fn zip_crate(crate_path: &Path, external: bool) -> Result<(), ZipError> {
{
let path = entry.path();

if path == zip_file_name || path == crate_abs {
if path == zip_file_name {
continue;
}

Expand Down
28 changes: 28 additions & 0 deletions tests/fixtures/_ro-crate-metadata-broken-schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@

{ "@context": "https://w3id.org/ro/crate/1.1/context",
"@graph": [

{
"@type": "CreativeWork",
"@id": "ro-crate-metadata.json",
"conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"},
"about": {"@id": "./"}
},
{
"@id": "./",
"identifier": "https://doi.org/10.4225/59/59672c09f4a4b",
"@type": "Dataset",
"datePublished": "2017",
"name": "Data files associated with the manuscript:Effects of facilitated family case conferencing for ...",
"description": "Palliative care planning for nursing home residents with advanced dementia ...",
"license": {"@id": "https://creativecommons.org/licenses/by-nc-sa/3.0/au/"}
},
{
"@id": "https://creativecommons.org/licenses/by-nc-sa/3.0/au/",
"@type": "CreativeWork",
"description": "This work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Australia License. To view a copy of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/au/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.",
"identifier": "https://creativecommons.org/licenses/by-nc-sa/3.0/au/",
"nonschemakey": "Attribution-NonCommercial-ShareAlike 3.0 Australia (CC BY-NC-SA 3.0 AU)"
}
]
}

0 comments on commit 5480d0e

Please sign in to comment.