Skip to content

Commit

Permalink
feat: config file parsing and testing (#19)
Browse files Browse the repository at this point in the history
Co-authored-by: Cyril Matthey-Doret <[email protected]>
Co-authored-by: Gabriel Nützi <[email protected]>
  • Loading branch information
3 people authored Jun 25, 2024
1 parent 341de0e commit d440e34
Show file tree
Hide file tree
Showing 7 changed files with 191 additions and 25 deletions.
123 changes: 123 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,5 @@ clap = { version = "4.5.7", features = ["derive"] }
rio_turtle = "0.8.4"
rio_api = "0.8.4"
bitflags = "2.5.0"
serde_yml = "0.0.10"
tempfile = "3.10.1"
25 changes: 22 additions & 3 deletions src/io.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use crate::rules::Config;
use rio_turtle::NTriplesParser;
use serde_yml;
use std::{
boxed::Box,
fs::File,
Expand All @@ -18,7 +20,7 @@ pub fn get_reader(path: &Path) -> Box<dyn BufRead> {
pub fn get_writer(path: &Path) -> Box<dyn Write> {
return match path.to_str().unwrap() {
"-" => Box::new(BufWriter::new(stdout())),
path => Box::new(BufWriter::new(File::open(path).unwrap())),
path => Box::new(BufWriter::new(File::create(path).unwrap())),
};
}

Expand All @@ -28,11 +30,22 @@ pub fn parse_ntriples(reader: impl BufRead) -> NTriplesParser<impl BufRead> {
return NTriplesParser::new(reader);
}

// Parse yaml configuration file.
pub fn parse_config(path: &Path) -> Config {
return match File::open(&path) {
Ok(file) => serde_yml::from_reader(file).expect("Error parsing config file."),
Err(e) => panic!("Cannot open file '{:?}': '{}'.", path, e),
};
}

#[cfg(test)]
mod tests {
use super::parse_ntriples;
use super::{parse_config, parse_ntriples};
use rio_api::parser::TriplesParser;
use std::io::{BufRead, BufReader};
use std::{
io::{BufRead, BufReader},
path::Path,
};

#[test]
// Test the parsing of a triple.
Expand All @@ -49,4 +62,10 @@ mod tests {
})
.expect("Error parsing triple");
}
// Test the parsing of a config file.
#[test]
fn config_parsing() {
let config_path = Path::new("tests/data/config.yaml");
parse_config(&config_path);
}
}
7 changes: 6 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ struct PseudoArgs {
#[arg(default_value = "-")]
input: PathBuf,

/// The config file descriptor to use for defining RDF elements to pseudonymize.
/// Format: yaml
#[arg(short, long)]
config: PathBuf,

/// Output file descriptor for pseudonymized triples.
/// Defaults to `stdout`.
#[arg(short, long, default_value = "-")]
Expand Down Expand Up @@ -80,7 +85,7 @@ fn main() {
}
Subcommands::Pseudo(args) => {
info!(log, "Args: {:?}", args);
pseudonymize_graph(&log, &args.input, &args.output, &args.index)
pseudonymize_graph(&log, &args.input, &args.config, &args.output, &args.index)
}
}
}
Expand Down
28 changes: 22 additions & 6 deletions src/pass_second.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use crate::{
io,
log::Logger,
model::{pseudonymize_triple, TripleMask},
rules::Config,
};

fn mask_triple(triple: &Triple) -> TripleMask {
Expand All @@ -18,7 +19,12 @@ fn mask_triple(triple: &Triple) -> TripleMask {

// mask and encode input triple
// NOTE: This will need the type-map to perform masking
fn process_triple(triple: &Triple, out: &mut impl Write) -> Result<(), TurtleError> {
fn process_triple(
triple: &Triple,
rules_config: &Config,
node_to_type: &HashMap<String, String>,
out: &mut impl Write,
) -> Result<(), TurtleError> {
let mask = mask_triple(triple);
let pseudo_triple = pseudonymize_triple(&triple, mask);
let _ = out.write(&format!("{} .\n", &pseudo_triple.to_string()).into_bytes());
Expand All @@ -41,32 +47,42 @@ fn load_type_map(input: impl BufRead) -> HashMap<String, String> {
return node_to_type;
}

pub fn pseudonymize_graph(log: &Logger, input: &Path, output: &Path, index: &Path) {
pub fn pseudonymize_graph(log: &Logger, input: &Path, config: &Path, output: &Path, index: &Path) {
let buf_input = io::get_reader(input);
let buf_index = io::get_reader(index);
let mut buf_output = io::get_writer(output);
let rules_config = io::parse_config(config);

let node_to_type: HashMap<String, String> = load_type_map(buf_index);
let mut triples = io::parse_ntriples(buf_input);
while !triples.is_end() {
triples
.parse_step(&mut |t| process_triple(&t, &mut buf_output))
.parse_step(&mut |t| process_triple(&t, &rules_config, &node_to_type, &mut buf_output))
.unwrap();
}
}
#[cfg(test)]
mod tests {
use super::encrypt;
use super::pseudonymize_graph;
use crate::log;
use std::path::Path;
use tempfile::tempdir;

#[test]
// Test the parsing of a triple.
fn encrypt_nt_file() {
let dir = tempdir().unwrap();
let input_path = Path::new("tests/data/test.nt");
let output_path = Path::new("tests/data/output.nt");
let config_path = Path::new("tests/data/config.yaml");
let output_path = dir.path().join("output.nt");
let type_map_path = Path::new("tests/data/type_map.nt");
let logger = log::create_logger(true);
encrypt(&logger, &input_path, &output_path, &type_map_path);
pseudonymize_graph(
&logger,
&input_path,
&config_path,
&output_path,
&type_map_path,
);
}
}
9 changes: 5 additions & 4 deletions src/rules.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
use ::std::collections::{HashMap, HashSet};
use serde::{Deserialize, Serialize};

#[derive(Serialize, Deserialize, Debug)]
struct Config {
pub struct Config {
// Replace values of nodes with a certain type.
replace_values_of_nodes_with_type: Vec<String>,
pub replace_uri_of_nodes_with_type: HashSet<String>,

// Replace values of `subject` & `predicate`.
replace_values_of_subject_predicate: Vec<(String, String)>,
pub replace_values_of_subject_predicate: HashMap<String, HashSet<String>>,

// Replace values in matched `predicates`.
replace_value_of_predicate: Vec<String>,
pub replace_value_of_predicate: HashSet<String>,
}
22 changes: 11 additions & 11 deletions tests/data/config.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# Hash URIs of people and online accounts.
replace-uris-of-nodes-with-type:
"http://xmlns.com/foaf/0.1/Person" # All nodes which are `rdf:type Person`.
"http://xmlns.com/foaf/OnlineAccount" # or `rdf::type OnlineAccount`
# hash URIs of people and online accounts
replace_uri_of_nodes_with_type:
- "http://xmlns.com/foaf/0.1/Person" # All nodes which are rdf:type Person
- "http://xmlns.com/foaf/OnlineAccount" # "" OnlineAccount

# Hash name only for instances of person and online account.
replace-values-of-predicate-object:
# hash name only for instances of person and online account
replace_values_of_subject_predicate:
"http://xmlns.com/foaf/OnlineAccount":
"http://schema.org/name"
- "http://schema.org/name"
"http://xmlns.com/foaf/0.1/Person":
"http://schema.org/name"
- "http://schema.org/name"

# Hash accesscode values for all nodes.
replace-values-of-predicate:
"http://schema.org/accessCode"
# hash accesscode values for all nodes
replace_value_of_predicate:
- "http://schema.org/accessCode"

0 comments on commit d440e34

Please sign in to comment.