From fddccd5fce317a42458d6e154ab9bcdbc87f2c17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabriel=20N=C3=BCtzi?= Date: Tue, 16 Jul 2024 10:40:45 +0200 Subject: [PATCH] chore: make clippy fail on warnings, better code style (#36) --- Cargo.lock | 2 +- src/crypto.rs | 20 +++++--------------- src/io.rs | 6 ++---- src/log.rs | 1 - src/main.rs | 2 +- src/model.rs | 1 - src/pass_first.rs | 38 +++++++++++++++++++++++--------------- src/pass_second.rs | 36 +++++++++++++++++++++--------------- src/rdf_types.rs | 33 +++++++++++++-------------------- src/rules.rs | 18 ++++++++---------- tools/lint-rust.sh | 3 +-- 11 files changed, 75 insertions(+), 85 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7b3c115..51dca00 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -870,7 +870,7 @@ dependencies = [ [[package]] name = "tripsu" -version = "0.1.0" +version = "0.0.1" dependencies = [ "bitflags", "blake3", diff --git a/src/crypto.rs b/src/crypto.rs index 500ad17..162858e 100644 --- a/src/crypto.rs +++ b/src/crypto.rs @@ -1,6 +1,5 @@ use super::model::Entity; use crate::{model::TripleMask, rdf_types::*}; -use blake3; pub trait Pseudonymize { // Pseudonymize parts of a triple set by its mask @@ -26,9 +25,9 @@ pub trait Pseudonymize { fn pseudo_entity(&self, e: &Entity) -> Entity { match e { - Entity::Literal(l) => Entity::Literal(self.pseudo_literal(&l)), - Entity::NamedNode(n) => Entity::NamedNode(self.pseudo_named_node(&n)), - Entity::BlankNode(b) => Entity::BlankNode(self.pseudo_blank_node(&b)), + Entity::Literal(l) => Entity::Literal(self.pseudo_literal(l)), + Entity::NamedNode(n) => Entity::NamedNode(self.pseudo_named_node(n)), + Entity::BlankNode(b) => Entity::BlankNode(self.pseudo_blank_node(b)), } } // private methods? Blanket implementations @@ -42,17 +41,8 @@ pub trait Pseudonymize { // return u.clone() } -pub struct DefaultHasher { - hasher: blake3::Hasher, -} - -impl DefaultHasher { - pub fn new() -> Self { - return DefaultHasher { - hasher: blake3::Hasher::new(), - }; - } -} +#[derive(Default)] +pub struct DefaultHasher {} impl Pseudonymize for DefaultHasher { fn pseudo_named_node(&self, t: &NamedNode) -> NamedNode { diff --git a/src/io.rs b/src/io.rs index 36e1cca..9f84fe6 100644 --- a/src/io.rs +++ b/src/io.rs @@ -1,10 +1,8 @@ use crate::rules::Rules; use rio_turtle::NTriplesParser; -use serde_yml; use std::{ - boxed::Box, fs::File, - io::{self, stdin, stdout, BufRead, BufReader, BufWriter, Write}, + io::{self, stdin, stdout, BufRead, BufReader, BufWriter}, path::Path, }; @@ -46,7 +44,7 @@ pub fn parse_ntriples(reader: impl BufRead) -> NTriplesParser { // Parse yaml configuration file. pub fn parse_config(path: &Path) -> Rules { - return match File::open(&path) { + return match File::open(path) { Ok(file) => serde_yml::from_reader(file).expect("Error parsing config file."), Err(e) => panic!("Cannot open file '{:?}': '{}'.", path, e), }; diff --git a/src/log.rs b/src/log.rs index eb5a10b..d766846 100644 --- a/src/log.rs +++ b/src/log.rs @@ -1,5 +1,4 @@ use slog::{self, o, Drain}; -use slog_async; use std::{io, sync::Arc}; pub type Logger = slog::Logger; diff --git a/src/main.rs b/src/main.rs index 30fb43d..f71ff43 100644 --- a/src/main.rs +++ b/src/main.rs @@ -35,7 +35,7 @@ struct IndexArgs { /// File descriptor to read triples from. /// Defaults to `stdin`. - #[arg(default_value = "-")] + #[arg(short, long, default_value = "-")] input: PathBuf, } diff --git a/src/model.rs b/src/model.rs index 8b164fb..2567a36 100644 --- a/src/model.rs +++ b/src/model.rs @@ -1,7 +1,6 @@ use std::hash::Hash; use crate::rdf_types::*; -use bitflags; #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub enum Entity { diff --git a/src/pass_first.rs b/src/pass_first.rs index 148db6d..039edce 100644 --- a/src/pass_first.rs +++ b/src/pass_first.rs @@ -1,30 +1,38 @@ -use rio_api::{model::Triple, parser::TriplesParser}; +use rio_api::parser::TriplesParser; use rio_turtle::TurtleError; -use std::{ - io::{stdin, BufRead, BufReader, Write}, - path::Path, +use std::{io::Write, path::Path}; + +use crate::{ + io, + rdf_types::{Triple, TripleView}, }; -use crate::io; +fn index_triple(t: Triple, out: &mut impl Write) { + if t.predicate.iri.as_str() == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" { + let r = || -> std::io::Result<()> { + out.write_all(t.to_string().as_bytes())?; + out.write_all(b" .\n") + }(); -fn index_triple(t: Triple, out: &mut impl Write) -> Result<(), TurtleError> { - match t.predicate.iri { - "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" => { - let _ = out.write(&format!("{} .\n", &t.to_string()).into_bytes()); + if let Err(e) = r { + panic!("Error writting to out buffer: {e}"); } - _ => {} } - - Ok(()) } pub fn create_type_map(input: &Path, output: &Path) { let buf_in = io::get_reader(input); let mut buf_out = io::get_writer(output); let mut triples = io::parse_ntriples(buf_in); + while !triples.is_end() { - triples - .parse_step(&mut |t| index_triple(t, &mut buf_out)) - .unwrap(); + let _ = triples + .parse_step(&mut |t: TripleView| { + index_triple(t.into(), &mut buf_out); + Result::<(), TurtleError>::Ok(()) + }) + .inspect_err(|e| { + panic!("Parsing error occured: {e}"); + }); } } diff --git a/src/pass_second.rs b/src/pass_second.rs index 7843933..3f3a337 100644 --- a/src/pass_second.rs +++ b/src/pass_second.rs @@ -2,7 +2,6 @@ use rio_api::parser::TriplesParser; use rio_turtle::TurtleError; use std::{ collections::HashMap, - fmt::{Debug, Display}, io::{BufRead, Write}, path::Path, }; @@ -38,13 +37,18 @@ fn process_triple( rules_config: &Rules, node_to_type: &HashMap, out: &mut impl Write, -) -> Result<(), TurtleError> { - let mask = match_rules(triple.clone(), &rules_config, &node_to_type); - let hasher = DefaultHasher::new(); - let _ = - out.write(&format!("{} .\n", hasher.pseudo_triple(&triple, mask).to_string()).into_bytes()); +) { + let mask = match_rules(triple.clone(), rules_config, node_to_type); + let hasher = DefaultHasher::default(); - Ok(()) + let r = || -> std::io::Result<()> { + out.write_all(hasher.pseudo_triple(&triple, mask).to_string().as_bytes())?; + out.write_all(b" .\n") + }(); + + if let Err(e) = r { + panic!("Error writting to out buffer: {e}"); + } } // Create a index mapping node -> type from an input ntriples buffer @@ -55,8 +59,8 @@ fn load_type_map(input: impl BufRead) -> HashMap { while !triples.is_end() { let _: Result<(), TurtleError> = triples.parse_step(&mut |t| { node_to_type.insert( - t.subject.to_string().replace(&['<', '>'], ""), - t.object.to_string().replace(&['<', '>'], ""), + t.subject.to_string().replace(['<', '>'], ""), + t.object.to_string().replace(['<', '>'], ""), ); Ok(()) }); @@ -65,7 +69,7 @@ fn load_type_map(input: impl BufRead) -> HashMap { return node_to_type; } -pub fn pseudonymize_graph(log: &Logger, input: &Path, config: &Path, output: &Path, index: &Path) { +pub fn pseudonymize_graph(_: &Logger, input: &Path, config: &Path, output: &Path, index: &Path) { let buf_input = io::get_reader(input); let buf_index = io::get_reader(index); let mut buf_output = io::get_writer(output); @@ -75,13 +79,15 @@ pub fn pseudonymize_graph(log: &Logger, input: &Path, config: &Path, output: &Pa let mut triples = io::parse_ntriples(buf_input); - // TODO: Try to make this into an iterator loop to leverage rayons parallelization feature over - // iterators. - + // Run the loop single-threaded. while !triples.is_end() { triples - .parse_step(&mut |t| { - process_triple(t.into(), &rules_config, &node_to_type, &mut buf_output) + .parse_step(&mut |t: TripleView| { + process_triple(t.into(), &rules_config, &node_to_type, &mut buf_output); + Result::<(), TurtleError>::Ok(()) + }) + .inspect_err(|e| { + panic!("Parsing error occured: {e}"); }) .unwrap(); } diff --git a/src/rdf_types.rs b/src/rdf_types.rs index 1f659a6..8cf9b2a 100644 --- a/src/rdf_types.rs +++ b/src/rdf_types.rs @@ -1,18 +1,17 @@ -use super::model::{Entity, TripleMask}; -use rio_api; -use std::{fmt, fmt::Write, ops::Sub}; +use super::model::Entity; +use std::{fmt, fmt::Write}; // Rewrite all the rio types to be able to instanciate triples // Rename rio types as XXXView to distinguish them from our types // Use rio types for parsing and serializing // Define mappers between the two types // -type NamedNodeView<'a> = rio_api::model::NamedNode<'a>; -type LiteralView<'a> = rio_api::model::Literal<'a>; -type TermView<'a> = rio_api::model::Term<'a>; -type TripleView<'a> = rio_api::model::Triple<'a>; -type BlankNodeView<'a> = rio_api::model::BlankNode<'a>; -type SubjectView<'a> = rio_api::model::Subject<'a>; +pub type NamedNodeView<'a> = rio_api::model::NamedNode<'a>; +pub type LiteralView<'a> = rio_api::model::Literal<'a>; +pub type TermView<'a> = rio_api::model::Term<'a>; +pub type TripleView<'a> = rio_api::model::Triple<'a>; +pub type BlankNodeView<'a> = rio_api::model::BlankNode<'a>; +pub type SubjectView<'a> = rio_api::model::Subject<'a>; #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct Triple { @@ -130,16 +129,10 @@ impl fmt::Display for BlankNode { impl<'a> From> for Triple { fn from(t: TripleView<'a>) -> Self { - match t { - TripleView { - subject, - predicate, - object, - } => Triple { - subject: subject.into(), - predicate: predicate.into(), - object: object.into(), - }, + Triple { + subject: t.subject.into(), + predicate: t.predicate.into(), + object: t.object.into(), } } } @@ -247,7 +240,7 @@ impl From for Term { } #[inline] -fn fmt_quoted_str(string: &String, f: &mut fmt::Formatter<'_>) -> fmt::Result { +fn fmt_quoted_str(string: &str, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_char('"')?; for c in string.chars() { match c { diff --git a/src/rules.rs b/src/rules.rs index 134f0a6..0cf55e7 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -50,7 +50,7 @@ pub fn match_type_rule_subject( ) -> TripleMask { match subject { Subject::NamedNode(n) => { - return mask | match_type_rule_named_node(true, &n, mask, rules, type_map); + return mask | match_type_rule_named_node(true, n, mask, rules, type_map); } Subject::BlankNode(_) => return mask, } @@ -64,21 +64,19 @@ pub fn match_type_rule_object( ) -> TripleMask { match object { Term::NamedNode(n) => { - return mask | match_type_rule_named_node(false, &n, mask, rules, type_map); + return mask | match_type_rule_named_node(false, n, mask, rules, type_map); } _ => return mask, } } pub fn match_predicate_rule(predicate: &NamedNode, mask: TripleMask, rules: &Rules) -> TripleMask { - match predicate { - NamedNode { iri: n } => { - if rules.replace_value_of_predicate.contains(n) { - return mask | TripleMask::OBJECT; - } else { - return mask; - } - } + let NamedNode { iri: i } = predicate; + + if rules.replace_value_of_predicate.contains(i) { + return mask | TripleMask::OBJECT; + } else { + return mask; } } diff --git a/tools/lint-rust.sh b/tools/lint-rust.sh index d81569f..848c4d8 100755 --- a/tools/lint-rust.sh +++ b/tools/lint-rust.sh @@ -12,9 +12,8 @@ cargo --version cargo clippy --version print_info "Run Rust Clippy linter." -print_warning "Currently warnings are not errors!" -cargo clippy --no-deps -- -A clippy::needless_return "$@" || +cargo clippy --no-deps -- -D warnings -A clippy::needless_return "$@" || { git diff --name-status || true die "Rust clippy failed."