From 0863e1af7663eb86ed812df44164802ce2c5b63c Mon Sep 17 00:00:00 2001 From: Cyril Matthey-Doret Date: Wed, 19 Jun 2024 16:14:02 +0200 Subject: [PATCH] refactor: simpler buffer handling with dynamic dispatch (#17) * refactor(io): drop boilerplate enums * refactor(io): use dynamic dispatch for io types * fix: drop io mod --- src/converter.rs | 11 ++-- src/formats.rs | 23 +++++--- src/io.rs | 145 ----------------------------------------------- src/main.rs | 11 ++-- 4 files changed, 27 insertions(+), 163 deletions(-) delete mode 100644 src/io.rs diff --git a/src/converter.rs b/src/converter.rs index c24ee88..8d58b00 100644 --- a/src/converter.rs +++ b/src/converter.rs @@ -20,8 +20,9 @@ //! This module contains the `RdfIO` trait which is used to parse and serialize RDF graphs. //! Each RDF serialization format should implement this trait. //! +use std::io::{BufRead, Write}; +use std::boxed::Box; -use crate::io::{Input, Output}; use sophia::api::prelude::TripleParser; use sophia::api::serializer::TripleSerializer; use sophia::api::source::TripleSource; @@ -29,9 +30,9 @@ use sophia::inmem::graph::FastGraph; /// The `RdfIO` trait is used to parse and serialize RDF graphs. -pub trait RdfIO<'a, P: TripleParser, F: TripleSerializer> { +pub trait RdfIO<'a, P: TripleParser>, F: TripleSerializer> { /// Parse an RDF graph from an input source to an in-memory graph. - fn parse(&self, input: Input) -> Result { + fn parse(&self, input: Box) -> Result { let mut graph = FastGraph::new(); match self.parser().parse(input).add_to_graph(&mut graph) { Ok(_) => Ok(graph), @@ -40,7 +41,7 @@ pub trait RdfIO<'a, P: TripleParser, F: TripleSerializer> { } /// Serialize an in-memory RDF graph to an output source. - fn serialize(&self, writer: Output, graph: FastGraph) -> Result<(), String> { + fn serialize(&self, writer: Box, graph: FastGraph) -> Result<(), String> { let mut formatter = self.serializer(writer); match formatter.serialize_graph(&graph) { Ok(_) => Ok(()), @@ -52,5 +53,5 @@ pub trait RdfIO<'a, P: TripleParser, F: TripleSerializer> { fn parser(&self) -> P; /// Create a new serializer for this format. - fn serializer(&self, writer: Output) -> F; + fn serializer(&self, writer: Box) -> F; } diff --git a/src/formats.rs b/src/formats.rs index 3d156f7..418b701 100644 --- a/src/formats.rs +++ b/src/formats.rs @@ -18,9 +18,10 @@ //! # Implementation of concrete RDF formats //! //! This module implements `RdfIO` trait for each RDF serialization format. +use std::io::{BufRead, Write}; + use crate::cli::GraphFormat; use crate::converter::RdfIO; -use crate::io::{Input, Output}; use sophia::inmem::graph::FastGraph; use sophia::turtle::parser::nt::NTriplesParser; use sophia::turtle::parser::turtle::TurtleParser; @@ -40,7 +41,7 @@ pub struct RdfParser { } impl RdfParser { - pub fn new(input: Input, format: GraphFormat) -> Result { + pub fn new(input: Box, format: GraphFormat) -> Result { let graph = match format { GraphFormat::NTriples => NTriples.parse(input), GraphFormat::Turtle => Turtle.parse(input), @@ -55,7 +56,11 @@ impl RdfParser { pub struct RdfSerializer; impl RdfSerializer { - pub fn serialize(dest: Output, format: GraphFormat, graph: FastGraph) -> Result<(), String> { + pub fn serialize( + dest: Box, + format: GraphFormat, + graph: FastGraph, + ) -> Result<(), String> { match format { GraphFormat::NTriples => NTriples.serialize(dest, graph), GraphFormat::Turtle => Turtle.serialize(dest, graph), @@ -63,32 +68,32 @@ impl RdfSerializer { } } } -impl<'a> RdfIO<'a, NTriplesParser, NtSerializer> for NTriples { +impl<'a> RdfIO<'a, NTriplesParser, NtSerializer>> for NTriples { fn parser(&self) -> NTriplesParser { NTriplesParser {} } - fn serializer(&self, writer: Output) -> NtSerializer { + fn serializer(&self, writer: Box) -> NtSerializer> { NtSerializer::new(writer) } } -impl<'a> RdfIO<'a, TurtleParser, TurtleSerializer> for Turtle { +impl<'a> RdfIO<'a, TurtleParser, TurtleSerializer>> for Turtle { fn parser(&self) -> TurtleParser { TurtleParser { base: None } } - fn serializer(&self, writer: Output) -> TurtleSerializer { + fn serializer(&self, writer: Box) -> TurtleSerializer> { TurtleSerializer::new(writer) } } -impl<'a> RdfIO<'a, RdfXmlParser, RdfXmlSerializer> for RdfXml { +impl<'a> RdfIO<'a, RdfXmlParser, RdfXmlSerializer>> for RdfXml { fn parser(&self) -> RdfXmlParser { RdfXmlParser { base: None } } - fn serializer(&self, writer: Output) -> RdfXmlSerializer { + fn serializer(&self, writer: Box) -> RdfXmlSerializer> { RdfXmlSerializer::new(writer) } } diff --git a/src/io.rs b/src/io.rs deleted file mode 100644 index f4c6cbe..0000000 --- a/src/io.rs +++ /dev/null @@ -1,145 +0,0 @@ -// rdfpipe-rs -// Copyright (C) 2023 - Swiss Data Science Center (SDSC) -// A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and -// Eidgenössische Technische Hochschule Zürich (ETHZ). -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . -//! # Input and Output helpers -//! -//! This module contains the `Input` and `Output` helper structs. -//! These structs simplify the handling of various sources and sinks. -use std::fs::File; -use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Stdin, Stdout, Write}; - -pub enum Input { - Stdin(BufReader), - File(BufReader), -} - -pub enum Output { - Stdout(BufWriter), - File(BufWriter), -} - -impl Output { - pub fn new(path: Option) -> Self { - match path.as_deref() { - Some(path) => { - let file = File::create(path).expect("Can not create file"); - Self::File(BufWriter::new(file)) - } - None => Self::Stdout(BufWriter::new(stdout())), - } - } -} - -impl Write for Output { - fn write(&mut self, buf: &[u8]) -> std::io::Result { - match self { - Output::Stdout(b) => b.write(buf), - Output::File(b) => b.write(buf), - } - } - - fn flush(&mut self) -> std::io::Result<()> { - match self { - Output::Stdout(b) => b.flush(), - Output::File(b) => b.flush(), - } - } -} - -impl Input { - pub fn new(path: Option) -> Self { - match path.as_deref() { - Some("-") | None => Self::Stdin(BufReader::new(stdin())), - Some(path) => { - let file = File::open(path).expect("Can not open file"); - Self::File(BufReader::new(file)) - } - } - } -} - -impl Read for Input { - fn read(&mut self, buf: &mut [u8]) -> std::io::Result { - match self { - Input::Stdin(b) => b.read(buf), - Input::File(b) => b.read(buf), - } - } -} - -impl BufRead for Input { - fn fill_buf(&mut self) -> std::io::Result<&[u8]> { - match self { - Input::Stdin(b) => b.fill_buf(), - Input::File(b) => b.fill_buf(), - } - } - - fn consume(&mut self, amt: usize) { - match self { - Input::Stdin(b) => b.consume(amt), - Input::File(b) => b.consume(amt), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_input_new_stdin() { - let input = Input::new(None); - match input { - Input::Stdin(_) => assert!(true), - _ => assert!(false), - } - } - - #[test] - fn test_input_new_file() { - let input = Input::new(Some("README.md".to_string())); - match input { - Input::File(_) => assert!(true), - _ => assert!(false), - } - } - - #[test] - #[should_panic(expected = "Can not open file")] - fn test_input_new_file_panic() { - let _input = Input::new(Some("nonexistent.txt".to_string())); - } - - #[test] - fn test_output_new_stdout() { - let output = Output::new(None); - match output { - Output::Stdout(_) => assert!(true), - _ => assert!(false), - } - } - - #[test] - fn test_output_new_file() { - let output = Output::new(Some("test.txt".to_string())); - match output { - Output::File(_) => assert!(true), - _ => assert!(false), - } - } -} diff --git a/src/main.rs b/src/main.rs index d282242..a9193c1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -54,13 +54,13 @@ mod cli; mod converter; mod formats; -mod io; use crate::cli::{Args, GraphFormat}; -use crate::io::{Input, Output}; use clap::Parser; use formats::{RdfParser, RdfSerializer}; use std::error::Error; +use std::fs::File; +use std::io::{BufRead, BufReader, BufWriter, stdout}; use std::path::Path; /// Infer RDF serialization format from file extension @@ -87,8 +87,11 @@ fn main() -> Result<(), Box> { }; let output_format = args.output_format; - let input = Input::new(args.input_file); - let output = Output::new(None); + let input: Box = match args.input_file.unwrap().as_ref() { + "-" => Box::new(BufReader::new(std::io::stdin())), + path => Box::new(BufReader::new(File::open(path)?)), + }; + let output = Box::new(BufWriter::new(stdout())); let parser = RdfParser::new(input, input_format)?; if !args.no_out { RdfSerializer::serialize(output, output_format, parser.graph)?;