Skip to content

Commit

Permalink
feat: support additional rdf formats (#18)
Browse files Browse the repository at this point in the history
* feat(io): support common rdf formats

* chore(deps): add oxrdfio

* chore: bump to 0.2.2

* chor: bump deps
  • Loading branch information
cmdoret authored Sep 25, 2024
1 parent bbd433b commit 47c0f63
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 19 deletions.
49 changes: 42 additions & 7 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion fuzon/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "fuzon"
version = "0.2.1"
version = "0.2.2"
edition = "2021"

[lib]
Expand All @@ -12,6 +12,7 @@ clap = { version = "4.5.16", features = ["derive"] }
crossterm = "0.28.1"
lazy_static = "1.5.0"
oxrdf = "0.1.7"
oxrdfio = "0.1.0"
oxttl = "0.1.0-rc.1"
ratatui = "0.28.1"
reqwest = { version = "0.12.0", features = ["blocking", "native-tls-vendored"] }
Expand Down
27 changes: 18 additions & 9 deletions fuzon/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::io::{BufRead, BufReader};

use anyhow::Result;
use lazy_static::lazy_static;
use oxttl::TurtleParser;
use oxrdfio::{RdfFormat, RdfParser};
use reqwest::blocking::Client;
use reqwest::Url;

Expand Down Expand Up @@ -54,7 +54,7 @@ impl TermMatcher {
.map(|t| t.0)
.collect()
}
pub fn from_readers(readers: Vec<impl BufRead>) -> Self {
pub fn from_readers(readers: Vec<(impl BufRead, RdfFormat)>) -> Self {
let terms = gather_terms(readers).collect();
TermMatcher { terms }
}
Expand All @@ -78,18 +78,26 @@ impl fmt::Display for Term {
}
}

pub fn get_source(path: &str) -> Result<Box<dyn BufRead>> {
/// Get an rdf reader along with its format from a path
pub fn get_source(path: &str) -> Result<(Box<dyn BufRead>, RdfFormat)> {
let file_ext = path.split('.').last().unwrap();
let ext = match file_ext {
"owl" => "xml",
"rdf" => "xml",
_ => file_ext,
};
let format = RdfFormat::from_extension(ext).expect("Unkown file extension");
if let Ok(url) = Url::parse(path) {
// Handle URL
let client = Client::new();
let response = client.get(url).send()?.error_for_status()?;
let reader = BufReader::new(response);
Ok(Box::new(reader)) // Return boxed reader for URL
Ok((Box::new(reader), format)) // Return boxed reader for URL
} else {
// Handle file path
let file = File::open(path)?;
let reader = BufReader::new(file);
Ok(Box::new(reader)) // Return boxed reader for file
Ok((Box::new(reader), format)) // Return boxed reader for file
}
}
/// Returns the input term vector sorted by match score (best first),
Expand All @@ -111,12 +119,13 @@ pub fn rank_terms<'a>(query: &str, terms: Vec<&'a Term>) -> Vec<(&'a Term, f64)>
return ranked;
}

// Load URI-label pairs from all source.
pub fn gather_terms(readers: Vec<impl BufRead>) -> impl Iterator<Item = Term> {

// Load URI-label pairs from all sources.
pub fn gather_terms(readers: Vec<(impl BufRead, RdfFormat)>) -> impl Iterator<Item = Term> {
// NOTE: May want to use bulk loader for better performances
let mut terms = Vec::new();
for reader in readers {
let parser = TurtleParser::new().for_reader(reader);
for (reader, format) in readers {
let parser = RdfParser::from_format(format).for_reader(reader);
let mut out = parser
.map(|t| t.expect("Error parsing RDF"))
.filter(|t| ANNOTATIONS.contains(t.predicate.as_str()))
Expand Down
4 changes: 2 additions & 2 deletions pyfuzon/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "pyfuzon"
version = "0.2.1"
version = "0.2.2"
edition = "2021"

[lib]
Expand All @@ -11,7 +11,7 @@ crate-type = ["cdylib"]
anyhow = "1.0.86"
clap = { version = "4.5.16", features = ["derive"] }
crossterm = "0.28.1"
fuzon = { version = "0.2.0", path = "../fuzon" }
fuzon = { version = "0.2.2", path = "../fuzon" }
lazy_static = "1.5.0"
oxrdf = "0.1.7"
oxttl = "0.1.0-rc.1"
Expand Down

0 comments on commit 47c0f63

Please sign in to comment.