Skip to content

Commit

Permalink
feat: add benchmark script
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdoret committed Oct 26, 2023
1 parent 4287473 commit f1f6561
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 0 deletions.
19 changes: 19 additions & 0 deletions scripts/run_bench.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env bash
# Compare runtime of rdfpipe vs rdfpipe-rs
# hyperfine is the only dependency (besides rdfpipe and rdfpipe-rs)
set -euo pipefail

# File path to a (large) ntriples RDF dataset
DATASET="$1"
RDFPIPE_PY="rdfpipe"
RDFPIPE_RS="./target/release/rdfpipe-rs"

# Run both implementations with different number of triples
# timings are saved in timings.csv
hyperfine \
--warmup 1 \
-L N 1,2,3,4,5,10,15,20,50 \
-L FMT ttl,xml \
--export-csv timings.csv \
"head -n {N}000 ${DATASET} | ${RDFPIPE_PY} -i nt -o {FMT} - > /dev/null" \
"head -n {N}000 ${DATASET} | ${RDFPIPE_RS} -i nt -o {FMT} - > /dev/null"
32 changes: 32 additions & 0 deletions scripts/viz_bench.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Visualization of timings for rdfpipe vs rdfpipe-rs
# tidyverse>=1.1.3 is the only dependency

library(tidyverse)

timings <- read_csv("timings.csv")

timings <- timings %>%
rename(
tool = command,
thousand_lines = parameter_N,
fmt = parameter_FMT
) %>%
mutate(tool = case_when(
str_detect(tool, "rdfpipe-rs") ~ "rdfpipe-rs",
TRUE ~ "rdfpipe"
)) %>%
select(tool, mean, fmt, stddev, thousand_lines) %>%
arrange(thousand_lines, tool)

ggplot(timings, aes(x = thousand_lines, y = log10(mean), color = tool)) +
geom_line() +
xlab("Thousands of lines parsed") +
ylab("Log10 time (seconds)") +
theme_bw(base_size = 18) +
coord_fixed(ratio = 10) +
facet_grid(~fmt, labeller = labeller(
fmt = c(
"ttl" = "ntriples -> turtle",
"xml" = "ntriples -> xml"
)
))

0 comments on commit f1f6561

Please sign in to comment.