Skip to content

Commit

Permalink
Add support for reading from TAR archives to flatdata-rs (heremaps#182)
Browse files Browse the repository at this point in the history
Signed-off-by: Christian Ocker <[email protected]>
  • Loading branch information
fermeise committed Sep 29, 2021
1 parent 3edaa5d commit 2e46df6
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 5 deletions.
1 change: 1 addition & 0 deletions flatdata-rs/lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ edition = "2018"
[dependencies]
diff = "0.1.11"
memmap2 = "0.2.0"
tar = "0.4"
walkdir = "2.2.9"
2 changes: 1 addition & 1 deletion flatdata-rs/lib/src/filestorage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ impl MemoryMappedFileStorage {
/// # Examples
///
/// ```rust,no_run
/// use flatdata::{FileResourceStorage, Vector};
/// use flatdata::{FileResourceStorage, Vector};
/// use flatdata::test::{X, XBuilder};
///
/// let storage = FileResourceStorage::new("/root/to/my/archive");
Expand Down
2 changes: 2 additions & 0 deletions flatdata-rs/lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ mod multivector;
mod rawdata;
mod storage;
mod structs;
mod tarstorage;
mod vector;

#[doc(hidden)]
Expand All @@ -181,5 +182,6 @@ pub use crate::{
create_archive, create_external_vector, create_multi_vector, ResourceStorage, StorageHandle,
},
structs::*,
tarstorage::TarArchiveResourceStorage,
vector::*,
};
128 changes: 128 additions & 0 deletions flatdata-rs/lib/src/tarstorage.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
use crate::storage::{ResourceStorage, StorageHandle, Stream};

use memmap2::Mmap;
use std::{
collections::HashMap,
fs::File,
io,
ops::Range,
path::{Path, PathBuf},
slice,
sync::Arc,
};

/// Internal storage of file entries in tar archive.
#[derive(Debug)]
struct MemoryMappedTarArchiveStorage {
archive_map: Mmap,
file_ranges: HashMap<PathBuf, Range<usize>>,
}

impl MemoryMappedTarArchiveStorage {
pub fn new(tar_path: &Path) -> Result<Self, io::Error> {
let file = File::open(tar_path)?;
let mut archive = tar::Archive::new(file);

let file_ranges = archive
.entries()?
.map(|entry| {
let entry = entry?;
let path = entry.path()?;
let path = if let Ok(stripped_path) = path.strip_prefix(".") {
stripped_path.to_path_buf()
} else {
path.to_path_buf()
};
let offset = entry.raw_file_position() as usize;
let size = entry.size() as usize;

Ok((path, offset..offset + size))
})
.collect::<Result<HashMap<PathBuf, Range<usize>>, io::Error>>()?;

let file = File::open(tar_path)?;
let archive_map = unsafe { Mmap::map(&file)? };

Ok(Self {
archive_map,
file_ranges,
})
}

pub fn read(&self, path: &Path) -> Option<&[u8]> {
self.file_ranges.get(path).map(|range| {
// We cannot prove to Rust that the buffer will live as long as the storage
// (we never delete mappings), so we need to manually extend lifetime
let extended_lifetime_archive_map =
unsafe { slice::from_raw_parts(self.archive_map.as_ptr(), self.archive_map.len()) };

&extended_lifetime_archive_map[range.clone()]
})
}
}

/// Read-only resource storage on disk using a memory mapped tar archive.
///
/// Used to read flatdata archives from a tar archive on disk.
///
/// # Examples
///
/// ```rust,no_run
/// use flatdata::{TarArchiveResourceStorage, Vector};
/// use flatdata::test::X;
///
/// let storage = TarArchiveResourceStorage::new("/root/to/my/archive.tar")
/// .expect("failed to read tar archive");
/// let archive = X::open(storage).expect("failed to open");
/// // read data
/// archive.data();
/// ```
#[derive(Debug)]
pub struct TarArchiveResourceStorage {
storage: Arc<MemoryMappedTarArchiveStorage>,
sub_path: PathBuf,
}

impl TarArchiveResourceStorage {
/// Create a memory mapped tar archive resource storage for a tar archive at a given path.
pub fn new<P: Into<PathBuf>>(tar_path: P) -> Result<Arc<Self>, io::Error> {
Ok(Arc::new(Self {
storage: Arc::new(MemoryMappedTarArchiveStorage::new(&tar_path.into())?),
sub_path: PathBuf::new(),
}))
}
}

impl ResourceStorage for TarArchiveResourceStorage {
fn subdir(&self, dir: &str) -> StorageHandle {
Arc::new(Self {
storage: self.storage.clone(),
sub_path: self.sub_path.join(dir),
})
}

fn exists(&self, resource_name: &str) -> bool {
self.storage
.read(&self.sub_path.join(resource_name))
.is_some()
}

fn read_resource(&self, resource_name: &str) -> Result<&[u8], io::Error> {
let resource_path = self.sub_path.join(resource_name);
if let Some(data) = self.storage.read(&resource_path) {
Ok(data)
} else {
return Err(io::Error::new(
io::ErrorKind::NotFound,
String::from(resource_path.to_str().unwrap_or(resource_name)),
));
}
}

fn create_output_stream(&self, _resource_name: &str) -> Result<Box<dyn Stream>, io::Error> {
return Err(io::Error::new(
io::ErrorKind::Other,
"Writing to tar archives is not supported",
));
}
}
Binary file not shown.
22 changes: 18 additions & 4 deletions flatdata-rs/tests/coappearances/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@ use std::{env, fs, io::Read, path, str};

pub mod coappearances;

#[test]
fn read_and_validate_coappearances() -> Result<(), std::str::Utf8Error> {
let storage =
flatdata::FileResourceStorage::new(path::PathBuf::from("assets/karenina.archive"));
fn read_and_validate_coappearances(
storage: flatdata::StorageHandle,
) -> Result<(), std::str::Utf8Error> {
let g = coappearances::Graph::open(storage).expect("invalid archive");
println!("{:?}", g);

Expand Down Expand Up @@ -127,6 +126,21 @@ fn read_and_validate_coappearances() -> Result<(), std::str::Utf8Error> {
Ok(())
}

#[test]
fn read_and_validate_coappearances_from_file_storage() -> Result<(), std::str::Utf8Error> {
let storage =
flatdata::FileResourceStorage::new(path::PathBuf::from("assets/karenina.archive"));
read_and_validate_coappearances(storage)
}

#[test]
fn read_and_validate_coappearances_from_tar_archive_storage() -> Result<(), std::str::Utf8Error> {
let storage =
flatdata::TarArchiveResourceStorage::new(path::PathBuf::from("assets/karenina.tar"))
.expect("failed to read tar archive");
read_and_validate_coappearances(storage)
}

fn check_files(name_a: &path::Path, name_b: &path::Path) {
let mut fa = fs::File::open(name_a).unwrap();
let mut buf_a = Vec::new();
Expand Down

0 comments on commit 2e46df6

Please sign in to comment.