Skip to content

Commit

Permalink
more io operations
Browse files Browse the repository at this point in the history
  • Loading branch information
nleroy917 committed Jan 22, 2024
1 parent 74660d4 commit a5047e0
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 15 deletions.
8 changes: 4 additions & 4 deletions genimtools/src/ailist/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ impl AIList {

*intervals = results.3;

if intervals.len() == 0 {
if intervals.is_empty() {
break;
} else {
header_list.push(starts.len());
Expand All @@ -53,7 +53,7 @@ impl AIList {
}

fn decompose(
intervals: &mut Vec<Interval>,
intervals: &mut [Interval],
minimum_coverage_length: usize,
) -> (Vec<u32>, Vec<u32>, Vec<u32>, Vec<Interval>) {
// look at the next minL*2 intervals
Expand Down Expand Up @@ -119,7 +119,7 @@ impl AIList {
})
}
}
return results_list;
results_list
}

pub fn query(&self, interval: &Interval) -> Vec<Interval> {
Expand All @@ -142,7 +142,7 @@ impl AIList {
&self.max_ends[self.header_list[i]..],
));

return results_list;
results_list
}

pub fn print(&self) {
Expand Down
12 changes: 4 additions & 8 deletions genimtools/src/common/models/tokenized_regionset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use crate::common::consts::{PAD_CHR, PAD_END, PAD_START};
use crate::common::models::region::Region;
use crate::common::models::tokenized_region::TokenizedRegion;
use crate::common::models::universe::Universe;
use crate::io::write_tokens_to_gtok;

pub struct TokenizedRegionSet<'a> {
pub regions: Vec<Region>,
Expand Down Expand Up @@ -72,14 +73,9 @@ impl<'a> TokenizedRegionSet<'a> {
/// Write a TokenizedRegionSet to a .gtok file
/// * `path` - A PathBuf to write the .gtok file to
///
pub fn to_gtok_file(&self, path: &PathBuf) -> Result<(), Box<dyn Error>> {
let mut file = File::create(path)?;
for region in self.regions.iter() {
let id = self.universe.convert_region_to_id(region);
let line = format!("{}\n", id);
file.write_all(line.as_bytes())?;
}

pub fn to_gtok_file(&self, path: &str) -> Result<(), Box<dyn Error>> {
let tokens = self.to_region_ids();
write_tokens_to_gtok(path, &tokens)?;
Ok(())
}

Expand Down
39 changes: 39 additions & 0 deletions genimtools/src/io/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
use std::fs::File;
use std::io::{Write, Read, BufReader, BufWriter};

///
/// Writes a vector of tokens to a file in the `.gtok` format.
/// # Arguments
/// - filename: the file to save the tokens to
/// - tokens: tokens to save
///
pub fn write_tokens_to_gtok(filename: &str, tokens: &[u32]) -> std::io::Result<()> {
let file = File::create(filename)?;
let mut writer = BufWriter::new(file);

for &token in tokens {
writer.write_all(&token.to_le_bytes())?;
}

Ok(())
}

///
/// Read in a vector of tokens from a file in the `.gtok` format.
/// # Arguments
/// - filename: filename to read the tokens from
///
/// # Returns
/// - vector of tokens in u32 format
pub fn read_tokens_from_gtok(filename: &str) -> std::io::Result<Vec<u32>> {
let file = File::open(filename)?;
let mut reader = BufReader::new(file);
let mut tokens = Vec::new();
let mut buffer = [0; 4];

while let Ok(()) = reader.read_exact(&mut buffer) {
tokens.push(u32::from_le_bytes(buffer));
}

Ok(tokens)
}
3 changes: 2 additions & 1 deletion genimtools/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ pub mod common;
pub mod tokenizers;
pub mod uniwig;
pub mod vocab;
pub mod tools;
pub mod tools;
pub mod io;
4 changes: 2 additions & 2 deletions genimtools/src/tools/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ pub fn make_tools_cli() -> Command {

pub mod handlers {

use std::path::{Path, PathBuf};
use std::path::Path;

use crate::{tokenizers::{self, Tokenizer}, common::models::RegionSet};

Expand Down Expand Up @@ -90,7 +90,7 @@ pub mod handlers {
match data {
Ok(data) => {
let result = tokenizer.tokenize_region_set(&data).expect("Data couldn't be tokenized.");
let _ = result.to_gtok_file(&PathBuf::from(new_file));
let _ = result.to_gtok_file(new_file.to_str().unwrap());
},
Err(e) => panic!("There was an error readig the data file: {}", e)
}
Expand Down

0 comments on commit a5047e0

Please sign in to comment.