Skip to content

Commit

Permalink
add poa feature
Browse files Browse the repository at this point in the history
  • Loading branch information
cauliyang committed Jan 17, 2024
1 parent 5d24a17 commit f82d84f
Show file tree
Hide file tree
Showing 5 changed files with 1,141 additions and 4 deletions.
9 changes: 5 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,16 @@ codegen-units = 1

[dependencies]
anyhow = { version = "1.0" }
clap = { version = "4.4.12", features = ["wrap_help", "derive", "cargo"] }

bio = "1.5.0"
clap = { version = "4.4.17", features = ["wrap_help", "derive", "cargo"] }
clap-verbosity-flag = "2.1.1"
clap_complete = "4.4"
colored = "2"
env_logger = "0.10"
human-panic = "1.2.2"
human-panic = "1.2.3"
indicatif = "0.17"
log = "0.4"

noodles-bam = "0.52.0"
noodles-bgzf = "0.26"
noodles-csi = "0.29"
Expand All @@ -56,7 +57,7 @@ noodles-sam = "0.49"
petgraph = { version = "0.6", features = ["serde-1"] }
plotters = { version = "0.3" }

polars = { version = "0.36", features = ["lazy"] }
polars = { version = "0.36", features = ["lazy", "nightly"] }
rayon = { version = "1.8" }
regex = { version = "1.10" }
serde = { version = "1.0" }
Expand Down
123 changes: 123 additions & 0 deletions src/anno.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
use anyhow::Result;
use clap::{arg, Args, ValueHint};
use log::{error, info, warn};
use std::path::{Path, PathBuf};

use polars::df;
use polars::prelude::*;
use std::io::{self, BufRead}; // Add this line

use bio::alignment::pairwise::Scoring;
use bio::alignment::poa::*;
use std::fs::File;

#[derive(Args, Debug)]
pub struct AnnoArgs {
#[arg(value_hint = ValueHint::AnyPath)]
pub vcf: PathBuf,
// #[arg(value_hint = ValueHint::AnyPath)]
// fasta: PathBuf,
// #[arg(value_hint = ValueHint::AnyPath)]
// bam: PathBuf,
}

pub fn read_vcf<P: AsRef<Path>>(vcf_path: P) -> Result<()> {
let mut n = 0;

let df = CsvReader::from_path(vcf_path.as_ref())?
.with_separator(b'\t')
.has_header(false)
.with_comment_prefix(Some("#"))
.finish()?;

for (idx, row) in df.iter_chunks().enumerate() {
if idx == 0 {
for col in row.iter() {
println!("{:?}", col);
}
}
}

// print every row

// reader.records(&header).for_each(|r| match r {
// Ok(record) => {
// n += 1;
// }
// Err(e) => {
// error!("error: {:?}", e);
// }
// });

Ok(())
}

pub fn read_bam<P: AsRef<Path>>(bam_path: P) {
todo!()
}

pub fn read_fasta<P: AsRef<Path>>(fasta_path: P) {
todo!()
}
// The output is wrapped in a Result to allow matching on errors.
// Returns an Iterator to the Reader of the lines of the file.
fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>>
where
P: AsRef<Path>,
{
let file = File::open(filename)?;
Ok(io::BufReader::new(file).lines())
}

pub fn run_poa<P: AsRef<Path>>(file: P) -> Result<()> {
if let Ok(lines) = read_lines(file) {
let r = lines.collect::<Result<Vec<String>, _>>()?;
let flattened: Vec<&str> = r.iter().map(|line| line.as_str()).collect();
let consensus = poa(&flattened);
println!("{}", consensus);
}
Ok(())
}

pub fn poa(seqs: &[&str]) -> String {
let scoring = Scoring::new(-2, 0, |a: u8, b: u8| if a == b { 1i32 } else { -1i32 });

let mut seq_iter = seqs.iter().map(|s| s.as_bytes());
let mut aligner = Aligner::new(scoring, seq_iter.next().unwrap());

for seq in seq_iter {
aligner.global(seq).add_to_graph();
}

String::from_utf8(aligner.consensus()).unwrap()
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_poa() {
let seqs = vec![
"ATATTGTGTAAGGCACAATTAACA",
"ATATTGCAAGGCACAATTCAACA",
"ATATTGCAAGGCACACAACA",
"ATGTGCAAGAGCACATAAC",
];

let test_seq = "ATATTGCAAGGCACAATTCAACA";
let consensus = poa(&seqs);
assert_eq!(consensus, test_seq);
}

#[test]
fn test_extend() {
let seqs = vec![
"ATATTGTGTAAGGCACAATTAACA",
"CAATTAACATTTTTTTTTTTTTTTTT",
"ATGTGCAAGAGCACATAAC",
];

println!("{}", poa(&seqs));
}
}
9 changes: 9 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ mod fq2fa;
mod index;
mod rsoft;

mod anno;
mod graph;

#[derive(Parser, Debug)]
Expand Down Expand Up @@ -90,6 +91,9 @@ enum Commands {

/// Graph Analysis
Graph(graph::GraphArgs),

/// Annotation scannls
AnnoScan(anno::AnnoArgs),
}

fn print_completions<G: Generator>(gen: G, cmd: &mut Command) {
Expand Down Expand Up @@ -164,6 +168,11 @@ fn main() {
graph::analyze(args).unwrap();
}

Some(Commands::AnnoScan(args)) => {
info!("'anno' {args:?} ");
anno::run_poa(&args.vcf).unwrap();
}

// If no subcommand was used, it's a normal top level command
None => info!("No subcommand was used"),
}
Expand Down
Loading

0 comments on commit f82d84f

Please sign in to comment.