-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from nleroy917/main
New updates
- Loading branch information
Showing
13 changed files
with
306 additions
and
160 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
[package] | ||
name = "gc_count" | ||
name = "rust_gc_count_py" | ||
version = "0.1.0" | ||
edition = "2021" | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
from typing import List | ||
|
||
class ChecksumResult: | ||
def __init__(self, id: str, length: int, sha512: str, md5: str): | ||
self.id = id | ||
self.length = length | ||
self.sha512 = sha512 | ||
self.md5 = md5 | ||
|
||
def __repr__(self): | ||
return f"ChecksumResult(id={self.id}, length={self.length}, sha512={self.sha512}, md5={self.md5})" | ||
|
||
def __str__(self): | ||
return f"ChecksumResult(id={self.id}, length={self.length}, sha512={self.sha512}, md5={self.md5})" | ||
|
||
def checksum(file: str, verbose: bool) -> List[ChecksumResult]: | ||
""" | ||
Calculate the sequence lengths and checksums from a fasta file. It will | ||
produce a list of ChecksumResult objects, each containing the following | ||
Sequence ID as it appears in the FASTA file | ||
Sequence length | ||
Refget ga4gh identifier (SQ.sha512t24u) | ||
MD5 checksum hex encoded | ||
:param file: The file to checksum | ||
:param verbose: Whether to print out the progress | ||
""" | ||
|
||
def write_gc_count_to_file( | ||
input: str, | ||
output: str, | ||
compression_level: int, | ||
window_size: int, | ||
omit_tail: bool, | ||
chrom_sizes_path: str, | ||
write_chrom_sizes: bool, | ||
verbose: bool, | ||
) -> None: | ||
""" | ||
Calculate the GC content and write it to a file. The file will be a | ||
tab-separated file with the following columns: | ||
Chromosome name | ||
Start position | ||
End position | ||
GC content | ||
:param input: The input file to calculate the GC content from | ||
:param output: The output file to write the GC content to | ||
:param compression_level: The compression level to use for the output file | ||
:param window_size: The window size to use for calculating the GC content | ||
:param omit_tail: Whether to omit the tail of the sequence | ||
:param chrom_sizes_path: The path to the chromosome sizes file | ||
:param write_chrom_sizes: Whether to write the chromosome sizes to the output file | ||
:param verbose: Whether to print out the progress | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
use pyo3::prelude::*; | ||
use std::path::PathBuf; | ||
|
||
#[pyfunction] | ||
pub fn write_gc_count_to_file( | ||
input: String, | ||
output: String, | ||
compression_level: u32, | ||
window_size: i32, | ||
omit_tail: bool, | ||
chrom_sizes_path: String, | ||
write_chrom_sizes: bool, | ||
verbose: bool, | ||
) { | ||
|
||
let input = PathBuf::from(input); | ||
let output = PathBuf::from(output); | ||
let chrom_sizes_path = PathBuf::from(chrom_sizes_path); | ||
|
||
rust_gc_count::gc_count::write_gc_to_file(input, output, compression_level, window_size, omit_tail, chrom_sizes_path, write_chrom_sizes, verbose) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,16 @@ | ||
use pyo3::prelude::*; | ||
|
||
pub mod gc_count_utils; | ||
pub mod checksumseq; | ||
pub mod models; | ||
|
||
use crate::checksumseq::checksum; | ||
use crate::gc_count_utils::write_gc_count_to_file; | ||
|
||
/// A Python module implemented in Rust. | ||
#[pymodule] | ||
fn gc_count(_py: Python, m: &PyModule) -> PyResult<()> { | ||
m.add_function(wrap_pyfunction!(checksum, m)?)?; | ||
m.add_function(wrap_pyfunction!(write_gc_count_to_file, m)?)?; | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.