Skip to content

Commit

Permalink
Merge pull request #2 from nleroy917/main
Browse files Browse the repository at this point in the history
New updates
  • Loading branch information
andrewyatz authored Feb 22, 2024
2 parents 7b49d9d + 5142cfb commit 79152e1
Show file tree
Hide file tree
Showing 13 changed files with 306 additions and 160 deletions.
31 changes: 16 additions & 15 deletions bindings/.github/workflows/CI.yml → .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,8 @@
name: CI

on:
push:
branches:
- main
- master
tags:
- '*'
pull_request:
release:
types: [created]
workflow_dispatch:

permissions:
Expand All @@ -23,7 +18,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
target: [x86_64, x86, aarch64, armv7, s390x, ppc64le]
target: [x86_64, x86, aarch64]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
Expand All @@ -36,11 +31,12 @@ jobs:
args: --release --out dist --find-interpreter
sccache: 'true'
manylinux: auto
working-directory: ./bindings
- name: Upload wheels
uses: actions/upload-artifact@v3
with:
name: wheels
path: dist
path: ./bindings/dist

windows:
runs-on: windows-latest
Expand All @@ -59,11 +55,12 @@ jobs:
target: ${{ matrix.target }}
args: --release --out dist --find-interpreter
sccache: 'true'
working-directory: ./bindings
- name: Upload wheels
uses: actions/upload-artifact@v3
with:
name: wheels
path: dist
path: ./bindings/dist

macos:
runs-on: macos-latest
Expand All @@ -81,11 +78,12 @@ jobs:
target: ${{ matrix.target }}
args: --release --out dist --find-interpreter
sccache: 'true'
working-directory: ./bindings
- name: Upload wheels
uses: actions/upload-artifact@v3
with:
name: wheels
path: dist
path: ./bindings/dist

sdist:
runs-on: ubuntu-latest
Expand All @@ -96,25 +94,28 @@ jobs:
with:
command: sdist
args: --out dist
working-directory: ./bindings
- name: Upload sdist
uses: actions/upload-artifact@v3
with:
name: wheels
path: dist
path: ./bindings/dist

release:
name: Release
runs-on: ubuntu-latest
environment: release
permissions:
id-token: write
if: "startsWith(github.ref, 'refs/tags/')"
needs: [linux, windows, macos, sdist]
steps:
- uses: actions/download-artifact@v3
with:
name: wheels
path: ./bindings/dist
- name: Publish to PyPI
uses: PyO3/maturin-action@v1
env:
MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
with:
command: upload
args: --non-interactive --skip-existing *
args: --non-interactive --skip-existing ./bindings/dist/*
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ Copyright [2023] EMBL-European Bioinformatics Institute

This product includes software developed at:
- EMBL-European Bioinformatics Institute
- University of Virginia, Center for Public Health Genomics
20 changes: 10 additions & 10 deletions bindings/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion bindings/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[package]
name = "gc_count"
name = "rust_gc_count_py"
version = "0.1.0"
edition = "2021"

Expand Down
57 changes: 57 additions & 0 deletions bindings/gc_count.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from typing import List

class ChecksumResult:
def __init__(self, id: str, length: int, sha512: str, md5: str):
self.id = id
self.length = length
self.sha512 = sha512
self.md5 = md5

def __repr__(self):
return f"ChecksumResult(id={self.id}, length={self.length}, sha512={self.sha512}, md5={self.md5})"

def __str__(self):
return f"ChecksumResult(id={self.id}, length={self.length}, sha512={self.sha512}, md5={self.md5})"

def checksum(file: str, verbose: bool) -> List[ChecksumResult]:
"""
Calculate the sequence lengths and checksums from a fasta file. It will
produce a list of ChecksumResult objects, each containing the following
Sequence ID as it appears in the FASTA file
Sequence length
Refget ga4gh identifier (SQ.sha512t24u)
MD5 checksum hex encoded
:param file: The file to checksum
:param verbose: Whether to print out the progress
"""

def write_gc_count_to_file(
input: str,
output: str,
compression_level: int,
window_size: int,
omit_tail: bool,
chrom_sizes_path: str,
write_chrom_sizes: bool,
verbose: bool,
) -> None:
"""
Calculate the GC content and write it to a file. The file will be a
tab-separated file with the following columns:
Chromosome name
Start position
End position
GC content
:param input: The input file to calculate the GC content from
:param output: The output file to write the GC content to
:param compression_level: The compression level to use for the output file
:param window_size: The window size to use for calculating the GC content
:param omit_tail: Whether to omit the tail of the sequence
:param chrom_sizes_path: The path to the chromosome sizes file
:param write_chrom_sizes: Whether to write the chromosome sizes to the output file
:param verbose: Whether to print out the progress
"""
11 changes: 3 additions & 8 deletions bindings/src/checksumseq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,9 @@ pub fn checksum(file: String, verbose: Option<bool>) -> Vec<PyChecksumResult> {

while let Some(record) = reader.next() {
let record = record.unwrap();
let (id, length, sha512, md5) = process_sequence(record, verbose);

results.push(PyChecksumResult {
id,
length,
sha512,
md5
});
let result = process_sequence(record, verbose);

results.push(PyChecksumResult::from(result));
}

results
Expand Down
21 changes: 21 additions & 0 deletions bindings/src/gc_count_utils.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
use pyo3::prelude::*;
use std::path::PathBuf;

#[pyfunction]
pub fn write_gc_count_to_file(
input: String,
output: String,
compression_level: u32,
window_size: i32,
omit_tail: bool,
chrom_sizes_path: String,
write_chrom_sizes: bool,
verbose: bool,
) {

let input = PathBuf::from(input);
let output = PathBuf::from(output);
let chrom_sizes_path = PathBuf::from(chrom_sizes_path);

rust_gc_count::gc_count::write_gc_to_file(input, output, compression_level, window_size, omit_tail, chrom_sizes_path, write_chrom_sizes, verbose)
}
3 changes: 3 additions & 0 deletions bindings/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
use pyo3::prelude::*;

pub mod gc_count_utils;
pub mod checksumseq;
pub mod models;

use crate::checksumseq::checksum;
use crate::gc_count_utils::write_gc_count_to_file;

/// A Python module implemented in Rust.
#[pymodule]
fn gc_count(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(checksum, m)?)?;
m.add_function(wrap_pyfunction!(write_gc_count_to_file, m)?)?;
Ok(())
}
19 changes: 19 additions & 0 deletions bindings/src/models.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use pyo3::prelude::*;
use rust_gc_count::checksum::ChecksumResult;

#[pyclass]
#[pyo3(name="ChecksumResult")]
Expand All @@ -11,4 +12,22 @@ pub struct PyChecksumResult {
pub sha512: String,
#[pyo3(get,set)]
pub md5: String
}

#[pymethods]
impl PyChecksumResult {
fn __repr__(&self) -> String {
format!("<ChecksumResult for {}>", self.id)
}
}

impl From<ChecksumResult> for PyChecksumResult {
fn from(value: ChecksumResult) -> Self {
PyChecksumResult {
id: value.id,
length: value.length,
sha512: value.sha512,
md5: value.md5
}
}
}
19 changes: 11 additions & 8 deletions src/checksumseq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,13 @@ fn main() {
while let Some(record) = reader.next() {
let record = record.expect("Error reading record");
let result = process_sequence(record, args.verbose);
let line = format!(
"{0:#}\t{1:#}\tSQ.{2:#}\t{3:#}\n",
result.0, result.1, result.2, result.3
);

let id = result.id;
let length = result.length;
let sha512 = result.sha512;
let md5 = result.md5;

let line = format!("{0:#}\t{1:#}\tSQ.{2:#}\t{3:#}\n", id, length, sha512, md5);
writer
.write_all(line.as_bytes())
.expect("Could not write to file");
Expand All @@ -89,9 +92,9 @@ acgT\n
while let Some(record) = reader.next() {
let record = record.expect("Error reading record");
let result = process_sequence(record, false);
assert_eq!(result.0, "id");
assert_eq!(result.1, 4);
assert_eq!(result.2, "aKF498dAxcJAqme6QYQ7EZ07-fiw8Kw2");
assert_eq!(result.3, "f1f8f4bf413b16ad135722aa4591043e");
assert_eq!(result.id, "id");
assert_eq!(result.length, 4);
assert_eq!(result.sha512, "aKF498dAxcJAqme6QYQ7EZ07-fiw8Kw2");
assert_eq!(result.md5, "f1f8f4bf413b16ad135722aa4591043e");
}
}
Loading

0 comments on commit 79152e1

Please sign in to comment.