Skip to content

Commit

Permalink
Merge pull request #48 from databio/dev
Browse files Browse the repository at this point in the history
Release `v0.1.0` - Fender stratocaster
  • Loading branch information
nleroy917 authored Dec 3, 2024
2 parents 4162f72 + e3cac35 commit 3c0e799
Show file tree
Hide file tree
Showing 130 changed files with 7,930 additions and 36 deletions.
22 changes: 11 additions & 11 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
working-directory: ./bindings
working-directory: ./bindings/python
target: ${{ matrix.target }}
args: --release --out dist --find-interpreter
sccache: 'true'
Expand All @@ -36,7 +36,7 @@ jobs:
uses: actions/upload-artifact@v3
with:
name: wheels
path: ./bindings/dist
path: ./bindings/python/dist

windows:
runs-on: windows-latest
Expand All @@ -55,12 +55,12 @@ jobs:
target: ${{ matrix.target }}
args: --release --out dist --find-interpreter
sccache: 'true'
working-directory: ./bindings
working-directory: ./bindings/python
- name: Upload wheels
uses: actions/upload-artifact@v3
with:
name: wheels
path: ./bindings/dist
path: ./bindings/python/dist

macos:
runs-on: macos-latest
Expand All @@ -78,12 +78,12 @@ jobs:
target: ${{ matrix.target }}
args: --release --out dist --find-interpreter
sccache: 'true'
working-directory: ./bindings
working-directory: ./bindings/python
- name: Upload wheels
uses: actions/upload-artifact@v3
with:
name: wheels
path: ./bindings/dist
path: ./bindings/python/dist

sdist:
runs-on: ubuntu-latest
Expand All @@ -94,12 +94,12 @@ jobs:
with:
command: sdist
args: --out dist
working-directory: ./bindings
working-directory: ./bindings/python
- name: Upload sdist
uses: actions/upload-artifact@v3
with:
name: wheels
path: ./bindings/dist
path: ./bindings/python/dist

release:
name: Release
Expand All @@ -113,13 +113,13 @@ jobs:
- uses: actions/download-artifact@v3
with:
name: wheels
path: ./bindings/dist
path: ./bindings/python/dist
- name: List contents
run: |
echo "Contents of dist/"
ls -l ./bindings/dist/
ls -l ./bindings/python/dist/
- name: Publish to PyPI
uses: PyO3/maturin-action@v1
with:
command: upload
args: --non-interactive --skip-existing ./bindings/dist/*
args: --non-interactive --skip-existing ./bindings/python/dist/*
42 changes: 42 additions & 0 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
on:
push:
branches:
- master
pull_request:
branches:
- master

name: R-CMD-check

jobs:
R-CMD-check:
runs-on: ${{ matrix.config.os }}
name: ${{ matrix.config.os }} (R-${{ matrix.config.r }} rust-${{ matrix.config.rust-version }})
strategy:
fail-fast: false
matrix:
config:
# - {os: windows-latest, r: 'release', rust-version: 'stable-msvc', rust-target: 'x86_64-pc-windows-gnu'}
- {os: macOS-latest, r: 'release', rust-version: 'stable'}
- {os: ubuntu-latest, r: 'release', rust-version: 'stable'}
- {os: ubuntu-latest, r: 'devel', rust-version: 'stable'}
env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
steps:
- uses: actions/checkout@v2
- uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ matrix.config.rust-version }}
targets: ${{ matrix.config.rust-target }}
- uses: r-lib/actions/setup-pandoc@v2
- uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.config.r }}
use-public-rspm: true
- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: rcmdcheck
working-directory: ${{ github.workspace }}/bindings/r
- uses: r-lib/actions/check-r-package@v2
with:
working-directory: ${{ github.workspace }}/bindings/r
12 changes: 10 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ Cargo.lock
*.pdb

.venv

/.idea/genimtools.iml
/.idea/modules.xml
/.idea/.gitignore
/.idea/vcs.xml
# this is for "act"
bin/
bin/
/.idea/gtars.iml
/gtars/tests/data/test1.bw

.DS_Store
.Rhistory
3 changes: 2 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"rust-analyzer.linkedProjects": [
"./gtars/Cargo.toml",
"./bindings/Cargo.toml",
"./bindings/python/Cargo.toml",
"./bindings/r/src/rust/Cargo.toml",
]
}
File renamed without changes.
4 changes: 2 additions & 2 deletions bindings/Cargo.toml → bindings/python/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "gtars-py"
version = "0.0.15"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand All @@ -10,7 +10,7 @@ crate-type = ["cdylib"]

[dependencies]
anyhow = "1.0.82"
gtars = { path = "../gtars" }
gtars = { path = "../../gtars" }
pyo3 = { version = "0.21", features=["anyhow", "extension-module"] }
numpy = "0.21"
# pyo3-tch = { git = "https://github.com/LaurentMazare/tch-rs" }
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
18 changes: 18 additions & 0 deletions bindings/python/src/igd/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
use pyo3::prelude::*;

use gtars::igd::search::igd_search;

#[pyclass(name="IGD")]
pub struct IGD;

#[pymethods]
impl IGD {

#[classmethod]
pub fn search(database_path: String, query_file_path: String) {

igd_search(&database_path, &query_file_path).unwrap();


}
}
1 change: 1 addition & 0 deletions bindings/src/lib.rs → bindings/python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ mod ailist;
mod models;
mod tokenizers;
mod utils;
mod igd;

pub const VERSION: &str = env!("CARGO_PKG_VERSION");

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,17 @@ pub fn read_tokens_from_gtok(filename: &str) -> PyResult<Vec<u32>> {
Ok(tokens)
}

#[pyfunction]
pub fn read_tokens_from_gtok_as_strings(filename: &str) -> PyResult<Vec<String>> {
let tokens = gtars::io::read_tokens_from_gtok(filename)?;
let tokens = tokens.iter().map(|t| t.to_string()).collect();
Ok(tokens)
}

#[pymodule]
pub fn utils(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(write_tokens_to_gtok))?;
m.add_wrapped(wrap_pyfunction!(read_tokens_from_gtok))?;
m.add_wrapped(wrap_pyfunction!(read_tokens_from_gtok_as_strings))?;
Ok(())
}
Binary file added bindings/r/.RData
Binary file not shown.
2 changes: 2 additions & 0 deletions bindings/r/.Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
^src/\.cargo$
^LICENSE\.md$
13 changes: 13 additions & 0 deletions bindings/r/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Package: gtars
Title: Performance critical genomic interval analysis using Rust, in R
Version: 0.0.0.9000
Authors@R:
person("Nathan", "LeRoy", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-7354-7213"))
Description: Performance-critical tools to manipulate, analyze, and process genomic interval data. Primarily focused on building tools for geniml - our genomic machine learning python package.
License: MIT + file LICENSE
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
Config/rextendr/version: 0.3.1.9001
SystemRequirements: Cargo (Rust's package manager), rustc
2 changes: 2 additions & 0 deletions bindings/r/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
YEAR: 2024
COPYRIGHT HOLDER: gtars authors
21 changes: 21 additions & 0 deletions bindings/r/LICENSE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# MIT License

Copyright (c) 2024 gtars authors

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
7 changes: 7 additions & 0 deletions bindings/r/NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Generated by roxygen2: do not edit by hand

export(r_igd_create)
export(r_igd_search)
export(read_tokens_from_gtok)
export(write_tokens_to_gtok)
useDynLib(gtars, .registration = TRUE)
38 changes: 38 additions & 0 deletions bindings/r/R/extendr-wrappers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Generated by extendr: Do not edit by hand

# nolint start

#
# This file was created with the following call:
# .Call("wrap__make_gtars_wrappers", use_symbols = TRUE, package_name = "gtars")

#' @usage NULL
#' @useDynLib gtars, .registration = TRUE
NULL

`__init__` <- function() invisible(.Call(wrap____init__))

#' Write tokens to a gtok file
#' @export
#' @param filename A string representing the path to the gtok file.
read_tokens_from_gtok <- function(filename) .Call(wrap__r_read_tokens_from_gtok, filename)

#' Write tokens to a gtok file
#' @export
#' @param filename A string representing the path to the gtok file.
#' @param tokens The tokens to write.
write_tokens_to_gtok <- function(filename, tokens) invisible(.Call(wrap__r_write_tokens_to_gtok, filename, tokens))

#' Create an IGD database from a directory of bed files
#' @param output_path String path where the IGD database will be saved
#' @param filelist String path to either a text file containing paths to bed files, or a directory containing bed files
#' @param db_name String name for the database (will be used in output filenames)
rextendr_igd_create <- function(output_path, filelist, db_name) .Call(wrap__rextendr_igd_create, output_path, filelist, db_name)

#' Search igd with a bed file
#' @param database_path A string representing the path to the database igd file.
#' @param query_path A string representing the path to the query bed file.
rextendr_igd_search <- function(database_path, query_path) .Call(wrap__rextendr_igd_search, database_path, query_path)


# nolint end
73 changes: 73 additions & 0 deletions bindings/r/R/igd.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#' @useDynLib gtars, .registration = TRUE
NULL

#' @title Create IGD Database
#'
#' @description Creates an IGD (Indexed Genomic Data) database from a collection of BED files.
#'
#' @param output_path Character string specifying the directory where the IGD database will be saved
#' @param filelist Character string specifying either:
#' - Path to a text file containing paths to BED files (one per line)
#' - Path to a directory containing BED files
#' - "-" or "stdin" to read paths from standard input
#' @param db_name Character string specifying the name for the database (will be used in output filenames).
#' Defaults to "igd_database"
#'
#' @return NULL invisibly on success
#'
#' @examples
#' \dontrun{
#' # Create database with default name
#' igd_create("path/to/output", "path/to/bed/files")
#' }
#'
#' @export
r_igd_create <- function(output_path, filelist, db_name = "igd_database") {
# Input validation
if (!is.character(output_path) || length(output_path) != 1) {
stop("output_path must be a single character string")
}
if (!is.character(filelist) || length(filelist) != 1) {
stop("filelist must be a single character string")
}

# Call Rust function
.Call(wrap__rextendr_igd_create, output_path, filelist, db_name)

invisible(NULL)
}


#' @title Search IGD Database
#'
#' @description Searches an IGD database for region overlaps with an input BED file
#'
#' @param database_path path to .igd database
#' @param query_path path to .bed file
#'
#' @return dataframe of overlap hits
#'
#' @examples
#' \dontrun{
#' }
#'
#' @export
r_igd_search <- function(database_path, query_path) {

# Input validation
if (!is.character(database_path) || length(database_path) != 1) {
stop("database_path must be a single character string")
}
if (!is.character(query_path) || length(query_path) != 1) {
stop("query_path must be a single character string")
}

# Call Rust function
chr_vector <- .Call(wrap__rextendr_igd_search, database_path, query_path)

split_result <- strsplit(chr_vector, split = '\t')
df <- data.frame(matrix(unlist(split_result[-1]), nrow = length(chr_vector)-1, byrow = TRUE))
colnames(df) <- split_result[[1]]

invisible(df)
}
Loading

0 comments on commit 3c0e799

Please sign in to comment.