Skip to content

Commit

Permalink
small cleaning and python support
Browse files Browse the repository at this point in the history
  • Loading branch information
khoroshevskyi committed Sep 11, 2024
1 parent 6ca5022 commit 67fd512
Show file tree
Hide file tree
Showing 15 changed files with 255 additions and 105 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/run-pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ["3.8", "3.12"]
python-version: ["3.8", "3.13"]
os: [ubuntu-latest]

steps:
Expand Down
44 changes: 42 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/geofetch/README.html)

`geofetch` is a command-line tool that downloads sequencing data and metadata from GEO and SRA and creates [standard PEPs](https://pep.databio.org/). `geofetch` is hosted at [pypi](https://pypi.org/project/geofetch/). You can convert the result of geofetch into unmapped `bam` or `fastq` files with the included `sraconvert` command.
**geofetch** is a command-line tool that downloads sequencing data and metadata from GEO and SRA and create metadata tables in [standard PEP format](https://pep.databio.org/). `geofetch` is hosted at [pypi](https://pypi.org/project/geofetch/). You can convert the result of geofetch into unmapped `bam` or `fastq` files with the included `sraconvert` command.

## Key geofetch features:

Expand All @@ -20,4 +20,44 @@
- Can search GEO to find relevant data
- Can be used either as a command-line tool or from within Python using an API

For more information, see [complete documentation at geofetch.databio.org](http://geofetch.databio.org) (source in the [/docs](/docs) folder).
## Docs

---

**Documentation**: <a href="https://pep.databio.org/geofetch/" target="_blank">https://pep.databio.org/geofetch/</a>

**Source Code**: <a href="https://github.com/pepkit/geofetch/" target="_blank">https://github.com/pepkit/geofetch/</a>

---


## Installation
To install `geofetch` use this command:
```
pip install geofetch
```
or install the latest version from the GitHub repository:
```
pip install git+https://github.com/pepkit/geofetch.git
```


## How to cite:
https://doi.org/10.1093/bioinformatics/btad069
```bibtex
@article{10.1093/bioinformatics/btad069,
author = {Khoroshevskyi, Oleksandr and LeRoy, Nathan and Reuter, Vincent P and Sheffield, Nathan C},
title = "{GEOfetch: a command-line tool for downloading data and standardized metadata from GEO and SRA}",
journal = {Bioinformatics},
volume = {39},
number = {3},
pages = {btad069},
year = {2023},
month = {03},
abstract = "{The Gene Expression Omnibus has become an important source of biological data for secondary analysis. However, there is no simple, programmatic way to download data and metadata from Gene Expression Omnibus (GEO) in a standardized annotation format.To address this, we present GEOfetch—a command-line tool that downloads and organizes data and metadata from GEO and SRA. GEOfetch formats the downloaded metadata as a Portable Encapsulated Project, providing universal format for the reanalysis of public data.GEOfetch is available on Bioconda and the Python Package Index (PyPI).}",
issn = {1367-4811},
doi = {10.1093/bioinformatics/btad069},
url = {https://doi.org/10.1093/bioinformatics/btad069},
eprint = {https://academic.oup.com/bioinformatics/article-pdf/39/3/btad069/49407404/btad069.pdf},
}
```
141 changes: 141 additions & 0 deletions docs/img/geofetch_logo.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
7 changes: 3 additions & 4 deletions geofetch/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
""" Package-level data """

import logmuse
import coloredlogs
import logmuse

from geofetch.geofetch import Geofetcher
from geofetch.finder import Finder
from geofetch._version import __version__

from geofetch.finder import Finder
from geofetch.geofetch import Geofetcher

__author__ = ["Oleksandr Khoroshevskyi", "Vince Reuter", "Nathan Sheffield"]
__all__ = ["Finder", "Geofetcher", "__version__"]
Expand Down
1 change: 1 addition & 0 deletions geofetch/__main__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sys

from geofetch.geofetch import main

if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion geofetch/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.12.6"
__version__ = "0.12.7"
2 changes: 2 additions & 0 deletions geofetch/cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import argparse
import os

import logmuse

from geofetch._version import __version__


Expand Down
25 changes: 13 additions & 12 deletions geofetch/finder.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
import logging
import os
import re
from datetime import datetime, timedelta

import coloredlogs
import requests
import xmltodict

from .const import (
RETMAX,
ETOOLS_GEO_GSE_BASE,
ETOOLS_ENDING,
TODAY_DATE,
DATE_FILTER,
ETOOLS_ENDING,
ETOOLS_GEO_GSE_BASE,
RETMAX,
THREE_MONTH_FILTER,
TODAY_DATE,
)
import requests
import xmltodict
import re
import os
import logging
import coloredlogs
from datetime import datetime
from datetime import timedelta

__author__ = "Oleksandr Khoroshevskyi"

Expand Down
66 changes: 33 additions & 33 deletions geofetch/geofetch.py
Original file line number Diff line number Diff line change
@@ -1,65 +1,65 @@
import copy
import csv
import logging
import os
import re
import sys
import time
from typing import Dict, List, NoReturn, Tuple, Union

import logmuse
import pandas as pd
import peppy
import requests
import xmltodict
import yaml
import time
import logging

from rich.progress import track
import re
import logmuse
from ubiquerg import expandpath
from typing import List, Union, Dict, Tuple, NoReturn
import peppy
import pandas as pd

from geofetch.cli import _parse_cmdl
from geofetch.const import (
GSE_PATTERN,
SAMPLE_SUPP_METADATA_FILE,
CONFIG_PROCESSED_TEMPLATE_NAME,
CONFIG_RAW_TEMPLATE_NAME,
CONFIG_SRA_TEMPLATE,
EXP_SUPP_METADATA_FILE,
NEW_GENOME_COL_NAME,
EXPERIMENT_PATTERN,
FILE_RAW_NAME_SAMPLE_PATTERN,
FILE_RAW_NAME_SUBSAMPLE_PATTERN,
CONFIG_RAW_TEMPLATE_NAME,
CONFIG_SRA_TEMPLATE,
CONFIG_PROCESSED_TEMPLATE_NAME,
GSE_PATTERN,
NCBI_EFETCH,
NCBI_ESEARCH,
NEW_GENOME_COL_NAME,
NUM_RETRIES,
PROJECT_PATTERN,
SAMPLE_SUPP_METADATA_FILE,
SER_SUPP_FILE_PATTERN,
SUPP_FILE_PATTERN,
PROJECT_PATTERN,
NCBI_EFETCH,
NCBI_ESEARCH,
EXPERIMENT_PATTERN,
)
from geofetch.utils import (
Accession,
build_prefetch_command,
parse_accessions,
parse_SOFT_line,
convert_size,
clean_soft_files,
run_subprocess,
_check_file_existance,
_create_dot_yaml,
_dict_to_list_converter,
_filter_gsm,
_get_list_of_keys,
_get_value,
_read_tar_filelist,
_check_file_existance,
_separate_list_of_files,
_update_columns,
_sanitize_name,
_sanitize_config_string,
_create_dot_yaml,
_which,
_dict_to_list_converter,
_standardize_colnames,
_sanitize_name,
_separate_file_url,
_filter_gsm,
_separate_list_of_files,
_standardize_colnames,
_unify_list_keys,
_update_columns,
_which,
build_prefetch_command,
clean_soft_files,
convert_size,
gse_content_to_dict,
is_prefetch_callable,
parse_accessions,
parse_SOFT_line,
run_subprocess,
)

_LOGGER = logging.getLogger(__name__)
Expand Down
7 changes: 4 additions & 3 deletions geofetch/sraconvert.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#!/usr/bin/env python

from argparse import ArgumentParser
import os
import pypiper
import logmuse
import sys
from argparse import ArgumentParser

import logmuse
import pypiper

__version__ = "0.1.0"

Expand Down
9 changes: 5 additions & 4 deletions geofetch/utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
""" Independently-importable utilities to circumvent true scripts. """

import csv
import logging
import os
import re
import subprocess
import sys
import re
import requests
from io import StringIO
import csv
from typing import Union, List, NoReturn, Dict
from typing import Dict, List, NoReturn, Union

import requests

_LOGGER = logging.getLogger(__name__)

Expand Down
40 changes: 0 additions & 40 deletions mkdocs.yml

This file was deleted.

2 changes: 1 addition & 1 deletion requirements/requirements-all.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ ubiquerg>=0.6.2
requests>=2.28.1
xmltodict>=0.13.0
pandas>=1.5.3
peppy>=0.40.0
peppy>=0.40.6
rich>=12.5.1
coloredlogs>=15.0.1
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#! /usr/bin/env python

import os
from setuptools import setup
import sys

from setuptools import setup

PACKAGE = "geofetch"
REQDIR = "requirements"

Expand Down Expand Up @@ -49,6 +50,8 @@ def read_reqs(reqs_name):
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: Scientific/Engineering :: Bio-Informatics",
],
keywords="project, bioinformatics, sequencing, ngs, workflow, GUI",
Expand Down
7 changes: 4 additions & 3 deletions tests/test_geofetch.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import os
import shutil

import peppy
import pytest

import geofetch
from geofetch import Geofetcher, utils
from geofetch.utils import parse_accessions
import os
import pytest
import shutil

INPUT_ACC_FILE = "tests/test_files/input_acc.txt"
GSE_FILES = "tests/test_files/soft_files"
Expand Down

0 comments on commit 67fd512

Please sign in to comment.