-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #754 from kbase/dev_build_busco
add busco tool
- Loading branch information
Showing
6 changed files
with
168 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
name: Build & Push BUSCO Image to GHCR | ||
|
||
on: | ||
pull_request: | ||
types: | ||
- opened | ||
- reopened | ||
- synchronize | ||
- ready_for_review | ||
paths: | ||
- 'src/loaders/compute_tools/busco/versions.yaml' | ||
- '.github/workflows/build-push-busco-image.yml' | ||
- '.github/workflows/build-push-tool-images.yml' | ||
|
||
push: | ||
branches: | ||
- main | ||
- master | ||
- develop | ||
paths: | ||
- 'src/loaders/compute_tools/busco/versions.yaml' | ||
- '.github/workflows/build-push-busco-image.yml' | ||
- '.github/workflows/build-push-tool-images.yml' | ||
|
||
jobs: | ||
trigger-build-push: | ||
uses: ./.github/workflows/build-push-tool-images.yml | ||
with: | ||
tool_name: busco | ||
version_file: 'src/loaders/compute_tools/busco/versions.yaml' | ||
secrets: inherit |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
FROM continuumio/miniconda3:24.5.0-0 | ||
|
||
# NOTE: If the tool version changes, ensure the metadata information saved after running the tool in the _run_busco_single method is updated | ||
ARG BUSCO_VER=5.7.1 | ||
ENV CONDA_ENV busco-$BUSCO_VER | ||
|
||
# Add Bioconda and Conda-Forge channels | ||
RUN conda config --add channels bioconda | ||
RUN conda config --add channels conda-forge | ||
|
||
# Install BUSCO | ||
# Certain dependencies (e.g., dendropy, sepp) are only compatible with Python versions up to 3.9. | ||
ARG PYTHON_VER=3.9 | ||
RUN conda create -n $CONDA_ENV python=$PYTHON_VER | ||
RUN conda install -n $CONDA_ENV pandas=2.2.2 jsonlines=4.0.0 mamba=1.5.8 pyyaml=6.0.1 | ||
# Suggestions from BUSCO team to use mamba for speeding up the installation process: | ||
# https://busco.ezlab.org/busco_userguide.html#installation-with-conda | ||
RUN conda run -n $CONDA_ENV mamba install -c bioconda -c conda-forge -y busco=$BUSCO_VER | ||
|
||
# Activate the environment | ||
RUN echo "source activate $CONDA_ENV" >> ~/.bashrc | ||
|
||
# Set up directories | ||
RUN mkdir -p /app | ||
COPY ./ /app/collections | ||
RUN rm -r /app/collections/.git | ||
|
||
ENV PYTHONPATH /app/collections | ||
WORKDIR /app | ||
|
||
ENV PY_SCRIPT=/app/collections/src/loaders/compute_tools/busco/busco.py | ||
|
||
RUN chmod -R 777 /app/collections | ||
|
||
ENTRYPOINT ["/app/collections/src/loaders/compute_tools/entrypoint.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
""" | ||
Run BUSCO tool on a set of fna files. | ||
This tool serves a distinct purpose separate from collection tools; instead, it is suited for CDM work. | ||
Therefore, the parser program is not compatible with data generated by this tool. | ||
""" | ||
import os | ||
import time | ||
from pathlib import Path | ||
|
||
from src.loaders.common.loader_common_names import TOOL_METADATA | ||
from src.loaders.compute_tools.tool_common import ToolRunner, run_command, create_tool_metadata | ||
from src.loaders.compute_tools.tool_version import extract_latest_reference_db_version | ||
|
||
|
||
def _run_busco_single( | ||
tool_safe_data_id: str, | ||
data_id: str, | ||
source_file: Path, | ||
output_dir: Path, | ||
threads_per_tool_run: int, | ||
debug: bool) -> None: | ||
start = time.time() | ||
print(f'Start executing BUSCO for {data_id}') | ||
|
||
metadata_file = output_dir / TOOL_METADATA | ||
if metadata_file.exists(): | ||
print(f"Skipping {source_file} as it has already been processed.") | ||
return | ||
|
||
current_dir = os.path.dirname(os.path.abspath(__file__)) | ||
version_file = os.path.join(current_dir, 'versions.yaml') | ||
ref_db_version = extract_latest_reference_db_version(version_file) | ||
|
||
# Please refer to https://docs.google.com/document/d/15yV-S41Iqe20F-I2MRLWdzJwVdr8QKUfZPw7oq8WvB0/edit#heading=h.elgudks5mtxu | ||
# for more information on the BUSCO command options we are using here. | ||
command = [ | ||
'busco', | ||
'-i', str(source_file), | ||
'-o', data_id, | ||
'--out_path', str(output_dir), | ||
'--datasets_version', ref_db_version, | ||
'--download_path', '/reference_data', | ||
'-c', str(threads_per_tool_run), | ||
'--auto-lineage-prok', | ||
'-m', 'genome', | ||
'-f', | ||
'--augustus', | ||
] | ||
|
||
run_command(command, output_dir if debug else None) | ||
|
||
end_time = time.time() | ||
run_time = end_time - start | ||
print( | ||
f'Used {round(run_time / 60, 2)} minutes to execute BUSCO for {data_id}') | ||
|
||
# Save run info to a metadata file in the output directory for parsing later | ||
additional_metadata = { | ||
'source_file': str(source_file), | ||
'data_id': data_id, | ||
"reference_db": { | ||
"version": ref_db_version, | ||
}, | ||
} | ||
create_tool_metadata( | ||
output_dir, | ||
tool_name="busco", | ||
version="5.7.1", | ||
command=command, | ||
run_time=round(run_time, 2), | ||
batch_size=1, | ||
additional_metadata=additional_metadata) | ||
|
||
|
||
def main(): | ||
runner = ToolRunner("busco") | ||
runner.parallel_single_execution(_run_busco_single, unzip=True) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# This tool serves a distinct purpose separate from collection tools; instead, it is suited for CDM work. | ||
# Therefore, the parser program is not compatible with data generated by this tool. | ||
|
||
versions: | ||
- version: 0.1.0 | ||
date: 2024-08-22 | ||
notes: | | ||
- initial BUSCO implementation | ||
reference_db_version: odb10 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters