Skip to content

Commit

Permalink
Merge pull request #688 from kbase/dev_add_eggnog_container
Browse files Browse the repository at this point in the history
pass program threads to tools
  • Loading branch information
Tianhao-Gu authored Mar 16, 2024
2 parents 3696130 + 8e4a516 commit 5293b64
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 6 deletions.
4 changes: 2 additions & 2 deletions src/loaders/compute_tools/eggnog/eggnog.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
from src.loaders.compute_tools.tool_common import ToolRunner, run_command

INPUT_TYPE = 'proteins'
THREADS = 8


def _run_eggnog_single(
tool_safe_data_id: str,
data_id: str,
source_file: Path,
output_dir: Path,
program_threads: int,
debug: bool) -> None:

metadata_file = output_dir / EGGNOG_METADATA
Expand All @@ -33,7 +33,7 @@ def _run_eggnog_single(
'-o', output_dir / source_file.name, # Output prefix.
# Save result file to collectiondata directory. Expecting 'emapper.annotations', 'emapper.hits' and 'emapper.seed_orthologs' files.
'--itype', f'{INPUT_TYPE}',
'--cpu', f'{THREADS}',
'--cpu', f'{program_threads}',
'--excel',
'--sensmode', 'fast',
'--dmnd_iterate', 'no',
Expand Down
6 changes: 6 additions & 0 deletions src/loaders/compute_tools/eggnog/versions.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
versions:
- version: 0.1.0
date: 2024-03-13
reference_db_version: 5.0.2

- version: 0.1.1
date: 2024-03-15
notes: |
- add ability to specify thread number for execution
reference_db_version: 5.0.2
2 changes: 2 additions & 0 deletions src/loaders/compute_tools/mash/mash.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def _run_mash_single(
data_id: str,
source_file: Path,
output_dir: Path,
program_threads: int,
debug: bool,
kmer_size: int = KMER_SIZE,
sketch_size: int = SKETCH_SIZE) -> None:
Expand All @@ -25,6 +26,7 @@ def _run_mash_single(
# Save result file to source file directory. The suffix '.msh' will be appended.
'-k', f'{kmer_size}',
'-s', f'{sketch_size}',
'-p', f'{program_threads}',
source_file]

run_command(command, output_dir if debug else None)
Expand Down
6 changes: 5 additions & 1 deletion src/loaders/compute_tools/mash/versions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,8 @@ versions:
- version: 0.1.0
date: 2023-07-18
- version: 0.1.1
date: 2023-07-19
date: 2023-07-19
- version: 0.1.2
date: 2024-03-15
notes: |
- add ability to specify thread number for execution
12 changes: 11 additions & 1 deletion src/loaders/compute_tools/microtrait/microtrait.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,13 @@ def _process_trait_counts(
return heatmap_row, cells_meta, traits_meta


def _run_microtrait(tool_safe_data_id: str, data_id: str, fna_file: Path, genome_dir: Path, debug: bool):
def _run_microtrait(
tool_safe_data_id: str,
data_id: str,
fna_file: Path,
genome_dir: Path,
program_threads: int,
debug: bool):
# run microtrait.extract_traits on the genome file
# https://github.com/ukaraoz/microtrait

Expand All @@ -198,6 +204,10 @@ def _run_microtrait(tool_safe_data_id: str, data_id: str, fna_file: Path, genome
# object returned by the
# extract_traits function.

# programe_threads is not used in this function, but it is kept for consistency with another tools (e.g., eggnog, mash)
# since extract_traits function doesn't take the number of threads as an argument
# https://github.com/ukaraoz/microtrait/blob/master/R/extract_traits.R#L22-L26

# Load the R script as an R function
r_script = """
library(microtrait)
Expand Down
5 changes: 3 additions & 2 deletions src/loaders/compute_tools/tool_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def _get_data_ids(self):
data_ids = all_data_ids
return list(set(data_ids))

def parallel_single_execution(self, tool_callable: Callable[[str, str, Path, Path, bool], None], unzip=False):
def parallel_single_execution(self, tool_callable: Callable[[str, str, Path, Path, int, bool], None], unzip=False):
"""
Run a tool by a single data file, storing the results in a single batch directory with
the individual runs stored in directories by the data ID.
Expand Down Expand Up @@ -298,6 +298,7 @@ def parallel_single_execution(self, tool_callable: Callable[[str, str, Path, Pat
meta.get(loader_common_names.META_UNCOMPRESSED_FILE,
meta[loader_common_names.META_SOURCE_FILE]),
output_dir,
self._program_threads,
self._debug))

try:
Expand Down Expand Up @@ -378,7 +379,7 @@ def _execute(
self,
threads: int,
tool_callable: Callable[..., None],
args: List[Tuple[Dict[str, GenomeTuple], Path, int, bool]],
args: List[Tuple[Dict[str, GenomeTuple], Path, int, bool]] | List[Tuple[str, str, Path, Path, int, bool]],
start: datetime.datetime,
total: bool,
):
Expand Down

0 comments on commit 5293b64

Please sign in to comment.