From 34b6d40d0379c29fae2ad36b09545b45b77c1461 Mon Sep 17 00:00:00 2001 From: "Tianyu (Sky) Lu" Date: Fri, 12 Jan 2024 18:35:05 +0800 Subject: [PATCH] bug fix Synced with lutianyu2001/TIR-Learner at 2024-01-12 04:35 (CST) --- bin/TIR-Learner3.0/TIR-Learner3.0.py | 3 ++- bin/TIR-Learner3.0/bin/main.py | 17 ++++++++++++----- bin/TIR-Learner3.0/bin/prog_const.py | 5 +++-- bin/TIR-Learner3.0/bin/run_GRF.py | 10 ++++++---- bin/TIR-Learner3.0/bin/run_TIRvish.py | 2 ++ 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/bin/TIR-Learner3.0/TIR-Learner3.0.py b/bin/TIR-Learner3.0/TIR-Learner3.0.py index a66e5b4..e05b93a 100644 --- a/bin/TIR-Learner3.0/TIR-Learner3.0.py +++ b/bin/TIR-Learner3.0/TIR-Learner3.0.py @@ -64,7 +64,8 @@ GRF_path = parsed_args.grf_path.replace('"', "") gt_path = parsed_args.gt_path.replace('"', "") additional_args = prog_const.process_additional_args(parsed_args.additional_args.split(" ")) - print(f"INFO: Additional args: {additional_args} accepted.") + if len(additional_args) != 0: + print(f"INFO: Additional args: {additional_args} accepted.") # Transforming the possible relative path into absolute path genome_file = os.path.abspath(genome_file) diff --git a/bin/TIR-Learner3.0/bin/main.py b/bin/TIR-Learner3.0/bin/main.py index bbfce9d..b4bd76b 100644 --- a/bin/TIR-Learner3.0/bin/main.py +++ b/bin/TIR-Learner3.0/bin/main.py @@ -35,7 +35,7 @@ class TIRLearner: def __init__(self, genome_file_path: str, genome_name: str, species: str, TIR_length: int, cpu_cores: int, GRF_mode: str, working_dir_path: str, output_dir_path: str, checkpoint_dir_input_path: str, - flag_verbose: bool, flag_debug: bool, GRF_path: str, gt_path: str, additional_args: list): + flag_verbose: bool, flag_debug: bool, GRF_path: str, gt_path: str, additional_args: tuple): self.genome_file_path = genome_file_path self.genome_name = genome_name self.species = species @@ -96,7 +96,7 @@ def clear(self): self.working_df_dict.clear() def execute(self): - self.mount_working_dir() + temp_dir = self.mount_working_dir() self.load_checkpoint_file() self.pre_scan_fasta_file() # print(os.getcwd()) # TODO ONLY FOR DEBUG REMOVE AFTER FINISHED @@ -114,8 +114,12 @@ def execute(self): if prog_const.CHECKPOINT_OFF not in self.additional_args and not self.flag_debug: shutil.rmtree(self.checkpoint_dir_output_path) - subprocess.Popen(["unlink", self.genome_file_path]).wait() - os.rmdir(self.working_dir_path) + # subprocess.Popen(["unlink", self.genome_file_path]).wait() + # os.rmdir(self.working_dir_path) + if temp_dir is not None: + shutil.rmtree(temp_dir) + else: + shutil.rmtree(self.working_dir_path) def pre_scan_fasta_file(self): # names = [record.id for record in SeqIO.parse(self.genome_file, "fasta")] @@ -166,14 +170,17 @@ def pre_scan_fasta_file(self): def mount_working_dir(self): if self.working_dir_path is None: - self.working_dir_path = tempfile.mkdtemp() + temp_dir = tempfile.mkdtemp() + self.working_dir_path = temp_dir # self.load_genome_file() else: + temp_dir = None os.makedirs(self.working_dir_path, exist_ok=True) self.working_dir_path = os.path.join(self.working_dir_path, prog_const.sandbox_dir_name) os.makedirs(self.working_dir_path, exist_ok=True) self.working_dir_path = os.path.abspath(self.working_dir_path) os.chdir(self.working_dir_path) + return temp_dir # def load_genome_file(self): # genome_file_soft_link = os.path.join(self.execution_dir, "genome_file_soft_link.fa.lnk") diff --git a/bin/TIR-Learner3.0/bin/prog_const.py b/bin/TIR-Learner3.0/bin/prog_const.py index 0d1cb4c..ca26e63 100644 --- a/bin/TIR-Learner3.0/bin/prog_const.py +++ b/bin/TIR-Learner3.0/bin/prog_const.py @@ -30,8 +30,9 @@ mix_short_seq_process_num = 2 -def process_additional_args(additional_args: list) -> list: - processed_additional_args = list(map(additional_args_mapping_dict.get, additional_args)) +def process_additional_args(additional_args: list) -> tuple: + processed_additional_args = tuple(i for i in + tuple(map(additional_args_mapping_dict.get, additional_args)) if i is not None) if SKIP_TIRVISH in processed_additional_args and SKIP_GRF in processed_additional_args: raise SystemExit("ERROR: \"skip_tirvish\" and \"skip_grf\" cannot be specified at the same time!") return processed_additional_args diff --git a/bin/TIR-Learner3.0/bin/run_GRF.py b/bin/TIR-Learner3.0/bin/run_GRF.py index b6e83bf..150f490 100644 --- a/bin/TIR-Learner3.0/bin/run_GRF.py +++ b/bin/TIR-Learner3.0/bin/run_GRF.py @@ -181,9 +181,10 @@ def cpu_cores_allocation_GRF_boost(cpu_cores, job_bound_type="cpu_bound"): num_threads_total = cpu_cores num_processes = int(math.sqrt(num_threads_total)) - num_threads_per_process = int(num_threads_total / num_processes) - num_extra_threads = num_threads_per_process - num_processes * num_threads_per_process - num_extra_threads = 0 if num_extra_threads < 0 else num_extra_threads + num_threads_per_process = int(num_threads_total / num_processes) * 4 + # num_extra_threads = num_threads_per_process - num_processes * num_threads_per_process + # num_extra_threads = 0 if num_extra_threads < 0 else num_extra_threads + num_extra_threads = 0 return num_processes, num_threads_per_process, num_extra_threads @@ -231,7 +232,7 @@ def execute(TIRLearner_instance): # for i in os.listdir(os.getcwd()): # shutil.copyfile(os.path.join(os.getcwd(), i), os.path.join(TIRLearner_instance.output_dir, i)) - print() + print(" Step 1/2: Executing GRF\n") if GRF_mode == "mix": run_GRF_mix(records_split_file_name, filtered_genome_file_name, GRF_path, cpu_cores, TIR_length) elif GRF_mode == "boost": @@ -240,4 +241,5 @@ def execute(TIRLearner_instance): run_GRF_native(filtered_genome_file_name, GRF_path, cpu_cores, TIR_length) print() + print(" Step 2/2: Getting GRF result") return get_GRF_result_df(GRF_result_dir_name) diff --git a/bin/TIR-Learner3.0/bin/run_TIRvish.py b/bin/TIR-Learner3.0/bin/run_TIRvish.py index 6fe6285..e046a51 100644 --- a/bin/TIR-Learner3.0/bin/run_TIRvish.py +++ b/bin/TIR-Learner3.0/bin/run_TIRvish.py @@ -69,6 +69,8 @@ def execute(TIRLearner_instance): flag_verbose = TIRLearner_instance.flag_verbose gt_path = TIRLearner_instance.gt_path + print(" Step 1/2: Executing TIRvish") TIRvish_result_gff3_file_name = run_TIRvish(genome_file, genome_name, TIR_length, gt_path) + print(" Step 2/2: Getting TIRvish result") df = get_TIRvish_result_df(TIRvish_result_gff3_file_name) return get_fasta_pieces_SeqIO(genome_file, df, cpu_cores, flag_verbose)