Skip to content

Commit

Permalink
1.7.7.1 - bug fixes
Browse files Browse the repository at this point in the history
1. fix a import bug of 'from scipy import stat, log, inf' issue (issue #132 #315)
2. fix a ZeroDivisionError bug when the estimated coverage is 0 (issue #311)
3. Disentangling failed -> Disentangling unsuccessful to avoid panic (issue #308)
4. fix a bug in parsing options when '-F anonym' is used (issue #319)
5. have max_multiplicity passed to no-slim case
6. minor adjustment
  • Loading branch information
JianjunJin committed Apr 3, 2024
1 parent e21f0fa commit 1949437
Show file tree
Hide file tree
Showing 14 changed files with 79 additions and 49 deletions.
11 changes: 10 additions & 1 deletion GetOrganelleLib/statistical_func.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
try:
from scipy import stats, inf, log
from scipy import stats
except ImportError:
class stats:
class norm:
def logpdf(foo1, foo2, foo3):
raise ImportError("Failed in 'from scipy import stats, inf, log'!")
inf = float("inf")
from math import log
try:
from scipy import inf, log
except ImportError:
try:
from numpy import inf, log
except ImportError:
inf = float("inf")
from math import log

from copy import deepcopy
try:
import numpy as np
Expand Down
10 changes: 10 additions & 0 deletions GetOrganelleLib/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@ def get_versions():


versions = [
{
"number": "1.7.7.1",
"features": [
"1. fix a import bug of 'from scipy import stat, log, inf' issue (issue #132 #315)",
"2. fix a ZeroDivisionError bug when the estimated coverage is 0 (issue #311)",
"3. Disentangling failed -> Disentangling unsuccessful to avoid panic (issue #308)",
"4. fix a bug in parsing options when '-F anonym' is used (issue #319)",
],
"time": "2024-04-03 17:05 UTC-5"
},
{
"number": "1.7.7.0",
"features": [
Expand Down
6 changes: 3 additions & 3 deletions Utilities/disentangle_organelle_assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,10 +317,10 @@ def disentangle_circular_assembly(fastg_file, tab_file, prefix, weight_factor, t
except KeyError as e:
if str(e).strip("'") == options.mode:
log_handler.error(options.mode + " not found in " + str(options.tab_file) + "!")
log_handler.error("Disentangling failed!")
log_handler.error("Disentangling unsuccessful!")
else:
log_handler.exception(str(e))
log_handler.error("Disentangling failed!")
log_handler.error("Disentangling unsuccessful!")
if not options.acyclic_allowed:
log_handler.info("You might try again with '--linear' to export contig(s) "
"instead of circular genome.")
Expand All @@ -329,7 +329,7 @@ def disentangle_circular_assembly(fastg_file, tab_file, prefix, weight_factor, t
log_handler.info("Please open an issue at https://github.com/Kinggerm/GetOrganelle/issues if you find bugs!\n")
except Exception as e:
log_handler.exception(str(e))
log_handler.error("Disentangling failed!")
log_handler.error("Disentangling unsuccessful!")
if not options.acyclic_allowed:
log_handler.info("You might try again with '--linear' to export contig(s) "
"instead of circular genome.")
Expand Down
4 changes: 2 additions & 2 deletions Utilities/evaluate_assembly_using_mapping.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#! /usr/bin/env python
#!/usr/bin/env python
# coding:utf8

from argparse import ArgumentParser
Expand Down Expand Up @@ -29,7 +29,7 @@

try:
# python2 UnicodeDecodeError ±
reload(sys)
reload(sys) # type: ignore
sys.setdefaultencoding('utf8')
except NameError:
pass
Expand Down
2 changes: 1 addition & 1 deletion Utilities/fastg_to_gfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def main():
if type(2/1) == float:
fastg = input('Please input gfa file:').strip()
else:
fastg = raw_input('Please input gfa file:').strip()
fastg = raw_input('Please input gfa file:').strip() # type: ignore
PATH_OF_THIS_SCRIPT = os.path.split(os.path.realpath(__file__))[0]
sys.path.insert(0, os.path.join(PATH_OF_THIS_SCRIPT, ".."))
from GetOrganelleLib.assembly_parser import Assembly
Expand Down
2 changes: 1 addition & 1 deletion Utilities/get_annotated_regions_from_gb.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#! /usr/bin/env python
#!/usr/bin/env python

import os
import time
Expand Down
2 changes: 1 addition & 1 deletion Utilities/gfa_to_fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def main():
if type(2/1) == float:
gfa_file = input('Please input gfa file:').strip()
else:
gfa_file = raw_input('Please input gfa file:').strip()
gfa_file = raw_input('Please input gfa file:').strip() # type: ignore
if gfa_file.strip():
write_fasta(gfa_file +'.fasta', read_gfa_as_fasta(gfa_file), False)

Expand Down
2 changes: 1 addition & 1 deletion Utilities/gfa_to_fastg.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def main():
if type(2/1) == float:
gfa_file = input('Please input gfa file:').strip()
else:
gfa_file = raw_input('Please input gfa file:').strip()
gfa_file = raw_input('Please input gfa file:').strip() # type: ignore
PATH_OF_THIS_SCRIPT = os.path.split(os.path.realpath(__file__))[0]
sys.path.insert(0, os.path.join(PATH_OF_THIS_SCRIPT, ".."))
from GetOrganelleLib.assembly_parser import Assembly
Expand Down
2 changes: 1 addition & 1 deletion Utilities/join_spades_fastg_by_blast.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import subprocess
try:
# python2
import commands
import commands # type: ignore
except:
pass
from argparse import ArgumentParser
Expand Down
2 changes: 1 addition & 1 deletion Utilities/rm_low_coverage_duplicated_contigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from argparse import ArgumentParser
import subprocess
try:
import commands
import commands # type: ignore
except:
pass
import os
Expand Down
2 changes: 1 addition & 1 deletion Utilities/round_statistics.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#! /usr/bin/env python
#!/usr/bin/env python

from argparse import ArgumentParser
import os
Expand Down
2 changes: 1 addition & 1 deletion Utilities/slim_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import subprocess
try:
# python2
import commands
import commands # type: ignore
except:
pass
inf = float("inf")
Expand Down
26 changes: 15 additions & 11 deletions get_organelle_from_assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ def _check_default_db(this_sub_organelle, extra_type=""):
log_types.append("embplant_pt")
log_handler.info("LABEL DB: " + single_line_db_versions(existing_label_db, log_types))
# working directory
log_handler.info("WORKING DIR: " + os.getcwd())
log_handler.info("WORKING_DIR=" + os.getcwd())
log_handler.info(" ".join(["\"" + arg + "\"" if " " in arg else arg for arg in sys.argv]) + "\n")

assert is_valid_path(os.path.realpath(options.output_base)), \
Expand Down Expand Up @@ -450,10 +450,13 @@ def _check_default_db(this_sub_organelle, extra_type=""):
elif sub_organelle_t in ("embplant_nr", "fungus_nr", "animal_mt"):
options.expected_max_size.append(int(raw_default_value / 10))
elif sub_organelle_t == "anonym":
ref_seqs = read_fasta(options.genes_fasta[got_t])[1]
options.expected_max_size.append(10 * sum([len(this_seq) for this_seq in ref_seqs]))
log_handler.info("Setting '--expected-max-size " + str(options.expected_max_size) +
"' for estimating the word size value for anonym type.")
if options.genes_fasta:
ref_seqs = read_fasta(options.genes_fasta[got_t])[1]
options.expected_max_size.append(10 * sum([len(this_seq) for this_seq in ref_seqs]))
log_handler.info("Setting '--expected-max-size " + str(options.expected_max_size) +
"' for estimating the word size value for anonym type.")
else:
options.expected_max_size.append(inf)
else:
temp_val_len = len(str(options.expected_max_size).split(","))
if temp_val_len != organelle_type_len:
Expand Down Expand Up @@ -619,6 +622,7 @@ def disentangle_inside(fastg_f, tab_f, o_p, w_f, log_in, type_f=3., mode_in="emb
if no_slim:
input_graph.estimate_copy_and_depth_by_cov(mode=mode_in, log_handler=log_in, verbose=verbose_in)
target_results = input_graph.estimate_copy_and_depth_precisely(
maximum_copy_num=max_copy_in,
broken_graph_allowed=acyclic_allowed_in, return_new_graphs=True, verbose=verbose_in,
log_handler=log_in)
else:
Expand Down Expand Up @@ -841,11 +845,11 @@ def disentangle_inside(fastg_f, tab_f, o_p, w_f, log_in, type_f=3., mode_in="emb
if verbose:
raise e
except RuntimeError as e:
log_handler.info("Disentangling failed: RuntimeError: " + str(e).strip())
log_handler.info("Disentangling unsuccessful: RuntimeError: " + str(e).strip())
except TimeoutError as e:
log_handler.info("Disentangling timeout. (see " + timeout_flag + " for more)")
except ProcessingGraphFailed as e:
log_handler.info("Disentangling failed: " + str(e).strip())
log_handler.info("Disentangling unsuccessful: " + str(e).strip())
except Exception as e:
log_handler.exception("")
raise e
Expand Down Expand Up @@ -879,11 +883,11 @@ def disentangle_inside(fastg_f, tab_f, o_p, w_f, log_in, type_f=3., mode_in="emb
if verbose:
raise e
except RuntimeError as e:
log_handler.info("Disentangling failed: RuntimeError: " + str(e).strip())
log_handler.info("Disentangling unsuccessful: RuntimeError: " + str(e).strip())
except TimeoutError as e:
log_handler.info("Disentangling timeout. (see " + timeout_flag + " for more)")
except ProcessingGraphFailed as e:
log_handler.info("Disentangling failed: " + str(e).strip())
log_handler.info("Disentangling unsuccessful: " + str(e).strip())
except Exception as e:
log_handler.exception("")
raise e
Expand Down Expand Up @@ -915,11 +919,11 @@ def disentangle_inside(fastg_f, tab_f, o_p, w_f, log_in, type_f=3., mode_in="emb
except RuntimeError as e:
if verbose:
log_handler.exception("")
log_handler.info("Disentangling failed: RuntimeError: " + str(e).strip())
log_handler.info("Disentangling unsuccessful: RuntimeError: " + str(e).strip())
except TimeoutError as e:
log_handler.info("Disentangling timeout. (see " + timeout_flag + " for more)")
except ProcessingGraphFailed as e:
log_handler.info("Disentangling failed: " + str(e).strip())
log_handler.info("Disentangling unsuccessful: " + str(e).strip())
except Exception as e:
raise e
else:
Expand Down
55 changes: 31 additions & 24 deletions get_organelle_from_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@ def _check_default_db(this_sub_organelle, extra_type=""):
log_label_types.append("embplant_pt")
log_handler.info("LABEL DB: " + single_line_db_versions(existing_label_db, log_label_types))
# working directory
log_handler.info("WORKING DIR: " + os.getcwd())
log_handler.info("WORKING_DIR=" + os.getcwd())
log_handler.info(" ".join(["\"" + arg + "\"" if " " in arg else arg for arg in sys.argv]) + "\n")

# if options.run_spades:
Expand Down Expand Up @@ -872,11 +872,14 @@ def _check_default_db(this_sub_organelle, extra_type=""):
elif sub_organelle_t in ("embplant_nr", "fungus_nr", "animal_mt"):
options.expected_max_size.append(int(raw_default_value / 10))
elif sub_organelle_t == "anonym":
ref_seqs = read_fasta(options.genes_fasta[got_t])[1]
options.expected_max_size.append(10 * sum([len(this_seq) for this_seq in ref_seqs]))
log_handler.info(
"Setting '--expected-max-size " + ",".join([str(t_s) for t_s in options.expected_max_size]) +
"' for estimating the word size value for anonym type.")
if options.genes_fasta:
ref_seqs = read_fasta(options.genes_fasta[got_t])[1]
options.expected_max_size.append(10 * sum([len(this_seq) for this_seq in ref_seqs]))
log_handler.info(
"Setting '--expected-max-size " + ",".join([str(t_s) for t_s in options.expected_max_size]) +
"' for estimating the word size value for anonym type.")
else:
options.expected_max_size.append(inf)
else:
temp_val_len = len(str(options.expected_max_size).split(","))
if temp_val_len != organelle_type_len:
Expand Down Expand Up @@ -1243,18 +1246,22 @@ def estimate_word_size(base_cov, base_cov_deviation, read_length, target_size, m
echo_problem = True
word_cov = max(min_word_cov, word_cov)
word_cov = trans_word_cov(word_cov, base_cov, mean_error_rate / 2., read_length)
# 1. relationship between kmer coverage and base coverage, k_cov = base_cov * (read_len - k_len + 1) / read_len
estimated_word_size = int(read_length * (1 - word_cov / base_cov)) + 1
# print(estimated_word_size)
estimated_word_size = min(int(read_length * MAX_RATIO_RL_WS), max(min_word_size, estimated_word_size))
if echo_problem:
if log_handler:
log_handler.warning("Guessing that you are using too few data for assembling " + organelle_type + "!")
log_handler.warning("GetOrganelle is still trying ...")
else:
sys.stdout.write("Guessing that you are using too few data for assembling " + organelle_type + "!\n")
sys.stdout.write("GetOrganelle is still trying ...\n")
return int(round(estimated_word_size, 0))
if base_cov == 0:
log_handler.error("Word size estimation failed due to improper seed or too few input data!")
sys.exit()
else:
# 1. relationship between kmer coverage and base coverage, k_cov = base_cov * (read_len - k_len + 1) / read_len
estimated_word_size = int(read_length * (1 - word_cov / base_cov)) + 1
# print(estimated_word_size)
estimated_word_size = min(int(read_length * MAX_RATIO_RL_WS), max(min_word_size, estimated_word_size))
if echo_problem:
if log_handler:
log_handler.warning("Guessing that you are using too few data for assembling " + organelle_type + "!")
log_handler.warning("GetOrganelle is still trying ...")
else:
sys.stdout.write("Guessing that you are using too few data for assembling " + organelle_type + "!\n")
sys.stdout.write("GetOrganelle is still trying ...\n")
return int(round(estimated_word_size, 0))


def calculate_word_size_according_to_ratio(word_size_ratio, mean_read_len, log_handler):
Expand Down Expand Up @@ -3619,11 +3626,11 @@ def disentangle_inside(fastg_f, tab_f, o_p, w_f, log_in, type_f=3., mode_in="emb
except RuntimeError as e:
if verbose:
log_handler.exception("")
log_handler.info("Disentangling failed: RuntimeError: " + str(e).strip())
log_handler.info("Disentangling unsuccessful: RuntimeError: " + str(e).strip())
except TimeoutError:
log_handler.info("Disentangling timeout. (see " + timeout_flag + " for more)")
except ProcessingGraphFailed as e:
log_handler.info("Disentangling failed: " + str(e).strip())
log_handler.info("Disentangling unsuccessful: " + str(e).strip())
except Exception as e:
log_handler.exception("")
sys.exit()
Expand Down Expand Up @@ -3663,11 +3670,11 @@ def disentangle_inside(fastg_f, tab_f, o_p, w_f, log_in, type_f=3., mode_in="emb
except RuntimeError as e:
if verbose:
log_handler.exception("")
log_handler.info("Disentangling failed: RuntimeError: " + str(e).strip())
log_handler.info("Disentangling unsuccessful: RuntimeError: " + str(e).strip())
except TimeoutError:
log_handler.info("Disentangling timeout. (see " + timeout_flag + " for more)")
except ProcessingGraphFailed as e:
log_handler.info("Disentangling failed: " + str(e).strip())
log_handler.info("Disentangling unsuccessful: " + str(e).strip())
except Exception as e:
log_handler.exception("")
sys.exit()
Expand Down Expand Up @@ -3708,11 +3715,11 @@ def disentangle_inside(fastg_f, tab_f, o_p, w_f, log_in, type_f=3., mode_in="emb
except RuntimeError as e:
if verbose:
log_handler.exception("")
log_handler.info("Disentangling failed: RuntimeError: " + str(e).strip())
log_handler.info("Disentangling unsuccessful: RuntimeError: " + str(e).strip())
except TimeoutError:
log_handler.info("Disentangling timeout. (see " + timeout_flag + " for more)")
except ProcessingGraphFailed as e:
log_handler.info("Disentangling failed: " + str(e).strip())
log_handler.info("Disentangling unsuccessful: " + str(e).strip())
except Exception as e:
raise e
else:
Expand Down

0 comments on commit 1949437

Please sign in to comment.