Skip to content

Commit

Permalink
v1.5.1b
Browse files Browse the repository at this point in the history
  • Loading branch information
Kinggerm committed Feb 20, 2019
1 parent 2512d0b commit 5d729ad
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 23 deletions.
33 changes: 15 additions & 18 deletions Library/seq_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,46 +347,43 @@ def fq_seq_simple_generator(fq_dir_list, go_to_line=1, split_pattern=None, min_s
count += 1


def chop_seqs(seq_generator_or_list, word_size):
def chop_seqs(seq_generator_or_list, word_size, mesh_size=1):
return_words = set()
for seed in seq_generator_or_list:
this_seq_len = len(seed)
if this_seq_len >= word_size:
cpt_seed = complementary_seq(seed)
for i in range(0, this_seq_len - word_size + 1):
forward = seed[i:i + word_size]
return_words.add(forward)
reverse = cpt_seed[i:i + word_size]
return_words.add(reverse)
temp_length = this_seq_len - word_size
for i in range(0, this_seq_len - word_size + 1, mesh_size):
return_words.add(seed[i:i + word_size])
return_words.add(cpt_seed[temp_length - i:this_seq_len - i])
return return_words


def chop_seqs_as_empty_dict(seq_generator_or_list, word_size):
def chop_seqs_as_empty_dict(seq_generator_or_list, word_size, mesh_size=1):
return_words = dict()
for seed in seq_generator_or_list:
this_seq_len = len(seed)
if this_seq_len >= word_size:
cpt_seed = complementary_seq(seed)
for i in range(0, this_seq_len - word_size + 1):
forward = seed[i:i + word_size]
return_words[forward] = 0
reverse = cpt_seed[i:i + word_size]
return_words[reverse] = 0
temp_length = this_seq_len - word_size
for i in range(0, this_seq_len - word_size + 1, mesh_size):
return_words[seed[i:i + word_size]] = 0
return_words[cpt_seed[temp_length - i:this_seq_len - i]] = 0
return return_words


def chop_seq_list(seq_generator_or_list, word_size):
def chop_seq_list(seq_generator_or_list, word_size, mesh_size=1):
return_words = set()
for seed in seq_generator_or_list:
for seq_part in seed:
this_seq_len = len(seq_part)
if this_seq_len >= word_size:
cpt_seed = complementary_seq(seq_part)
for i in range(0, this_seq_len - word_size + 1):
forward = seq_part[i:i + word_size]
return_words.add(forward)
reverse = cpt_seed[i:i + word_size]
return_words.add(reverse)
temp_length = this_seq_len - word_size
for i in range(0, this_seq_len - word_size + 1, mesh_size):
return_words.add(seq_part[i:i + word_size])
return_words.add(cpt_seed[temp_length - i:this_seq_len - i])
return return_words


Expand Down
4 changes: 4 additions & 0 deletions VERSIONS.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ def get_versions():


versions = [
{"number": "1.5.1b",
"features": [
"1. get_organelle_reads.py: value of mesh size should have effect on --out-per-round (fix a bug since 1.4.2)",
]},
{"number": "1.5.1a",
"features": [
"1. get_organelle_reads.py: from math import inf is not compatible with Python2; -R default set to 1000",
Expand Down
11 changes: 6 additions & 5 deletions get_organelle_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,14 +159,14 @@ def get_options(descriptions, version):
"Default: 4E8 (-F plant_cp), 8E7 (-F plant_nr/fungus_mt), 4E7 (-F animal_mt), "
"2E9 (-F plant_mt)")
group_extending.add_option("-J", dest="jump_step", type=int, default=3,
help="The wide of steps of checking words in reads during extending process "
help="The length of step for checking words in reads during extending process "
"(integer >= 1). When you have reads of high quality, the larger the number is, "
"the faster the extension will be, "
"the more risk of missing reads in low coverage area. "
"Choose 1 to choose the slowest but safest extension strategy. Default: %default")
group_extending.add_option("-M", dest="mesh_size", type=int, default=2,
help="(Beta parameter) "
"The wide of steps of building words from seeds during extending process "
"The length of step for building words from seeds during extending process "
"(integer >= 1). When you have reads of high quality, the larger the number is, "
"the faster the extension will be, "
"the more risk of missing reads in low coverage area. "
Expand Down Expand Up @@ -1385,6 +1385,7 @@ def extending_reads(word_size, seed_file, seed_is_fq, original_fq_files, len_ind
def summarise_round(acc_words, acc_contig_id_this_round, pre_aw, r_count, acc_num_words, unique_id):
len_aw = len(acc_words)
len_al = len(acc_contig_id_this_round)
# for check words limit; memory control
acc_num_words += len_aw - pre_aw
if this_process:
inside_memory_usage = " Mem " + str(round(this_process.memory_info().rss / 1024.0 / 1024 / 1024, 3))
Expand All @@ -1404,13 +1405,13 @@ def summarise_round(acc_words, acc_contig_id_this_round, pre_aw, r_count, acc_nu
[os.path.join(round_dir, "Round." + str(r_count) + '_' + str(x + 1) + '.fq') for x in
range(len(original_fq_files))],
split_pattern=low_quality_pattern, min_sub_seq=word_size),
word_size)
word_size, mesh_size)
else:
acc_words = chop_seqs(
fq_seq_simple_generator(
[os.path.join(round_dir, "Round." + str(r_count) + '_' + str(x + 1) + '.fq') for x in
range(len(original_fq_files))]),
word_size)
word_size, mesh_size)
acc_contig_id_this_round = set()
log.info("Round " + str(r_count) + ': ' + str(unique_id + 1) + '/' + str(len_indices) + " AI " + str(
len_al) + " AW " + str(len_aw) + inside_memory_usage)
Expand Down Expand Up @@ -2588,7 +2589,7 @@ def main():

# extending process
log.info("Extending ...")
accepted_ids = set()
# accepted_ids = set()
if options.auto_word_size_step:
if options.maximum_n_words <= options.soft_max_words:
log.info("Setting '--soft-max-words " + str(int(options.maximum_n_words)) + "'")
Expand Down

0 comments on commit 5d729ad

Please sign in to comment.