From dac760e9988ce11305068de65fcf1586e97899eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= Date: Thu, 21 Nov 2019 09:36:34 +0100 Subject: [PATCH 1/4] dont raise on error if input does not exists --- iss/app.py | 1 - 1 file changed, 1 deletion(-) diff --git a/iss/app.py b/iss/app.py index dd75100..ec095f7 100644 --- a/iss/app.py +++ b/iss/app.py @@ -130,7 +130,6 @@ def generate_reads(args): genome_list = util.count_records(f) except IOError as e: logger.error('Failed to open genome(s) file:%s' % e) - raise sys.exit(1) except AssertionError as e: logger.error('Genome(s) file seems empty: %s' % genome_file) From 0e707911a5f696cff5816e0c8179c8ae75ddc5b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= Date: Thu, 21 Nov 2019 09:42:01 +0100 Subject: [PATCH 2/4] fix for #133 --- iss/app.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/iss/app.py b/iss/app.py index ec095f7..f836472 100644 --- a/iss/app.py +++ b/iss/app.py @@ -182,7 +182,7 @@ def generate_reads(args): with f: fasta_file = SeqIO.parse(f, 'fasta') if args.n_genomes and not args.ncbi: - n = args.n_genomes[0][0] + n = args.n_genomes else: n = None for record in util.reservoir(fasta_file, genome_list, n): @@ -390,7 +390,6 @@ def main(): '--n_genomes', '-u', type=int, - action='append', metavar='', help='How many genomes will be used for the simulation. is set with \ --genomes/-g or/and --draft to take random genomes from the \ From 8479295014e8c7f7a9b5ec2b0e39a9cd5fb27ac8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= Date: Thu, 21 Nov 2019 10:18:02 +0100 Subject: [PATCH 3/4] version bump --- iss/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iss/version.py b/iss/version.py index 9e0feee..5e235ea 100644 --- a/iss/version.py +++ b/iss/version.py @@ -1 +1 @@ -__version__ = '1.4.4' +__version__ = '1.4.5' From feca2b75383bedd9c31fb1fcaa2b6fdd6031c1eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= Date: Fri, 22 Nov 2019 11:35:27 +0100 Subject: [PATCH 4/4] fix abundances when using n_genomes --- iss/app.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/iss/app.py b/iss/app.py index f836472..70a022e 100644 --- a/iss/app.py +++ b/iss/app.py @@ -85,7 +85,7 @@ def generate_reads(args): if args.draft: logger.warning('--draft is in early experimental stage.') logger.warning( - '--draft disables --abundance_file and --coverage') + 'disabling --abundance_file, --coverage and --n_genomes') logger.warning('Defaulting to --abundance.') genome_files.extend(args.draft) if args.ncbi and args.n_genomes_ncbi: @@ -124,6 +124,16 @@ def generate_reads(args): genome_files, output=genome_file) + # for n_genomes we use reservoir sampling to draw random genomes + # from the concatenated genome file. We then override the file. + if args.n_genomes and not args.draft and not args.ncbi: + genome_count = util.count_records(genome_file) + genome_files = [genome for genome in util.reservoir( + SeqIO.parse(genome_file, 'fasta'), + genome_count, + args.n_genomes)] + SeqIO.write(genome_files, genome_file, 'fasta') + assert os.stat(genome_file).st_size != 0 f = open(genome_file, 'r') with f: # count the number of records @@ -181,11 +191,8 @@ def generate_reads(args): f = open(genome_file, 'r') # re-opens the file with f: fasta_file = SeqIO.parse(f, 'fasta') - if args.n_genomes and not args.ncbi: - n = args.n_genomes - else: - n = None - for record in util.reservoir(fasta_file, genome_list, n): + + for record in fasta_file: # generate reads for records try: species_abundance = abundance_dic[record.id] @@ -543,4 +550,4 @@ def main(): logger = logging.getLogger(__name__) logger.debug(e) parser.print_help() - # raise # extra traceback to uncomment if all hell breaks lose + raise # extra traceback to uncomment if all hell breaks lose