diff --git a/iss/app.py b/iss/app.py index dd75100..70a022e 100644 --- a/iss/app.py +++ b/iss/app.py @@ -85,7 +85,7 @@ def generate_reads(args): if args.draft: logger.warning('--draft is in early experimental stage.') logger.warning( - '--draft disables --abundance_file and --coverage') + 'disabling --abundance_file, --coverage and --n_genomes') logger.warning('Defaulting to --abundance.') genome_files.extend(args.draft) if args.ncbi and args.n_genomes_ncbi: @@ -124,13 +124,22 @@ def generate_reads(args): genome_files, output=genome_file) + # for n_genomes we use reservoir sampling to draw random genomes + # from the concatenated genome file. We then override the file. + if args.n_genomes and not args.draft and not args.ncbi: + genome_count = util.count_records(genome_file) + genome_files = [genome for genome in util.reservoir( + SeqIO.parse(genome_file, 'fasta'), + genome_count, + args.n_genomes)] + SeqIO.write(genome_files, genome_file, 'fasta') + assert os.stat(genome_file).st_size != 0 f = open(genome_file, 'r') with f: # count the number of records genome_list = util.count_records(f) except IOError as e: logger.error('Failed to open genome(s) file:%s' % e) - raise sys.exit(1) except AssertionError as e: logger.error('Genome(s) file seems empty: %s' % genome_file) @@ -182,11 +191,8 @@ def generate_reads(args): f = open(genome_file, 'r') # re-opens the file with f: fasta_file = SeqIO.parse(f, 'fasta') - if args.n_genomes and not args.ncbi: - n = args.n_genomes[0][0] - else: - n = None - for record in util.reservoir(fasta_file, genome_list, n): + + for record in fasta_file: # generate reads for records try: species_abundance = abundance_dic[record.id] @@ -391,7 +397,6 @@ def main(): '--n_genomes', '-u', type=int, - action='append', metavar='', help='How many genomes will be used for the simulation. is set with \ --genomes/-g or/and --draft to take random genomes from the \ @@ -545,4 +550,4 @@ def main(): logger = logging.getLogger(__name__) logger.debug(e) parser.print_help() - # raise # extra traceback to uncomment if all hell breaks lose + raise # extra traceback to uncomment if all hell breaks lose diff --git a/iss/version.py b/iss/version.py index 9e0feee..5e235ea 100644 --- a/iss/version.py +++ b/iss/version.py @@ -1 +1 @@ -__version__ = '1.4.4' +__version__ = '1.4.5'