diff --git a/examples/MEMORY.md b/examples/MEMORY.md new file mode 100644 index 0000000..02ad25f --- /dev/null +++ b/examples/MEMORY.md @@ -0,0 +1,27 @@ +I ran several simulations from the command line for reps `$NSIM = 1 10 100 1000 20000`, using the command: + +```sh +/usr/bin/time -v python examples.py -T 21 -N 100 -r 0.01 -l 10 -k 10 -n $NSIM +``` + +I recorded the maximum resident size in `memory.tsv`. The data: + +```sh +reps max_memory_kbytes +1 54380 +10 62216 +100 88468 +1000 354252 +2000 657656 +``` + +Although the script calls a function that both exits and includes `del pop` and +`del rc`, the data (i.e., comparing the memory use from 1000 to 2000 reps) +indicate that total memory use increases linearly with the number of reps. + +## More +the provenance output by the script was + +```python +Namespace(chrom_length=100, gamma_alpha=0.23, gamma_beta=5.34, generations=20, logfile='-', neut_mut_rate=1e-07, nsamples=10, nselloci=10, nsims=2000, outfile=None, popsize=100, recomb_rate=0.01, sel_mut_rate=1e-07, selloci_file='sel_loci.txt', simplify_interval=500, treefile=None) +``` diff --git a/examples/examples.py b/examples/examples.py index e1e1501..376bff0 100644 --- a/examples/examples.py +++ b/examples/examples.py @@ -12,46 +12,6 @@ from ftprime import RecombCollector import msprime -REPORTING_STEP = 50 - -parser = ArgumentParser(description=description) -parser.add_argument("-T","--generations", dest="generations", type=int, - help="number of generations to run for") -parser.add_argument("-N","--popsize", dest="popsize", type=int, - help="size of the population", default=100) -parser.add_argument("-r","--recomb_rate", dest="recomb_rate", type=float, - help="recombination rate", default=1e-7) -parser.add_argument("-L","--length", dest="chrom_length", type=int, - help="number of bp in the chromosome", default=100) -parser.add_argument("-U","--neut_mut_rate", dest="neut_mut_rate", type=float, - help="neutral mutation rate", default=1e-7) -parser.add_argument("-l","--nselloci", dest="nselloci", type=int, - help="number of selected loci", default=1) -parser.add_argument("-u","--sel_mut_rate", dest="sel_mut_rate", type=float, - help="mutation rate of selected alleles", default=1e-7) -parser.add_argument("-a","--gamma_alpha", dest="gamma_alpha", type=float, - help="alpha parameter in gamma distributed selection coefficient", default=.23) -parser.add_argument("-b","--gamma_beta", dest="gamma_beta", type=float, - help="beta parameter in gamma distributed selection coefficient", default=5.34) -parser.add_argument("-k","--nsamples", dest="nsamples", type=int, - help="number of *diploid* samples, total", default=100) -parser.add_argument("-o","--outfile", dest="outfile", type=str, - help="name of output PED file (default: not output)", default=None) -parser.add_argument("--gc", "-G", dest="simplify_interval", type=int, - help="Interval between simplify steps.", default=500) -parser.add_argument("-g","--logfile", dest="logfile", type=str, - help="name of log file (or '-' for stdout)", default="-") -parser.add_argument("-s","--selloci_file", dest="selloci_file", type=str, - help="name of file to output selected locus information", default="sel_loci.txt") -parser.add_argument("--treefile","-t", type=str, dest="treefile", - help="name of output file for trees (default: not output)",default=None) - -args = parser.parse_args() - -if args.generations is None: - parser.print_help() - sys.exit() - # some simupop options involving mutation type import simuOpt simuOpt.setOptions(alleleType='mutant') @@ -74,29 +34,6 @@ def fileopt(fname,opts): fobj = open(fname,opts) return fobj -logfile = fileopt(args.logfile, "w") -selloci_file = args.selloci_file - -logfile.write("Options:\n") -logfile.write(str(args)+"\n") -logfile.write(time.strftime('%X %x %Z')+"\n") -logfile.write("----------\n") -logfile.flush() - -# locations of the loci along the chromosome? -# hard code defaults for simupop: -# >The default positions are 1, 2, 3, 4, ... on each -# >chromosome. -locus_position = list(range(0, args.chrom_length)) - -# which loci are under selection? -selected_loci = math.ceil(args.chrom_length / 2) -try: - sl = set(selected_loci) -except TypeError: - sl = set((selected_loci, )) -neutral_loci = list(set(range(1,args.chrom_length)) - sl) - ### # random selection coefficients: # modified from http://simupop.sourceforge.net/manual_svn/build/userGuide_ch5_sec9.html @@ -122,91 +59,160 @@ def __call__(self, loc, alleles): return 1. - s else: return 1. - 2.*s - -pop = sim.Population( - size=args.popsize, - loci=[args.chrom_length], - lociPos=locus_position, - infoFields=['ind_id','fitness']) - - -# set up recomb collector -# NB: we have to simulate an initial tree sequence -id_tagger = sim.IdTagger() -id_tagger.apply(pop) -first_gen = pop.indInfo("ind_id") -init_ts = msprime.simulate(2*len(first_gen), - length=max(locus_position)) -haploid_labels = [(k,p) for k in first_gen - for p in (0,1)] -node_ids = {x:j for x, j in zip(haploid_labels, init_ts.samples())} -rc = RecombCollector(ts=init_ts, node_ids=node_ids, - locus_position=locus_position) - -# initially, population is monogenic -init_geno=[sim.InitGenotype(freq=1.0)] - -pop.evolve( - initOps=[ - sim.InitSex(), - ]+init_geno, - preOps=[ - sim.PyOperator(lambda pop: rc.increment_time() or True), - sim.SNPMutator(u=args.neut_mut_rate,v=0,loci=neutral_loci), - sim.SNPMutator(u=args.sel_mut_rate,v=0,loci=selected_loci), - sim.PyMlSelector(GammaDistributedFitness(args.gamma_alpha, args.gamma_beta), - loci=selected_loci, output=">>"+selloci_file), - ], - matingScheme=sim.RandomMating( - ops=[ - id_tagger, - sim.Recombinator(rates=args.recomb_rate, output=rc.collect_recombs, - infoFields="ind_id"), - ] ), - postOps=[ - sim.Stat(numOfSegSites=sim.ALL_AVAIL, step=REPORTING_STEP, - vars=['numOfSegSites', 'numOfFixedSites']), - sim.PyEval(r"'Gen: %2d #seg/#fixed sites: %d / %d\n' % (gen, numOfSegSites, numOfFixedSites)", step=REPORTING_STEP), - sim.PyOperator(lambda pop: rc.simplify(pop.indInfo("ind_id")) or True, - step=args.simplify_interval), - ], - gen = args.generations -) - -logfile.write("Done simulating!\n") -logfile.write(time.strftime('%X %x %Z')+"\n") -logfile.write("----------\n") -logfile.flush() - -logfile.write("Collecting samples:\n") -logfile.write(" " + str(args.nsamples) + " of them") -# logfile.write(" " + "ids:" + str(pop.indInfo("ind_id"))) - -diploid_samples = random.sample(pop.indInfo("ind_id"), args.nsamples) -rc.simplify(diploid_samples) - -del pop - -logfile.write("Samples:\n") -#logfile.write(str(rc.diploid_samples)+"\n") -logfile.write("----------\n") -logfile.flush() - -ts = rc.args.tree_sequence() -del rc - -logfile.write("Loaded into tree sequence!\n") -logfile.write(time.strftime('%X %x %Z')+"\n") -logfile.write("----------\n") -logfile.flush() - -if args.treefile is not None: - ts.dump(args.treefile) - -logfile.write("Writing out samples.\n") -logfile.write(time.strftime('%X %x %Z')+"\n") -logfile.write("----------\n") -logfile.flush() - -logfile.write("All done!\n") -logfile.close() +def run(args): + logfile = fileopt(args.logfile, "w") + selloci_file = args.selloci_file + + logfile.write("Options:\n") + logfile.write(str(args)+"\n") + logfile.write(time.strftime('%X %x %Z')+"\n") + logfile.write("----------\n") + logfile.flush() + + # locations of the loci along the chromosome? + # hard code defaults for simupop: + # >The default positions are 1, 2, 3, 4, ... on each + # >chromosome. + locus_position = list(range(0, args.chrom_length)) + + # which loci are under selection? + selected_loci = math.ceil(args.chrom_length / 2) + try: + sl = set(selected_loci) + except TypeError: + sl = set((selected_loci, )) + neutral_loci = list(set(range(1,args.chrom_length)) - sl) + + pop = sim.Population( + size=args.popsize, + loci=[args.chrom_length], + lociPos=locus_position, + infoFields=['ind_id','fitness']) + + + # set up recomb collector + # NB: we have to simulate an initial tree sequence + id_tagger = sim.IdTagger() + id_tagger.apply(pop) + first_gen = pop.indInfo("ind_id") + init_ts = msprime.simulate(2*len(first_gen), + length=max(locus_position)) + haploid_labels = [(k,p) for k in first_gen + for p in (0,1)] + node_ids = {x:j for x, j in zip(haploid_labels, init_ts.samples())} + rc = RecombCollector(ts=init_ts, node_ids=node_ids, + locus_position=locus_position) + + # initially, population is monogenic + init_geno=[sim.InitGenotype(freq=1.0)] + + pop.evolve( + initOps=[ + sim.InitSex(), + ]+init_geno, + preOps=[ + sim.PyOperator(lambda pop: rc.increment_time() or True), + sim.SNPMutator(u=args.neut_mut_rate,v=0,loci=neutral_loci), + sim.SNPMutator(u=args.sel_mut_rate,v=0,loci=selected_loci), + sim.PyMlSelector(GammaDistributedFitness(args.gamma_alpha, args.gamma_beta), + loci=selected_loci, output=">>"+selloci_file), + ], + matingScheme=sim.RandomMating( + ops=[ + id_tagger, + sim.Recombinator(rates=args.recomb_rate, output=rc.collect_recombs, + infoFields="ind_id"), + ] ), + postOps=[ + sim.Stat(numOfSegSites=sim.ALL_AVAIL, step=REPORTING_STEP, + vars=['numOfSegSites', 'numOfFixedSites']), + sim.PyEval(r"'Gen: %2d #seg/#fixed sites: %d / %d\n' % (gen, numOfSegSites, numOfFixedSites)", step=REPORTING_STEP), + sim.PyOperator(lambda pop: rc.simplify(pop.indInfo("ind_id")) or True, + step=args.simplify_interval), + ], + gen = args.generations + ) + + logfile.write("Done simulating!\n") + logfile.write(time.strftime('%X %x %Z')+"\n") + logfile.write("----------\n") + logfile.flush() + + logfile.write("Collecting samples:\n") + logfile.write(" " + str(args.nsamples) + " of them") + # logfile.write(" " + "ids:" + str(pop.indInfo("ind_id"))) + + diploid_samples = random.sample(pop.indInfo("ind_id"), args.nsamples) + rc.simplify(diploid_samples) + + del pop + + logfile.write("Samples:\n") + #logfile.write(str(rc.diploid_samples)+"\n") + logfile.write("----------\n") + logfile.flush() + + ts = rc.args.tree_sequence() + del rc + + logfile.write("Loaded into tree sequence!\n") + logfile.write(time.strftime('%X %x %Z')+"\n") + logfile.write("----------\n") + logfile.flush() + + if args.treefile is not None: + ts.dump(args.treefile) + + logfile.write("Writing out samples.\n") + logfile.write(time.strftime('%X %x %Z')+"\n") + logfile.write("----------\n") + logfile.flush() + + logfile.write("All done!\n") + +if __name__ == '__main__': + + REPORTING_STEP = 50 + + parser = ArgumentParser(description=description) + parser.add_argument("-T","--generations", dest="generations", type=int, + help="number of generations to run for") + parser.add_argument("-N","--popsize", dest="popsize", type=int, + help="size of the population", default=100) + parser.add_argument("-r","--recomb_rate", dest="recomb_rate", type=float, + help="recombination rate", default=1e-7) + parser.add_argument("-L","--length", dest="chrom_length", type=int, + help="number of bp in the chromosome", default=100) + parser.add_argument("-U","--neut_mut_rate", dest="neut_mut_rate", type=float, + help="neutral mutation rate", default=1e-7) + parser.add_argument("-l","--nselloci", dest="nselloci", type=int, + help="number of selected loci", default=1) + parser.add_argument("-u","--sel_mut_rate", dest="sel_mut_rate", type=float, + help="mutation rate of selected alleles", default=1e-7) + parser.add_argument("-a","--gamma_alpha", dest="gamma_alpha", type=float, + help="alpha parameter in gamma distributed selection coefficient", default=.23) + parser.add_argument("-b","--gamma_beta", dest="gamma_beta", type=float, + help="beta parameter in gamma distributed selection coefficient", default=5.34) + parser.add_argument("-k","--nsamples", dest="nsamples", type=int, + help="number of *diploid* samples, total", default=100) + parser.add_argument("-o","--outfile", dest="outfile", type=str, + help="name of output PED file (default: not output)", default=None) + parser.add_argument("--gc", "-G", dest="simplify_interval", type=int, + help="Interval between simplify steps.", default=500) + parser.add_argument("-g","--logfile", dest="logfile", type=str, + help="name of log file (or '-' for stdout)", default="-") + parser.add_argument("-s","--selloci_file", dest="selloci_file", type=str, + help="name of file to output selected locus information", default="sel_loci.txt") + parser.add_argument("--treefile","-t", type=str, dest="treefile", + help="name of output file for trees (default: not output)",default=None) + parser.add_argument("--nsim","-n", type=int, dest="nsims", + help="repetitions",default=1) + + args = parser.parse_args() + + if args.generations is None: + parser.print_help() + sys.exit() + + for _ in range(args.nsims): + run(args) diff --git a/examples/memory.tsv b/examples/memory.tsv new file mode 100644 index 0000000..d05ce8c --- /dev/null +++ b/examples/memory.tsv @@ -0,0 +1,6 @@ +reps max_memory_kbytes +1 54380 +10 62216 +100 88468 +1000 354252 +2000 657656