-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsimupop_example.py
93 lines (74 loc) · 3.04 KB
/
simupop_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import simuPOP as sim
import math
import random
from ftprime import RecombCollector
import msprime
popsize = 10
nsamples = 2
length = 1000
nloci = 11
recomb_rate = 0.001
# 1. Must have a locus at each end of the chromosome
locus_position = [length * x/(nloci-1) for x in range(nloci)]
init_geno = [sim.InitGenotype(freq=[0.9, 0.1], loci=sim.ALL_AVAIL)]
# 2. Must include 'ind_id' infoField for sim.Recombinator
pop = sim.Population(
size=[popsize],
loci=[nloci],
lociPos=locus_position,
infoFields=['ind_id'])
# 3. Must tag the first generation so we can use it when initializing RecombCollector
id_tagger = sim.IdTagger()
# note: if doing this more than once in the same python session must do this to reset:
# creating a new IdTagger doesn't reset it
id_tagger.reset(startID=1)
# tag the first generation so we can pass it to rc
id_tagger.apply(pop)
# 4. Simulate population history before the start of the simulation,
# and which individuals in this history correspond to indvidiuals
# in the simuPOP simulation.
first_gen = pop.indInfo("ind_id")
init_ts = msprime.simulate(2*len(first_gen), # this is haploid simulation
length=max(locus_position))
haploid_labels = [(k,p) for k in first_gen
for p in (0,1)]
node_ids = {x:j for x, j in zip(haploid_labels, init_ts.samples())}
# 5. Initialize
rc = RecombCollector(ts=init_ts, node_ids=node_ids,
locus_position=locus_position)
recombinator = sim.Recombinator(intensity=recomb_rate,
output=rc.collect_recombs,
infoFields="ind_id")
# 6. Run the simulation, simplifying the underlying tree sequence
# every 5 generations (helps reduce memory usage for bigger sims)
simplify_interval = 5
pop.evolve(
initOps=[
sim.InitSex()
]+init_geno,
preOps=[
# 5. Must keep time up to date in the RecombCollector
sim.PyOperator(lambda pop: rc.increment_time() or True),
# Must return true or false. True keeps whole population (?)
sim.PyOperator(lambda pop: rc.simplify(pop.indInfo("ind_id")) or True, step=simplify_interval),
],
matingScheme=sim.RandomMating(
ops=[id_tagger, recombinator]),
postOps=[
sim.PyEval(r"'Gen: %2d\n' % (gen, )", step=1),
],
gen=20
)
# 7. Do one last simplify().
rc.simplify(pop.indInfo("ind_id"))
# 8. Copy location information over from simuPOP for the final generation.
locations = [pop.subPopIndPair(x)[0] for x in range(pop.popSize())]
rc.add_locations(pop.indInfo("ind_id"), locations)
# 9. Choose which individuals are sampled, and write out their information.
diploid_samples = random.sample(pop.indInfo("ind_id"), nsamples)
rc.simplify(diploid_samples)
# 10. Output the tree sequence from the recorded information.
ts = rc.args.tree_sequence()
ts.dump_samples_text(open("simulated_samples.tsv","w"))
# 11. Write out the tree sequence in a file.
ts.dump("simulated.ts")