From 7725b9a2938e105599bd663d2011ff25aca41362 Mon Sep 17 00:00:00 2001 From: Jaime Ashander Date: Tue, 25 Apr 2017 23:28:47 -0700 Subject: [PATCH 1/5] Example using `vprof` for running w/ RecombCollector WITHOUT simplification/checking/computing tree sequence or other - about 60% of time is spend in `argrecorder.py` - most of this (46%) is in `argrecorder.py:add_record` and 3/4 of that in stuff that is not `msprime` (which takes up total of 10% of the 46% for argrecorder.py in the flame graph from command below) --- so mostly the logic in `argrecorder.py:merge_records` and buildin logic of OrderedDict. you can re-run (from project root after first `pip install vprof`): `vprof -c cmp tests/test_recomb_collector.py` --- tests/test_recomb_collector.py | 43 +++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/tests/test_recomb_collector.py b/tests/test_recomb_collector.py index 93e06e2..fb3daa4 100644 --- a/tests/test_recomb_collector.py +++ b/tests/test_recomb_collector.py @@ -114,22 +114,37 @@ def test_simupop(make_pop, generations, popsize): locations = [pop.subPopIndPair(x)[0] for x in range(pop.popSize())] rc.add_diploid_samples(pop.indInfo("ind_id"), locations) - check_record_order(rc.args) - check_tables(rc.args) + # check_record_order(rc.args) + # check_tables(rc.args) - for x in rc.args: - print(rc.args[x]) - print(rc.args.node_table()) - print(rc.args.edgeset_table()) + # for x in rc.args: + # print(rc.args[x]) + # print(rc.args.node_table()) + # print(rc.args.edgeset_table()) - ts = rc.args.tree_sequence() + # ts = rc.args.tree_sequence() - print("coalescence records:") - for x in ts.records(): - print(x) + # print("coalescence records:") + # for x in ts.records(): + # print(x) - ts.simplify(samples=list(range(nsamples))) + # ts.simplify(samples=list(range(nsamples))) - print("trees:") - for x in ts.trees(): - print(x) + # print("trees:") + # for x in ts.trees(): + # print(x) + + +if __name__ == '__main__': + class MockRequest(object): + def __init__(self): + pass + mr = MockRequest() + mr.param = lambda recombinator, popsize: sim.HeteroMating([sim.RandomMating( + ops=[ + sim.IdTagger(), + recombinator + ]), + sim.CloneMating()], + subPopSize=popsize * 2) + test_simupop(make_pop(mr), generations=100, popsize=500) From d2a4803ef0c71e9abc0284a87d7c392d4b1f5b5a Mon Sep 17 00:00:00 2001 From: Jaime Ashander Date: Wed, 26 Apr 2017 10:25:47 -0700 Subject: [PATCH 2/5] Use line_profiler To re-run ```sh pip install line_profiler kernprof -v -l tests/test_arg_recorder_with_wf.py > devel/prof.txt kernprof -v -l tests/test_recomb_collector.py > devel/prof_rc.txt ``` --- devel/prof.txt | 148 +++++++++++++++++++++++++++++ devel/prof_rc.txt | 148 +++++++++++++++++++++++++++++ ftprime/argrecorder.py | 4 +- tests/test_arg_recorder_with_wf.py | 25 ++--- tests/test_recomb_collector.py | 2 +- 5 files changed, 314 insertions(+), 13 deletions(-) create mode 100644 devel/prof.txt create mode 100644 devel/prof_rc.txt diff --git a/devel/prof.txt b/devel/prof.txt new file mode 100644 index 0000000..6f8c936 --- /dev/null +++ b/devel/prof.txt @@ -0,0 +1,148 @@ +Wrote profile results to test_arg_recorder_with_wf.py.lprof +Timer unit: 1e-06 s + +Total time: 0.565759 s +File: /home/jaime/lib/ftprime/ftprime/argrecorder.py +Function: add_individual at line 26 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 26 @profile + 27 def add_individual(self, name, time, population=msprime.NULL_POPULATION, + 28 is_sample=False): + 29 '''Add a new individual. + 30 We need to add individuals when they are *born*, + 31 rather than the first time they reproduce, to ensure + 32 that records are output in order by birth time of the parent. + 33 ''' + 34 101240 72447 0.7 12.8 if name not in self: + 35 101240 58303 0.6 10.3 self[name] = (msprime.Node(time=time, population=population, + 36 101240 297613 2.9 52.6 name=name, is_sample=is_sample), []) + 37 101240 137396 1.4 24.3 self.num_nodes = max(self.num_nodes, 1+int(name)) + +Total time: 7.5774 s +File: /home/jaime/lib/ftprime/ftprime/argrecorder.py +Function: add_record at line 39 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 39 @profile + 40 def add_record(self, left, right, parent, children): + 41 ''' + 42 Add records corresponding to a reproduction event in which children (a + 43 tuple of IDs) inherit from parent (a single ID) on the interval + 44 [left,right). + 45 ''' + 46 # unneeded but helpful for debugging + 47 150314 120881 0.8 1.6 if parent not in self.keys(): + 48 raise ValueError("Parent " + str(parent) + + 49 "'s birth time has not been recorded with " + + 50 ".add_individual().") + 51 # time = self[parent][0] + 52 150314 79151 0.5 1.0 new_rec = msprime.Edgeset( + 53 150314 71103 0.5 0.9 parent=parent, + 54 150314 66660 0.4 0.9 children=children, + 55 150314 65861 0.4 0.9 left=left, + 56 150314 325604 2.2 4.3 right=right) + 57 150314 6848145 45.6 90.4 merge_records(new_rec, self[parent][1]) + +Total time: 4.08047 s +File: /home/jaime/lib/ftprime/ftprime/argrecorder.py +Function: merge_records at line 142 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 142 @profile + 143 def merge_records(new, existing): + 144 ''' + 145 Incorporate a new record (l,r,x,c,t[x]) + 146 into a list of existing ones (a,b,x,C,t[x]) sorted on left endpoint. + 147 Keeping them in sorted order simplifies the procedure + 148 (makes it so we don't have to split the new record). + 149 ''' + 150 150314 101369 0.7 2.5 k = 0 + 151 150314 87001 0.6 2.1 cur_left = new.left + 152 # print("MR: -----") + 153 # print("adding", new) + 154 # print(" to", existing) + 155 308472 241009 0.8 5.9 while (k < len(existing)) and (cur_left < new.right): + 156 158158 124833 0.8 3.1 left = existing[k].left + 157 158158 93386 0.6 2.3 right = existing[k].right + 158 158158 90150 0.6 2.2 parent = existing[k].parent + 159 158158 96347 0.6 2.4 children = existing[k].children + 160 # print("k:",k) + 161 # print("existing:",existing[k]) + 162 # print("cur_left:",cur_left) + 163 158158 92653 0.6 2.3 if new.parent != parent: + 164 raise ValueError("Trying to merge records with different parents.") + 165 158158 85414 0.5 2.1 if right <= cur_left: + 166 # no overlap + 167 # print("no overlap") + 168 15534 8903 0.6 0.2 k += 1 + 169 15534 7404 0.5 0.2 continue + 170 142624 77301 0.5 1.9 if cur_left < left: + 171 # print("dangling left") + 172 15409 10416 0.7 0.3 existing.insert(k, msprime.Edgeset( + 173 15409 7887 0.5 0.2 left=cur_left, + 174 15409 16021 1.0 0.4 right=min(new.right, left), + 175 15409 7897 0.5 0.2 parent=parent, + 176 15409 43180 2.8 1.1 children=new.children)) + 177 15409 15864 1.0 0.4 cur_left = min(new.right, left) + 178 15409 9449 0.6 0.2 k += 1 + 179 15409 7490 0.5 0.2 continue + 180 127215 393801 3.1 9.7 combined_children = tuple(sorted(children+new.children)) + 181 127215 87066 0.7 2.1 combined_rec = msprime.Edgeset( + 182 127215 67410 0.5 1.7 left=cur_left, + 183 127215 234108 1.8 5.7 right=min(new.right, right), + 184 127215 73773 0.6 1.8 parent=new.parent, + 185 127215 312244 2.5 7.7 children=combined_children) + 186 127215 78337 0.6 1.9 if cur_left == left: + 187 # print("equal left") + 188 105571 67860 0.6 1.7 if new.right < right: + 189 # print("overlap right") + 190 21590 13336 0.6 0.3 mod_rec = msprime.Edgeset( + 191 21590 12284 0.6 0.3 left=new.right, + 192 21590 11592 0.5 0.3 right=right, + 193 21590 11575 0.5 0.3 parent=parent, + 194 21590 149418 6.9 3.7 children=children) + 195 21590 19858 0.9 0.5 existing[k] = combined_rec + 196 21590 13557 0.6 0.3 k += 1 + 197 21590 20224 0.9 0.5 existing.insert(k, mod_rec) + 198 21590 13962 0.6 0.3 k += 1 + 199 else: + 200 # print("dangling right") + 201 83981 71541 0.9 1.8 existing[k] = combined_rec + 202 83981 55980 0.7 1.4 k += 1 + 203 else: + 204 # here we know that left < cur_left < right + 205 # print("overlap left") + 206 21644 13649 0.6 0.3 mod_rec = msprime.Edgeset( + 207 21644 11864 0.5 0.3 left=left, + 208 21644 11487 0.5 0.3 right=cur_left, + 209 21644 11471 0.5 0.3 parent=parent, + 210 21644 93849 4.3 2.3 children=children) + 211 21644 20146 0.9 0.5 existing[k] = mod_rec + 212 21644 13783 0.6 0.3 k += 1 + 213 21644 20156 0.9 0.5 existing.insert(k, combined_rec) + 214 21644 13301 0.6 0.3 k += 1 + 215 21644 14807 0.7 0.4 if new.right < right: + 216 # print("overlap right") + 217 existing.insert(k, msprime.Edgeset( + 218 left=new.right, + 219 right=right, + 220 parent=parent, + 221 children=children)) + 222 k += 1 + 223 127215 110940 0.9 2.7 cur_left = min(new.right, right) + 224 # add whatever's left at the end + 225 150314 101572 0.7 2.5 if cur_left < new.right: + 226 82579 60579 0.7 1.5 existing.insert(k, msprime.Edgeset( + 227 82579 47194 0.6 1.2 left=cur_left, + 228 82579 50544 0.6 1.2 right=new.right, + 229 82579 47932 0.6 1.2 parent=new.parent, + 230 82579 428151 5.2 10.5 children=new.children)) + 231 # print("getting") + 232 # for x in existing: + 233 # print(" ", x) + 234 150314 77143 0.5 1.9 return None + diff --git a/devel/prof_rc.txt b/devel/prof_rc.txt new file mode 100644 index 0000000..6d2ac3d --- /dev/null +++ b/devel/prof_rc.txt @@ -0,0 +1,148 @@ +Wrote profile results to test_recomb_collector.py.lprof +Timer unit: 1e-06 s + +Total time: 2.59302 s +File: /home/jaime/lib/ftprime/ftprime/argrecorder.py +Function: add_individual at line 26 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 26 @profile + 27 def add_individual(self, name, time, population=msprime.NULL_POPULATION, + 28 is_sample=False): + 29 '''Add a new individual. + 30 We need to add individuals when they are *born*, + 31 rather than the first time they reproduce, to ensure + 32 that records are output in order by birth time of the parent. + 33 ''' + 34 402005 305786 0.8 11.8 if name not in self: + 35 402005 231339 0.6 8.9 self[name] = (msprime.Node(time=time, population=population, + 36 402005 1157105 2.9 44.6 name=name, is_sample=is_sample), []) + 37 402005 898793 2.2 34.7 self.num_nodes = max(self.num_nodes, 1+int(name)) + +Total time: 28.8526 s +File: /home/jaime/lib/ftprime/ftprime/argrecorder.py +Function: add_record at line 39 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 39 @profile + 40 def add_record(self, left, right, parent, children): + 41 ''' + 42 Add records corresponding to a reproduction event in which children (a + 43 tuple of IDs) inherit from parent (a single ID) on the interval + 44 [left,right). + 45 ''' + 46 # unneeded but helpful for debugging + 47 559608 1478529 2.6 5.1 if parent not in self.keys(): + 48 raise ValueError("Parent " + str(parent) + + 49 "'s birth time has not been recorded with " + + 50 ".add_individual().") + 51 # time = self[parent][0] + 52 559608 300326 0.5 1.0 new_rec = msprime.Edgeset( + 53 559608 254879 0.5 0.9 parent=parent, + 54 559608 242579 0.4 0.8 children=children, + 55 559608 238853 0.4 0.8 left=left, + 56 559608 1555702 2.8 5.4 right=right) + 57 559608 24781697 44.3 85.9 merge_records(new_rec, self[parent][1]) + +Total time: 14.25 s +File: /home/jaime/lib/ftprime/ftprime/argrecorder.py +Function: merge_records at line 142 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 142 @profile + 143 def merge_records(new, existing): + 144 ''' + 145 Incorporate a new record (l,r,x,c,t[x]) + 146 into a list of existing ones (a,b,x,C,t[x]) sorted on left endpoint. + 147 Keeping them in sorted order simplifies the procedure + 148 (makes it so we don't have to split the new record). + 149 ''' + 150 559608 617547 1.1 4.3 k = 0 + 151 559608 348545 0.6 2.4 cur_left = new.left + 152 # print("MR: -----") + 153 # print("adding", new) + 154 # print(" to", existing) + 155 1115110 964636 0.9 6.8 while (k < len(existing)) and (cur_left < new.right): + 156 555502 493499 0.9 3.5 left = existing[k].left + 157 555502 339232 0.6 2.4 right = existing[k].right + 158 555502 349807 0.6 2.5 parent = existing[k].parent + 159 555502 360762 0.6 2.5 children = existing[k].children + 160 # print("k:",k) + 161 # print("existing:",existing[k]) + 162 # print("cur_left:",cur_left) + 163 555502 336500 0.6 2.4 if new.parent != parent: + 164 raise ValueError("Trying to merge records with different parents.") + 165 555502 327994 0.6 2.3 if right <= cur_left: + 166 # no overlap + 167 # print("no overlap") + 168 64431 38189 0.6 0.3 k += 1 + 169 64431 31893 0.5 0.2 continue + 170 491071 275269 0.6 1.9 if cur_left < left: + 171 # print("dangling left") + 172 46856 32937 0.7 0.2 existing.insert(k, msprime.Edgeset( + 173 46856 24331 0.5 0.2 left=cur_left, + 174 46856 53090 1.1 0.4 right=min(new.right, left), + 175 46856 24376 0.5 0.2 parent=parent, + 176 46856 132844 2.8 0.9 children=new.children)) + 177 46856 291283 6.2 2.0 cur_left = min(new.right, left) + 178 46856 29613 0.6 0.2 k += 1 + 179 46856 23300 0.5 0.2 continue + 180 444215 771674 1.7 5.4 combined_children = tuple(sorted(children+new.children)) + 181 444215 370745 0.8 2.6 combined_rec = msprime.Edgeset( + 182 444215 238826 0.5 1.7 left=cur_left, + 183 444215 1016910 2.3 7.1 right=min(new.right, right), + 184 444215 264162 0.6 1.9 parent=new.parent, + 185 444215 1220131 2.7 8.6 children=combined_children) + 186 444215 289937 0.7 2.0 if cur_left == left: + 187 # print("equal left") + 188 374551 262916 0.7 1.8 if new.right < right: + 189 # print("overlap right") + 190 62945 38629 0.6 0.3 mod_rec = msprime.Edgeset( + 191 62945 36505 0.6 0.3 left=new.right, + 192 62945 34104 0.5 0.2 right=right, + 193 62945 34012 0.5 0.2 parent=parent, + 194 62945 213261 3.4 1.5 children=children) + 195 62945 65925 1.0 0.5 existing[k] = combined_rec + 196 62945 41770 0.7 0.3 k += 1 + 197 62945 60624 1.0 0.4 existing.insert(k, mod_rec) + 198 62945 42126 0.7 0.3 k += 1 + 199 else: + 200 # print("dangling right") + 201 311606 291615 0.9 2.0 existing[k] = combined_rec + 202 311606 214005 0.7 1.5 k += 1 + 203 else: + 204 # here we know that left < cur_left < right + 205 # print("overlap left") + 206 69664 43775 0.6 0.3 mod_rec = msprime.Edgeset( + 207 69664 39459 0.6 0.3 left=left, + 208 69664 37684 0.5 0.3 right=cur_left, + 209 69664 37276 0.5 0.3 parent=parent, + 210 69664 183293 2.6 1.3 children=children) + 211 69664 70705 1.0 0.5 existing[k] = mod_rec + 212 69664 45699 0.7 0.3 k += 1 + 213 69664 64248 0.9 0.5 existing.insert(k, combined_rec) + 214 69664 43549 0.6 0.3 k += 1 + 215 69664 51159 0.7 0.4 if new.right < right: + 216 # print("overlap right") + 217 6531 4663 0.7 0.0 existing.insert(k, msprime.Edgeset( + 218 6531 4029 0.6 0.0 left=new.right, + 219 6531 3764 0.6 0.0 right=right, + 220 6531 3696 0.6 0.0 parent=parent, + 221 6531 15758 2.4 0.1 children=children)) + 222 6531 4291 0.7 0.0 k += 1 + 223 444215 411173 0.9 2.9 cur_left = min(new.right, right) + 224 # add whatever's left at the end + 225 559608 407324 0.7 2.9 if cur_left < new.right: + 226 309118 232210 0.8 1.6 existing.insert(k, msprime.Edgeset( + 227 309118 179186 0.6 1.3 left=cur_left, + 228 309118 192993 0.6 1.4 right=new.right, + 229 309118 182372 0.6 1.3 parent=new.parent, + 230 309118 1095978 3.5 7.7 children=new.children)) + 231 # print("getting") + 232 # for x in existing: + 233 # print(" ", x) + 234 559608 292228 0.5 2.1 return None + diff --git a/ftprime/argrecorder.py b/ftprime/argrecorder.py index b0c768f..553dccd 100644 --- a/ftprime/argrecorder.py +++ b/ftprime/argrecorder.py @@ -1,7 +1,6 @@ import msprime from collections import OrderedDict - class ARGrecorder(OrderedDict): ''' Keys are individual IDs, and values are tuples, whose first entry is a Node, @@ -24,6 +23,7 @@ def __str__(self): ret += self.edgeset_table().__str__() return ret + @profile def add_individual(self, name, time, population=msprime.NULL_POPULATION, is_sample=False): '''Add a new individual. @@ -36,6 +36,7 @@ def add_individual(self, name, time, population=msprime.NULL_POPULATION, name=name, is_sample=is_sample), []) self.num_nodes = max(self.num_nodes, 1+int(name)) + @profile def add_record(self, left, right, parent, children): ''' Add records corresponding to a reproduction event in which children (a @@ -138,6 +139,7 @@ def add_samples(self, samples, length, populations=None): children=(k,)) +@profile def merge_records(new, existing): ''' Incorporate a new record (l,r,x,c,t[x]) diff --git a/tests/test_arg_recorder_with_wf.py b/tests/test_arg_recorder_with_wf.py index cea72ba..27ecc50 100644 --- a/tests/test_arg_recorder_with_wf.py +++ b/tests/test_arg_recorder_with_wf.py @@ -28,19 +28,22 @@ def test_simulation_runs(N, gen, samples): random.seed(123) records = wf(N=N, ngens=gen, nsamples=samples, survival=0.5) - check_tables(records) + # check_tables(records) - for x in records: - print(x, records[x]) + # for x in records: + # print(x, records[x]) - print(records.edgeset_table()) - print(records.node_table()) + # print(records.edgeset_table()) + # print(records.node_table()) - ts = records.tree_sequence() + # ts = records.tree_sequence() - for t in ts.trees(): - print(t) + # for t in ts.trees(): + # print(t) - print("Mean pairwise diversity:",ts.get_pairwise_diversity()) - print("(should be zero)") - assert ts.get_pairwise_diversity() == 0.0 + # print("Mean pairwise diversity:",ts.get_pairwise_diversity()) + # print("(should be zero)") + # assert ts.get_pairwise_diversity() == 0.0 + +if __name__ == '__main__': + test_simulation_runs(1000, 200, 100) diff --git a/tests/test_recomb_collector.py b/tests/test_recomb_collector.py index fb3daa4..f5590b1 100644 --- a/tests/test_recomb_collector.py +++ b/tests/test_recomb_collector.py @@ -147,4 +147,4 @@ def __init__(self): ]), sim.CloneMating()], subPopSize=popsize * 2) - test_simupop(make_pop(mr), generations=100, popsize=500) + test_simupop(make_pop(mr), generations=200, popsize=1000) From e8c4f742bc1b0f077b68224e036ac11dd61e0eb4 Mon Sep 17 00:00:00 2001 From: Jaime Ashander Date: Wed, 26 Apr 2017 11:01:49 -0700 Subject: [PATCH 3/5] Add timing for whole sim --- devel/prof_rc.txt | 183 +++++++++++++++++++-------------- tests/test_recomb_collector.py | 29 +++--- 2 files changed, 122 insertions(+), 90 deletions(-) diff --git a/devel/prof_rc.txt b/devel/prof_rc.txt index 6d2ac3d..e432d06 100644 --- a/devel/prof_rc.txt +++ b/devel/prof_rc.txt @@ -1,7 +1,7 @@ Wrote profile results to test_recomb_collector.py.lprof Timer unit: 1e-06 s -Total time: 2.59302 s +Total time: 2.21355 s File: /home/jaime/lib/ftprime/ftprime/argrecorder.py Function: add_individual at line 26 @@ -15,12 +15,12 @@ Line # Hits Time Per Hit % Time Line Contents 31 rather than the first time they reproduce, to ensure 32 that records are output in order by birth time of the parent. 33 ''' - 34 402005 305786 0.8 11.8 if name not in self: - 35 402005 231339 0.6 8.9 self[name] = (msprime.Node(time=time, population=population, - 36 402005 1157105 2.9 44.6 name=name, is_sample=is_sample), []) - 37 402005 898793 2.2 34.7 self.num_nodes = max(self.num_nodes, 1+int(name)) + 34 402005 312421 0.8 14.1 if name not in self: + 35 402005 229132 0.6 10.4 self[name] = (msprime.Node(time=time, population=population, + 36 402005 1169454 2.9 52.8 name=name, is_sample=is_sample), []) + 37 402005 502542 1.3 22.7 self.num_nodes = max(self.num_nodes, 1+int(name)) -Total time: 28.8526 s +Total time: 29.8855 s File: /home/jaime/lib/ftprime/ftprime/argrecorder.py Function: add_record at line 39 @@ -34,19 +34,19 @@ Line # Hits Time Per Hit % Time Line Contents 44 [left,right). 45 ''' 46 # unneeded but helpful for debugging - 47 559608 1478529 2.6 5.1 if parent not in self.keys(): + 47 559608 637110 1.1 2.1 if parent not in self.keys(): 48 raise ValueError("Parent " + str(parent) + 49 "'s birth time has not been recorded with " + 50 ".add_individual().") 51 # time = self[parent][0] - 52 559608 300326 0.5 1.0 new_rec = msprime.Edgeset( - 53 559608 254879 0.5 0.9 parent=parent, - 54 559608 242579 0.4 0.8 children=children, - 55 559608 238853 0.4 0.8 left=left, - 56 559608 1555702 2.8 5.4 right=right) - 57 559608 24781697 44.3 85.9 merge_records(new_rec, self[parent][1]) + 52 559608 314858 0.6 1.1 new_rec = msprime.Edgeset( + 53 559608 265560 0.5 0.9 parent=parent, + 54 559608 250048 0.4 0.8 children=children, + 55 559608 239496 0.4 0.8 left=left, + 56 559608 1990254 3.6 6.7 right=right) + 57 559608 26188192 46.8 87.6 merge_records(new_rec, self[parent][1]) -Total time: 14.25 s +Total time: 15.5276 s File: /home/jaime/lib/ftprime/ftprime/argrecorder.py Function: merge_records at line 142 @@ -60,89 +60,118 @@ Line # Hits Time Per Hit % Time Line Contents 147 Keeping them in sorted order simplifies the procedure 148 (makes it so we don't have to split the new record). 149 ''' - 150 559608 617547 1.1 4.3 k = 0 - 151 559608 348545 0.6 2.4 cur_left = new.left + 150 559608 481612 0.9 3.1 k = 0 + 151 559608 339875 0.6 2.2 cur_left = new.left 152 # print("MR: -----") 153 # print("adding", new) 154 # print(" to", existing) - 155 1115110 964636 0.9 6.8 while (k < len(existing)) and (cur_left < new.right): - 156 555502 493499 0.9 3.5 left = existing[k].left - 157 555502 339232 0.6 2.4 right = existing[k].right - 158 555502 349807 0.6 2.5 parent = existing[k].parent - 159 555502 360762 0.6 2.5 children = existing[k].children + 155 1115089 945952 0.8 6.1 while (k < len(existing)) and (cur_left < new.right): + 156 555481 490223 0.9 3.2 left = existing[k].left + 157 555481 349657 0.6 2.3 right = existing[k].right + 158 555481 345339 0.6 2.2 parent = existing[k].parent + 159 555481 365655 0.7 2.4 children = existing[k].children 160 # print("k:",k) 161 # print("existing:",existing[k]) 162 # print("cur_left:",cur_left) - 163 555502 336500 0.6 2.4 if new.parent != parent: + 163 555481 338301 0.6 2.2 if new.parent != parent: 164 raise ValueError("Trying to merge records with different parents.") - 165 555502 327994 0.6 2.3 if right <= cur_left: + 165 555481 331886 0.6 2.1 if right <= cur_left: 166 # no overlap 167 # print("no overlap") - 168 64431 38189 0.6 0.3 k += 1 - 169 64431 31893 0.5 0.2 continue - 170 491071 275269 0.6 1.9 if cur_left < left: + 168 64428 37979 0.6 0.2 k += 1 + 169 64428 31858 0.5 0.2 continue + 170 491053 270569 0.6 1.7 if cur_left < left: 171 # print("dangling left") - 172 46856 32937 0.7 0.2 existing.insert(k, msprime.Edgeset( - 173 46856 24331 0.5 0.2 left=cur_left, - 174 46856 53090 1.1 0.4 right=min(new.right, left), - 175 46856 24376 0.5 0.2 parent=parent, - 176 46856 132844 2.8 0.9 children=new.children)) - 177 46856 291283 6.2 2.0 cur_left = min(new.right, left) - 178 46856 29613 0.6 0.2 k += 1 - 179 46856 23300 0.5 0.2 continue - 180 444215 771674 1.7 5.4 combined_children = tuple(sorted(children+new.children)) - 181 444215 370745 0.8 2.6 combined_rec = msprime.Edgeset( - 182 444215 238826 0.5 1.7 left=cur_left, - 183 444215 1016910 2.3 7.1 right=min(new.right, right), - 184 444215 264162 0.6 1.9 parent=new.parent, - 185 444215 1220131 2.7 8.6 children=combined_children) - 186 444215 289937 0.7 2.0 if cur_left == left: + 172 46793 33272 0.7 0.2 existing.insert(k, msprime.Edgeset( + 173 46793 24317 0.5 0.2 left=cur_left, + 174 46793 54625 1.2 0.4 right=min(new.right, left), + 175 46793 23937 0.5 0.2 parent=parent, + 176 46793 142183 3.0 0.9 children=new.children)) + 177 46793 56301 1.2 0.4 cur_left = min(new.right, left) + 178 46793 29363 0.6 0.2 k += 1 + 179 46793 23620 0.5 0.2 continue + 180 444260 1227743 2.8 7.9 combined_children = tuple(sorted(children+new.children)) + 181 444260 316802 0.7 2.0 combined_rec = msprime.Edgeset( + 182 444260 237014 0.5 1.5 left=cur_left, + 183 444260 1003453 2.3 6.5 right=min(new.right, right), + 184 444260 261655 0.6 1.7 parent=new.parent, + 185 444260 1521928 3.4 9.8 children=combined_children) + 186 444260 303782 0.7 2.0 if cur_left == left: 187 # print("equal left") - 188 374551 262916 0.7 1.8 if new.right < right: + 188 374504 265749 0.7 1.7 if new.right < right: 189 # print("overlap right") - 190 62945 38629 0.6 0.3 mod_rec = msprime.Edgeset( - 191 62945 36505 0.6 0.3 left=new.right, - 192 62945 34104 0.5 0.2 right=right, - 193 62945 34012 0.5 0.2 parent=parent, - 194 62945 213261 3.4 1.5 children=children) - 195 62945 65925 1.0 0.5 existing[k] = combined_rec - 196 62945 41770 0.7 0.3 k += 1 - 197 62945 60624 1.0 0.4 existing.insert(k, mod_rec) - 198 62945 42126 0.7 0.3 k += 1 + 190 62926 39431 0.6 0.3 mod_rec = msprime.Edgeset( + 191 62926 35946 0.6 0.2 left=new.right, + 192 62926 34162 0.5 0.2 right=right, + 193 62926 33894 0.5 0.2 parent=parent, + 194 62926 552628 8.8 3.6 children=children) + 195 62926 65417 1.0 0.4 existing[k] = combined_rec + 196 62926 41385 0.7 0.3 k += 1 + 197 62926 63402 1.0 0.4 existing.insert(k, mod_rec) + 198 62926 42739 0.7 0.3 k += 1 199 else: 200 # print("dangling right") - 201 311606 291615 0.9 2.0 existing[k] = combined_rec - 202 311606 214005 0.7 1.5 k += 1 + 201 311578 287545 0.9 1.9 existing[k] = combined_rec + 202 311578 216263 0.7 1.4 k += 1 203 else: 204 # here we know that left < cur_left < right 205 # print("overlap left") - 206 69664 43775 0.6 0.3 mod_rec = msprime.Edgeset( - 207 69664 39459 0.6 0.3 left=left, - 208 69664 37684 0.5 0.3 right=cur_left, - 209 69664 37276 0.5 0.3 parent=parent, - 210 69664 183293 2.6 1.3 children=children) - 211 69664 70705 1.0 0.5 existing[k] = mod_rec - 212 69664 45699 0.7 0.3 k += 1 - 213 69664 64248 0.9 0.5 existing.insert(k, combined_rec) - 214 69664 43549 0.6 0.3 k += 1 - 215 69664 51159 0.7 0.4 if new.right < right: + 206 69756 45277 0.6 0.3 mod_rec = msprime.Edgeset( + 207 69756 38909 0.6 0.3 left=left, + 208 69756 37352 0.5 0.2 right=cur_left, + 209 69756 36927 0.5 0.2 parent=parent, + 210 69756 313090 4.5 2.0 children=children) + 211 69756 69524 1.0 0.4 existing[k] = mod_rec + 212 69756 45909 0.7 0.3 k += 1 + 213 69756 65553 0.9 0.4 existing.insert(k, combined_rec) + 214 69756 43987 0.6 0.3 k += 1 + 215 69756 51920 0.7 0.3 if new.right < right: 216 # print("overlap right") - 217 6531 4663 0.7 0.0 existing.insert(k, msprime.Edgeset( - 218 6531 4029 0.6 0.0 left=new.right, - 219 6531 3764 0.6 0.0 right=right, - 220 6531 3696 0.6 0.0 parent=parent, - 221 6531 15758 2.4 0.1 children=children)) - 222 6531 4291 0.7 0.0 k += 1 - 223 444215 411173 0.9 2.9 cur_left = min(new.right, right) + 217 6529 4810 0.7 0.0 existing.insert(k, msprime.Edgeset( + 218 6529 3922 0.6 0.0 left=new.right, + 219 6529 3695 0.6 0.0 right=right, + 220 6529 3661 0.6 0.0 parent=parent, + 221 6529 16014 2.5 0.1 children=children)) + 222 6529 4215 0.6 0.0 k += 1 + 223 444260 420557 0.9 2.7 cur_left = min(new.right, right) 224 # add whatever's left at the end - 225 559608 407324 0.7 2.9 if cur_left < new.right: - 226 309118 232210 0.8 1.6 existing.insert(k, msprime.Edgeset( - 227 309118 179186 0.6 1.3 left=cur_left, - 228 309118 192993 0.6 1.4 right=new.right, - 229 309118 182372 0.6 1.3 parent=new.parent, - 230 309118 1095978 3.5 7.7 children=new.children)) + 225 559608 406147 0.7 2.6 if cur_left < new.right: + 226 309136 230955 0.7 1.5 existing.insert(k, msprime.Edgeset( + 227 309136 178791 0.6 1.2 left=cur_left, + 228 309136 191835 0.6 1.2 right=new.right, + 229 309136 182679 0.6 1.2 parent=new.parent, + 230 309136 1575403 5.1 10.1 children=new.children)) 231 # print("getting") 232 # for x in existing: 233 # print(" ", x) - 234 559608 292228 0.5 2.1 return None + 234 559608 289035 0.5 1.9 return None + +Total time: 45.2746 s +File: tests/test_recomb_collector.py +Function: test_simupop at line 96 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 96 @profile + 97 def test_simupop(make_pop, generations, popsize): + 98 1 7 7.0 0.0 print("Popsize: ", popsize) + 99 # replications = 1 + 100 1 1 1.0 0.0 nsamples = 2 + 101 1 1 1.0 0.0 length = 10 + 102 1 1 1.0 0.0 nloci = 5 + 103 1 4 4.0 0.0 locus_position = list(range(0, length, int(length/nloci))) + 104 1 1 1.0 0.0 recomb_rate = 0.05 + 105 + 106 1 0 0.0 0.0 rc = RecombCollector( + 107 1 0 0.0 0.0 nsamples=nsamples, generations=generations, N=popsize, + 108 1 26109 26109.0 0.1 ancestor_age=10, length=length, locus_position=locus_position) + 109 + 110 1 61 61.0 0.0 init_geno = [sim.InitGenotype(freq=[0.9, 0.1], loci=sim.ALL_AVAIL)] + 111 + 112 1 12 12.0 0.0 id_tagger = sim.IdTagger(begin=0) + 113 1 6 6.0 0.0 id_tagger.reset(startID=1) # must reset - creating a new one doesn't + 114 1 1 1.0 0.0 pop = make_pop(popsize, nloci, locus_position, id_tagger, init_geno, + 115 1 45247223 45247223.0 99.9 recomb_rate, rc, generations) + 116 1 707 707.0 0.0 locations = [pop.subPopIndPair(x)[0] for x in range(pop.popSize())] + 117 1 515 515.0 0.0 rc.add_diploid_samples(pop.indInfo("ind_id"), locations) diff --git a/tests/test_recomb_collector.py b/tests/test_recomb_collector.py index f5590b1..65fee73 100644 --- a/tests/test_recomb_collector.py +++ b/tests/test_recomb_collector.py @@ -79,19 +79,21 @@ def _make_pop(popsize, nloci, locus_position, id_tagger, init_geno, return pop return _make_pop -# occasional failures marked below -# ValueError: Parent 3's birth time has not been recorded with .add_individual() -@pytest.mark.parametrize(('generations', 'popsize'), [ - (3, 5), # stochastic fail - (3, 10), # stochastic fail - (3, 20), - (5, 5), - (5, 10), - (5, 20), - (10, 5), - (10, 10), - (10, 20), -]) +# # occasional failures marked below +# # ValueError: Parent 3's birth time has not been recorded with .add_individual() +# @pytest.mark.parametrize(('generations', 'popsize'), [ +# (3, 5), # stochastic fail +# (3, 10), # stochastic fail +# (3, 20), +# (5, 5), +# (5, 10), +# (5, 20), +# (10, 5), +# (10, 10), +# (10, 20), +# ]) + +@profile def test_simupop(make_pop, generations, popsize): print("Popsize: ", popsize) # replications = 1 @@ -147,4 +149,5 @@ def __init__(self): ]), sim.CloneMating()], subPopSize=popsize * 2) + test_simupop(make_pop(mr), generations=200, popsize=1000) From 44f22e54c26b6e48733699fe18a6a5bac89e87fe Mon Sep 17 00:00:00 2001 From: Jaime Ashander Date: Wed, 26 Apr 2017 11:04:29 -0700 Subject: [PATCH 4/5] Added more profiling in wrapper --- devel/prof_rc.txt | 243 +++++++++++++++++++-------------- tests/test_recomb_collector.py | 31 +++-- 2 files changed, 158 insertions(+), 116 deletions(-) diff --git a/devel/prof_rc.txt b/devel/prof_rc.txt index e432d06..fa2e3f1 100644 --- a/devel/prof_rc.txt +++ b/devel/prof_rc.txt @@ -1,7 +1,7 @@ Wrote profile results to test_recomb_collector.py.lprof Timer unit: 1e-06 s -Total time: 2.21355 s +Total time: 2.28065 s File: /home/jaime/lib/ftprime/ftprime/argrecorder.py Function: add_individual at line 26 @@ -15,12 +15,12 @@ Line # Hits Time Per Hit % Time Line Contents 31 rather than the first time they reproduce, to ensure 32 that records are output in order by birth time of the parent. 33 ''' - 34 402005 312421 0.8 14.1 if name not in self: - 35 402005 229132 0.6 10.4 self[name] = (msprime.Node(time=time, population=population, - 36 402005 1169454 2.9 52.8 name=name, is_sample=is_sample), []) - 37 402005 502542 1.3 22.7 self.num_nodes = max(self.num_nodes, 1+int(name)) + 34 402005 327178 0.8 14.3 if name not in self: + 35 402005 237900 0.6 10.4 self[name] = (msprime.Node(time=time, population=population, + 36 402005 1182893 2.9 51.9 name=name, is_sample=is_sample), []) + 37 402005 532681 1.3 23.4 self.num_nodes = max(self.num_nodes, 1+int(name)) -Total time: 29.8855 s +Total time: 29.7806 s File: /home/jaime/lib/ftprime/ftprime/argrecorder.py Function: add_record at line 39 @@ -34,19 +34,19 @@ Line # Hits Time Per Hit % Time Line Contents 44 [left,right). 45 ''' 46 # unneeded but helpful for debugging - 47 559608 637110 1.1 2.1 if parent not in self.keys(): + 47 559608 1316797 2.4 4.4 if parent not in self.keys(): 48 raise ValueError("Parent " + str(parent) + 49 "'s birth time has not been recorded with " + 50 ".add_individual().") 51 # time = self[parent][0] - 52 559608 314858 0.6 1.1 new_rec = msprime.Edgeset( - 53 559608 265560 0.5 0.9 parent=parent, - 54 559608 250048 0.4 0.8 children=children, - 55 559608 239496 0.4 0.8 left=left, - 56 559608 1990254 3.6 6.7 right=right) - 57 559608 26188192 46.8 87.6 merge_records(new_rec, self[parent][1]) + 52 559608 316729 0.6 1.1 new_rec = msprime.Edgeset( + 53 559608 268002 0.5 0.9 parent=parent, + 54 559608 244506 0.4 0.8 children=children, + 55 559608 241549 0.4 0.8 left=left, + 56 559608 1547125 2.8 5.2 right=right) + 57 559608 25845876 46.2 86.8 merge_records(new_rec, self[parent][1]) -Total time: 15.5276 s +Total time: 15.6038 s File: /home/jaime/lib/ftprime/ftprime/argrecorder.py Function: merge_records at line 142 @@ -60,118 +60,159 @@ Line # Hits Time Per Hit % Time Line Contents 147 Keeping them in sorted order simplifies the procedure 148 (makes it so we don't have to split the new record). 149 ''' - 150 559608 481612 0.9 3.1 k = 0 - 151 559608 339875 0.6 2.2 cur_left = new.left + 150 559608 1326820 2.4 8.5 k = 0 + 151 559608 357924 0.6 2.3 cur_left = new.left 152 # print("MR: -----") 153 # print("adding", new) 154 # print(" to", existing) - 155 1115089 945952 0.8 6.1 while (k < len(existing)) and (cur_left < new.right): - 156 555481 490223 0.9 3.2 left = existing[k].left - 157 555481 349657 0.6 2.3 right = existing[k].right - 158 555481 345339 0.6 2.2 parent = existing[k].parent - 159 555481 365655 0.7 2.4 children = existing[k].children + 155 1115136 963906 0.9 6.2 while (k < len(existing)) and (cur_left < new.right): + 156 555528 503313 0.9 3.2 left = existing[k].left + 157 555528 357472 0.6 2.3 right = existing[k].right + 158 555528 350369 0.6 2.2 parent = existing[k].parent + 159 555528 360336 0.6 2.3 children = existing[k].children 160 # print("k:",k) 161 # print("existing:",existing[k]) 162 # print("cur_left:",cur_left) - 163 555481 338301 0.6 2.2 if new.parent != parent: + 163 555528 336751 0.6 2.2 if new.parent != parent: 164 raise ValueError("Trying to merge records with different parents.") - 165 555481 331886 0.6 2.1 if right <= cur_left: + 165 555528 334317 0.6 2.1 if right <= cur_left: 166 # no overlap 167 # print("no overlap") - 168 64428 37979 0.6 0.2 k += 1 - 169 64428 31858 0.5 0.2 continue - 170 491053 270569 0.6 1.7 if cur_left < left: + 168 64443 37744 0.6 0.2 k += 1 + 169 64443 31531 0.5 0.2 continue + 170 491085 272696 0.6 1.7 if cur_left < left: 171 # print("dangling left") - 172 46793 33272 0.7 0.2 existing.insert(k, msprime.Edgeset( - 173 46793 24317 0.5 0.2 left=cur_left, - 174 46793 54625 1.2 0.4 right=min(new.right, left), - 175 46793 23937 0.5 0.2 parent=parent, - 176 46793 142183 3.0 0.9 children=new.children)) - 177 46793 56301 1.2 0.4 cur_left = min(new.right, left) - 178 46793 29363 0.6 0.2 k += 1 - 179 46793 23620 0.5 0.2 continue - 180 444260 1227743 2.8 7.9 combined_children = tuple(sorted(children+new.children)) - 181 444260 316802 0.7 2.0 combined_rec = msprime.Edgeset( - 182 444260 237014 0.5 1.5 left=cur_left, - 183 444260 1003453 2.3 6.5 right=min(new.right, right), - 184 444260 261655 0.6 1.7 parent=new.parent, - 185 444260 1521928 3.4 9.8 children=combined_children) - 186 444260 303782 0.7 2.0 if cur_left == left: + 172 46811 33206 0.7 0.2 existing.insert(k, msprime.Edgeset( + 173 46811 24385 0.5 0.2 left=cur_left, + 174 46811 54986 1.2 0.4 right=min(new.right, left), + 175 46811 24179 0.5 0.2 parent=parent, + 176 46811 140550 3.0 0.9 children=new.children)) + 177 46811 56437 1.2 0.4 cur_left = min(new.right, left) + 178 46811 29029 0.6 0.2 k += 1 + 179 46811 23164 0.5 0.1 continue + 180 444274 793270 1.8 5.1 combined_children = tuple(sorted(children+new.children)) + 181 444274 452577 1.0 2.9 combined_rec = msprime.Edgeset( + 182 444274 240969 0.5 1.5 left=cur_left, + 183 444274 670205 1.5 4.3 right=min(new.right, right), + 184 444274 266848 0.6 1.7 parent=new.parent, + 185 444274 1324300 3.0 8.5 children=combined_children) + 186 444274 293177 0.7 1.9 if cur_left == left: 187 # print("equal left") - 188 374504 265749 0.7 1.7 if new.right < right: + 188 374597 271651 0.7 1.7 if new.right < right: 189 # print("overlap right") - 190 62926 39431 0.6 0.3 mod_rec = msprime.Edgeset( - 191 62926 35946 0.6 0.2 left=new.right, - 192 62926 34162 0.5 0.2 right=right, - 193 62926 33894 0.5 0.2 parent=parent, - 194 62926 552628 8.8 3.6 children=children) - 195 62926 65417 1.0 0.4 existing[k] = combined_rec - 196 62926 41385 0.7 0.3 k += 1 - 197 62926 63402 1.0 0.4 existing.insert(k, mod_rec) - 198 62926 42739 0.7 0.3 k += 1 + 190 62980 38856 0.6 0.2 mod_rec = msprime.Edgeset( + 191 62980 37919 0.6 0.2 left=new.right, + 192 62980 34621 0.5 0.2 right=right, + 193 62980 34063 0.5 0.2 parent=parent, + 194 62980 174581 2.8 1.1 children=children) + 195 62980 65782 1.0 0.4 existing[k] = combined_rec + 196 62980 41135 0.7 0.3 k += 1 + 197 62980 62275 1.0 0.4 existing.insert(k, mod_rec) + 198 62980 42888 0.7 0.3 k += 1 199 else: 200 # print("dangling right") - 201 311578 287545 0.9 1.9 existing[k] = combined_rec - 202 311578 216263 0.7 1.4 k += 1 + 201 311617 293482 0.9 1.9 existing[k] = combined_rec + 202 311617 217959 0.7 1.4 k += 1 203 else: 204 # here we know that left < cur_left < right 205 # print("overlap left") - 206 69756 45277 0.6 0.3 mod_rec = msprime.Edgeset( - 207 69756 38909 0.6 0.3 left=left, - 208 69756 37352 0.5 0.2 right=cur_left, - 209 69756 36927 0.5 0.2 parent=parent, - 210 69756 313090 4.5 2.0 children=children) - 211 69756 69524 1.0 0.4 existing[k] = mod_rec - 212 69756 45909 0.7 0.3 k += 1 - 213 69756 65553 0.9 0.4 existing.insert(k, combined_rec) - 214 69756 43987 0.6 0.3 k += 1 - 215 69756 51920 0.7 0.3 if new.right < right: + 206 69677 43915 0.6 0.3 mod_rec = msprime.Edgeset( + 207 69677 39292 0.6 0.3 left=left, + 208 69677 37677 0.5 0.2 right=cur_left, + 209 69677 38197 0.5 0.2 parent=parent, + 210 69677 759606 10.9 4.9 children=children) + 211 69677 69563 1.0 0.4 existing[k] = mod_rec + 212 69677 45212 0.6 0.3 k += 1 + 213 69677 66250 1.0 0.4 existing.insert(k, combined_rec) + 214 69677 43517 0.6 0.3 k += 1 + 215 69677 53571 0.8 0.3 if new.right < right: 216 # print("overlap right") - 217 6529 4810 0.7 0.0 existing.insert(k, msprime.Edgeset( - 218 6529 3922 0.6 0.0 left=new.right, - 219 6529 3695 0.6 0.0 right=right, - 220 6529 3661 0.6 0.0 parent=parent, - 221 6529 16014 2.5 0.1 children=children)) - 222 6529 4215 0.6 0.0 k += 1 - 223 444260 420557 0.9 2.7 cur_left = min(new.right, right) + 217 6546 4822 0.7 0.0 existing.insert(k, msprime.Edgeset( + 218 6546 4187 0.6 0.0 left=new.right, + 219 6546 3787 0.6 0.0 right=right, + 220 6546 3756 0.6 0.0 parent=parent, + 221 6546 16401 2.5 0.1 children=children)) + 222 6546 4353 0.7 0.0 k += 1 + 223 444274 418285 0.9 2.7 cur_left = min(new.right, right) 224 # add whatever's left at the end - 225 559608 406147 0.7 2.6 if cur_left < new.right: - 226 309136 230955 0.7 1.5 existing.insert(k, msprime.Edgeset( - 227 309136 178791 0.6 1.2 left=cur_left, - 228 309136 191835 0.6 1.2 right=new.right, - 229 309136 182679 0.6 1.2 parent=new.parent, - 230 309136 1575403 5.1 10.1 children=new.children)) + 225 559608 414958 0.7 2.7 if cur_left < new.right: + 226 309105 234729 0.8 1.5 existing.insert(k, msprime.Edgeset( + 227 309105 180711 0.6 1.2 left=cur_left, + 228 309105 202110 0.7 1.3 right=new.right, + 229 309105 191629 0.6 1.2 parent=new.parent, + 230 309105 1463548 4.7 9.4 children=new.children)) 231 # print("getting") 232 # for x in existing: 233 # print(" ", x) - 234 559608 289035 0.5 1.9 return None + 234 559608 288005 0.5 1.8 return None -Total time: 45.2746 s +Total time: 5e-06 s File: tests/test_recomb_collector.py -Function: test_simupop at line 96 +Function: make_pop at line 48 Line # Hits Time Per Hit % Time Line Contents ============================================================== - 96 @profile - 97 def test_simupop(make_pop, generations, popsize): - 98 1 7 7.0 0.0 print("Popsize: ", popsize) - 99 # replications = 1 - 100 1 1 1.0 0.0 nsamples = 2 - 101 1 1 1.0 0.0 length = 10 - 102 1 1 1.0 0.0 nloci = 5 - 103 1 4 4.0 0.0 locus_position = list(range(0, length, int(length/nloci))) - 104 1 1 1.0 0.0 recomb_rate = 0.05 - 105 - 106 1 0 0.0 0.0 rc = RecombCollector( - 107 1 0 0.0 0.0 nsamples=nsamples, generations=generations, N=popsize, - 108 1 26109 26109.0 0.1 ancestor_age=10, length=length, locus_position=locus_position) - 109 - 110 1 61 61.0 0.0 init_geno = [sim.InitGenotype(freq=[0.9, 0.1], loci=sim.ALL_AVAIL)] - 111 - 112 1 12 12.0 0.0 id_tagger = sim.IdTagger(begin=0) - 113 1 6 6.0 0.0 id_tagger.reset(startID=1) # must reset - creating a new one doesn't - 114 1 1 1.0 0.0 pop = make_pop(popsize, nloci, locus_position, id_tagger, init_geno, - 115 1 45247223 45247223.0 99.9 recomb_rate, rc, generations) - 116 1 707 707.0 0.0 locations = [pop.subPopIndPair(x)[0] for x in range(pop.popSize())] - 117 1 515 515.0 0.0 rc.add_diploid_samples(pop.indInfo("ind_id"), locations) + 48 @profile + 49 def make_pop(request): + 50 # request.param stores a lambda function to make mating scheme + 51 # each test that uses this fixture will be run for both entries in 'params' + 52 1 2 2.0 40.0 mating_scheme_factory = request.param + 53 + 54 1 2 2.0 40.0 def _make_pop(popsize, nloci, locus_position, id_tagger, init_geno, + 55 recomb_rate, rc, generations): + 56 sim.setOptions(seed=111) + 57 recombinator = sim.Recombinator(intensity=recomb_rate, + 58 output=rc.collect_recombs, + 59 infoFields="ind_id") + 60 pop = sim.Population( + 61 size=[popsize], + 62 loci=[nloci], + 63 lociPos=locus_position, + 64 infoFields=['ind_id']) + 65 pop.evolve( + 66 initOps=[ + 67 sim.InitSex(), + 68 id_tagger + 69 ]+init_geno, + 70 preOps=[ + 71 sim.PyOperator(lambda pop: rc.increment_time() or True), + 72 # Must return true or false. True keeps whole population (?) + 73 ], + 74 matingScheme=mating_scheme_factory(recombinator, popsize), + 75 postOps=[ + 76 sim.PyEval(r"'Gen: %2d\n' % (gen, )", step=1) + 77 ], + 78 gen=generations + 79 ) + 80 return pop + 81 1 1 1.0 20.0 return _make_pop + +Total time: 45.4613 s +File: tests/test_recomb_collector.py +Function: test_simupop at line 97 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 97 @profile + 98 def test_simupop(make_pop, generations, popsize): + 99 1 6 6.0 0.0 print("Popsize: ", popsize) + 100 # replications = 1 + 101 1 1 1.0 0.0 nsamples = 2 + 102 1 1 1.0 0.0 length = 10 + 103 1 1 1.0 0.0 nloci = 5 + 104 1 5 5.0 0.0 locus_position = list(range(0, length, int(length/nloci))) + 105 1 1 1.0 0.0 recomb_rate = 0.05 + 106 + 107 1 1 1.0 0.0 rc = RecombCollector( + 108 1 1 1.0 0.0 nsamples=nsamples, generations=generations, N=popsize, + 109 1 24590 24590.0 0.1 ancestor_age=10, length=length, locus_position=locus_position) + 110 + 111 1 69 69.0 0.0 init_geno = [sim.InitGenotype(freq=[0.9, 0.1], loci=sim.ALL_AVAIL)] + 112 + 113 1 13 13.0 0.0 id_tagger = sim.IdTagger(begin=0) + 114 1 7 7.0 0.0 id_tagger.reset(startID=1) # must reset - creating a new one doesn't + 115 1 2 2.0 0.0 pop = make_pop(popsize, nloci, locus_position, id_tagger, init_geno, + 116 1 45435320 45435320.0 99.9 recomb_rate, rc, generations) + 117 1 702 702.0 0.0 locations = [pop.subPopIndPair(x)[0] for x in range(pop.popSize())] + 118 1 541 541.0 0.0 rc.add_diploid_samples(pop.indInfo("ind_id"), locations) diff --git a/tests/test_recomb_collector.py b/tests/test_recomb_collector.py index 65fee73..96b7178 100644 --- a/tests/test_recomb_collector.py +++ b/tests/test_recomb_collector.py @@ -30,21 +30,22 @@ def check_tables(args): assert(ch < args.num_nodes) -@pytest.fixture(scope="function", params=[ - lambda recombinator, popsize: sim.RandomMating( - ops=[ - sim.IdTagger(), - recombinator - ]), - # Overlapping generations mating system - lambda recombinator, popsize: sim.HeteroMating([sim.RandomMating( - ops=[ - sim.IdTagger(), - recombinator - ]), - sim.CloneMating()], - subPopSize=popsize * 2) - ]) +# @pytest.fixture(scope="function", params=[ +# lambda recombinator, popsize: sim.RandomMating( +# ops=[ +# sim.IdTagger(), +# recombinator +# ]), +# # Overlapping generations mating system +# lambda recombinator, popsize: sim.HeteroMating([sim.RandomMating( +# ops=[ +# sim.IdTagger(), +# recombinator +# ]), +# sim.CloneMating()], +# subPopSize=popsize * 2) +# ]) +@profile def make_pop(request): # request.param stores a lambda function to make mating scheme # each test that uses this fixture will be run for both entries in 'params' From 32a6fd192afb58e9ae0b76ee34db685ee0d672e9 Mon Sep 17 00:00:00 2001 From: Jaime Ashander Date: Wed, 26 Apr 2017 11:06:50 -0700 Subject: [PATCH 5/5] fixup! Added more profiling in wrapper --- devel/prof_rc.txt | 255 ++++++++++++++++----------------- tests/test_recomb_collector.py | 5 +- 2 files changed, 124 insertions(+), 136 deletions(-) diff --git a/devel/prof_rc.txt b/devel/prof_rc.txt index fa2e3f1..75ffdad 100644 --- a/devel/prof_rc.txt +++ b/devel/prof_rc.txt @@ -1,7 +1,7 @@ Wrote profile results to test_recomb_collector.py.lprof Timer unit: 1e-06 s -Total time: 2.28065 s +Total time: 2.28796 s File: /home/jaime/lib/ftprime/ftprime/argrecorder.py Function: add_individual at line 26 @@ -15,12 +15,12 @@ Line # Hits Time Per Hit % Time Line Contents 31 rather than the first time they reproduce, to ensure 32 that records are output in order by birth time of the parent. 33 ''' - 34 402005 327178 0.8 14.3 if name not in self: - 35 402005 237900 0.6 10.4 self[name] = (msprime.Node(time=time, population=population, - 36 402005 1182893 2.9 51.9 name=name, is_sample=is_sample), []) - 37 402005 532681 1.3 23.4 self.num_nodes = max(self.num_nodes, 1+int(name)) + 34 402005 320650 0.8 14.0 if name not in self: + 35 402005 238573 0.6 10.4 self[name] = (msprime.Node(time=time, population=population, + 36 402005 1231482 3.1 53.8 name=name, is_sample=is_sample), []) + 37 402005 497256 1.2 21.7 self.num_nodes = max(self.num_nodes, 1+int(name)) -Total time: 29.7806 s +Total time: 29.878 s File: /home/jaime/lib/ftprime/ftprime/argrecorder.py Function: add_record at line 39 @@ -34,19 +34,19 @@ Line # Hits Time Per Hit % Time Line Contents 44 [left,right). 45 ''' 46 # unneeded but helpful for debugging - 47 559608 1316797 2.4 4.4 if parent not in self.keys(): + 47 559608 799704 1.4 2.7 if parent not in self.keys(): 48 raise ValueError("Parent " + str(parent) + 49 "'s birth time has not been recorded with " + 50 ".add_individual().") 51 # time = self[parent][0] - 52 559608 316729 0.6 1.1 new_rec = msprime.Edgeset( - 53 559608 268002 0.5 0.9 parent=parent, - 54 559608 244506 0.4 0.8 children=children, - 55 559608 241549 0.4 0.8 left=left, - 56 559608 1547125 2.8 5.2 right=right) - 57 559608 25845876 46.2 86.8 merge_records(new_rec, self[parent][1]) + 52 559608 339309 0.6 1.1 new_rec = msprime.Edgeset( + 53 559608 276814 0.5 0.9 parent=parent, + 54 559608 256989 0.5 0.9 children=children, + 55 559608 254082 0.5 0.9 left=left, + 56 559608 1489470 2.7 5.0 right=right) + 57 559608 26461636 47.3 88.6 merge_records(new_rec, self[parent][1]) -Total time: 15.6038 s +Total time: 16.3576 s File: /home/jaime/lib/ftprime/ftprime/argrecorder.py Function: merge_records at line 142 @@ -60,159 +60,150 @@ Line # Hits Time Per Hit % Time Line Contents 147 Keeping them in sorted order simplifies the procedure 148 (makes it so we don't have to split the new record). 149 ''' - 150 559608 1326820 2.4 8.5 k = 0 - 151 559608 357924 0.6 2.3 cur_left = new.left + 150 559608 1563370 2.8 9.6 k = 0 + 151 559608 343556 0.6 2.1 cur_left = new.left 152 # print("MR: -----") 153 # print("adding", new) 154 # print(" to", existing) - 155 1115136 963906 0.9 6.2 while (k < len(existing)) and (cur_left < new.right): - 156 555528 503313 0.9 3.2 left = existing[k].left - 157 555528 357472 0.6 2.3 right = existing[k].right - 158 555528 350369 0.6 2.2 parent = existing[k].parent - 159 555528 360336 0.6 2.3 children = existing[k].children + 155 1115266 963964 0.9 5.9 while (k < len(existing)) and (cur_left < new.right): + 156 555658 508057 0.9 3.1 left = existing[k].left + 157 555658 355931 0.6 2.2 right = existing[k].right + 158 555658 350275 0.6 2.1 parent = existing[k].parent + 159 555658 366891 0.7 2.2 children = existing[k].children 160 # print("k:",k) 161 # print("existing:",existing[k]) 162 # print("cur_left:",cur_left) - 163 555528 336751 0.6 2.2 if new.parent != parent: + 163 555658 336903 0.6 2.1 if new.parent != parent: 164 raise ValueError("Trying to merge records with different parents.") - 165 555528 334317 0.6 2.1 if right <= cur_left: + 165 555658 330761 0.6 2.0 if right <= cur_left: 166 # no overlap 167 # print("no overlap") - 168 64443 37744 0.6 0.2 k += 1 - 169 64443 31531 0.5 0.2 continue - 170 491085 272696 0.6 1.7 if cur_left < left: + 168 64560 38082 0.6 0.2 k += 1 + 169 64560 32076 0.5 0.2 continue + 170 491098 271582 0.6 1.7 if cur_left < left: 171 # print("dangling left") - 172 46811 33206 0.7 0.2 existing.insert(k, msprime.Edgeset( - 173 46811 24385 0.5 0.2 left=cur_left, - 174 46811 54986 1.2 0.4 right=min(new.right, left), - 175 46811 24179 0.5 0.2 parent=parent, - 176 46811 140550 3.0 0.9 children=new.children)) - 177 46811 56437 1.2 0.4 cur_left = min(new.right, left) - 178 46811 29029 0.6 0.2 k += 1 - 179 46811 23164 0.5 0.1 continue - 180 444274 793270 1.8 5.1 combined_children = tuple(sorted(children+new.children)) - 181 444274 452577 1.0 2.9 combined_rec = msprime.Edgeset( - 182 444274 240969 0.5 1.5 left=cur_left, - 183 444274 670205 1.5 4.3 right=min(new.right, right), - 184 444274 266848 0.6 1.7 parent=new.parent, - 185 444274 1324300 3.0 8.5 children=combined_children) - 186 444274 293177 0.7 1.9 if cur_left == left: + 172 46958 33681 0.7 0.2 existing.insert(k, msprime.Edgeset( + 173 46958 24061 0.5 0.1 left=cur_left, + 174 46958 53415 1.1 0.3 right=min(new.right, left), + 175 46958 24232 0.5 0.1 parent=parent, + 176 46958 143949 3.1 0.9 children=new.children)) + 177 46958 51934 1.1 0.3 cur_left = min(new.right, left) + 178 46958 29963 0.6 0.2 k += 1 + 179 46958 23954 0.5 0.1 continue + 180 444140 1513825 3.4 9.3 combined_children = tuple(sorted(children+new.children)) + 181 444140 601062 1.4 3.7 combined_rec = msprime.Edgeset( + 182 444140 237600 0.5 1.5 left=cur_left, + 183 444140 894437 2.0 5.5 right=min(new.right, right), + 184 444140 262270 0.6 1.6 parent=new.parent, + 185 444140 1173999 2.6 7.2 children=combined_children) + 186 444140 290592 0.7 1.8 if cur_left == left: 187 # print("equal left") - 188 374597 271651 0.7 1.7 if new.right < right: + 188 374591 267743 0.7 1.6 if new.right < right: 189 # print("overlap right") - 190 62980 38856 0.6 0.2 mod_rec = msprime.Edgeset( - 191 62980 37919 0.6 0.2 left=new.right, - 192 62980 34621 0.5 0.2 right=right, - 193 62980 34063 0.5 0.2 parent=parent, - 194 62980 174581 2.8 1.1 children=children) - 195 62980 65782 1.0 0.4 existing[k] = combined_rec - 196 62980 41135 0.7 0.3 k += 1 - 197 62980 62275 1.0 0.4 existing.insert(k, mod_rec) - 198 62980 42888 0.7 0.3 k += 1 + 190 62946 39608 0.6 0.2 mod_rec = msprime.Edgeset( + 191 62946 36558 0.6 0.2 left=new.right, + 192 62946 36056 0.6 0.2 right=right, + 193 62946 34178 0.5 0.2 parent=parent, + 194 62946 191912 3.0 1.2 children=children) + 195 62946 65663 1.0 0.4 existing[k] = combined_rec + 196 62946 41032 0.7 0.3 k += 1 + 197 62946 61760 1.0 0.4 existing.insert(k, mod_rec) + 198 62946 42943 0.7 0.3 k += 1 199 else: 200 # print("dangling right") - 201 311617 293482 0.9 1.9 existing[k] = combined_rec - 202 311617 217959 0.7 1.4 k += 1 + 201 311645 291891 0.9 1.8 existing[k] = combined_rec + 202 311645 216618 0.7 1.3 k += 1 203 else: 204 # here we know that left < cur_left < right 205 # print("overlap left") - 206 69677 43915 0.6 0.3 mod_rec = msprime.Edgeset( - 207 69677 39292 0.6 0.3 left=left, - 208 69677 37677 0.5 0.2 right=cur_left, - 209 69677 38197 0.5 0.2 parent=parent, - 210 69677 759606 10.9 4.9 children=children) - 211 69677 69563 1.0 0.4 existing[k] = mod_rec - 212 69677 45212 0.6 0.3 k += 1 - 213 69677 66250 1.0 0.4 existing.insert(k, combined_rec) - 214 69677 43517 0.6 0.3 k += 1 - 215 69677 53571 0.8 0.3 if new.right < right: + 206 69549 45708 0.7 0.3 mod_rec = msprime.Edgeset( + 207 69549 38658 0.6 0.2 left=left, + 208 69549 37008 0.5 0.2 right=cur_left, + 209 69549 37283 0.5 0.2 parent=parent, + 210 69549 193916 2.8 1.2 children=children) + 211 69549 69996 1.0 0.4 existing[k] = mod_rec + 212 69549 45173 0.6 0.3 k += 1 + 213 69549 65443 0.9 0.4 existing.insert(k, combined_rec) + 214 69549 44130 0.6 0.3 k += 1 + 215 69549 52116 0.7 0.3 if new.right < right: 216 # print("overlap right") - 217 6546 4822 0.7 0.0 existing.insert(k, msprime.Edgeset( - 218 6546 4187 0.6 0.0 left=new.right, - 219 6546 3787 0.6 0.0 right=right, - 220 6546 3756 0.6 0.0 parent=parent, - 221 6546 16401 2.5 0.1 children=children)) - 222 6546 4353 0.7 0.0 k += 1 - 223 444274 418285 0.9 2.7 cur_left = min(new.right, right) + 217 6539 4796 0.7 0.0 existing.insert(k, msprime.Edgeset( + 218 6539 3996 0.6 0.0 left=new.right, + 219 6539 3774 0.6 0.0 right=right, + 220 6539 3700 0.6 0.0 parent=parent, + 221 6539 16880 2.6 0.1 children=children)) + 222 6539 4174 0.6 0.0 k += 1 + 223 444140 421187 0.9 2.6 cur_left = min(new.right, right) 224 # add whatever's left at the end - 225 559608 414958 0.7 2.7 if cur_left < new.right: - 226 309105 234729 0.8 1.5 existing.insert(k, msprime.Edgeset( - 227 309105 180711 0.6 1.2 left=cur_left, - 228 309105 202110 0.7 1.3 right=new.right, - 229 309105 191629 0.6 1.2 parent=new.parent, - 230 309105 1463548 4.7 9.4 children=new.children)) + 225 559608 412823 0.7 2.5 if cur_left < new.right: + 226 309093 239751 0.8 1.5 existing.insert(k, msprime.Edgeset( + 227 309093 178473 0.6 1.1 left=cur_left, + 228 309093 192582 0.6 1.2 right=new.right, + 229 309093 182228 0.6 1.1 parent=new.parent, + 230 309093 1630994 5.3 10.0 children=new.children)) 231 # print("getting") 232 # for x in existing: 233 # print(" ", x) - 234 559608 288005 0.5 1.8 return None + 234 559608 286449 0.5 1.8 return None -Total time: 5e-06 s +Total time: 45.6963 s File: tests/test_recomb_collector.py -Function: make_pop at line 48 +Function: _make_pop at line 53 Line # Hits Time Per Hit % Time Line Contents ============================================================== - 48 @profile - 49 def make_pop(request): - 50 # request.param stores a lambda function to make mating scheme - 51 # each test that uses this fixture will be run for both entries in 'params' - 52 1 2 2.0 40.0 mating_scheme_factory = request.param - 53 - 54 1 2 2.0 40.0 def _make_pop(popsize, nloci, locus_position, id_tagger, init_geno, + 53 @profile + 54 def _make_pop(popsize, nloci, locus_position, id_tagger, init_geno, 55 recomb_rate, rc, generations): - 56 sim.setOptions(seed=111) - 57 recombinator = sim.Recombinator(intensity=recomb_rate, - 58 output=rc.collect_recombs, - 59 infoFields="ind_id") - 60 pop = sim.Population( - 61 size=[popsize], - 62 loci=[nloci], - 63 lociPos=locus_position, - 64 infoFields=['ind_id']) - 65 pop.evolve( + 56 1 5 5.0 0.0 sim.setOptions(seed=111) + 57 1 1 1.0 0.0 recombinator = sim.Recombinator(intensity=recomb_rate, + 58 1 1 1.0 0.0 output=rc.collect_recombs, + 59 1 54 54.0 0.0 infoFields="ind_id") + 60 1 1 1.0 0.0 pop = sim.Population( + 61 1 1 1.0 0.0 size=[popsize], + 62 1 0 0.0 0.0 loci=[nloci], + 63 1 1 1.0 0.0 lociPos=locus_position, + 64 1 72 72.0 0.0 infoFields=['ind_id']) + 65 1 2 2.0 0.0 pop.evolve( 66 initOps=[ - 67 sim.InitSex(), - 68 id_tagger - 69 ]+init_geno, + 67 1 6 6.0 0.0 sim.InitSex(), + 68 1 1 1.0 0.0 id_tagger + 69 1 1 1.0 0.0 ]+init_geno, 70 preOps=[ - 71 sim.PyOperator(lambda pop: rc.increment_time() or True), + 71 1 13 13.0 0.0 sim.PyOperator(lambda pop: rc.increment_time() or True), 72 # Must return true or false. True keeps whole population (?) 73 ], - 74 matingScheme=mating_scheme_factory(recombinator, popsize), - 75 postOps=[ - 76 sim.PyEval(r"'Gen: %2d\n' % (gen, )", step=1) - 77 ], - 78 gen=generations - 79 ) - 80 return pop - 81 1 1 1.0 20.0 return _make_pop + 74 1 146 146.0 0.0 matingScheme=mating_scheme_factory(recombinator, popsize), + 75 1 45696018 45696018.0 100.0 gen=generations + 76 ) + 77 1 1 1.0 0.0 return pop -Total time: 45.4613 s +Total time: 45.7214 s File: tests/test_recomb_collector.py -Function: test_simupop at line 97 +Function: test_simupop at line 94 Line # Hits Time Per Hit % Time Line Contents ============================================================== - 97 @profile - 98 def test_simupop(make_pop, generations, popsize): - 99 1 6 6.0 0.0 print("Popsize: ", popsize) - 100 # replications = 1 - 101 1 1 1.0 0.0 nsamples = 2 - 102 1 1 1.0 0.0 length = 10 - 103 1 1 1.0 0.0 nloci = 5 - 104 1 5 5.0 0.0 locus_position = list(range(0, length, int(length/nloci))) - 105 1 1 1.0 0.0 recomb_rate = 0.05 - 106 - 107 1 1 1.0 0.0 rc = RecombCollector( - 108 1 1 1.0 0.0 nsamples=nsamples, generations=generations, N=popsize, - 109 1 24590 24590.0 0.1 ancestor_age=10, length=length, locus_position=locus_position) - 110 - 111 1 69 69.0 0.0 init_geno = [sim.InitGenotype(freq=[0.9, 0.1], loci=sim.ALL_AVAIL)] - 112 - 113 1 13 13.0 0.0 id_tagger = sim.IdTagger(begin=0) - 114 1 7 7.0 0.0 id_tagger.reset(startID=1) # must reset - creating a new one doesn't - 115 1 2 2.0 0.0 pop = make_pop(popsize, nloci, locus_position, id_tagger, init_geno, - 116 1 45435320 45435320.0 99.9 recomb_rate, rc, generations) - 117 1 702 702.0 0.0 locations = [pop.subPopIndPair(x)[0] for x in range(pop.popSize())] - 118 1 541 541.0 0.0 rc.add_diploid_samples(pop.indInfo("ind_id"), locations) + 94 @profile + 95 def test_simupop(make_pop, generations, popsize): + 96 1 7 7.0 0.0 print("Popsize: ", popsize) + 97 # replications = 1 + 98 1 1 1.0 0.0 nsamples = 2 + 99 1 1 1.0 0.0 length = 10 + 100 1 1 1.0 0.0 nloci = 5 + 101 1 5 5.0 0.0 locus_position = list(range(0, length, int(length/nloci))) + 102 1 1 1.0 0.0 recomb_rate = 0.05 + 103 + 104 1 1 1.0 0.0 rc = RecombCollector( + 105 1 1 1.0 0.0 nsamples=nsamples, generations=generations, N=popsize, + 106 1 23683 23683.0 0.1 ancestor_age=10, length=length, locus_position=locus_position) + 107 + 108 1 59 59.0 0.0 init_geno = [sim.InitGenotype(freq=[0.9, 0.1], loci=sim.ALL_AVAIL)] + 109 + 110 1 12 12.0 0.0 id_tagger = sim.IdTagger(begin=0) + 111 1 5 5.0 0.0 id_tagger.reset(startID=1) # must reset - creating a new one doesn't + 112 1 1 1.0 0.0 pop = make_pop(popsize, nloci, locus_position, id_tagger, init_geno, + 113 1 45696352 45696352.0 99.9 recomb_rate, rc, generations) + 114 1 702 702.0 0.0 locations = [pop.subPopIndPair(x)[0] for x in range(pop.popSize())] + 115 1 581 581.0 0.0 rc.add_diploid_samples(pop.indInfo("ind_id"), locations) diff --git a/tests/test_recomb_collector.py b/tests/test_recomb_collector.py index 96b7178..9f9439d 100644 --- a/tests/test_recomb_collector.py +++ b/tests/test_recomb_collector.py @@ -45,12 +45,12 @@ def check_tables(args): # sim.CloneMating()], # subPopSize=popsize * 2) # ]) -@profile def make_pop(request): # request.param stores a lambda function to make mating scheme # each test that uses this fixture will be run for both entries in 'params' mating_scheme_factory = request.param + @profile def _make_pop(popsize, nloci, locus_position, id_tagger, init_geno, recomb_rate, rc, generations): sim.setOptions(seed=111) @@ -72,9 +72,6 @@ def _make_pop(popsize, nloci, locus_position, id_tagger, init_geno, # Must return true or false. True keeps whole population (?) ], matingScheme=mating_scheme_factory(recombinator, popsize), - postOps=[ - sim.PyEval(r"'Gen: %2d\n' % (gen, )", step=1) - ], gen=generations ) return pop