diff --git a/devel/prof.txt b/devel/prof.txt new file mode 100644 index 0000000..6f8c936 --- /dev/null +++ b/devel/prof.txt @@ -0,0 +1,148 @@ +Wrote profile results to test_arg_recorder_with_wf.py.lprof +Timer unit: 1e-06 s + +Total time: 0.565759 s +File: /home/jaime/lib/ftprime/ftprime/argrecorder.py +Function: add_individual at line 26 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 26 @profile + 27 def add_individual(self, name, time, population=msprime.NULL_POPULATION, + 28 is_sample=False): + 29 '''Add a new individual. + 30 We need to add individuals when they are *born*, + 31 rather than the first time they reproduce, to ensure + 32 that records are output in order by birth time of the parent. + 33 ''' + 34 101240 72447 0.7 12.8 if name not in self: + 35 101240 58303 0.6 10.3 self[name] = (msprime.Node(time=time, population=population, + 36 101240 297613 2.9 52.6 name=name, is_sample=is_sample), []) + 37 101240 137396 1.4 24.3 self.num_nodes = max(self.num_nodes, 1+int(name)) + +Total time: 7.5774 s +File: /home/jaime/lib/ftprime/ftprime/argrecorder.py +Function: add_record at line 39 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 39 @profile + 40 def add_record(self, left, right, parent, children): + 41 ''' + 42 Add records corresponding to a reproduction event in which children (a + 43 tuple of IDs) inherit from parent (a single ID) on the interval + 44 [left,right). + 45 ''' + 46 # unneeded but helpful for debugging + 47 150314 120881 0.8 1.6 if parent not in self.keys(): + 48 raise ValueError("Parent " + str(parent) + + 49 "'s birth time has not been recorded with " + + 50 ".add_individual().") + 51 # time = self[parent][0] + 52 150314 79151 0.5 1.0 new_rec = msprime.Edgeset( + 53 150314 71103 0.5 0.9 parent=parent, + 54 150314 66660 0.4 0.9 children=children, + 55 150314 65861 0.4 0.9 left=left, + 56 150314 325604 2.2 4.3 right=right) + 57 150314 6848145 45.6 90.4 merge_records(new_rec, self[parent][1]) + +Total time: 4.08047 s +File: /home/jaime/lib/ftprime/ftprime/argrecorder.py +Function: merge_records at line 142 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 142 @profile + 143 def merge_records(new, existing): + 144 ''' + 145 Incorporate a new record (l,r,x,c,t[x]) + 146 into a list of existing ones (a,b,x,C,t[x]) sorted on left endpoint. + 147 Keeping them in sorted order simplifies the procedure + 148 (makes it so we don't have to split the new record). + 149 ''' + 150 150314 101369 0.7 2.5 k = 0 + 151 150314 87001 0.6 2.1 cur_left = new.left + 152 # print("MR: -----") + 153 # print("adding", new) + 154 # print(" to", existing) + 155 308472 241009 0.8 5.9 while (k < len(existing)) and (cur_left < new.right): + 156 158158 124833 0.8 3.1 left = existing[k].left + 157 158158 93386 0.6 2.3 right = existing[k].right + 158 158158 90150 0.6 2.2 parent = existing[k].parent + 159 158158 96347 0.6 2.4 children = existing[k].children + 160 # print("k:",k) + 161 # print("existing:",existing[k]) + 162 # print("cur_left:",cur_left) + 163 158158 92653 0.6 2.3 if new.parent != parent: + 164 raise ValueError("Trying to merge records with different parents.") + 165 158158 85414 0.5 2.1 if right <= cur_left: + 166 # no overlap + 167 # print("no overlap") + 168 15534 8903 0.6 0.2 k += 1 + 169 15534 7404 0.5 0.2 continue + 170 142624 77301 0.5 1.9 if cur_left < left: + 171 # print("dangling left") + 172 15409 10416 0.7 0.3 existing.insert(k, msprime.Edgeset( + 173 15409 7887 0.5 0.2 left=cur_left, + 174 15409 16021 1.0 0.4 right=min(new.right, left), + 175 15409 7897 0.5 0.2 parent=parent, + 176 15409 43180 2.8 1.1 children=new.children)) + 177 15409 15864 1.0 0.4 cur_left = min(new.right, left) + 178 15409 9449 0.6 0.2 k += 1 + 179 15409 7490 0.5 0.2 continue + 180 127215 393801 3.1 9.7 combined_children = tuple(sorted(children+new.children)) + 181 127215 87066 0.7 2.1 combined_rec = msprime.Edgeset( + 182 127215 67410 0.5 1.7 left=cur_left, + 183 127215 234108 1.8 5.7 right=min(new.right, right), + 184 127215 73773 0.6 1.8 parent=new.parent, + 185 127215 312244 2.5 7.7 children=combined_children) + 186 127215 78337 0.6 1.9 if cur_left == left: + 187 # print("equal left") + 188 105571 67860 0.6 1.7 if new.right < right: + 189 # print("overlap right") + 190 21590 13336 0.6 0.3 mod_rec = msprime.Edgeset( + 191 21590 12284 0.6 0.3 left=new.right, + 192 21590 11592 0.5 0.3 right=right, + 193 21590 11575 0.5 0.3 parent=parent, + 194 21590 149418 6.9 3.7 children=children) + 195 21590 19858 0.9 0.5 existing[k] = combined_rec + 196 21590 13557 0.6 0.3 k += 1 + 197 21590 20224 0.9 0.5 existing.insert(k, mod_rec) + 198 21590 13962 0.6 0.3 k += 1 + 199 else: + 200 # print("dangling right") + 201 83981 71541 0.9 1.8 existing[k] = combined_rec + 202 83981 55980 0.7 1.4 k += 1 + 203 else: + 204 # here we know that left < cur_left < right + 205 # print("overlap left") + 206 21644 13649 0.6 0.3 mod_rec = msprime.Edgeset( + 207 21644 11864 0.5 0.3 left=left, + 208 21644 11487 0.5 0.3 right=cur_left, + 209 21644 11471 0.5 0.3 parent=parent, + 210 21644 93849 4.3 2.3 children=children) + 211 21644 20146 0.9 0.5 existing[k] = mod_rec + 212 21644 13783 0.6 0.3 k += 1 + 213 21644 20156 0.9 0.5 existing.insert(k, combined_rec) + 214 21644 13301 0.6 0.3 k += 1 + 215 21644 14807 0.7 0.4 if new.right < right: + 216 # print("overlap right") + 217 existing.insert(k, msprime.Edgeset( + 218 left=new.right, + 219 right=right, + 220 parent=parent, + 221 children=children)) + 222 k += 1 + 223 127215 110940 0.9 2.7 cur_left = min(new.right, right) + 224 # add whatever's left at the end + 225 150314 101572 0.7 2.5 if cur_left < new.right: + 226 82579 60579 0.7 1.5 existing.insert(k, msprime.Edgeset( + 227 82579 47194 0.6 1.2 left=cur_left, + 228 82579 50544 0.6 1.2 right=new.right, + 229 82579 47932 0.6 1.2 parent=new.parent, + 230 82579 428151 5.2 10.5 children=new.children)) + 231 # print("getting") + 232 # for x in existing: + 233 # print(" ", x) + 234 150314 77143 0.5 1.9 return None + diff --git a/devel/prof_rc.txt b/devel/prof_rc.txt new file mode 100644 index 0000000..6d2ac3d --- /dev/null +++ b/devel/prof_rc.txt @@ -0,0 +1,148 @@ +Wrote profile results to test_recomb_collector.py.lprof +Timer unit: 1e-06 s + +Total time: 2.59302 s +File: /home/jaime/lib/ftprime/ftprime/argrecorder.py +Function: add_individual at line 26 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 26 @profile + 27 def add_individual(self, name, time, population=msprime.NULL_POPULATION, + 28 is_sample=False): + 29 '''Add a new individual. + 30 We need to add individuals when they are *born*, + 31 rather than the first time they reproduce, to ensure + 32 that records are output in order by birth time of the parent. + 33 ''' + 34 402005 305786 0.8 11.8 if name not in self: + 35 402005 231339 0.6 8.9 self[name] = (msprime.Node(time=time, population=population, + 36 402005 1157105 2.9 44.6 name=name, is_sample=is_sample), []) + 37 402005 898793 2.2 34.7 self.num_nodes = max(self.num_nodes, 1+int(name)) + +Total time: 28.8526 s +File: /home/jaime/lib/ftprime/ftprime/argrecorder.py +Function: add_record at line 39 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 39 @profile + 40 def add_record(self, left, right, parent, children): + 41 ''' + 42 Add records corresponding to a reproduction event in which children (a + 43 tuple of IDs) inherit from parent (a single ID) on the interval + 44 [left,right). + 45 ''' + 46 # unneeded but helpful for debugging + 47 559608 1478529 2.6 5.1 if parent not in self.keys(): + 48 raise ValueError("Parent " + str(parent) + + 49 "'s birth time has not been recorded with " + + 50 ".add_individual().") + 51 # time = self[parent][0] + 52 559608 300326 0.5 1.0 new_rec = msprime.Edgeset( + 53 559608 254879 0.5 0.9 parent=parent, + 54 559608 242579 0.4 0.8 children=children, + 55 559608 238853 0.4 0.8 left=left, + 56 559608 1555702 2.8 5.4 right=right) + 57 559608 24781697 44.3 85.9 merge_records(new_rec, self[parent][1]) + +Total time: 14.25 s +File: /home/jaime/lib/ftprime/ftprime/argrecorder.py +Function: merge_records at line 142 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 142 @profile + 143 def merge_records(new, existing): + 144 ''' + 145 Incorporate a new record (l,r,x,c,t[x]) + 146 into a list of existing ones (a,b,x,C,t[x]) sorted on left endpoint. + 147 Keeping them in sorted order simplifies the procedure + 148 (makes it so we don't have to split the new record). + 149 ''' + 150 559608 617547 1.1 4.3 k = 0 + 151 559608 348545 0.6 2.4 cur_left = new.left + 152 # print("MR: -----") + 153 # print("adding", new) + 154 # print(" to", existing) + 155 1115110 964636 0.9 6.8 while (k < len(existing)) and (cur_left < new.right): + 156 555502 493499 0.9 3.5 left = existing[k].left + 157 555502 339232 0.6 2.4 right = existing[k].right + 158 555502 349807 0.6 2.5 parent = existing[k].parent + 159 555502 360762 0.6 2.5 children = existing[k].children + 160 # print("k:",k) + 161 # print("existing:",existing[k]) + 162 # print("cur_left:",cur_left) + 163 555502 336500 0.6 2.4 if new.parent != parent: + 164 raise ValueError("Trying to merge records with different parents.") + 165 555502 327994 0.6 2.3 if right <= cur_left: + 166 # no overlap + 167 # print("no overlap") + 168 64431 38189 0.6 0.3 k += 1 + 169 64431 31893 0.5 0.2 continue + 170 491071 275269 0.6 1.9 if cur_left < left: + 171 # print("dangling left") + 172 46856 32937 0.7 0.2 existing.insert(k, msprime.Edgeset( + 173 46856 24331 0.5 0.2 left=cur_left, + 174 46856 53090 1.1 0.4 right=min(new.right, left), + 175 46856 24376 0.5 0.2 parent=parent, + 176 46856 132844 2.8 0.9 children=new.children)) + 177 46856 291283 6.2 2.0 cur_left = min(new.right, left) + 178 46856 29613 0.6 0.2 k += 1 + 179 46856 23300 0.5 0.2 continue + 180 444215 771674 1.7 5.4 combined_children = tuple(sorted(children+new.children)) + 181 444215 370745 0.8 2.6 combined_rec = msprime.Edgeset( + 182 444215 238826 0.5 1.7 left=cur_left, + 183 444215 1016910 2.3 7.1 right=min(new.right, right), + 184 444215 264162 0.6 1.9 parent=new.parent, + 185 444215 1220131 2.7 8.6 children=combined_children) + 186 444215 289937 0.7 2.0 if cur_left == left: + 187 # print("equal left") + 188 374551 262916 0.7 1.8 if new.right < right: + 189 # print("overlap right") + 190 62945 38629 0.6 0.3 mod_rec = msprime.Edgeset( + 191 62945 36505 0.6 0.3 left=new.right, + 192 62945 34104 0.5 0.2 right=right, + 193 62945 34012 0.5 0.2 parent=parent, + 194 62945 213261 3.4 1.5 children=children) + 195 62945 65925 1.0 0.5 existing[k] = combined_rec + 196 62945 41770 0.7 0.3 k += 1 + 197 62945 60624 1.0 0.4 existing.insert(k, mod_rec) + 198 62945 42126 0.7 0.3 k += 1 + 199 else: + 200 # print("dangling right") + 201 311606 291615 0.9 2.0 existing[k] = combined_rec + 202 311606 214005 0.7 1.5 k += 1 + 203 else: + 204 # here we know that left < cur_left < right + 205 # print("overlap left") + 206 69664 43775 0.6 0.3 mod_rec = msprime.Edgeset( + 207 69664 39459 0.6 0.3 left=left, + 208 69664 37684 0.5 0.3 right=cur_left, + 209 69664 37276 0.5 0.3 parent=parent, + 210 69664 183293 2.6 1.3 children=children) + 211 69664 70705 1.0 0.5 existing[k] = mod_rec + 212 69664 45699 0.7 0.3 k += 1 + 213 69664 64248 0.9 0.5 existing.insert(k, combined_rec) + 214 69664 43549 0.6 0.3 k += 1 + 215 69664 51159 0.7 0.4 if new.right < right: + 216 # print("overlap right") + 217 6531 4663 0.7 0.0 existing.insert(k, msprime.Edgeset( + 218 6531 4029 0.6 0.0 left=new.right, + 219 6531 3764 0.6 0.0 right=right, + 220 6531 3696 0.6 0.0 parent=parent, + 221 6531 15758 2.4 0.1 children=children)) + 222 6531 4291 0.7 0.0 k += 1 + 223 444215 411173 0.9 2.9 cur_left = min(new.right, right) + 224 # add whatever's left at the end + 225 559608 407324 0.7 2.9 if cur_left < new.right: + 226 309118 232210 0.8 1.6 existing.insert(k, msprime.Edgeset( + 227 309118 179186 0.6 1.3 left=cur_left, + 228 309118 192993 0.6 1.4 right=new.right, + 229 309118 182372 0.6 1.3 parent=new.parent, + 230 309118 1095978 3.5 7.7 children=new.children)) + 231 # print("getting") + 232 # for x in existing: + 233 # print(" ", x) + 234 559608 292228 0.5 2.1 return None + diff --git a/ftprime/argrecorder.py b/ftprime/argrecorder.py index b0c768f..553dccd 100644 --- a/ftprime/argrecorder.py +++ b/ftprime/argrecorder.py @@ -1,7 +1,6 @@ import msprime from collections import OrderedDict - class ARGrecorder(OrderedDict): ''' Keys are individual IDs, and values are tuples, whose first entry is a Node, @@ -24,6 +23,7 @@ def __str__(self): ret += self.edgeset_table().__str__() return ret + @profile def add_individual(self, name, time, population=msprime.NULL_POPULATION, is_sample=False): '''Add a new individual. @@ -36,6 +36,7 @@ def add_individual(self, name, time, population=msprime.NULL_POPULATION, name=name, is_sample=is_sample), []) self.num_nodes = max(self.num_nodes, 1+int(name)) + @profile def add_record(self, left, right, parent, children): ''' Add records corresponding to a reproduction event in which children (a @@ -138,6 +139,7 @@ def add_samples(self, samples, length, populations=None): children=(k,)) +@profile def merge_records(new, existing): ''' Incorporate a new record (l,r,x,c,t[x]) diff --git a/tests/test_arg_recorder_with_wf.py b/tests/test_arg_recorder_with_wf.py index cea72ba..27ecc50 100644 --- a/tests/test_arg_recorder_with_wf.py +++ b/tests/test_arg_recorder_with_wf.py @@ -28,19 +28,22 @@ def test_simulation_runs(N, gen, samples): random.seed(123) records = wf(N=N, ngens=gen, nsamples=samples, survival=0.5) - check_tables(records) + # check_tables(records) - for x in records: - print(x, records[x]) + # for x in records: + # print(x, records[x]) - print(records.edgeset_table()) - print(records.node_table()) + # print(records.edgeset_table()) + # print(records.node_table()) - ts = records.tree_sequence() + # ts = records.tree_sequence() - for t in ts.trees(): - print(t) + # for t in ts.trees(): + # print(t) - print("Mean pairwise diversity:",ts.get_pairwise_diversity()) - print("(should be zero)") - assert ts.get_pairwise_diversity() == 0.0 + # print("Mean pairwise diversity:",ts.get_pairwise_diversity()) + # print("(should be zero)") + # assert ts.get_pairwise_diversity() == 0.0 + +if __name__ == '__main__': + test_simulation_runs(1000, 200, 100) diff --git a/tests/test_recomb_collector.py b/tests/test_recomb_collector.py index fb3daa4..f5590b1 100644 --- a/tests/test_recomb_collector.py +++ b/tests/test_recomb_collector.py @@ -147,4 +147,4 @@ def __init__(self): ]), sim.CloneMating()], subPopSize=popsize * 2) - test_simupop(make_pop(mr), generations=100, popsize=500) + test_simupop(make_pop(mr), generations=200, popsize=1000)