-
Notifications
You must be signed in to change notification settings - Fork 0
/
ptnode.py
2661 lines (2231 loc) · 110 KB
/
ptnode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
###############################################################################
#
# ptnode.py - node (strand, helix, terminus) classes for ptgraph.
#
# File: ptnode.py
# Author: Alex Stivala
# Created: July 2007
#
# $Id: ptnode.py 4259 2011-07-30 06:52:46Z alexs $
#
# A PTnode is a node in the protein topology graph. It represents a
# secondary structure which is an alpha helix or beta sheet.
#
# Since these are simple (linear) secondary structures, the
# PTnode will have two
# edges, one from the previous (towards N-terminal) PTnode and one to
# the next (towards C-terminal) PTnode. So we just keep these nodes
# in a list, the ordering of which is sufficient for this purpose.
#
# Specific PTNode types (Helix, Strand, Terminus) are derived from PTNode.
#
#
###############################################################################
import sys
from math import pi,atan2,acos
from numpy.oldnumeric import array
from numpy.oldnumeric.linear_algebra import singular_value_decomposition # part of Numeric
from Bio.PDB import *
from ptsecstruct import stride_chainid_to_pdb_chainid
from geometry import *
from ptmfile import mfile_write_strand, mfile_write_helix
from ptutils import get_int_icode,biopdbresid_to_pdbresseq,char_if_not_blank
#-----------------------------------------------------------------------------
#
# Module constants
#
#-----------------------------------------------------------------------------
ALPHA = 100 # multiplier of dircos to get second point on line
EPSILON = 1e-4 # epsilon for testing closeness to 0 or pi in fit_axis
#-----------------------------------------------------------------------------
#
# Module globals
#
#-----------------------------------------------------------------------------
verbose = False
global issued_warning
issued_warning = {} # warning for (chainid,res_seq_num) not found
#-----------------------------------------------------------------------------
#
# Class definitions
#
#-----------------------------------------------------------------------------
class PTNode:
"""
A PTnode is a node in the protein topology graph. It represents a
secondary structure which is an alpha helix or beta strand.
Since these are simple (linear) secnodary structures, the
PTnode will have two
edges, one from the previous (towards N-terminal) PTnode and one to
the next (towards C-terminal) PTnode. So we just keep these nodes
in a list, the ordering of which is sufficient for this purpose.
Specific PTNode types (Helix, Strand, Terminus) are derived from this
class.
The 'rich comparison' operators (<, <=, <, etc.) are overridden
so that they are integer comparison on the start residue sequence number.
Note equality and inequality are not overriden.
"""
def __init__(self, nodeid, seqnum, start_res_seq, end_res_seq, chainid,
domainid,
pdb_residue_list, pdb_resid_dict,
fake_resids = False):
"""
Construct a PTNode with supplied nodeid and type, and empty
hydrogen bond list.
Parameters:
nodeid - unique identifier for this node (string)
seqnum - sequence number of this node, note need not be unique
start_res_seq - smallest PDB residue sequence number in the structure
end_res_seq - largest PDB residue sequence number in the structure
Note these residue sequence 'numbers' are strings, may have
insertion codes.
chainid = chain identifier for this SSE
domainid = domain identifier for domain this SSE is in
pdb_residue_list - list of all residues (for all chains) in the protein
pdb_resid_dict - dict of { {chainid,pdb_resseq) : seqindx }
where chainid and pdb_resseq make up
the PDB residue identifier, the pdb_resseq
being string resnum+icode if any e.g.
'60' or '60A', seqindx is the indiex
into sequential list of all residues
pdb_residue_list.
fake_resids - Default False. IF True, do not look up resids in
pdb_resid_dict as they do not really exist; used for
terminus nodes.
Exceptions:
raises ValueEror if end_res_seq < start_res_seq
"""
if ( not fake_resids ):
try:
start_res_indx = pdb_resid_dict[(chainid, start_res_seq)]
except KeyError:
if not issued_warning.has_key((chainid, start_res_seq)):
sys.stderr.write('WARNING: residue ' + start_res_seq +
' (chain ' +
chainid +
') not found. May be HETATM.\n')
issued_warning[(chainid,start_res_seq)] = True
while not pdb_resid_dict.has_key((chainid, start_res_seq)):
start_res_seq = str(get_int_icode(start_res_seq)[0] + 1)
start_res_indx = pdb_resid_dict[(chainid, start_res_seq) ]
try:
end_res_indx = pdb_resid_dict[(chainid, end_res_seq)]
except KeyError:
if not issued_warning.has_key((chainid, end_res_seq)):
sys.stderr.write('WARNING: residue ' + end_res_seq +
' (chain ' +
chainid +
') not found. May be HETATM.\n')
issued_warning[(chainid,end_res_seq)] = True
while not pdb_resid_dict.has_key((chainid, end_res_seq)):
end_res_seq = str(get_int_icode(end_res_seq)[0] - 1)
end_res_indx = pdb_resid_dict[(chainid, end_res_seq)]
if ( end_res_indx < start_res_indx ):
raise ValueError("end residue seqnum " + str(end_res_indx) + " < " +
"start residue seqnum " + str(start_res_indx))
self.nodeid = nodeid
self.seqnum = seqnum # assigned sequentially, maybe not unique,
# used so strands can have a sequence number
# and helices also (both start at 1)
self.start_res_seq = start_res_seq # start PDB residue seq num
self.end_res_seq = end_res_seq # end PDB residue seq num
self.chainid = chainid # id of the chain this node is in
self.pdb_residue_list = pdb_residue_list
self.pdb_resid_dict = pdb_resid_dict
self.hydrogen_bond_list = [] # list of (PTnode, r1, r2, dist) tuples
self.sideways = False # True if element to be
# drawn left-right not up-down
# use get/set_sideways()
self.reversed = False # True if strand is to be drawn in reverse
# direction. This is set based on the
# parallel/antiparaell relationshiup to
# neighbour strand (so once the first
# in a sheet is set to False, all the others
# are set based on this and following the
# bridge relationships and their 'N' or 'P' flag).
# Also used for helices, where the tableau
# information is used i.e. orienation as
# parallel or antiparallel.
# use get/set_reversed()
self.residue_list = [] # list of Bio.PDB Residue objects in this SSE.
# Built and returned by get_residue_list()
self.resname_list = [] # list of 3 letter residue names in this SSE
# built by build_resname_sequence()
self.resid_list = [] # list of string PDB residue sequence numbers
# built by build_resname_sequence()
self.residue_ordinal_dict = None
# dict { pdb_resseq : ordinal } mapping
# the PDB sequence number string to integer orinal position
# in sequence. Used to memoize this funtino so fast time it
# is called the dict is built, subsequent calls look up in
# dictionary.
# built get by get_residue_ordinal()
self.fake_resids = fake_resids
self.seq_component_id = None # intger sequence connect componented
# id used for the 'fold' color scheme:
# all strands in a sheet that are
# connected in sequence with no
# other SSEs in between have
# the same id
# similar for helices in clusters if
# used.
# use get/set_seq_component_id()
self.domain_id = domainid # domain identifier for domain this SSE is in
# The following are only used by domainfc.py not ptgraph2.py:
self.closenode_list=[]# list of (node, dist) tuples for
# PTNodes within some distance threshold
# of this node
self.endchain = False # True if first or last in chain.
# use get/set_endchain()
def __str__(self):
"""
Return String representation of the node as 'TYPE id'
"""
return "PTNode" + " " + self.nodeid
def __lt__(self, other):
"""
Compare based on chainid and start residue sequence number
"""
assert(isinstance(other, PTNode))
return (self.pdb_resid_dict[(self.chainid, self.start_res_seq)] <
self.pdb_resid_dict[(other.chainid,other.start_res_seq)])
def __le__(self, other):
"""
Compare based on chainid and start residue sequence number
"""
assert(isinstance(other, PTNode))
return (self.pdb_resid_dict[(self.chainid, self.start_res_seq)] <=
self.pdb_resid_dict[(other.chainid,other.start_res_seq)])
def __gt__(self, other):
"""
Compare based on chainid and start residue sequence number
"""
assert(isinstance(other, PTNode))
return (self.pdb_resid_dict[(self.chainid, self.start_res_seq)] >
self.pdb_resid_dict[(other.chainid,other.start_res_seq)])
def __ge__(self, other):
"""
Compare based on chainid and start residue sequence number
"""
assert(isinstance(other, PTNode))
return (self.pdb_resid_dict[(self.chainid, self.start_res_seq)] >=
self.pdb_resid_dict[(other.chainid,other.start_res_seq)])
def get_chainid(self):
"""
Return the chain identifier in this structure node
Parameters: None
Uses membe data (readonly):
chainid
Return value:
The chain identifier of this structure node
"""
return self.chainid
def get_start_res_seq(self):
"""
Return the lowest residue sequence number in this structure node
Parameters: None
Uses member data: (readonly)
start_res_seq
Return value:
The residue sequence number at the start of this structure
NB this is a pdb residue sequence nubmer so it is a string
and may have an insertion code
"""
return self.start_res_seq
def get_end_res_seq(self):
"""
Return the highest residue sequence number in this structure node
Parameters: None
Uses member data: (readonly)
end_res_seq
Return value:
The residue sequence number at the end of this structure
NB this is a pdb residue sequence number so it is a astring and
may have an insertion code.
"""
return self.end_res_seq
def get_span(self):
"""
Return the length in residues spanned by this structure node
Parameters: None
Uses member data: (readonly)
end_res_seq, start_res_seq
Return value: length in resisudes spanned by this node, ie the
number of residues in this node.
"""
return len(self.get_residue_list())
def is_in_interval(self, res_seq_num):
"""
Return True iff the supplied residue sequence number is contained
in the interval spanned by this PTNode.
Parameters:
res_seq_num - PDB resisude sequence number to test (assumed in
this chain)
Uses member data (readonly):
chainid,start_res_seq, end_res_seq - start/end res seq num and
chainid of this node
pdb_resid_dict - dict mapping chainid,resnum+icode to sequential
index
Return value:
True if res_seq_num is >= start_res_seq and <= end seq num
else False
"""
if self.fake_resids:
return False
try:
res_indx = self.pdb_resid_dict[(self.chainid, res_seq_num)]
except KeyError:
if not issued_warning.has_key((self.chainid, res_seq_num)):
sys.stderr.write('WARNING: residue ' + res_seq_num + ' (chain ' +
self.chainid + ') not found. May be HETATM.\n')
issued_warning[(self.chainid,res_seq_num)] = True
return False
return \
( res_indx >= self.pdb_resid_dict[(self.chainid,self.start_res_seq)]
and
res_indx <= self.pdb_resid_dict[(self.chainid,self.end_res_seq)] )
def add_hbond(self, other_node, resnum1, resnum2, dist):
"""
Add a hydrogen bond to the list of hydrogen bonds in this node.
The bond is from PDB resdiue number resnum1 (in this node) to
PDB residue number resnum2 in other_node with distance dist (Angstroms).
Parameters:
other_node - PTNode the bond is to from this node
resnum1 - PDB residue number, must be in this node
resnum2 - PDB residue number, must be in other_node
dist - (float) N..O distance of the bond (Angstroms)
Uses data members (write):
hydrogen_bond_list - list of (ptnode, resnum1, resnum2, dist) tuples
Return value: None
"""
assert(isinstance(other_node, PTNode))
self.hydrogen_bond_list.append((other_node, resnum1, resnum2, dist))
def get_hbond_list(self):
"""
Return list of hydrogen bonds from this node in form of list of
(ptnode, resnum1, resnum2, dist) tuples.
Parameters: None
Uses member data (readonly):
hydrogen_bond_list - list of (other_node, resnum1, resnum2, dist)
tuples
Return value:
list of (ptnode, resnum1, resnum2, dist) tuples.
The actual list (ref)
in this node, not a new copy.
"""
return self.hydrogen_bond_list
def get_residue_ordinal(self, pdb_resseq):
"""
Given a PDB residue sequence number (may have insertino code) string
for a residue in this SSE, return its ordinal position int the
sequence of residues in this SSE (starting at 1).
Parameters:
pdb_resseq - string PDB residue sequence number e.g. '60' or '60A'
Return value:
integer >=1 ordinal number of the resiude in sequnce of residues
for this SSE (from N to C terminal end)
Uses data members (read/write):
residue_ordinal_dict - dict { pdb_resseq : ordinal } mapping
the PDB sequence number string to integer orinal position
in sequence. Used to memoize this funtino so fast time it
is called the dict is built, subsequent calls look up in
dictionary.
"""
if self.residue_ordinal_dict:
return self.residue_ordinal_dict[pdb_resseq]
self.residue_ordinal_dict = {}
ordinal = 1
for residue in self.get_residue_list():
res_seq = biopdbresid_to_pdbresseq(residue.get_id())
self.residue_ordinal_dict[res_seq] = ordinal
ordinal += 1
return self.residue_ordinal_dict[pdb_resseq]
def get_residue_list(self):
"""
Return the list of Bio.PDB Residue objects for the residues in this
PTNode (strand or helix).
Also store list in residue_list data member, and return stored
value if already there.
Parameters:
None
Uses data members:
residue_list (read/write).
pdb_residue_list, pdb_resid_dict (readonly)
start_res_seq,end_res_seq,chainid (readonly)
"""
if self.residue_list: # memoization: return if already computed
return self.residue_list
start_indx = self.pdb_resid_dict[(self.chainid,self.start_res_seq)]
end_indx = self.pdb_resid_dict[(self.chainid,self.end_res_seq)]
self.residue_list = self.pdb_residue_list[start_indx : end_indx + 1]
return self.residue_list
def set_sideways(self, sideways):
"""
Label this strand with the sideways flag. See comments in __init__
Parameters: sideways - True/False sideways flag
Return value: None
Uses data members (write): sideways
"""
self.sideways = sideways
def get_sideways(self):
"""
Get the value of the sideways flag. See comments in __init__
Parameters: None.
Return value: True/False value of sideways flag
Uses data members (readnly): sideways
"""
return self.sideways
def set_reversed(self, reversed):
"""
Label this node with the reversed flag. See comments in __init__
Parameters: reversed - True/False reversed flag
Return value: None
Uses data members (write): reversed
"""
self.reversed = reversed
def get_reversed(self):
"""
Get the value of the reversed flag. See comments in __init__
Parameters: None.
Return value: True/False value of reversed flag
Uses data members (readnly): reversed
"""
return self.reversed
def is_resnum_nearer_top(self, resnum):
"""
Return True if the supplied residue sequence number is nearer
the 'top' than the 'bottom' of this strand or helix (SSE).
I.e. if it is nearer
to the C- than the N- terminal end of the strand when the
SSE is not reversed (i.e. drawn pointing 'up'). Or, if the
SSE is reversed (drawn pointing 'down'), if it is nearer he
N terminal than the C terminal end of the SSE.
Parameters:
resnum - residue sequence number, must be in this strand
Uses data members (readonly)
start_res_seq, end_res_seq, reversed
Return value:
True iff resnum is nearer C term and strand is not reversed OR
resnum is nearer N terman and strand is reversed
"""
assert(self.is_in_interval(resnum))
midpoint_resnum = ( self.get_residue_ordinal(self.start_res_seq) +
(self.get_residue_ordinal(self.end_res_seq)
- self.get_residue_ordinal(self.start_res_seq))
/ 2 )
if resnum > midpoint_resnum:
# closer to C terminal end
return not self.reversed
else:
# closer to N terminal end
return self.reversed
def add_closenode(self, other_node, dist):
"""
Add a (node, dist) tuple
to the list of nodes that are close to this one.
Parameters:
other_node - PTNode which is below threshold distance from this node.
dist - distance to the other_node (Angstroms)
Uses member data (write):
closenode_list - the list of nodes close to this one.
Return value: None
"""
assert(isinstance(other_node, PTNode))
self.closenode_list.append((other_node, dist))
def get_closenode_list(self):
"""
Return list of (node, dist) tuples for nodes
that are below threshold distance from this one.
Parameters: None
Uses member data (readonly):
closenode_list - list of close nodes
Return value:
List of (ptnode, dist) tuples. The actual list (ref) in this
node, not a new copy.
"""
return self.closenode_list
def set_endchain(self, endchain):
"""
Label this node with the endchain flag. See comments in __init__
Parameters: reversed - True/False endchain flag
Return value: None
Uses data members (write): endchain
"""
self.endchain = endchain
def get_endchain(self):
"""
Get the value of the endchain flag. See comments in __init__
Parameters: None.
Return value: True/False value of endchain flag
Uses data members (readnly): endchain
"""
return self.endchain
def get_degree(self):
"""
Return the degree of this node (number of edges incident with it).
This is equal to the number of spatial edges
(nubmer of spatially adjacent nodes) plus the number of
sequence edges which are implicit - all nodes have two except
the first and last in a chain which have only one.
Parameters:
None
Return value:
Degree of the node.
"""
deg = len(self.closenode_list)
if self.endchain:
deg += 1
else:
deg += 2
return deg
def axis_dihedral_angle(self, SSE1, SSE2, pdb_struct):
"""
We fit an axis to each of the two SSEs, and this SSE, and
compute the dihedral angle between the two planes defined by
(1) the axis lines SSE1 and self, and (2) SSE2 and self.
3 consecutive vectors between 4 points can define a dihedral
angle. Call the points A,B,C,D in order. ABC is a plane and
BCD is a plane and we can calculate the angle between those
planes by the conventional formula (using Bio.PDB Vector
module).
Here, instead of having this vectors as actual bonds between
atoms (as in hbonds_dihedral_angle()), we are using purely the
abstraction of the SSEs as straight lines. Like
Arun Konagurthu's method in TableauCreator (email 05Oct2007)
we choose the points so that the vector between each of the two
SSE axes and self SSE axis is the shortest line between
the axes (mutual perpendicular). So we choose A and B as the
points on SSE1 axis and self axis respectively such that AB is
the shortest line between the SSE1 axis and this axis.
Similarly C and D are chosen so that CD is the shortest line
between this axis and SSE2 axis.
SSE1 self SSE2
| | |
| | | AB is shortest line between
| | v3 | SSE1 and self, defining
| C *--------->* D vector v1
| ^ |
| | | CD is shortest line between
| | | self and SSE2, defining
| | v2 | vector v3
| | |
| (|)theta | v2 is then defined by the
| | | line BC
A *--------->* B |
| v1 | | and the dihedral angle theta
| | | between planes ABC and BCD
| | | is given by:
| | |
|v2|v1 . (v2 x v3)
tan(theta) = --------------------
(v1 x v2) . (v2 x v3)
Parameters:
SSE1 - first SSE node (strand or helix) to test
SSE2 - 2nd SSE node (strand or helix) to test
pdb_struct - The Bio.PDB parsed PDB struct (atomic co-ordinates)
for this protein.
Return value:
The angle (in (-pi, pi]) between the planes formed between
the axes fitted to the two SSEs with this SSE in common.
or None if no common perpendicular can be found or one
(or more) of the required axes cannot be computed.
NOTE: this method is only for STRAND and HELIX nodes, which have
a fit_axis() method. This method is identical in operation between
these types of nodes so is shared, but fit_axis() works differently
so PTNodeHelix and PTNodeStrand each have their own implemetnation
of it.
"""
SSE1_axis = SSE1.fit_axis(pdb_struct)
SSE2_axis = SSE2.fit_axis(pdb_struct)
self_axis = self.fit_axis(pdb_struct)
if SSE1_axis == None or SSE2_axis == None or self_axis == None:
return None
(SSE1_dircos, SSE1_centroid) = SSE1_axis
(SSE2_dircos, SSE2_centroid) = SSE2_axis
(self_dircos, self_centroid) = self_axis
# Get pa and pb, endpoints of the shortest line segment between
# SSE1 axis line and self axis line
# Note using Numeric.array '*' operator here for
# element-wise multiplication
# as Bio.PDB.Vector '*' operator is vector dot product.
# (Note also must have Vector + int * array and NOT
# int * array + Vector due to Python type coercion rules).
s1self_line = LineLineIntersect(
SSE1_centroid, SSE1_centroid
+ ALPHA * SSE1_dircos.get_array(),
self_centroid, self_centroid
+ ALPHA * self_dircos.get_array() )
if s1self_line == None:
if verbose:
sys.stderr.write('no common perpendicular for axes ' +
SSE1.nodeid + ', ' + self.nodeid + '\n')
return None
else:
(pa, pb, mua, mub) = s1self_line
# and pc, pd similarly for self and SSE2
# Note using Numeric.array '*' operator here for
# element-wise multiplication
# as Bio.PDB.Vector '*' operator is vector dot product.
# (Note also must have Vector + int * array and NOT
# int * array + Vector due to Python type coercion rules).
s2self_line = LineLineIntersect(
self_centroid, self_centroid
+ ALPHA * self_dircos.get_array(),
SSE2_centroid, SSE2_centroid
+ ALPHA * SSE2_dircos.get_array() )
if s2self_line == None:
if verbose:
sys.stderr.write('no common perpendicular for axes ' +
SSE2.nodeid + ', ' + self.nodeid + '\n')
return None
else:
(pc, pd, muc, mud) = s2self_line
# angle = calc_dihedral(pa, pb, pc, pd)
v1 = pb - pa
v2 = pc - pb
v3 = pd - pc
# print 'xxx',self.nodeid,SSE1.nodeid,SSE2.nodeid,pa,pb,pc,pd
# Using Bio.PDB.Vector class; v*v is dot product, v**v is cross product
# This is the same result as calling Vector.calc_dihedral() anyway
# theta = atan2( Vector((v2.norm() * v1.get_array())) * (v2 ** v3),
# (v1 ** v2) * (v2 ** v3) )
# print 'xxxx',theta,
# theta0 = theta
# and this is the more elegant way, not using atan2() (as per Arun).
normals_dotprod = (v1 ** v2).normalized() * (v2 ** v3).normalized()
# handle roundoff errors
normals_dotprod = min(normals_dotprod, 1)
normals_dotprod = max(normals_dotprod, -1)
theta = acos(normals_dotprod)
normals_crossprod = (v1 ** v2).normalized() ** (v2 ** v3).normalized()
stp = v2 * normals_crossprod
if stp < 0:
theta = -theta
# print 'yyyy',theta
# assert(abs(theta0 - theta) < EPSILON)
if verbose:
sys.stderr.write('axis_dihedral_angle ' + self.nodeid + ' '
+ SSE1.nodeid + ' '
+ SSE2.nodeid + ' '
+ str(theta) + '\n')
return theta
def relative_angle(self, SSE1, pdb_struct):
"""
We fit an axis to this SSE and the supplied other SSE (SSE1), and
compute the relative angle (omega) between the two axes.
This is the angle use to define the Tableau
(See Kamat and Lesk 2007 Proteins 66:869-876 and
Konagurthu, Stuckey and Lesk 2008 'Structural search and retrieval using
a tableau representation of protein folding patterns' Bioinformatics
(advance access, to be published Jan 5 2008).
As per Arun Konagurthu's method in TableauCreator (email
05Oct2007) we choose the points so that the vector between
each of the two SSE axes is the shortest line between the axes
(mutual perpendicular). So we choose A and B as the points on
SSE1 axis and self axis respectively such that AB is the
shortest line between the SSE1 axis and this axis.
SSE1 self
| |
| | BC is shortest line between
| | SSE1 and self, defining
| | vector v2
A * * D
| /|\
| | v1 and v3 are vectors
| | defining the axes of SSE1
|v1 | v3 and self respectively.
| |
\|/ omega #| The relative angle omega
B *---(-)--->* C (interaxial angle) is the
|# v2 | smallest angle required to
| | reorient v1 to eclipse v3
| | (or vice versa):
| |
|v2|v1 . (v2 x v3)
tan(omega) = --------------------
(v1 x v2) . (v2 x v3)
Parameters:
SSE1 - The other SSE (helix or strand) to get relative angle to self
pdb_struct - The Bio.PDB parsed PDB struct (atomic co-ordinates)
for this protein.
Return value:
The angle (in (-pi, pi]) required to reorient self axis
to eclipse axis of SSE1 (or vice versa) 'looking along' the
shortest line (mutual perpendicular) between the two axes.
NOTE: this method is only for STRAND and HELIX nodes, which have
a fit_axis() method. This method is identical in operation between
these types of nodes so is shared, but fit_axis() works differently
so PTNodeHelix and PTNodeStrand each have their own implemetnation
of it.
"""
sse1_axis = SSE1.fit_axis(pdb_struct)
self_axis = self.fit_axis(pdb_struct)
if sse1_axis == None or self_axis == None:
return None
(SSE1_dircos, SSE1_centroid) = sse1_axis
(self_dircos, self_centroid) = self_axis
pa = SSE1_centroid + ALPHA * SSE1_dircos.get_array()
pd = self_centroid + ALPHA * self_dircos.get_array()
# Get pb and pc, endpoints of the shortest line segment between
# SSE1 axis line and self axis line
# Note using Numeric.array '*' operator here for
# element-wise multiplication
# as Bio.PDB.Vector '*' operator is vector dot product.
# (Note also must have Vector + int * array and NOT
# int * array + Vector due to Python type coercion rules).
s1self_line = LineLineIntersect(SSE1_centroid, pa,
self_centroid, pd)
if s1self_line == None:
if verbose:
sys.stderr.write('relative_angle: ' +
'no common perpendicular for axes ' +
str(self) + ',' + str(SSE1) + '\n')
return None
else:
(pb, pc, mub, muc) = s1self_line
# # DEBUG
# if verbose:
# sys.stderr.write('mutual perpendicular ' + self.nodeid + ' '
# + SSE1.nodeid +
# ': pb = ' + str(array(list(pb))) +
# '; pc = ' + str(array(list(pc))) + '\n')
# # END DEBUG
# omega = calc_dihedral(pa, pb, pc, pd)
v1 = pb - pa
v2 = pc - pb
v3 = pd - pc
# Using Bio.PDB.Vector class; v*v is dot product, v**v is cross product
# This is the same result as calling Vector.calc_dihedral() anyway
# omega = atan2( Vector((v2.norm() * v1.get_array())) * (v2 ** v3),
# (v1 ** v2) * (v2 ** v3) )
# print 'xxxx',omega,
# and this is the more elegant way, not using atan2() (as per Arun).
normals_dotprod = (v1 ** v2).normalized() * (v2 ** v3).normalized()
# handle roundoff errors
normals_dotprod = min(normals_dotprod, 1)
normals_dotprod = max(normals_dotprod, -1)
omega = acos(normals_dotprod)
normals_crossprod = (v1 ** v2).normalized() ** (v2 ** v3).normalized()
stp = v2 * normals_crossprod
if stp < 0:
omega = -omega
# print 'yyyy',omega
# DEBUG
# if verbose:
# sys.stderr.write('relative_angle ' + self.nodeid + ' '
# + SSE1.nodeid + ' '
# + str(omega) + '\n')
# END DEBUG
return omega
def build_resname_sequence(self):
"""
Build list of (3 letter) residue names in sequence for the residues
in this node (SSE). E.g. and matching
list of PDB residue sequence numbers
Parameters:
None.
Return value: None
Uses data member (write): resname_list
resid_list
"""
residue_list = self.get_residue_list()
self.resname_list = [residue.get_resname() for residue in residue_list]
# id of a residue in Bio.PDB is tuple (hetatm, resseqnum, icode)
self.resid_list = [str(residue.get_id()[1]) +
char_if_not_blank(residue.get_id()[2])
for residue in residue_list]
def set_seq_component_id(self, seqc_id):
"""
Label this node with a sequence connected component identifier
Parameters:
seqc_id - int seq connected component id
Uses data members:
seq_component_id (write)
Return value: None
"""
self.seq_component_id = seqc_id
def get_seq_component_id(self):
"""
Return the id of the seq connectec component to which
this node belongs (may be None)
Parameters: None
Uses data members (readonly): seq_component_id
Return value: Seq cc id set in this node (int) or None
"""
return self.seq_component_id
class PTNodeHelix(PTNode):
"""
The PTNodeHelix class is the type of PTNode for an alpha helix.
"""
def __init__(self, helixtype="ALPHA", *args):
"""
Construct PTNodeHelix with supplied nodeid and type.
Parameters:
helixtype - string "ALPHA" or "PI" or "310". default "ALPHA".
+Variable parameter list: straight to PTNode constructor (q.v.).
This extends PTNode by adding is_positioned,
and other than that just calls PTNode constructor.
Raises exceptions:
TypeError if helixtype argument is invalid.
"""
if helixtype not in ["ALPHA", "PI", "310"]:
raise TypeError("Invalid helixtype " + helixtype + "\n")
PTNode.__init__(self, *args)
self.helixtype = helixtype
#
# Flags used in heuristic helix placement
#
self.is_positioned = False # set to True if helix is already positioned
# (drawn). Used when old_helix_placement
# (i.e. -i option NOT supplied) is in use,
# sometimes as a special case we position
# helices before calling write_helices_svg()
# and this flag is set to mark these as
# already positioned.
# use get/set_is_positioned
self.is_interstrand = False # Set to True to mark special case
# of helix between strands on same
# vert axis in same sheet, treated
# specially when using heuristic helix
# placement to
# force interstand helices beside sheet.
# If this is set then the reversed
# flag of this helix node (in base class)
# is set to reversed flag of
# the first strand N-terminal to it.
# (see write_insterstrand_helices_svg()).
# Use get/set is_interstrand.
self.is_last_in_seq = False # Set to True if this helix is the last
# in a sequence of helices all aligned
# on an axis, AND n-terminal strand
# is on different axis.
# Used so that if
# connector into this helix is on
# another axis then we know to use
# the top (reversed) else bottom
# port instead of what what normally
# be used (reveresed is set as per
# is inter_strand (see comments above).
# Use get/set is_last_in_seq().
self.cluster_id = None # If use helix 'clustering' then this is
# the intege id of the cluster to which this
# helix belongs (ids assigned sequentially
# starting from 1).
# Use get/set cluster_id()
self.axis_centroid = None # Bio.PDB.Vector representing the
# centroid of midpoints of consecutive
# c_alpha atoms of consecutive residue triples
# in the helix. Set by fit_axis()
self.axis_direction_cosines = None # Bio.PDB.Vector representing the
# direction cosines of the axis line