-
Notifications
You must be signed in to change notification settings - Fork 258
/
flatten_obj.py
336 lines (269 loc) · 11.3 KB
/
flatten_obj.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
"""
flatten_obj.py - Flatten multi-state pymol objects into a single state.
This is particularly useful for dealing with biological assemblies, which are
loaded as multi-state objects when fetched using `fetch PDBID, type=pdb1`. It
can also be used as a quick way to combine multiple objects without causing
collisions between chain identifiers.
The command re-letters chains to avoid collisions. Older versions of PyMOL
restrict the chain id to a single character, so the script will fail for
assemblies with >62 chains. With more recent versions, this problem is solved
with multi-character chain IDs. Several options are available for how
re-lettering should occur.
Author: Spencer Bliven <[email protected]>
Date: October 30, 2015
Version: 1.0
License: Public Domain
"""
from pymol import cmd, stored
import re
try:
from collections import OrderedDict
_orderedDict = True
except ImportError:
_orderedDict = False
# PyMOL 1.7.4 introduces support for multi-letter chains, so we can afford to
# use a smaller alphabet. In earlier versions, use lower-case letters if needed
# (requires running `set ignore_case, 0`)
_long_chains = cmd.get_version()[1] >= 1.74
_default_base = 36 if _long_chains else 62
class OutOfChainsError(Exception):
def __init__(self,msg):
self.msg=msg
def __str__(self):
return str(self.msg)
class ChainSet(object):
"""
Base class for various methods to rename chains
Contains _chains, which maps from the renamed chain to a tuple with the
original (object,state,chain). All dict-like accessors work on ChainSets,
e.g.
chain_set["A"] -> ("obj",1,"A")
"""
def __init__(self):
# Use an OrderedDict in Python >= 1.7 for better printing
if _orderedDict:
self._chains = OrderedDict()
else:
self._chains = dict()
def map_chain(self, obj, state, origChain ):
"""
map_chain(string obj,int state, string chain]]) -> string
Maps a chain letter to a unique chainID. Results are unique within each
instance, and can be used as keys on this chain set.
"""
raise NotImplementedError("Base class")
# delegate most methods to _chains
def __getattr__(self,at):
if at in "pop popitem update setdefault".split():
raise AttributeError("type object '%s' has no attribute '%s'"%(type(self),at))
return getattr(self._chains,at)
def __cmp__(self,other): return self._chains.__cmp__(other)
def __eq__(self,other): return self._chains.__eq__(other)
def __ge__(self,other): return self._chains.__ge__(other)
def __gt__(self,other): return self._chains.__gt__(other)
def __le__(self,other): return self._chains.__le__(other)
def __lt__(self,other): return self._chains.__lt__(other)
def __ne__(self,other): return self._chains.__ne__(other)
def __len__(self): return self._chains.__len__()
def __contains__(self,key): return self._chains.__contains__(key)
def __getitem__(self,key): return self._chains.__getitem__(key)
def __iter__(self): return self._chains.__iter__()
def __str__(self): return str(self._chains)
@staticmethod
def _int_to_chain(i,base=_default_base):
"""
_int_to_chain(int,int) -> str
Converts a positive integer to a chain ID. Chain IDs include uppercase
characters, numbers, and optionally lowercase letters.
i = a positive integer to convert
base = the alphabet size to include. Typically 36 or 62.
"""
if i < 0:
raise ValueError("positive integers only")
if base < 0 or 62 < base:
raise ValueError("Invalid base")
quot = int(i)//base
rem = i%base
if rem < 26:
letter = chr( ord("A") + rem)
elif rem < 36:
letter = str( rem-26)
else:
letter = chr( ord("a") + rem - 36)
if quot == 0:
return letter
else:
return ChainSet._int_to_chain(quot-1,base) + letter
class DefaultChainSet(ChainSet):
"""
Avoids relettering chains if possible. If a chain has been used, uses the
next available chain letter. Note that this can potentially lead to
cascading renames, e.g. if chains are sorted alphabetically rather than by
object.
Used for rename = 0.
"""
def __init__(self):
super(DefaultChainSet,self).__init__()
self._next_chain = 0
def map_chain(self, obj, state, origChain ):
# Keep _next_chain up-to-date
while ChainSet._int_to_chain(self._next_chain) in self:
self._next_chain += 1
# Map this chain
if origChain in self:
# Rename
next_chain = ChainSet._int_to_chain(self._next_chain)
self._next_chain += 1
else:
next_chain = origChain
self._chains[next_chain] = (obj,state,origChain)
return next_chain
class SequentialChainSet(ChainSet):
"""
Renumbers all chains starting at A, continuing through the capital letters
and numbers, and then adding additional letters through 9999 (the last
valid chain for mmCIF) and beyond.
Used for rename=1
"""
def __init__(self):
super(SequentialChainSet,self).__init__()
self._next_chain = 0
def map_chain(self, obj, state, origChain ):
next_chain = ChainSet._int_to_chain(self._next_chain)
self._chains[next_chain] = (obj,state,origChain)
self._next_chain += 1
return next_chain
class LongChainSet(ChainSet):
"""
Uses long strings for the chain names. Chains are renamed like
"%s_%s_%04d"%(original_chainid,objectname,state).
Used for rename=2
"""
def map_chain(self, obj, state, origChain ):
ch = "%s_%s_%04d"%(origChain,obj,state)
if ch in self:
raise ValueError("Duplicate chain %s"%(ch))
self._chains[ch] = (obj,state,origChain)
return ch
def flatten_obj(name="",selection="",state=0,rename=0,quiet=1,chain_map=""):
"""
DESCRIPTION
"flatten_obj" combines multiple objects or states into a single object,
renaming chains where required
USAGE
flatten_obj name, selection[, state[, rename[, quiet[, chain_map]]]]
ARGUMENTS
name = a unique name for the flattened object {default: flat}
selection = the set of objects to include in the flattening. The selection
will be expanded to include all atoms of objects. {default: all}
state = the source state to select. Use 0 or -1 to flatten all states {default: 0}
rename = The scheme to use for renaming chains: {default: 0}
(0) preserve chains IDs where possible, rename other chains
alphabetically
(1) rename all chains alphabetically
(2) rename chains using the original chain letter, object name, and state
quiet = If set to 0, print some additional information about progress and
chain renaming {default: 1}
chain_map = An attribute name for the 'stored' scratch object. If
specified, `stored.<chain_map>` will be populated with a dictionary
mapping the new chain names to a tuple giving the originated object,
state, and chainID. {default: ""}
NOTES
Like the select command, if name is omitted then the default object name
("flat") is used as the name argument.
Chain renaming is tricky. PDB files originally limited chains to single
letter identifiers containing [A-Za-z0-9]. When this was found to be
limiting, multi-letter chains (ideally < 4 chars) were allowed. This is
supported as of PyMOL 1.7. Earlier versions do not accept rename=2, and
will raise an exception when flattening a structure with more than 62
chains.
EXAMPLES
flatten_obj flat, nmrObj
flatten_obj ( obj1 or obj2 )
SEE ALSO
split_states
"""
# arguments
# Single argument; treat as selection
if name and not selection:
selection = name
name = ""
# default name and selection
if not name:
name = "flat"
if not selection:
selection = "(all)"
state = int(state)
rename = int(rename)
quiet = int(quiet)
# Wrap in extra parantheses for get_object_list
selection = "( %s )" % selection
if rename == 0:
chainSet = DefaultChainSet()
elif rename == 1:
chainSet = SequentialChainSet()
elif rename == 2:
chainSet = LongChainSet()
else:
raise ValueError("Unrecognized rename option (Valid: 0,1,2)")
metaprefix = "temp" #TODO unique prefix
# store original value of retain_order, which causes weird interleaving of
# structures if enabled.
retain_order = cmd.get("retain_order")
try:
cmd.set("retain_order",0)
# create new object for each state
for obj in cmd.get_object_list(selection):
if state <= 0:
# all states
prefix = "%s_%s_"%(metaprefix,obj)
cmd.split_states(obj,prefix=prefix)
else:
prefix = "%s_%s_%04d"%(metaprefix,obj,state)
cmd.create(prefix, obj, state, 1)
# renumber all states
statere = re.compile("^%s_(.*)_(\d+)$" % metaprefix) # matches split object names
warn_lowercase = False
# Iterate over all objects with metaprefix
try:
for obj in cmd.get_object_list("(%s_*)"%(metaprefix) ):
m = statere.match(obj)
if m is None:
print(("Failed to match object %s" %obj))
continue
origobj = m.group(1)
statenum = int(m.group(2))
chains = cmd.get_chains(obj)
rev_chain_map = {} #old -> new, for this obj only
for chain in sorted(chains,key=lambda x:(len(x),x)):
new_chain = chainSet.map_chain(origobj,statenum,chain)
rev_chain_map[chain] = new_chain
if not quiet:
print((" %s state %d chain %s -> %s"%(origobj,statenum,chain, new_chain) ))
if not _long_chains:
if len(new_chain) > 1:
raise OutOfChainsError("No additional chains available (max 62).")
space = {'rev_chain_map':rev_chain_map}
cmd.alter(obj,"chain = rev_chain_map[chain]",space=space)
print(("Creating object from %s_*"%metaprefix))
# Recombine into a single object
cmd.create(name,"%s_*"%metaprefix)
# Set chain_map
if chain_map:
setattr(stored,chain_map,chainSet)
# Warn if lowercase chains were generated
if cmd.get("ignore_case") == "on" and any([c.upper() != c for c in list(chainSet.keys())]):
print("Warning: using lower-case chain IDs. Consider running the "
"following command:\n set ignore_case, 0" )
finally:
# Clean up
print("Cleaning up intermediates")
cmd.delete("%s_*"%metaprefix)
finally:
# restore original parameters
print("Resetting variables")
cmd.set("retain_order",retain_order)
cmd.extend('flatten_obj', flatten_obj)
# tab-completion of arguments
cmd.auto_arg[0]['flatten_obj'] = [ cmd.object_sc, 'name or selection', '']
cmd.auto_arg[1]['flatten_obj'] = [ cmd.object_sc, 'selection', '']