Skip to content

Commit

Permalink
triplexibility.py: add check for seq (from file) or from seq (re-do)
Browse files Browse the repository at this point in the history
  • Loading branch information
mmagnus committed Jan 21, 2022
1 parent 0e89506 commit e1d503f
Showing 1 changed file with 106 additions and 72 deletions.
178 changes: 106 additions & 72 deletions rna_tools/tools/triplexibility/triplexibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
warnings.filterwarnings('ignore', '.*is discontinuous*',)
warnings.filterwarnings('ignore', '.*Ignoring unrecognized record*',)

import sys
from icecream import ic
ic.configureOutput(outputFunction=lambda *a: print(*a, file=sys.stderr))
ic.configureOutput(prefix='> ')

from rna_tools.rna_tools_lib import set_chain_for_struc, RNAStructure
import argparse
import sys
Expand Down Expand Up @@ -60,7 +65,7 @@ def get_report(self):
t += ' '.join(['resi: ', str(r) ,' atom: ', str(a) , '\n' ])
return t

def get_rmsd_to(self, other_rnamodel, way="", triple_mode=False, save=True):
def get_rmsd_to(self, other_rnamodel, way="", triple_mode=False, save=True, tseq=''):
"""Calc rmsd P-atom based rmsd to other rna model
sugar now 10 atoms ;-) """
Expand Down Expand Up @@ -126,6 +131,16 @@ def get_rmsd_to(self, other_rnamodel, way="", triple_mode=False, save=True):
self.atoms_for_rmsd = self.atoms
other_atoms_for_rmsd = other_rnamodel.atoms

# calc rmsd #
if not tseq:
tseq = ''
rt = None
for a in self.atoms_for_rmsd:
r = a.get_parent()
if r != rt:
rt = r
tseq += r.get_resname().strip()

if triple_mode:
def chunks(lst, n):
"""Yield successive n-sized chunks from lst.
Expand All @@ -140,8 +155,11 @@ def chunks(lst, n):
lst = list(chunks(other_atoms_for_rmsd,
int(len(other_atoms_for_rmsd)/3))) # for 1 atom, this will be 1 x 3 [3 residues]
# so so len is 3 atoms so / by 3 to get how many atoms per residue
sup_min = None

sup_min = None
seq_min = 'not yet obtained, rmsd rejected!'
p_min = None
rms = -1
for p in per:
patoms = []
for i in p: # p=(1, 2, 3)
Expand All @@ -154,8 +172,6 @@ def chunks(lst, n):
## for a in self.atoms_for_rmsd:
## print(a, a.get_parent().get_id())
#sup.set_atoms(patoms, self.atoms_for_rmsd)
sup.set_atoms(self.atoms_for_rmsd, patoms)
rms = round(sup.rms, 2)

rt = None
seq = ''
Expand All @@ -164,74 +180,89 @@ def chunks(lst, n):
if r != rt:
rt = r
seq += r.get_resname().strip()

ic(tseq.lower(), seq.lower(), other_rnamodel.fpath)
# dont' even calc rmsd if the curr seq and tseq are not the same
if tseq.lower() == seq.lower(): # only if seq is the same
ic(self.atoms_for_rmsd, patoms)
for a in self.atoms_for_rmsd:
ic(a, a.get_parent().id, a.get_parent().get_resname())
ic('# patoms')
for a in patoms:
ic(a, a.get_parent().id, a.get_parent().get_resname())

ic(len(self.atoms_for_rmsd), len(patoms))
sup.set_atoms(self.atoms_for_rmsd, patoms)
rms = round(sup.rms, 2)
if rms < rmsd_min:
rmsd_min = rms
sup_min = copy.copy(sup)
suffix = seq
p_min = p
seq_min = seq
if args.debug: 'set new rmsd_min', rmsd_min

if args.debug:
print(p, '', [i + 1 for i in p], end=' ')
print(seq, 'seq_min: ' + seq_min, rms)

if p_min: # found target sequence
# what is this? ;-)
index = [0 ,0 ,0]
index[0] = p_min.index(0)
index[1] = p_min.index(1)
index[2] = p_min.index(2)

# ugly re-set 123 to crazy id ! + 100, so this will
# fill up 1 2 3 for the second for
rs = other_rnamodel.struc[0].get_residues()
for i, r in enumerate(rs):
r.id = (' ', index[i] + 253, ' ') # ugly, some random offset

for i, r in enumerate(other_rnamodel.struc[0].get_residues()):
r.id = (' ', index[i] + 1, ' ')
if args.debug: print('r', r)

if rms < rmsd_min:
rmsd_min = rms
sup_min = copy.copy(sup)
suffix = seq
p_min = p
seq_min = seq

if args.debug:
print(p, '', [i + 1 for i in p], end=' ')
print(seq, 'seq_min: a' + seq_min, rms)

index = [0 ,0 ,0]
index[0] = p_min.index(0)
index[1] = p_min.index(1)
index[2] = p_min.index(2)

# ugly re-set 123 to crazy id ! + 100, so this will
# fill up 1 2 3 for the second for
rs = other_rnamodel.struc[0].get_residues()
for i, r in enumerate(rs):
r.id = (' ', index[i] + 253, ' ') # ugly, some random offset

for i, r in enumerate(other_rnamodel.struc[0].get_residues()):
r.id = (' ', index[i] + 1, ' ')
if args.debug: print(r)

io = Bio.PDB.PDBIO()
sup_min.apply(other_rnamodel.struc.get_atoms())
# if args.debug: print(p_min, [i + 1 for i in p_min])
io.set_structure(other_rnamodel.struc)

args.save_here = True
if args.save_here:
folder = os.path.basename(self.fpath.replace('.pdb', '_' + args.folder_prefix + '_aligned'))
# print(f)
try:
os.mkdir(folder)
except:
pass
fout = folder + os.sep + "{:1.2f}".format(rmsd_min) + '-' + os.path.basename(other_rnamodel.fpath)#.replace('.pdb', '-' + str(rms) + '.pdb'))
#_s' + suffix + '.pdb'))
else:
fout = other_rnamodel.fpath.replace('.pdb', '_aligned.pdb')#_s' + suffix + '.pdb')

# if args.debug: print(fout)
io.save(fout)

# ugly set chain to A
set_chain_for_struc(fout, 'A', save_file_inplace=True)
# and now run this to sort into 1 2 3

r = RNAStructure(fout)
remarks = r.get_remarks_text()
r1 = r.get_res_text('A', 1)
r2 = r.get_res_text('A', 2)
r3 = r.get_res_text('A', 3)
with open(fout, 'w') as f:
f.write(remarks)
f.write(r1)
f.write(r2)
f.write(r3)
r.reload()
r.get_rnapuzzle_ready()
if rmsd_min < 1: # !!!!!!!!!!!! ugly
r.write()
return str(rmsd_min)# + ',s' + seq_min + ',' + os.path.basename(fout)
io = Bio.PDB.PDBIO()
sup_min.apply(other_rnamodel.struc.get_atoms())
# if args.debug: print(p_min, [i + 1 for i in p_min])
io.set_structure(other_rnamodel.struc)

args.save_here = True
if args.save_here:
folder = os.path.basename(self.fpath.replace('.pdb', '_' + args.folder_prefix + '_aligned'))
# print(f)
try:
os.mkdir(folder)
except:
pass
fout = folder + os.sep + "{:1.2f}".format(rmsd_min) + '-' + os.path.basename(other_rnamodel.fpath)#.replace('.pdb', '-' + str(rms) + '.pdb'))
#_s' + suffix + '.pdb'))
else:
fout = other_rnamodel.fpath.replace('.pdb', '_aligned.pdb')#_s' + suffix + '.pdb')

if args.debug: print(fout)
io.save(fout)

# ugly set chain to A
set_chain_for_struc(fout, 'A', save_file_inplace=True)
# and now run this to sort into 1 2 3

r = RNAStructure(fout)
remarks = r.get_remarks_text()
r1 = r.get_res_text('A', 1)
r2 = r.get_res_text('A', 2)
r3 = r.get_res_text('A', 3)
with open(fout, 'w') as f:
f.write(remarks)
f.write(r1)
f.write(r2)
f.write(r3)
r.reload()
r.get_rnapuzzle_ready()
if rmsd_min < 1: # !!!!!!!!!!!! ugly
r.write()
return str(rmsd_min)# + ',s' + seq_min + ',' + os.path.basename(fout)
else:
sup.set_atoms(self.atoms_for_rmsd, other_atoms_for_rmsd)
rms = round(sup.rms, 2)
Expand Down Expand Up @@ -303,16 +334,19 @@ def get_parser():
parser.add_argument('files', help='files', nargs='+')
parser.add_argument("--debug", action="store_true")
parser.add_argument("--sort", action="store_true", help='sort results based on rmsd (ascending)')
parser.add_argument("--tseq", help='target sequence, e.g. acu, find only triples of this sequence [use the order for the seq taken from the input PDB file, literally order of residues in a pdb file]')
return parser

# main
if __name__ == '__main__':
parser = get_parser()
args = parser.parse_args()
if not args.debug:
ic.disable()

target = RNAmodel(args.target)

models = args.files # opts.input_dir
models = args.files # opts.input_dir
tmp = []
if args.ignore_files:
for f in args.files:
Expand All @@ -329,7 +363,7 @@ def get_parser():
mrna = RNAmodel(m)
#print r1.fn, r2.fn, r1.get_rmsd_to(r2)#, 'tmp.pdb')
# print(m)
rmsd = target.get_rmsd_to(mrna, args.way, args.triple_mode, args.save) #, 'tmp.pdb')
rmsd = target.get_rmsd_to(mrna, args.way, args.triple_mode, args.save, args.tseq) #, 'tmp.pdb')
#except:
if 0:
print(m)
Expand Down

0 comments on commit e1d503f

Please sign in to comment.