Skip to content

Commit

Permalink
Move useful function into a central location. Create a test.
Browse files Browse the repository at this point in the history
  • Loading branch information
olegsobolev committed Oct 9, 2024
1 parent 967088f commit 20d939a
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 45 deletions.
56 changes: 11 additions & 45 deletions mmtbx/programs/fetch_emdb.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,9 @@
from __future__ import absolute_import, division, print_function
try:
from phenix.program_template import ProgramTemplate
except ImportError:
from libtbx.program_template import ProgramTemplate
import json
import urllib.parse
import libtbx.utils
from libtbx.program_template import ProgramTemplate
from libtbx.utils import Sorry
from mmtbx.wwpdb import rcsb_web_services

pdb_url_base = "https://search.rcsb.org/rcsbsearch/v2/query?json="
graphql_base = "https://data.rcsb.org/graphql?query="
emdb_base = "https://ftp.ebi.ac.uk/pub/databases/emdb/structures/"

master_phil_str = '''
Expand Down Expand Up @@ -43,45 +37,18 @@ def validate(self):

# ---------------------------------------------------------------------------

def api_query(self):
graphql_query = '''
query
{
entry(entry_id:"%s") {
exptl {
method
}
rcsb_entry_container_identifiers {
emdb_ids
}
}
}
'''
url = graphql_base + urllib.parse.quote(graphql_query % self.params.pdb_code)
data = libtbx.utils.urlopen(url).read()
d = json.loads(data)
entry_data = d['data']['entry']
exptl = entry_data['exptl'][0]
if exptl['method'] != 'ELECTRON MICROSCOPY':
raise Sorry('This entry is not an EM structure.')
emdb_ids = entry_data['rcsb_entry_container_identifiers']['emdb_ids']
if len(emdb_ids)==0:
raise Sorry('No associated EMDB IDs found for this entry.')
print('Found the following map codes associated to PDB ID: %s'
% self.params.pdb_code, file=self.logger)
for emdb_id in emdb_ids:
print(emdb_id, file=self.logger)
self.emdb_ids = emdb_ids

# ---------------------------------------------------------------------------

def download_pdb_file(self):
pdb_url = 'https://files.rcsb.org/download/%s.pdb' % self.params.pdb_code
pdb_fn = '%s.pdb' % self.params.pdb_code
urllib.request.urlretrieve(pdb_url, pdb_fn)

def download_maps(self):
for emdb_id in self.emdb_ids:
def download_maps(self, emdb_ids):
if emdb_ids is None:
raise Sorry('No associated EMDB IDs found for this entry.')
print('Found the following map codes associated to PDB ID: %s'
% self.params.pdb_code, file=self.logger)
for emdb_id in emdb_ids:
print("Downloading %s" % emdb_id, file=self.logger)
emdb_id_numeral = emdb_id.split('-')[1]
emdb_url = emdb_base + 'EMD-%s/map/emd_%s.map.gz' % (
emdb_id_numeral, emdb_id_numeral)
Expand All @@ -91,7 +58,6 @@ def download_maps(self):
# ---------------------------------------------------------------------------

def run(self):
self.emdb_ids = []
self.api_query()
emdb_ids = rcsb_web_services.get_emdb_id_for_pdb_id(self.params.pdb_code)
self.download_pdb_file()
self.download_maps()
self.download_maps(emdb_ids)
31 changes: 31 additions & 0 deletions mmtbx/wwpdb/rcsb_web_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,3 +394,34 @@ def get_ligand_info_for_structures(pdb_ids):
c_id = str(chain_id)
result.append([pdb_id, c_id, lig_id, lig_mw, lig_formula, lig_name, smiles])
return result

def get_emdb_id_for_pdb_id(pdb_id):
""" Find out EMDB ID given PDB ID by quering RCSB portal.
Args:
pdb_id (str): pdb id
Returns:
list of emdb ids, e.g. ['EMD-37438'] or None if X-ray or not defined
"""

graphql_query = '''
query
{
entry(entry_id:"%s") {
exptl {
method
}
rcsb_entry_container_identifiers {
emdb_ids
}
}
}
''' % pdb_id
r = requests.post(report_base_url, json={"query":graphql_query})
data_entry = r.json()['data']['entry']
if data_entry['exptl'][0]['method'] != 'ELECTRON MICROSCOPY':
return None
emdb_ids = data_entry['rcsb_entry_container_identifiers']['emdb_ids']
if len(emdb_ids)==0:
return None
return emdb_ids
7 changes: 7 additions & 0 deletions mmtbx/wwpdb/tst_rcsb_web_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@ def exercise_3():
r = rcsb_web_services.post_query(sort_by_resolution=True)
assert len(r) > 1

def exercise_get_emdb_id():
emdb_ids = rcsb_web_services.get_emdb_id_for_pdb_id('8wcc')
assert emdb_ids == ['EMD-37438']
emdb_ids = rcsb_web_services.get_emdb_id_for_pdb_id('1yjp')
assert emdb_ids == None

if (__name__ == "__main__"):
# thorough_exercise()
# check if internet and rcsb are available
Expand All @@ -105,6 +111,7 @@ def exercise_3():
exercise()
exercise_2()
exercise_3()
exercise_get_emdb_id()
print("OK")
else:
print("OK but skipped.")

0 comments on commit 20d939a

Please sign in to comment.