Skip to content

Commit

Permalink
Merge pull request #32 from volkamerlab/pip
Browse files Browse the repository at this point in the history
Make KinFragLib pip-installable; fix CI issues
  • Loading branch information
dominiquesydow authored Jun 3, 2021
2 parents cdfe52e + cf1a537 commit 4fea90a
Show file tree
Hide file tree
Showing 18 changed files with 1,655 additions and 224 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ jobs:
python-version: "3.6"
- os: ubuntu-latest
python-version: "3.8"
- os: ubuntu-latest
python-version: "3.9"
#- os: ubuntu-latest
# python-version: "3.9"
#- os: macos-latest
# python-version: "3.6"
#- os: windows-latest
Expand Down
2 changes: 1 addition & 1 deletion data/fragment_library/original_ligands.json

Large diffs are not rendered by default.

1,278 changes: 1,278 additions & 0 deletions docs/demos/KinFragLib_demo.ipynb

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,5 @@ dependencies:
- pip:
- black-nb
- https://github.com/volkamerlab/opencadd/archive/2a6cf9bcee72a3626114cda82af758e05ed5af18.zip
# KinFragLib itself
- https://github.com/volkamerlab/kinfraglib/archive/master.tar.gz
Empty file added kinfraglib/__init__.py
Empty file.
113 changes: 58 additions & 55 deletions notebooks/utils.py → kinfraglib/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import seaborn as sns

from opencadd.databases.klifs import setup_remote

KLIFS_SESSION = setup_remote()

RDLogger.DisableLog("rdApp.*")
Expand All @@ -45,13 +46,13 @@
def read_fragment_library(path_to_lib):
"""
Read fragment library from sdf files (one file per subpocket).
Parameters
----------
path_to_lib : str
Path to fragment library folder.
Returns
-------
dict of pandas.DataFrame
Expand All @@ -76,14 +77,14 @@ def read_fragment_library(path_to_lib):
def _read_subpocket_fragments(subpocket, path_to_lib):
"""
Read fragments for input subpocket.
Parameters
----------
subpocket : str
Subpocket name, i.e. AP, SE, FP, GA, B1, or B2.
path_to_lib : str
Path to fragment library folder.
Returns
-------
pandas.DataFrame
Expand Down Expand Up @@ -144,14 +145,14 @@ def _read_subpocket_fragments(subpocket, path_to_lib):

def get_original_ligands(fragment_library_concat):
"""
Get ligands from which the fragment library originated from,
Get ligands from which the fragment library originated from,
including each ligand's occupied subpockets, RDKit molecule (remote KLIFS access) and SMILES (from RDKit molecule).
Parameters
----------
fragment_library_concat : pandas.DataFrame
Fragment library data for one or multiple subpockets.
Returns
-------
pandas.DataFrame
Expand Down Expand Up @@ -191,23 +192,25 @@ def get_original_ligands(fragment_library_concat):
structures_all = KLIFS_SESSION.structures.all_structures()
structures_all = structures_all[
[
"structure.pdb_id",
"structure.alternate_model",
"structure.chain",
"structure.ac_helix",
"structure.klifs_id"
"structure.pdb_id",
"structure.alternate_model",
"structure.chain",
"structure.ac_helix",
"structure.klifs_id",
]
]
structures_all = structures_all.rename(
columns={
"structure.pdb_id": "complex_pdb",
"structure.pdb_id": "complex_pdb",
"structure.alternate_model": "alt",
"structure.chain": "chain",
"structure.ac_helix": "ac_helix",
"structure.klifs_id": "klifs_id"
"structure.klifs_id": "klifs_id",
}
)
original_ligands = original_ligands.merge(structures_all, how="left", on=["complex_pdb", "alt", "chain"])
original_ligands = original_ligands.merge(
structures_all, how="left", on=["complex_pdb", "alt", "chain"]
)

# Get RDKit molecules for original ligands (takes a couple of minutes)
structure_ids = original_ligands["klifs_id"]
Expand All @@ -225,14 +228,14 @@ def get_original_ligands(fragment_library_concat):
def get_most_common_fragments(fragments, top_x=50):
"""
Get most common fragments.
Parameters
----------
fragments : pandas.DataFrame
Fragment details, i.e. SMILES, kinase groups, and fragment RDKit molecules, for input subpocket.
top_x : int
Top x most common fragments.
Returns
-------
pandas.DataFrame
Expand Down Expand Up @@ -279,12 +282,12 @@ def get_most_common_fragments(fragments, top_x=50):
def _generate_fingerprints(mols):
"""
Generate RDKit fingerprint from list of molecules.
Parameters
----------
mols : list of rdkit.Chem.rdchem.Mol
List of molecules.
Returns
-------
list of rdkit.DataStructs.cDataStructs.ExplicitBitVect
Expand All @@ -300,14 +303,14 @@ def _generate_fingerprints(mols):
def cluster_molecules(mols, cutoff=0.6):
"""
Cluster molecules by fingerprint distance using the Butina algorithm.
Parameters
----------
mols : list of rdkit.Chem.rdchem.Mol
List of molecules.
cutoff : float
Distance cutoff Butina clustering.
Returns
-------
pandas.DataFrame
Expand Down Expand Up @@ -365,12 +368,12 @@ def cluster_molecules(mols, cutoff=0.6):
def _get_tanimoto_distance_matrix(fingerprints):
"""
Calculate distance matrix for list of fingerprints.
Parameters
----------
fingerprints : list of rdkit.DataStructs.cDataStructs.ExplicitBitVect
List of fingerprints.
Returns
-------
list of floats
Expand All @@ -394,14 +397,14 @@ def _get_tanimoto_distance_matrix(fingerprints):
def get_fragments_by_ligand(ligand_pdb, fragment_library):
"""
Get all fragments in the KinFragLib fragment library originating from a ligand of interest (by ligand PDB name).
Parameters
----------
ligand_pdb : str
Ligand PDB name.
fragment_library : dict of pandas.DataFrame
Fragment library (including fragments with dummy atoms).
Returns
-------
pandas.DataFrame
Expand All @@ -425,7 +428,7 @@ def get_fragments_by_ligand(ligand_pdb, fragment_library):
def get_fragmented_ligand(fragment_library, complex_pdb, ligand_pdb):
"""
Get fragments with subpocket assignment for ligand by PDB ID.
Parameters
----------
fragment_library : dict of pandas.DataFrame
Expand All @@ -435,7 +438,7 @@ def get_fragmented_ligand(fragment_library, complex_pdb, ligand_pdb):
PDB ID for structure with ligand of interest.
ligand_pdb : str
PDB ID for ligand of interest.
Returns
-------
PIL.PngImagePlugin.PngImageFile
Expand Down Expand Up @@ -467,7 +470,7 @@ def draw_fragmented_ligand(
):
"""
Show fragments with subpocket assignment for ligand by PDB ID.
Parameters
----------
fragment_library : dict of pandas.DataFrame
Expand All @@ -477,7 +480,7 @@ def draw_fragmented_ligand(
PDB ID for structure with ligand of interest.
ligand_pdb : str
PDB ID for ligand of interest.
Returns
-------
PIL.PngImagePlugin.PngImageFile
Expand All @@ -501,15 +504,15 @@ def draw_fragmented_ligand(
def draw_fragments_from_recombined_ligand(fragment_ids, fragment_library):
"""
Draw fragments that a recombined ligand of interest is composed of.
Parameters
----------
fragment_ids : list of str
Fragment IDs of recombined ligand (<subpocket>_<fragment_index>).
fragment_library : pandas.DataFrame
Fragment library that recombined ligand was based on.
Must be the same as used for recombination step, otherwise fragment_ids will not match!!!
Returns
-------
PIL.PngImagePlugin.PngImageFile
Expand Down Expand Up @@ -584,7 +587,7 @@ def get_descriptors_by_fragments(fragment_library):
"""
Get physicochemical properties of fragment library, i.e. size (# heavy atoms), logP, hydrogen bond donors and acceptors,
after deduplicating fragments per subpocket based on their smiles.
Parameters
----------
fragment_library : dict of pandas.DataFrame
Expand Down Expand Up @@ -662,7 +665,7 @@ def get_ro3_from_mol(mol):
-------
pd.Series
Rule of three criteria for input fragment.
Notes
-----
Taken from: https://europepmc.org/article/med/14554012
Expand Down Expand Up @@ -705,17 +708,17 @@ def get_ro5_from_smiles(smiles):

def get_connections_by_fragment(fragment_library_concat):
"""
For each fragment, extract connecting subpockets (e.g. ['FP', 'SE'] for subpocket 'AP') and define subpocket connections (e.g. ['AP=FP', 'AP=SE']).
For each fragment, extract connecting subpockets (e.g. ['FP', 'SE'] for subpocket 'AP') and define subpocket connections (e.g. ['AP=FP', 'AP=SE']).
Parameters
----------
fragment_library_concat : pandas.DataFrame
Fragment library data for one or multiple subpockets.
Returns
-------
pandas.DataFrame
Fragment library data including connecting subpockets and connections.
Fragment library data including connecting subpockets and connections.
"""

# For each fragment, extract connecting subpocket from atom_subpockets, e.g. ['FP', 'SE'] for subpocket 'AP'
Expand All @@ -736,14 +739,14 @@ def get_connections_by_fragment(fragment_library_concat):
def _get_connecting_subpockets(subpocket, atom_subpockets):
"""
Get a fragment's connecting subpockets based on the fragment's subpocket and all fragment atoms' subpockets (only dummy atoms will have differing subpockets).
Parameters
----------
subpocket : str
Fragment's subpocket.
atom_subpockets : list of str
Fragment atoms' subpockets.
Returns
-------
list of str
Expand All @@ -759,12 +762,12 @@ def _get_connecting_subpockets(subpocket, atom_subpockets):
def get_connections_count_by_ligand(connections_by_ligand):
"""
Count subpocket connections (by type) across all ligands, i.e. how often a specific connection appears in the data set.
Parameters
----------
connections_by_ligand : pandas.DataFrame
Ligands represented by fragment library with details on their subpocket connections (see connections_by_ligand() function).
Ligands represented by fragment library with details on their subpocket connections (see connections_by_ligand() function).
Returns
-------
pandas.DataFrame
Expand Down Expand Up @@ -798,12 +801,12 @@ def get_fragment_similarity_per_subpocket(fragment_library_concat):
"""
Calculate fingerprint similarities for all pairwise fragment combinations within each subpocket,
after deduplicating fragments per subpocket based on their smiles.
Parameters
----------
fragment_library_concat : pandas.DataFrame
Fragment library data for one or multiple subpockets.
Returns
-------
pandas.DataFrame
Expand Down Expand Up @@ -839,12 +842,12 @@ def get_fragment_similarity_per_kinase_group(fragment_library_concat):
"""
Calculate fingerprint similarities for all pairwise fragment combinations within each kinase group and subpocket
after deduplicating fragments per subpocket and kinase group based on their smiles.
Parameters
----------
fragment_library_concat : pandas.DataFrame
Fragment library data for one or multiple subpockets.
Returns
-------
pandas.DataFrame
Expand Down Expand Up @@ -1004,7 +1007,7 @@ def plot_fragment_descriptors(descriptors):
def draw_fragments(fragments, mols_per_row=10, max_mols=50):
"""
Draw fragments.
Parameters
----------
fragments : pandas.DataFrame
Expand Down Expand Up @@ -1040,16 +1043,16 @@ def draw_ligands_from_pdb_ids(
):
"""
Draw ligands from PDB ID (fetch data directly from KLIFS database).
Parameters
----------
complex_pdbs : str or list of str
One or more complex PDB IDs.
ligand_pdbs : str or list of str
One or more ligand PDB IDs complementary to complex PDB IDs.
sub_img_size :
sub_img_size :
Image size.
mols_per_row :
mols_per_row :
Number of molecules per row.
max_mols : int
Number of molecules displayed.
Expand Down Expand Up @@ -1134,17 +1137,17 @@ def draw_ligands_from_pdb_ids(
def get_protein_target_classifications(target_chembl_ids):
"""
Get protein target classifications for a list of target ChEMBL IDs (in the form of a DataFrame).
Parameters
----------
target_chembl_ids : list of str
Target ChEMBL IDs
Returns
-------
pandas.DataFrame
Protein target classifications for target ChEMBL IDs with columns:
'l1', 'l2', 'l3', 'l4', 'l5', 'l6', 'l7', 'l8',
Protein target classifications for target ChEMBL IDs with columns:
'l1', 'l2', 'l3', 'l4', 'l5', 'l6', 'l7', 'l8',
'protein_class_id', 'target_chembl_id', 'component_id', 'protein_classification_id'.
"""

Expand Down
Loading

0 comments on commit 4fea90a

Please sign in to comment.