Renaming Compounds to Structure (#17)

lotusnprod · Nov 22, 2023 · d983917 · d983917
1 parent 94a7dd2
commit d983917
Show file tree

Hide file tree

Showing 13 changed files with 126 additions and 126 deletions.
diff --git a/api.py b/api.py
@@ -10,7 +10,7 @@
 
 dm = DataModel()
 # Should likely move in the data model if that's used all the time
-all_structures = set(dm.get_compounds())
+all_structures = set(dm.get_structures())
 all_taxa = dm.get_taxa()
 
 app = FastAPI(
@@ -52,7 +52,7 @@ def get_matching_structures_from_structure_in_item(
             if item.molecule:
                 if item.substructure_search:
                     try:
-                        results = dm.compound_search_substructure(item.molecule)
+                        results = dm.structure_search_substructure(item.molecule)
                         structures = {_id for _id, _ in results}
                     except ValueError:
                         raise HTTPException(
@@ -61,7 +61,7 @@ def get_matching_structures_from_structure_in_item(
                         )
                 else:
                     try:
-                        results = dm.compound_search(item.molecule)
+                        results = dm.structure_search(item.molecule)
                         structures = {
                             _id
                             for _id, score in results
@@ -102,10 +102,10 @@ def get_matching_structures_from_taxon_in_item(dm: DataModel, item: Item) -> set
     # We need to get all the matching taxa
     taxa = get_matching_taxa_from_taxon_in_item(dm, item)
 
-    # We could have a parameter "recursive" in the query to have all the compounds from the parents too
+    # We could have a parameter "recursive" in the query to have all the structures from the parents too
     out = set()
     for taxon in taxa:
-        out.update(dm.get_compounds_of_taxon(taxon))
+        out.update(dm.get_structures_of_taxon(taxon))
 
     return out
 
@@ -116,7 +116,7 @@ def get_matching_taxa_from_structure_in_item(dm: DataModel, item: Item) -> set[i
 
     out = set()
     for structure in structures:
-        out.update(dm.get_taxa_containing_compound(structure))
+        out.update(dm.get_taxa_containing_structure(structure))
 
     return out
 
@@ -127,13 +127,13 @@ async def search_couples(item: Item) -> CoupleResult:
     selected_structures = get_matching_structures_from_structure_in_item(dm, item)
     selected_taxa = get_matching_taxa_from_taxon_in_item(dm, item)
 
-    compounds_of_selected_taxa = {
-        taxon: dm.get_compounds_of_taxon(taxon) for taxon in selected_taxa
+    structures_of_selected_taxa = {
+        taxon: dm.get_structures_of_taxon(taxon) for taxon in selected_taxa
     }
 
     couples = {
         (structure, taxon)
-        for taxon, structures in compounds_of_selected_taxa.items()
+        for taxon, structures in structures_of_selected_taxa.items()
         for structure in structures
         if structure in selected_structures
     }

diff --git a/dash_common.py b/dash_common.py
@@ -22,15 +22,15 @@
 
 @cache.memoize(timeout=3600)
 def get_svg_of_wid(j: int, molecule: str | None = None) -> str:
-    return molecule_svg(dm.get_compound_smiles_from_wid(j), molecule)
+    return molecule_svg(dm.get_structure_smiles_from_wid(j), molecule)
 
 
 @cache.memoize(timeout=3600)
-def get_number_of_taxa_for_compound(j: int) -> int:
-    return dm.get_number_of_taxa_containing_compound(j)
+def get_number_of_taxa_for_structure(j: int) -> int:
+    return dm.get_number_of_taxa_containing_structure(j)
 
 
-def generate_compounds_cards(
+def generate_structures_cards(
     active_page: int, data: dict[str, Any], molecule: str | None = None
 ) -> list[dbc.Card]:
     cards = []
@@ -39,7 +39,7 @@ def generate_compounds_cards(
 
     scores_mode = "scores" in data
 
-    displayed = data["matching_compounds"][
+    displayed = data["matching_structures"][
         PAGE_SIZE * (active_page - 1) : PAGE_SIZE * active_page
     ]
 
@@ -62,7 +62,7 @@ def generate_compounds_cards(
 
         img = get_svg_of_wid(j, molecule)
         img_data = f"data:image/svg+xml,{quote(img)}"
-        taxa_count = get_number_of_taxa_for_compound(j)
+        taxa_count = get_number_of_taxa_for_structure(j)
         card = dbc.Card(
             [
                 dbc.CardImg(src=img_data, top=True),
@@ -74,10 +74,10 @@ def generate_compounds_cards(
                             className="card-text",
                         ),
                         dcc.Markdown(
-                            f"[Wikidata page of compound](https://www.wikidata.org/entity/Q{j})"
+                            f"[Wikidata page of structure](https://www.wikidata.org/entity/Q{j})"
                         ),
                         dbc.Button(
-                            "Compound page", color="primary", href=f"/structure/{j}"
+                            "structure page", color="primary", href=f"/structure/{j}"
                         ),
                         *extras,
                     ]

diff --git a/model.py b/model.py
@@ -96,8 +96,8 @@ def __new__(cls):
     def num_taxa(self):
         return len(self.db["taxonomy_names"])
 
-    def num_compounds(self):
-        return len(self.db["compound_smiles"])
+    def num_structures(self):
+        return len(self.db["structure_smiles"])
 
     def num_couples(self):
         return len(self.db["c2t"])
@@ -160,33 +160,33 @@ def resolve_taxon(self, query: str) -> any:
         log.debug(response.json())
         return response.json()
 
-    ### Compoundonomy
-    def get_compounds(self) -> dict[int, int]:
-        return self.db["compound_wid"]
+    ### structureonomy
+    def get_structures(self) -> dict[int, int]:
+        return self.db["structure_wid"]
 
-    def get_compound_smiles_from_wid(self, wid: int) -> str | None:
+    def get_structure_smiles_from_wid(self, wid: int) -> str | None:
         try:
-            cid = self.db["compound_id"][wid]  # ambiguous with PubChem CID?
-            return self.db["compound_smiles"][cid]
+            cid = self.db["structure_id"][wid]  # ambiguous with PubChem CID?
+            return self.db["structure_smiles"][cid]
         except (IndexError, ValueError):
-            log.warning(f"Impossible to find a compound with wid={wid}")
+            log.warning(f"Impossible to find a structure with wid={wid}")
             return None
 
-    def get_compound_smiles_from_list_of_wid(self, wid: list[int]) -> list[str]:
-        ids = [self.db["compound_id"][w] for w in wid if w in self.db["compound_id"]]
-        llen = self.db["compound_smiles"]
-        return [self.db["compound_smiles"][i] for i in ids if 0 <= i < len(llen)]
+    def get_structure_smiles_from_list_of_wid(self, wid: list[int]) -> list[str]:
+        ids = [self.db["structure_id"][w] for w in wid if w in self.db["structure_id"]]
+        llen = self.db["structure_smiles"]
+        return [self.db["structure_smiles"][i] for i in ids if 0 <= i < len(llen)]
 
     def get_dict_of_wid_to_smiles(self, wid: Iterable[int]) -> dict[int, str]:
-        ids = {w: self.db["compound_id"][w] for w in wid if w in self.db["compound_id"]}
-        llen = self.db["compound_smiles"]
+        ids = {w: self.db["structure_id"][w] for w in wid if w in self.db["structure_id"]}
+        llen = self.db["structure_smiles"]
         return {
-            wid: self.db["compound_smiles"][i]
+            wid: self.db["structure_smiles"][i]
             for wid, i in ids.items()
             if 0 <= i < len(llen)
         }
 
-    def compound_get_mol_fp_and_explicit(self, query: str) -> tuple[any, any, bool]:
+    def structure_get_mol_fp_and_explicit(self, query: str) -> tuple[any, any, bool]:
         explicit_h = "[H]" in query
         p = Chem.SmilesParserParams()
         p.removeHs = not explicit_h
@@ -198,30 +198,30 @@ def compound_get_mol_fp_and_explicit(self, query: str) -> tuple[any, any, bool]:
         fp = fingerprint(mol)
         return mol, fp, explicit_h
 
-    ## COMMENT (AR): Should we rename this to compound_search_from_smiles
-    ## and have same for InChI and co and then wrap them to a `compound_search`
+    ## COMMENT (AR): Should we rename this to structure_search_from_smiles
+    ## and have same for InChI and co and then wrap them to a `structure_search`
     ## with inchi = "InChI=1S/" in query ...
-    def compound_search(self, query: str) -> list[tuple[int, float]]:
-        mol, fp, explicit_h = self.compound_get_mol_fp_and_explicit(query)
+    def structure_search(self, query: str) -> list[tuple[int, float]]:
+        mol, fp, explicit_h = self.structure_get_mol_fp_and_explicit(query)
 
         if explicit_h:
-            db = self.db["compound_sim_h_fps"]
+            db = self.db["structure_sim_h_fps"]
         else:
-            db = self.db["compound_sim_fps"]
+            db = self.db["structure_sim_fps"]
         scores = DataStructs.BulkTanimotoSimilarity(fp, db)
-        return [(wid, score) for wid, score in zip(self.db["compound_wid"], scores)]
+        return [(wid, score) for wid, score in zip(self.db["structure_wid"], scores)]
 
-    def compound_search_substructure(
+    def structure_search_substructure(
         self, query: str, chirality: bool = False
     ) -> list[tuple[int, float]]:
-        mol, fp, explicit_h = self.compound_get_mol_fp_and_explicit(query)
+        mol, fp, explicit_h = self.structure_get_mol_fp_and_explicit(query)
 
         if explicit_h:
-            db = self.db["compound_library_h"]
-            fp_db = self.db["compound_sim_h_fps"]
+            db = self.db["structure_library_h"]
+            fp_db = self.db["structure_sim_h_fps"]
         else:
-            db = self.db["compound_library"]
-            fp_db = self.db["compound_sim_fps"]
+            db = self.db["structure_library"]
+            fp_db = self.db["structure_sim_fps"]
 
         iids = db.GetMatches(
             mol,
@@ -231,42 +231,42 @@ def compound_search_substructure(
             useChirality=chirality,
         )
 
-        new_keys = [self.db["compound_wid"][iid] for iid in iids]
+        new_keys = [self.db["structure_wid"][iid] for iid in iids]
         out = []
         for iid, wid in zip(iids, new_keys):
             out.append((wid, DataStructs.TanimotoSimilarity(fp, fp_db[iid])))
         return out
 
-    def compound_get_tsv_from_scores(self, wids, scores) -> str:
+    def structure_get_tsv_from_scores(self, wids, scores) -> str:
         out = "Wikidata link\tSimilarity\tSmiles\n"
         for idx, score in enumerate(scores):
             wid = wids[idx]
-            smiles = self.db["compound_smiles"][self.db["compound_id"][wid]]
+            smiles = self.db["structure_smiles"][self.db["structure_id"][wid]]
             out += f"http://www.wikidata.org/entity/Q{wid}\t{score:.3f}\t{smiles}\n"
         return out
 
-    ### Taxonomy to compoundonomy
-    def get_compounds_of_taxon(self, wid: int, recursive: bool = True) -> list[int]:
+    ### Taxonomy to structureonomy
+    def get_structures_of_taxon(self, wid: int, recursive: bool = True) -> list[int]:
         if wid in self.db["t2c"]:
-            matching_compounds = set(self.db["t2c"][wid])
+            matching_structures = set(self.db["t2c"][wid])
         else:
-            matching_compounds = set()
+            matching_structures = set()
 
         if recursive:
             if wid in self.db["taxonomy_children"]:
                 for parent in self.db["taxonomy_children"][wid]:
                     if parent in self.db["t2c"]:
-                        for compound in self.db["t2c"][parent]:
-                            matching_compounds.add(compound)
+                        for structure in self.db["t2c"][parent]:
+                            matching_structures.add(structure)
 
-        return list(matching_compounds)
+        return list(matching_structures)
 
-    def get_taxa_containing_compound(self, wid: int) -> set[int]:
+    def get_taxa_containing_structure(self, wid: int) -> set[int]:
         if wid in self.db["c2t"]:
             return self.db["c2t"][wid]
         return set()
 
-    def get_number_of_taxa_containing_compound(self, wid: int) -> int:
+    def get_number_of_taxa_containing_structure(self, wid: int) -> int:
         if wid not in self.db["c2t"]:
             return 0
         return len(self.db["c2t"][wid])

diff --git a/pages/contribute.py b/pages/contribute.py
@@ -215,7 +215,7 @@ def update_output(email, content, name):
             dcc.Markdown(
                 """You are even more awesome!
 
-So if you have way too many compounds to add, you can send us directly a TSV or CSV file with the following three columns:
+So if you have way too many structures to add, you can send us directly a TSV or CSV file with the following three columns:
 
 **smiles** , **organism** , **reference**
 
@@ -285,8 +285,8 @@ def layout():
                 [
                     dbc.Tabs(
                         [
-                            dbc.Tab(tab1_content, label="I have a single compound"),
-                            dbc.Tab(tab2_content, label="I have multiple compounds"),
+                            dbc.Tab(tab1_content, label="I have a single structure"),
+                            dbc.Tab(tab2_content, label="I have multiple structures"),
                         ]
                     )
                 ]

diff --git a/pages/structure.py b/pages/structure.py
@@ -19,7 +19,7 @@ def title(wid=None):
 
 dash.register_page(
     __name__,
-    name="Compound information",
+    name="Structure information",
     top_nav=True,
     order=-1,
     path_template="/structure/<wid>",
@@ -39,7 +39,7 @@ def layout(wid: int):
     img_data = f"data:image/svg+xml,{quote(img)}"
 
     name_id_list = []
-    for t in dm.get_taxa_containing_compound(wid):
+    for t in dm.get_taxa_containing_structure(wid):
         name = dm.get_taxon_name_from_wid(t)
         name_id_list.append([name, t])
     name_id_list = sorted(name_id_list, key=lambda x: x[0])
@@ -66,7 +66,7 @@ def layout(wid: int):
                             dash_table.DataTable(
                                 data=table,
                                 page_size=15,
-                                id="taxon-list-compound",
+                                id="taxon-list-structure",
                                 sort_action="native",
                                 filter_action="native",
                                 columns=[

diff --git a/pages/taxon.py b/pages/taxon.py
@@ -6,7 +6,7 @@
 from dash import Input, Output, callback, dcc
 
 from config import PAGE_SIZE
-from dash_common import generate_compounds_cards
+from dash_common import generate_structures_cards
 from model import DataModel
 
 dm = DataModel()
@@ -21,8 +21,8 @@ def title(wid=None):
     return "LOTUS"
 
 
-def tsv(compounds: list[int]) -> str:
-    smileses = dm.get_compound_smiles_from_list_of_wid(compounds)
+def tsv(structures: list[int]) -> str:
+    smileses = dm.get_structure_smiles_from_list_of_wid(structures)
     return "smiles\n" + "\n".join(smileses)
 
 
@@ -40,8 +40,8 @@ def tsv(compounds: list[int]) -> str:
     Output("cards", "children"),
     [Input("pagination", "active_page"), Input("matching-ids", "data")],
 )
-def compound_cards(active_page: int, data: dict[str, Any]) -> list[dbc.Card]:
-    return generate_compounds_cards(active_page, data)
+def structure_cards(active_page: int, data: dict[str, Any]) -> list[dbc.Card]:
+    return generate_structures_cards(active_page, data)
 
 
 @callback(
@@ -50,8 +50,8 @@ def compound_cards(active_page: int, data: dict[str, Any]) -> list[dbc.Card]:
     prevent_initial_call=True,
 )
 def func(n_clicks, data):
-    filename = f"compounds_of_{data['taxon_name'].replace('.', '')}.tsv"
-    return dict(content=tsv(data["matching_compounds"]), filename=filename)
+    filename = f"structures_of_{data['taxon_name'].replace('.', '')}.tsv"
+    return dict(content=tsv(data["matching_structures"]), filename=filename)
 
 
 def layout(wid: int):
@@ -79,18 +79,18 @@ def layout(wid: int):
                 markdown += f"[{tax_name}{ranks}](/taxon/{parent[0]}) > "
         taxonomic_info = markdown.strip("> ")
 
-    matching_compounds = dm.get_compounds_of_taxon(wid)
-    matching_compounds.sort()
-    nb_matches = len(matching_compounds)
+    matching_structures = dm.get_structures_of_taxon(wid)
+    matching_structures.sort()
+    nb_matches = len(matching_structures)
 
-    warning = f"Found {nb_matches} compounds"
+    warning = f"Found {nb_matches} structures"
 
     return dbc.Container(
         [
             dcc.Store(
                 id="matching-ids",
                 data={
-                    "matching_compounds": matching_compounds,
+                    "matching_structures": matching_structures,
                     "taxon_name": taxon_name,
                 },
             ),
@@ -121,6 +121,6 @@ def layout(wid: int):
                     ),
                 ]
             ),
-            dbc.Spinner(id="loading-compounds-tsv", children=[dbc.Row(id="cards")]),
+            dbc.Spinner(id="loading-structures-tsv", children=[dbc.Row(id="cards")]),
         ]
     )