From e8fdc1384d934d9c8fb5b69dca96ce2a3dec790e Mon Sep 17 00:00:00 2001
From: Sebastian Lobentanzer <sebastian.lobentanzer@gmail.com>
Date: Tue, 12 Nov 2024 16:10:45 +0100
Subject: [PATCH] Biotools-API (#208)

* typo

* first draft of bio.tools API classes

* typo

* replace testing conditional with ability to evaluate regex
for spelling differences, capitalisation, ...

* add biotools example, change to regex eval

* change test name

* switch to metabolomics
as proteomics is in the API docs examples
---
 benchmark/conftest.py                         | 148 ++---
 .../data/benchmark_api_calling_data.yaml      |  23 +-
 benchmark/data/benchmark_kg_schema_data.yaml  |   2 +-
 benchmark/data/benchmark_med_qa_data.yaml     |   2 +-
 benchmark/data/benchmark_query_test_data.yaml |   2 +-
 benchmark/data/benchmark_rag_test_data.yaml   |   2 +-
 .../data/benchmark_text_extract_data.yaml     |   2 +-
 benchmark/test_api_calling.py                 |  14 +-
 biochatter/api_agent/__init__.py              |  22 +
 biochatter/api_agent/bio_tools.py             | 617 ++++++++++++++++++
 biochatter/api_agent/oncokb.py                |   2 +-
 11 files changed, 746 insertions(+), 90 deletions(-)
 create mode 100644 biochatter/api_agent/bio_tools.py

diff --git a/benchmark/conftest.py b/benchmark/conftest.py
index 5fa71139..a2af2127 100644
--- a/benchmark/conftest.py
+++ b/benchmark/conftest.py
@@ -17,20 +17,20 @@
 from .benchmark_utils import benchmark_already_executed
 
 # how often should each benchmark be run?
-N_ITERATIONS = 3
+N_ITERATIONS = 1
 
 # which dataset should be used for benchmarking?
 BENCHMARK_DATASET = get_benchmark_dataset()
 
 # which models should be benchmarked?
 OPENAI_MODEL_NAMES = [
-    "gpt-3.5-turbo-0125",
-    "gpt-4-0613",
-    "gpt-4-0125-preview",
-    "gpt-4-turbo-2024-04-09",
-    "gpt-4o-2024-05-13",
+    # "gpt-3.5-turbo-0125",
+    # "gpt-4-0613",
+    # "gpt-4-0125-preview",
+    # "gpt-4-turbo-2024-04-09",
+    # "gpt-4o-2024-05-13",
     "gpt-4o-2024-08-06",
-    "gpt-4o-mini-2024-07-18",
+    # "gpt-4o-mini-2024-07-18",
 ]
 
 ANTHROPIC_MODEL_NAMES = [
@@ -128,28 +128,28 @@
     #         # "FP16",
     #     ],
     # },
-    "llama-2-chat": {
-        "model_size_in_billions": [
-            7,
-            # 13,
-            # 70,
-        ],
-        "model_format": "ggufv2",
-        "quantization": [
-            "Q2_K",
-            # "Q3_K_S",
-            "Q3_K_M",
-            # "Q3_K_L",
-            # "Q4_0",
-            # "Q4_K_S",
-            "Q4_K_M",
-            # "Q5_0",
-            # "Q5_K_S",
-            "Q5_K_M",
-            "Q6_K",
-            "Q8_0",
-        ],
-    },
+    # "llama-2-chat": {
+    #     "model_size_in_billions": [
+    #         7,
+    #         # 13,
+    #         # 70,
+    #     ],
+    #     "model_format": "ggufv2",
+    #     "quantization": [
+    #         "Q2_K",
+    #         # "Q3_K_S",
+    #         "Q3_K_M",
+    #         # "Q3_K_L",
+    #         # "Q4_0",
+    #         # "Q4_K_S",
+    #         "Q4_K_M",
+    #         # "Q5_0",
+    #         # "Q5_K_S",
+    #         "Q5_K_M",
+    #         "Q6_K",
+    #         "Q8_0",
+    #     ],
+    # },
     # "llama-3-instruct": {
     #     "model_size_in_billions": [
     #         8,
@@ -169,31 +169,31 @@
     #         # "Q4_K_M",
     #     ],
     # },
-    "llama-3.1-instruct": {
-        "model_size_in_billions": [
-            8,
-            # 70,
-        ],
-        "model_format": "ggufv2",
-        "quantization": [
-            # 8B model quantisations
-            "Q3_K_L",
-            "IQ4_XS",
-            "Q4_K_M",
-            # "Q5_K_M",
-            # "Q6_K",
-            "Q8_0",
-            # 70B model quantisations
-            # "IQ2_M",
-            # "Q2_K",
-            # "Q3_K_S",
-            # "IQ4_XS",
-            # "Q4_K_M",  # crazy slow on mbp m3 max
-            # "Q5_K_M",
-            # "Q6_K",
-            # "Q8_0",
-        ],
-    },
+    # "llama-3.1-instruct": {
+    #     "model_size_in_billions": [
+    #         8,
+    #         # 70,
+    #     ],
+    #     "model_format": "ggufv2",
+    #     "quantization": [
+    #         # 8B model quantisations
+    #         "Q3_K_L",
+    #         "IQ4_XS",
+    #         "Q4_K_M",
+    #         # "Q5_K_M",
+    #         # "Q6_K",
+    #         "Q8_0",
+    #         # 70B model quantisations
+    #         # "IQ2_M",
+    #         # "Q2_K",
+    #         # "Q3_K_S",
+    #         # "IQ4_XS",
+    #         # "Q4_K_M",  # crazy slow on mbp m3 max
+    #         # "Q5_K_M",
+    #         # "Q6_K",
+    #         # "Q8_0",
+    #     ],
+    # },
     # "mistral-instruct-v0.2": {
     #     "model_size_in_billions": [
     #         7,
@@ -239,26 +239,26 @@
     #         "none",
     #     ],
     # },
-    "openhermes-2.5": {
-        "model_size_in_billions": [
-            7,
-        ],
-        "model_format": "ggufv2",
-        "quantization": [
-            "Q2_K",
-            # "Q3_K_S",
-            "Q3_K_M",
-            # "Q3_K_L",
-            # "Q4_0",
-            # "Q4_K_S",
-            "Q4_K_M",
-            # "Q5_0",
-            # "Q5_K_S",
-            "Q5_K_M",
-            "Q6_K",
-            "Q8_0",
-        ],
-    },
+    # "openhermes-2.5": {
+    #     "model_size_in_billions": [
+    #         7,
+    #     ],
+    #     "model_format": "ggufv2",
+    #     "quantization": [
+    #         "Q2_K",
+    #         # "Q3_K_S",
+    #         "Q3_K_M",
+    #         # "Q3_K_L",
+    #         # "Q4_0",
+    #         # "Q4_K_S",
+    #         "Q4_K_M",
+    #         # "Q5_0",
+    #         # "Q5_K_S",
+    #         "Q5_K_M",
+    #         "Q6_K",
+    #         "Q8_0",
+    #     ],
+    # },
 }
 
 # create concrete benchmark list by concatenating all combinations of model
diff --git a/benchmark/data/benchmark_api_calling_data.yaml b/benchmark/data/benchmark_api_calling_data.yaml
index e69181bb..85340a81 100644
--- a/benchmark/data/benchmark_api_calling_data.yaml
+++ b/benchmark/data/benchmark_api_calling_data.yaml
@@ -3,11 +3,16 @@
 #
 # Test case keys:
 # - input (for creating the test)
-# - expected (for asserting ourcomes and generating a score)
+# - expected (for asserting outcomes and generating a score)
 # - case (for categorizing the test case)
 #
 # If any input is a dictionary itself, it will be expanded into separate test
 # cases, using the top-level key to create a concatenated test case purpose.
+#
+# We are using regular expressions to evaluate the expected parts, to be able to
+# account for variations in the output (e.g. whitespace, capitalization). Make
+# sure to escape special characters in the regular expressions, such as '?',
+# '.', etc., by adding two backslashes before them.
 
 api_calling:
   - case: oncokb:braf:melanoma
@@ -17,7 +22,7 @@ api_calling:
     expected:
       parts_of_query:
         [
-          "https://demo.oncokb.org/api/v1/annotate/mutations/byProteinChange?",
+          "https://demo.oncokb.org/api/v1/annotate/mutations/byProteinChange\\?",
           "hugoSymbol=BRAF",
           "alteration=V600E",
           "tumorType=Melanoma",
@@ -29,7 +34,8 @@ api_calling:
     expected:
       parts_of_query:
         [
-          "https://demo.oncokb.org/api/v1/annotate/mutations/byProteinChange?hugoSymbol=TP53",
+          "https://demo.oncokb.org/api/v1/annotate/mutations/byProteinChange\\?",
+          "hugoSymbol=TP53",
           "alteration=R273C",
           "tumorType=Colon%20Adenocarcinoma",
         ]
@@ -41,7 +47,7 @@ api_calling:
     expected:
       parts_of_query:
         [
-          "https://demo.oncokb.org/api/v1/annotate/mutations/byProteinChange?",
+          "https://demo.oncokb.org/api/v1/annotate/mutations/byProteinChange\\?",
           "hugoSymbol=BRAF",
           "alteration=N486_P490del",
           "tumorType=Histiocytosis",
@@ -53,10 +59,17 @@ api_calling:
     expected:
       parts_of_query:
         [
-          "https://demo.oncokb.org/api/v1/annotate/structuralVariants?",
+          "https://demo.oncokb.org/api/v1/annotate/structuralVariants\\?",
           "hugoSymbolA=CD74",
           "hugoSymbolB=ROS1",
           "structuralVariantType=FUSION",
           "isFunctionalFusion=true",
           "tumorType=Lung%20Adenocarcinoma",
         ]
+  - case: biotools:topic:metabolomics
+    input:
+      prompt:
+        fuzzy_search: "Which tools can I use for metabolomics?"
+    expected:
+      parts_of_query:
+        ["https://bio.tools/api/t/", "\\?topic=", "[mM]etabolomics"]
diff --git a/benchmark/data/benchmark_kg_schema_data.yaml b/benchmark/data/benchmark_kg_schema_data.yaml
index c19c98d3..930b8656 100644
--- a/benchmark/data/benchmark_kg_schema_data.yaml
+++ b/benchmark/data/benchmark_kg_schema_data.yaml
@@ -3,7 +3,7 @@
 #
 # Test case keys:
 # - input (for creating the test)
-# - expected (for asserting ourcomes and generating a score)
+# - expected (for asserting outcomes and generating a score)
 # - case (for categorizing the test case)
 #
 # If any input is a dictionary itself, it will be expanded into separate test
diff --git a/benchmark/data/benchmark_med_qa_data.yaml b/benchmark/data/benchmark_med_qa_data.yaml
index 5b924050..ccd58655 100644
--- a/benchmark/data/benchmark_med_qa_data.yaml
+++ b/benchmark/data/benchmark_med_qa_data.yaml
@@ -3,7 +3,7 @@
 #
 # Test case keys:
 # - input (for creating the test)
-# - expected (for asserting ourcomes and generating a score)
+# - expected (for asserting outcomes and generating a score)
 # - case (for categorizing the test case)
 #
 # If any input is a dictionary itself, it will be expanded into separate test
diff --git a/benchmark/data/benchmark_query_test_data.yaml b/benchmark/data/benchmark_query_test_data.yaml
index e73ccaec..bdaa6889 100644
--- a/benchmark/data/benchmark_query_test_data.yaml
+++ b/benchmark/data/benchmark_query_test_data.yaml
@@ -3,7 +3,7 @@
 #
 # Test case keys:
 # - input (for creating the test)
-# - expected (for asserting ourcomes and generating a score)
+# - expected (for asserting outcomes and generating a score)
 # - case (for categorizing the test case)
 #
 # If any input is a dictionary itself, it will be expanded into separate test
diff --git a/benchmark/data/benchmark_rag_test_data.yaml b/benchmark/data/benchmark_rag_test_data.yaml
index 2fd4a105..fca5baec 100644
--- a/benchmark/data/benchmark_rag_test_data.yaml
+++ b/benchmark/data/benchmark_rag_test_data.yaml
@@ -3,7 +3,7 @@
 #
 # Test case keys:
 # - input (for creating the test)
-# - expected (for asserting ourcomes and generating a score)
+# - expected (for asserting outcomes and generating a score)
 # - case (for categorizing the test case)
 #
 # If any input is a dictionary itself, it will be expanded into separate test
diff --git a/benchmark/data/benchmark_text_extract_data.yaml b/benchmark/data/benchmark_text_extract_data.yaml
index 8c85e5a6..6e4f9e14 100644
--- a/benchmark/data/benchmark_text_extract_data.yaml
+++ b/benchmark/data/benchmark_text_extract_data.yaml
@@ -3,7 +3,7 @@
 #
 # Test case keys:
 # - input (for creating the test)
-# - expected (for asserting ourcomes and generating a score)
+# - expected (for asserting outcomes and generating a score)
 # - case (for categorizing the test case)
 #
 # If any input is a dictionary itself, it will be expanded into separate test
diff --git a/benchmark/test_api_calling.py b/benchmark/test_api_calling.py
index 6a4f52bc..c198ebd1 100644
--- a/benchmark/test_api_calling.py
+++ b/benchmark/test_api_calling.py
@@ -1,10 +1,11 @@
 from urllib.parse import urlencode
 import inspect
+import re
 
 import pytest
 
 from biochatter._misc import ensure_iterable
-from biochatter.api_agent.oncokb import OncoKBQueryBuilder
+from biochatter.api_agent import OncoKBQueryBuilder, BioToolsQueryBuilder
 from .conftest import calculate_bool_vector_score
 from .benchmark_utils import (
     skip_if_already_run,
@@ -31,24 +32,27 @@ def test_api_calling(
 
     def run_test():
         conversation.reset()  # needs to be reset for each test
-        builder = OncoKBQueryBuilder()
+        if "oncokb" in yaml_data["case"]:
+            builder = OncoKBQueryBuilder()
+        elif "biotools" in yaml_data["case"]:
+            builder = BioToolsQueryBuilder()
         parameters = builder.parameterise_query(
             question=yaml_data["input"]["prompt"],
             conversation=conversation,
         )
 
-        params = parameters.dict(exclude_unset=True)
+        params = parameters.dict(exclude_none=True)
         endpoint = params.pop("endpoint")
         base_url = params.pop("base_url")
         params.pop("question_uuid")
-        full_url = f"{base_url}/{endpoint}"
+        full_url = f"{base_url.rstrip('/')}/{endpoint.lstrip('/')}"
         api_query = f"{full_url}?{urlencode(params)}"
 
         score = []
         for expected_part in ensure_iterable(
             yaml_data["expected"]["parts_of_query"]
         ):
-            if expected_part in api_query:
+            if re.search(expected_part, api_query):
                 score.append(True)
             else:
                 score.append(False)
diff --git a/biochatter/api_agent/__init__.py b/biochatter/api_agent/__init__.py
index 959e3d63..4d9084ab 100644
--- a/biochatter/api_agent/__init__.py
+++ b/biochatter/api_agent/__init__.py
@@ -6,4 +6,26 @@
     BlastQueryParameters,
 )
 from .oncokb import OncoKBFetcher, OncoKBInterpreter, OncoKBQueryBuilder
+from .bio_tools import (
+    BioToolsFetcher,
+    BioToolsInterpreter,
+    BioToolsQueryBuilder,
+)
 from .api_agent import APIAgent
+
+__all__ = [
+    "BaseFetcher",
+    "BaseInterpreter",
+    "BaseQueryBuilder",
+    "BlastFetcher",
+    "BlastInterpreter",
+    "BlastQueryBuilder",
+    "BlastQueryParameters",
+    "OncoKBFetcher",
+    "OncoKBInterpreter",
+    "OncoKBQueryBuilder",
+    "BioToolsFetcher",
+    "BioToolsInterpreter",
+    "BioToolsQueryBuilder",
+    "APIAgent",
+]
diff --git a/biochatter/api_agent/bio_tools.py b/biochatter/api_agent/bio_tools.py
new file mode 100644
index 00000000..6745acc2
--- /dev/null
+++ b/biochatter/api_agent/bio_tools.py
@@ -0,0 +1,617 @@
+from typing import Optional
+from collections.abc import Callable
+import uuid
+
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.pydantic_v1 import Field, BaseModel
+from langchain_core.output_parsers import StrOutputParser
+from langchain.chains.openai_functions import create_structured_output_runnable
+import requests
+
+from biochatter.llm_connect import Conversation
+from .abc import BaseFetcher, BaseInterpreter, BaseQueryBuilder
+
+BIOTOOLS_QUERY_PROMPT = """
+You are a world class algorithm for creating queries in structured formats. Your task is to use the web API of bio.tools to answer questions about bioinformatics tools and their properties.
+
+You have to extract the appropriate information out of the examples:
+1. To list information about the tools, use the endpoint https://bio.tools/api/t/ with parameters like name, description, homepage, etc.
+
+Use these formats to generate queries based on the question provided. Below is more information about the bio.tools API:
+
+Base URL
+
+https://bio.tools/api/
+
+Endpoints and Parameters
+
+1. List tools
+
+GET /t/
+
+==================  ============================================================================================
+Parameter           Search behaviour                                                                            
+==================  ============================================================================================
+biotoolsID          Search for bio.tools tool ID (usually quoted - to get exact match)
+
+                    `biotoolsID="signalp" <https://bio.tools/api/t/?biotoolsID="signalp">`_
+
+name                Search for tool name (quoted as needed)
+
+                    `name=signalp <https://bio.tools/api/t/?name=signalp>`_ 
+homepage            Exact search for tool homepage URL (**must** be quoted)
+
+                    `homepage="http://cbs.dtu.dk/services/SignalP/" <https://bio.tools/api/t/?homepage="http://cbs.dtu.dk/services/SignalP/">`_ 
+description         Search over tool description (quoted as needed)
+
+                    `description="peptide cleavage" <https://bio.tools/api/t/?description="peptide%20cleavage">`_ 
+version             Exact search for tool version (**must** be quoted)
+
+                    `version="4.1" <https://bio.tools/api/t/?version="4.1">`_ 
+topic               Search for EDAM Topic (term) (quoted as needed)
+
+                    `topic="Proteomics" <https://bio.tools/api/t/?topic="Proteomics">`_ 
+
+topicID             Exact search for EDAM Topic (URI): **must** be quoted                                               
+
+                    `topicID="topic_3510" <https://bio.tools/api/t/?topicID="topic_3510">`_ 
+function            Fuzzy search over function (input, operation, output, note and command)                         
+
+                    `function="Sequence analysis" <https://bio.tools/api/t/?function="Sequence%20analysis">`_ 
+operation           Fuzzy search for EDAM Operation (term) (quoted as needed)                              
+
+                    `operation="Sequence analysis" <https://bio.tools/api/t/?operation="Sequence%20analysis">`_ 
+operationID         Exact search for EDAM Operation (ID) (**must** be quoted)
+
+                    `operationID="operation_2403" <https://bio.tools/api/t/?operationID="operation_2403">`_ 
+dataType            Fuzzy search over input and output for EDAM Data (term) (quoted as needed)                              
+
+                    `dataType="Protein sequence" <https://bio.tools/api/t/?dataType="Protein%20sequence">`_ 
+dataTypeID          Exact search over input and output for EDAM Data (ID) (**must** be quoted)                           
+
+                    `dataTypeID="data_2976" <https://bio.tools/api/t/?dataTypeID="data_2976">`_ 
+dataFormat          Fuzzy search over input and output for EDAM Format (term) (quoted as needed)                      
+
+                    `dataFormat="FASTA" <https://bio.tools/api/t/?dataFormat="FASTA">`_ 
+dataFormatID        Exact search over input and output for EDAM Format (ID) (**must** be quoted)
+
+                    `dataFormatID="format_1929" <https://bio.tools/api/t/?dataFormatID="format_1929">`_ 
+input               Fuzzy search over input for EDAM Data and Format (term) (quoted as needed)
+
+                    `input="Protein sequence" <https://bio.tools/api/t/?input="Protein%20sequence">`_ 
+inputID             Exact search over input for EDAM Data and Format (ID) (**must** be quoted)                                         
+
+                    `inputID="data_2976" <https://bio.tools/api/t/?inputID="data_2976">`_ 
+inputDataType       Fuzzy search over input for EDAM Data (term) (quoted as needed)     
+
+                    `inputDataType="Protein sequence" <https://bio.tools/api/t/?inputDataType="Protein%20sequence">`_ 
+inputDataTypeID     Exact search over input for EDAM Data (ID) (**must** be quoted)
+
+                    `inputDataTypeID="data_2976" <https://bio.tools/api/t/?inputDataTypeID="data_2976">`_ 
+inputDataFormat     Fuzzy search over input for EDAM Format (term) (quoted as needed)                                 
+
+                    `inputDataFormat="FASTA" <https://bio.tools/api/t/?inputDataFormat="FASTA">`_ 
+inputDataFormatID   Exact search over input for EDAM Format (ID) (**must** be quoted)
+
+                    `inputDataFormatID="format_1929" <https://bio.tools/api/t/?inputDataFormatID="format_1929">`_ 
+output              Fuzzy search over output for EDAM Data and Format (term) (quoted as needed)
+
+                    `output="Sequence alignment" <https://bio.tools/api/t/?output="Sequence%20alignment">`_ 
+outputID            Exact search over output for EDAM Data and Format (ID) (**must** be quoted)
+
+                    `outputID="data_0863" <https://bio.tools/api/t/?outputID="data_0863">`_ 
+outputDataType      Fuzzy search over output for EDAM Data (term) (quoted as needed)
+
+                    `outputDataType="Sequence alignment" <https://bio.tools/api/t/?outputDataType="Sequence%20alignment">`_ 
+outputDataTypeID    Exact search over output for EDAM Data (ID) (**must** be quoted)
+
+                    `outputDataTypeID="data_0863" <https://bio.tools/api/t/?outputDataTypeID="data_0863">`_ 
+outputDataFormat    Fuzzy search over output for EDAM Format (term) (quoted as needed)                                
+
+                    `outputDataFormat="ClustalW format" <https://bio.tools/api/t/?outputDataFormat="ClustalW%20format">`_ 
+outputDataFormatID  Exact search over output for EDAM Format (ID) (**must** be quoted)
+
+                    `outputDataFormatID="format_1982" <https://bio.tools/api/t/?outputDataFormatID="format_1982">`_ 
+toolType            Exact search for tool type
+
+                    `toolType="Command-line tool" <https://bio.tools/api/t/?toolType="Command-line%20tool">`_ 
+collectionID        Exact search for tool collection (normally quoted)
+
+                    `collectionID="Rare Disease" <https://bio.tools/api/t/?collectionID="Rare%20Disease">`_ 
+maturity            Exact search for tool maturity
+
+                    `maturity=Mature <https://bio.tools/api/t/?maturity=Mature>`_ 
+operatingSystem     Exact search for tool operating system                                                          
+
+                    `operatingSystem=Linux <https://bio.tools/api/t/?operatingSystem=Linux>`_ 
+language            Exact search for programming language
+
+                    `language=Java <https://bio.tools/api/t/?language=Java>`_ 
+cost                Exact search for cost 
+
+                    `cost="Free of charge" <https://bio.tools/api/t/?cost="Free%20of%20charge">`_ 
+license             Exact search for software or data usage license (quoted as needed)
+
+                    `license="GPL-3.0" <https://bio.tools/api/t/?license="GPL-3.0">`_ 
+accessibility       Exact search for tool accessibility                                      
+
+                    `accessibility="Open access" <https://bio.tools/api/t/?accessibility="Open%20access">`_ 
+credit              Fuzzy search over credit (name, email, URL, ORCID iD, type of entity, type of role and note)    
+
+                    `credit="Henrik Nielsen" <https://bio.tools/api/t/?credit="Henrik%20Nielsen">`_ 
+creditName          Exact search for name of credited entity                                                        
+
+                    `creditName="Henrik Nielsen" <https://bio.tools/api/t/?creditName="Henrik%20Nielsen">`_ 
+creditTypeRole      Exact search for role of credited entity 
+
+                    `creditTypeRole=Developer <https://bio.tools/api/t/?creditTypeRole=Developer>`_ 
+creditTypeEntity    Exact search for type of credited entity 
+
+                    `creditTypeEntity="Funding agency" <https://bio.tools/api/t/?creditTypeEntity="Funding%20agency">`_ 
+creditOrcidID       Exact search for ORCID iD of credited entity (**must** be quoted)
+
+                    `creditOrcidID="0000-0001-5121-2036" <https://bio.tools/api/t/?creditOrcidID="0000-0001-5121-2036">`_ 
+publication         Fuzzy search over publication (DOI, PMID, PMCID, publication type and tool version) (quoted as needed)            
+
+                    `publication=10.12688/f1000research.12974.1 <https://bio.tools/api/t/?publication=10.12688/f1000research.12974.1>`_ 
+publicationID       Exact search for publication ID (DOI, PMID or PMCID) (**must** be quoted)
+
+                    `publicationID="10.12688/f1000research.12974.1" <https://bio.tools/api/t/?publicationID="10.12688/f1000research.12974.1">`_ 
+publicationType     Exact search for publication type
+
+                    `publicationType=Primary <https://bio.tools/api/t/?publicationType=Primary>`_ 
+publicationVersion  Exact search for tool version associated with a publication (**must** be quoted)
+
+                    `publicationVersion="1.0" <https://bio.tools/api/t/?publicationVersion="1.0">`_ 
+link                Fuzzy search over general link (URL, type and note) (quote as needed)
+
+                    `link="Issue tracker" <https://bio.tools/api/t/?link="Issue%20tracker">`_ 
+linkType            Exact search for type of information found at a link
+
+                    `linkType="Issue tracker" <https://bio.tools/api/t/?linkType="Issue tracker">`_
+documentation       Fuzzy search over documentation link (URL, type and note) (quote as needed)                          
+
+                    `documentation=Manual <https://bio.tools/api/t/?documentation="User manual">`_ 
+documentationType   Exact search for type of documentation
+
+                    `documentationType=Manual <https://bio.tools/api/t/?documentationType="User manual">`_ 
+download            Fuzzy search over download link (URL, type, version and note) (quote as needed)
+
+                    `download=Binaries <https://bio.tools/api/t/?download=Binaries>`_ 
+downloadType        Exact search for type of download
+
+                    `downloadType=Binaries <https://bio.tools/api/t/?downloadType=Binaries>`_ 
+downloadVersion     Exact search for tool version associated with a download (**must** be quoted)
+
+                    `downloadVersion="1.0" <https://bio.tools/api/t/?downloadVersion="1.0">`_ 
+otherID             Fuzzy search over alternate tool IDs (ID value, type of ID and version)                         
+
+                    `otherID="rrid:SCR_015644" <https://bio.tools/api/t/?otherID="rrid:SCR_015644">`_ 
+
+otherIDValue        Exact search for value of alternate tool ID (**must** be quoted)
+
+                    `otherIDValue="rrid:SCR_015644" <https://bio.tools/api/t/?otherIDValue="rrid:SCR_015644">`_		    
+otherIDType         Exact search for type of alternate tool ID                                
+
+                    `otherIDType=RRID <https://bio.tools/api/t/?otherIDType=RRID>`_ 
+otherIDVersion      Exact search for tool version associated with an alternate ID (**must** be quoted)
+
+                    `otherIDVersion="1.0" <https://bio.tools/api/t/?otherIDVersion="1.0">`_ 
+==================  ============================================================================================
+
+
+The parameters are (currently) case-sensitive, e.g. you must use &biotoolsID= and not &biotoolsid
+
+Values of the following parameters must be given in quotes to get sensible (or any) results:
+homepage
+version
+topicID
+operationID
+dataTypeID
+dataFormatID
+inputID
+inputDataTypeID
+inputDataFormatID
+outputID
+outputDataTypeID
+outputDataFormatID
+creditOrcidID
+publicationID
+publicationVersion
+downloadVersion
+otherIDValue
+otherIDVersion
+e.g.
+https://bio.tools/api/tool?topicID=”topic_3510”
+Values of other parameters can be quoted or unquoted:
+Unquoted values invoke a fuzzy word search: it will search for fuzzy matches of words in the search phrase, to the target field
+Quoted values invoke an exact phrase search; it will search for an exact match of the full-length of the search phrase, to the target field (matches to target substrings are allowed)
+e.g.
+https://bio.tools/api/tool?biotoolsID=”blast” returns the tool with biotoolsID of “blast” (the “canonical” blast)
+https://bio.tools/api/tool?biotoolsID=blast returns all tools with “blast” in their biotoolsID (all blast flavours)
+"""
+
+
+BIOTOOLS_SUMMARY_PROMPT = """
+You have to answer this question in a clear and concise manner: {question} Be factual!\n\
+You are a world leading bioinformatician who knows everything about bio.tools packages.\n\
+Do not make up information, only use the provided information and mention how relevant the found information is based on your knowledge about bio.tools.\n\
+Here is the information relevant to the question found on the bio.tools web API:\n\
+{context}
+"""
+
+
+class BioToolsQueryParameters(BaseModel):
+    base_url: str = Field(
+        default="https://bio.tools/api/",
+        description="Base URL for the BioTools API.",
+    )
+    endpoint: str = Field(
+        ...,
+        description="Specific API endpoint to hit. Example: 't/' for listing tools.",
+    )
+    biotoolsID: Optional[str] = Field(
+        None,
+        description="Search for bio.tools tool ID (usually quoted - to get exact match)",
+    )
+    name: Optional[str] = Field(
+        None,
+        description="Search for tool name (quoted as needed: quoted for exact match, unquoted for fuzzy search)",
+    )
+    homepage: Optional[str] = Field(
+        None,
+        description="Exact search for tool homepage URL (**must** be quoted)",
+    )
+    description: Optional[str] = Field(
+        None,
+        description="Search over tool description (quoted as needed)",
+    )
+    version: Optional[str] = Field(
+        None,
+        description="Exact search for tool version (**must** be quoted)",
+    )
+    topic: Optional[str] = Field(
+        None,
+        description="Search for EDAM Topic (term) (quoted as needed)",
+    )
+    topicID: Optional[str] = Field(
+        None,
+        description="Exact search for EDAM Topic (URI): **must** be quoted",
+    )
+    function: Optional[str] = Field(
+        None,
+        description="Fuzzy search over function (input, operation, output, note and command)",
+    )
+    operation: Optional[str] = Field(
+        None,
+        description="Fuzzy search for EDAM Operation (term) (quoted as needed)",
+    )
+    operationID: Optional[str] = Field(
+        None,
+        description="Exact search for EDAM Operation (ID) (**must** be quoted)",
+    )
+    dataType: Optional[str] = Field(
+        None,
+        description="Fuzzy search over input and output for EDAM Data (term) (quoted as needed)",
+    )
+    dataTypeID: Optional[str] = Field(
+        None,
+        description="Exact search over input and output for EDAM Data (ID) (**must** be quoted)",
+    )
+    dataFormat: Optional[str] = Field(
+        None,
+        description="Fuzzy search over input and output for EDAM Format (term) (quoted as needed)",
+    )
+    dataFormatID: Optional[str] = Field(
+        None,
+        description="Exact search over input and output for EDAM Format (ID) (**must** be quoted)",
+    )
+    input: Optional[str] = Field(
+        None,
+        description="Fuzzy search over input for EDAM Data and Format (term) (quoted as needed)",
+    )
+    inputID: Optional[str] = Field(
+        None,
+        description="Exact search over input for EDAM Data and Format (ID) (**must** be quoted)",
+    )
+    inputDataType: Optional[str] = Field(
+        None,
+        description="Fuzzy search over input for EDAM Data (term) (quoted as needed)",
+    )
+    inputDataTypeID: Optional[str] = Field(
+        None,
+        description="Exact search over input for EDAM Data (ID) (**must** be quoted)",
+    )
+    inputDataFormat: Optional[str] = Field(
+        None,
+        description="Fuzzy search over input for EDAM Format (term) (quoted as needed)",
+    )
+    inputDataFormatID: Optional[str] = Field(
+        None,
+        description="Exact search over input for EDAM Format (ID) (**must** be quoted)",
+    )
+    output: Optional[str] = Field(
+        None,
+        description="Fuzzy search over output for EDAM Data and Format (term) (quoted as needed)",
+    )
+    outputID: Optional[str] = Field(
+        None,
+        description="Exact search over output for EDAM Data and Format (ID) (**must** be quoted)",
+    )
+    outputDataType: Optional[str] = Field(
+        None,
+        description="Fuzzy search over output for EDAM Data (term) (quoted as needed)",
+    )
+    outputDataTypeID: Optional[str] = Field(
+        None,
+        description="Exact search over output for EDAM Data (ID) (**must** be quoted)",
+    )
+    outputDataFormat: Optional[str] = Field(
+        None,
+        description="Fuzzy search over output for EDAM Format (term) (quoted as needed)",
+    )
+    outputDataFormatID: Optional[str] = Field(
+        None,
+        description="Exact search over output for EDAM Format (ID) (**must** be quoted)",
+    )
+    toolType: Optional[str] = Field(
+        None,
+        description="Exact search for tool type",
+    )
+    collectionID: Optional[str] = Field(
+        None,
+        description="Exact search for tool collection (normally quoted)",
+    )
+    maturity: Optional[str] = Field(
+        None,
+        description="Exact search for tool maturity",
+    )
+    operatingSystem: Optional[str] = Field(
+        None,
+        description="Exact search for tool operating system",
+    )
+    language: Optional[str] = Field(
+        None,
+        description="Exact search for programming language",
+    )
+    cost: Optional[str] = Field(
+        None,
+        description="Exact search for cost",
+    )
+    license: Optional[str] = Field(
+        None,
+        description="Exact search for software or data usage license (quoted as needed)",
+    )
+    accessibility: Optional[str] = Field(
+        None,
+        description="Exact search for tool accessibility",
+    )
+    credit: Optional[str] = Field(
+        None,
+        description="Fuzzy search over credit (name, email, URL, ORCID iD, type of entity, type of role and note)",
+    )
+    creditName: Optional[str] = Field(
+        None,
+        description="Exact search for name of credited entity",
+    )
+    creditTypeRole: Optional[str] = Field(
+        None,
+        description="Exact search for role of credited entity",
+    )
+    creditTypeEntity: Optional[str] = Field(
+        None,
+        description="Exact search for type of credited entity",
+    )
+    creditOrcidID: Optional[str] = Field(
+        None,
+        description="Exact search for ORCID iD of credited entity (**must** be quoted)",
+    )
+    publication: Optional[str] = Field(
+        None,
+        description="Fuzzy search over publication (DOI, PMID, PMCID, publication type and tool version) (quoted as needed)",
+    )
+    publicationID: Optional[str] = Field(
+        None,
+        description="Exact search for publication ID (DOI, PMID or PMCID) (**must** be quoted)",
+    )
+    publicationType: Optional[str] = Field(
+        None,
+        description="Exact search for publication type",
+    )
+    publicationVersion: Optional[str] = Field(
+        None,
+        description="Exact search for tool version associated with a publication (**must** be quoted)",
+    )
+    link: Optional[str] = Field(
+        None,
+        description="Fuzzy search over general link (URL, type and note) (quote as needed)",
+    )
+    linkType: Optional[str] = Field(
+        None,
+        description="Exact search for type of information found at a link",
+    )
+    documentation: Optional[str] = Field(
+        None,
+        description="Fuzzy search over documentation link (URL, type and note) (quote as needed)",
+    )
+    documentationType: Optional[str] = Field(
+        None,
+        description="Exact search for type of documentation",
+    )
+    download: Optional[str] = Field(
+        None,
+        description="Fuzzy search over download link (URL, type, version and note) (quote as needed)",
+    )
+    downloadType: Optional[str] = Field(
+        None,
+        description="Exact search for type of download",
+    )
+    downloadVersion: Optional[str] = Field(
+        None,
+        description="Exact search for tool version associated with a download (**must** be quoted)",
+    )
+    otherID: Optional[str] = Field(
+        None,
+        description="Fuzzy search over alternate tool IDs (ID value, type of ID and version)",
+    )
+    otherIDValue: Optional[str] = Field(
+        None,
+        description="Exact search for value of alternate tool ID (**must** be quoted)",
+    )
+    otherIDType: Optional[str] = Field(
+        None,
+        description="Exact search for type of alternate tool ID",
+    )
+    otherIDVersion: Optional[str] = Field(
+        None,
+        description="Exact search for tool version associated with an alternate ID (**must** be quoted)",
+    )
+    question_uuid: Optional[str] = Field(
+        default_factory=lambda: str(uuid.uuid4()),
+        description="Unique identifier for the question.",
+    )
+
+
+class BioToolsQueryBuilder(BaseQueryBuilder):
+    """A class for building an BioToolsQuery object."""
+
+    def create_runnable(
+        self,
+        query_parameters: "BioToolsQueryParameters",
+        conversation: "Conversation",
+    ) -> Callable:
+        """
+        Creates a runnable object for executing queries using the LangChain
+        `create_structured_output_runnable` method.
+
+        Args:
+            query_parameters: A Pydantic data model that specifies the fields of
+                the API that should be queried.
+
+            conversation: A BioChatter conversation object.
+
+        Returns:
+            A Callable object that can execute the query.
+        """
+        return create_structured_output_runnable(
+            output_schema=query_parameters,
+            llm=conversation.chat,
+            prompt=self.structured_output_prompt,
+        )
+
+    def parameterise_query(
+        self,
+        question: str,
+        conversation: "Conversation",
+    ) -> BioToolsQueryParameters:
+        """
+
+        Generates an BioToolsQuery object based on the given question, prompt,
+        and BioChatter conversation. Uses a Pydantic model to define the API
+        fields.  Creates a runnable that can be invoked on LLMs that are
+        qualified to parameterise functions.
+
+        Args:
+            question (str): The question to be answered.
+
+            conversation: The conversation object used for parameterising the
+                BioToolsQuery.
+
+        Returns:
+            BioToolsQueryParameters: the parameterised query object (Pydantic model)
+        """
+        runnable = self.create_runnable(
+            query_parameters=BioToolsQueryParameters,
+            conversation=conversation,
+        )
+        oncokb_call_obj = runnable.invoke(
+            {
+                "input": f"Answer:\n{question} based on:\n {BIOTOOLS_QUERY_PROMPT}"
+            }
+        )
+        oncokb_call_obj.question_uuid = str(uuid.uuid4())
+        return oncokb_call_obj
+
+
+class BioToolsFetcher(BaseFetcher):
+    """
+    A class for retrieving API results from BioTools given a parameterized
+    BioToolsQuery.
+    """
+
+    def __init__(self, api_token="demo"):
+        self.headers = {
+            "Authorization": f"Bearer {api_token}",
+            "Accept": "application/json",
+        }
+        self.base_url = "https://bio.tools/api"
+
+    def fetch_results(
+        self, request_data: BioToolsQueryParameters, retries: Optional[int] = 3
+    ) -> str:
+        """Function to submit the BioTools query and fetch the results directly.
+        No multi-step procedure, thus no wrapping of submission and retrieval in
+        this case.
+
+        Args:
+            request_data: BioToolsQuery object (Pydantic model) containing the
+                BioTools query parameters.
+
+        Returns:
+            str: The results of the BioTools query.
+        """
+        # Submit the query and get the URL
+        params = request_data.dict(exclude_unset=True)
+        endpoint = params.pop("endpoint")
+        params.pop("question_uuid")
+        full_url = f"{self.base_url}/{endpoint}"
+        response = requests.get(full_url, headers=self.headers, params=params)
+        response.raise_for_status()
+
+        # Fetch the results from the URL
+        results_response = requests.get(response.url, headers=self.headers)
+        results_response.raise_for_status()
+
+        return results_response.text
+
+
+class BioToolsInterpreter(BaseInterpreter):
+    def summarise_results(
+        self,
+        question: str,
+        conversation_factory: Callable,
+        response_text: str,
+    ) -> str:
+        """
+        Function to extract the answer from the BLAST results.
+
+        Args:
+            question (str): The question to be answered.
+            conversation_factory: A BioChatter conversation object.
+            response_text (str): The response.text returned by bio.tools.
+
+        Returns:
+            str: The extracted answer from the BLAST results.
+
+        """
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                (
+                    "system",
+                    "You are a world class bioinformatician who knows "
+                    "everything about bio.tools packages and the "
+                    "bioinformatics ecosystem. Your task is to interpret "
+                    "results from BioTools API calls and summarise "
+                    "them for the user.",
+                ),
+                ("user", "{input}"),
+            ]
+        )
+        summary_prompt = BIOTOOLS_SUMMARY_PROMPT.format(
+            question=question, context=response_text
+        )
+        output_parser = StrOutputParser()
+        conversation = conversation_factory()
+        chain = prompt | conversation.chat | output_parser
+        answer = chain.invoke({"input": {summary_prompt}})
+        return answer
diff --git a/biochatter/api_agent/oncokb.py b/biochatter/api_agent/oncokb.py
index 321937cc..2d2646f6 100644
--- a/biochatter/api_agent/oncokb.py
+++ b/biochatter/api_agent/oncokb.py
@@ -103,7 +103,7 @@
 ONCOKB_SUMMARY_PROMPT = """
 You have to answer this question in a clear and concise manner: {question} Be factual!\n\
 You are a world leading oncologist and molecular biologist who knows everything about OncoKB results.\n\
-Do not make up information, only use the provided information and mention how relevant the found information is based on your knowledge about OncKB\n\
+Do not make up information, only use the provided information and mention how relevant the found information is based on your knowledge about OncoKB\n\
 Here is the information relevant to the question found on OncoKB:\n\
 {context}
 """