Skip to content

Commit

Permalink
Biotools-API (#208)
Browse files Browse the repository at this point in the history
* typo

* first draft of bio.tools API classes

* typo

* replace testing conditional with ability to evaluate regex
for spelling differences, capitalisation, ...

* add biotools example, change to regex eval

* change test name

* switch to metabolomics
as proteomics is in the API docs examples
  • Loading branch information
slobentanzer authored Nov 12, 2024
1 parent 000d046 commit e8fdc13
Show file tree
Hide file tree
Showing 11 changed files with 746 additions and 90 deletions.
148 changes: 74 additions & 74 deletions benchmark/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,20 @@
from .benchmark_utils import benchmark_already_executed

# how often should each benchmark be run?
N_ITERATIONS = 3
N_ITERATIONS = 1

# which dataset should be used for benchmarking?
BENCHMARK_DATASET = get_benchmark_dataset()

# which models should be benchmarked?
OPENAI_MODEL_NAMES = [
"gpt-3.5-turbo-0125",
"gpt-4-0613",
"gpt-4-0125-preview",
"gpt-4-turbo-2024-04-09",
"gpt-4o-2024-05-13",
# "gpt-3.5-turbo-0125",
# "gpt-4-0613",
# "gpt-4-0125-preview",
# "gpt-4-turbo-2024-04-09",
# "gpt-4o-2024-05-13",
"gpt-4o-2024-08-06",
"gpt-4o-mini-2024-07-18",
# "gpt-4o-mini-2024-07-18",
]

ANTHROPIC_MODEL_NAMES = [
Expand Down Expand Up @@ -128,28 +128,28 @@
# # "FP16",
# ],
# },
"llama-2-chat": {
"model_size_in_billions": [
7,
# 13,
# 70,
],
"model_format": "ggufv2",
"quantization": [
"Q2_K",
# "Q3_K_S",
"Q3_K_M",
# "Q3_K_L",
# "Q4_0",
# "Q4_K_S",
"Q4_K_M",
# "Q5_0",
# "Q5_K_S",
"Q5_K_M",
"Q6_K",
"Q8_0",
],
},
# "llama-2-chat": {
# "model_size_in_billions": [
# 7,
# # 13,
# # 70,
# ],
# "model_format": "ggufv2",
# "quantization": [
# "Q2_K",
# # "Q3_K_S",
# "Q3_K_M",
# # "Q3_K_L",
# # "Q4_0",
# # "Q4_K_S",
# "Q4_K_M",
# # "Q5_0",
# # "Q5_K_S",
# "Q5_K_M",
# "Q6_K",
# "Q8_0",
# ],
# },
# "llama-3-instruct": {
# "model_size_in_billions": [
# 8,
Expand All @@ -169,31 +169,31 @@
# # "Q4_K_M",
# ],
# },
"llama-3.1-instruct": {
"model_size_in_billions": [
8,
# 70,
],
"model_format": "ggufv2",
"quantization": [
# 8B model quantisations
"Q3_K_L",
"IQ4_XS",
"Q4_K_M",
# "Q5_K_M",
# "Q6_K",
"Q8_0",
# 70B model quantisations
# "IQ2_M",
# "Q2_K",
# "Q3_K_S",
# "IQ4_XS",
# "Q4_K_M", # crazy slow on mbp m3 max
# "Q5_K_M",
# "Q6_K",
# "Q8_0",
],
},
# "llama-3.1-instruct": {
# "model_size_in_billions": [
# 8,
# # 70,
# ],
# "model_format": "ggufv2",
# "quantization": [
# # 8B model quantisations
# "Q3_K_L",
# "IQ4_XS",
# "Q4_K_M",
# # "Q5_K_M",
# # "Q6_K",
# "Q8_0",
# # 70B model quantisations
# # "IQ2_M",
# # "Q2_K",
# # "Q3_K_S",
# # "IQ4_XS",
# # "Q4_K_M", # crazy slow on mbp m3 max
# # "Q5_K_M",
# # "Q6_K",
# # "Q8_0",
# ],
# },
# "mistral-instruct-v0.2": {
# "model_size_in_billions": [
# 7,
Expand Down Expand Up @@ -239,26 +239,26 @@
# "none",
# ],
# },
"openhermes-2.5": {
"model_size_in_billions": [
7,
],
"model_format": "ggufv2",
"quantization": [
"Q2_K",
# "Q3_K_S",
"Q3_K_M",
# "Q3_K_L",
# "Q4_0",
# "Q4_K_S",
"Q4_K_M",
# "Q5_0",
# "Q5_K_S",
"Q5_K_M",
"Q6_K",
"Q8_0",
],
},
# "openhermes-2.5": {
# "model_size_in_billions": [
# 7,
# ],
# "model_format": "ggufv2",
# "quantization": [
# "Q2_K",
# # "Q3_K_S",
# "Q3_K_M",
# # "Q3_K_L",
# # "Q4_0",
# # "Q4_K_S",
# "Q4_K_M",
# # "Q5_0",
# # "Q5_K_S",
# "Q5_K_M",
# "Q6_K",
# "Q8_0",
# ],
# },
}

# create concrete benchmark list by concatenating all combinations of model
Expand Down
23 changes: 18 additions & 5 deletions benchmark/data/benchmark_api_calling_data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,16 @@
#
# Test case keys:
# - input (for creating the test)
# - expected (for asserting ourcomes and generating a score)
# - expected (for asserting outcomes and generating a score)
# - case (for categorizing the test case)
#
# If any input is a dictionary itself, it will be expanded into separate test
# cases, using the top-level key to create a concatenated test case purpose.
#
# We are using regular expressions to evaluate the expected parts, to be able to
# account for variations in the output (e.g. whitespace, capitalization). Make
# sure to escape special characters in the regular expressions, such as '?',
# '.', etc., by adding two backslashes before them.

api_calling:
- case: oncokb:braf:melanoma
Expand All @@ -17,7 +22,7 @@ api_calling:
expected:
parts_of_query:
[
"https://demo.oncokb.org/api/v1/annotate/mutations/byProteinChange?",
"https://demo.oncokb.org/api/v1/annotate/mutations/byProteinChange\\?",
"hugoSymbol=BRAF",
"alteration=V600E",
"tumorType=Melanoma",
Expand All @@ -29,7 +34,8 @@ api_calling:
expected:
parts_of_query:
[
"https://demo.oncokb.org/api/v1/annotate/mutations/byProteinChange?hugoSymbol=TP53",
"https://demo.oncokb.org/api/v1/annotate/mutations/byProteinChange\\?",
"hugoSymbol=TP53",
"alteration=R273C",
"tumorType=Colon%20Adenocarcinoma",
]
Expand All @@ -41,7 +47,7 @@ api_calling:
expected:
parts_of_query:
[
"https://demo.oncokb.org/api/v1/annotate/mutations/byProteinChange?",
"https://demo.oncokb.org/api/v1/annotate/mutations/byProteinChange\\?",
"hugoSymbol=BRAF",
"alteration=N486_P490del",
"tumorType=Histiocytosis",
Expand All @@ -53,10 +59,17 @@ api_calling:
expected:
parts_of_query:
[
"https://demo.oncokb.org/api/v1/annotate/structuralVariants?",
"https://demo.oncokb.org/api/v1/annotate/structuralVariants\\?",
"hugoSymbolA=CD74",
"hugoSymbolB=ROS1",
"structuralVariantType=FUSION",
"isFunctionalFusion=true",
"tumorType=Lung%20Adenocarcinoma",
]
- case: biotools:topic:metabolomics
input:
prompt:
fuzzy_search: "Which tools can I use for metabolomics?"
expected:
parts_of_query:
["https://bio.tools/api/t/", "\\?topic=", "[mM]etabolomics"]
2 changes: 1 addition & 1 deletion benchmark/data/benchmark_kg_schema_data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
# Test case keys:
# - input (for creating the test)
# - expected (for asserting ourcomes and generating a score)
# - expected (for asserting outcomes and generating a score)
# - case (for categorizing the test case)
#
# If any input is a dictionary itself, it will be expanded into separate test
Expand Down
2 changes: 1 addition & 1 deletion benchmark/data/benchmark_med_qa_data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
# Test case keys:
# - input (for creating the test)
# - expected (for asserting ourcomes and generating a score)
# - expected (for asserting outcomes and generating a score)
# - case (for categorizing the test case)
#
# If any input is a dictionary itself, it will be expanded into separate test
Expand Down
2 changes: 1 addition & 1 deletion benchmark/data/benchmark_query_test_data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
# Test case keys:
# - input (for creating the test)
# - expected (for asserting ourcomes and generating a score)
# - expected (for asserting outcomes and generating a score)
# - case (for categorizing the test case)
#
# If any input is a dictionary itself, it will be expanded into separate test
Expand Down
2 changes: 1 addition & 1 deletion benchmark/data/benchmark_rag_test_data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
# Test case keys:
# - input (for creating the test)
# - expected (for asserting ourcomes and generating a score)
# - expected (for asserting outcomes and generating a score)
# - case (for categorizing the test case)
#
# If any input is a dictionary itself, it will be expanded into separate test
Expand Down
2 changes: 1 addition & 1 deletion benchmark/data/benchmark_text_extract_data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
# Test case keys:
# - input (for creating the test)
# - expected (for asserting ourcomes and generating a score)
# - expected (for asserting outcomes and generating a score)
# - case (for categorizing the test case)
#
# If any input is a dictionary itself, it will be expanded into separate test
Expand Down
14 changes: 9 additions & 5 deletions benchmark/test_api_calling.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from urllib.parse import urlencode
import inspect
import re

import pytest

from biochatter._misc import ensure_iterable
from biochatter.api_agent.oncokb import OncoKBQueryBuilder
from biochatter.api_agent import OncoKBQueryBuilder, BioToolsQueryBuilder
from .conftest import calculate_bool_vector_score
from .benchmark_utils import (
skip_if_already_run,
Expand All @@ -31,24 +32,27 @@ def test_api_calling(

def run_test():
conversation.reset() # needs to be reset for each test
builder = OncoKBQueryBuilder()
if "oncokb" in yaml_data["case"]:
builder = OncoKBQueryBuilder()
elif "biotools" in yaml_data["case"]:
builder = BioToolsQueryBuilder()
parameters = builder.parameterise_query(
question=yaml_data["input"]["prompt"],
conversation=conversation,
)

params = parameters.dict(exclude_unset=True)
params = parameters.dict(exclude_none=True)
endpoint = params.pop("endpoint")
base_url = params.pop("base_url")
params.pop("question_uuid")
full_url = f"{base_url}/{endpoint}"
full_url = f"{base_url.rstrip('/')}/{endpoint.lstrip('/')}"
api_query = f"{full_url}?{urlencode(params)}"

score = []
for expected_part in ensure_iterable(
yaml_data["expected"]["parts_of_query"]
):
if expected_part in api_query:
if re.search(expected_part, api_query):
score.append(True)
else:
score.append(False)
Expand Down
22 changes: 22 additions & 0 deletions biochatter/api_agent/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,26 @@
BlastQueryParameters,
)
from .oncokb import OncoKBFetcher, OncoKBInterpreter, OncoKBQueryBuilder
from .bio_tools import (
BioToolsFetcher,
BioToolsInterpreter,
BioToolsQueryBuilder,
)
from .api_agent import APIAgent

__all__ = [
"BaseFetcher",
"BaseInterpreter",
"BaseQueryBuilder",
"BlastFetcher",
"BlastInterpreter",
"BlastQueryBuilder",
"BlastQueryParameters",
"OncoKBFetcher",
"OncoKBInterpreter",
"OncoKBQueryBuilder",
"BioToolsFetcher",
"BioToolsInterpreter",
"BioToolsQueryBuilder",
"APIAgent",
]
Loading

0 comments on commit e8fdc13

Please sign in to comment.