From 2bc687b1875dd08fa5b61dace26536a6d352efad Mon Sep 17 00:00:00 2001 From: Cyril Pommier Date: Thu, 14 Nov 2024 14:09:57 +0100 Subject: [PATCH 1/3] First test --- .../data/benchmark_api_calling_data.yaml | 9 +++ biochatter/api_agent/brapi.py | 55 ++++++++++++++++++- 2 files changed, 61 insertions(+), 3 deletions(-) diff --git a/benchmark/data/benchmark_api_calling_data.yaml b/benchmark/data/benchmark_api_calling_data.yaml index 85340a81..d955fe8b 100644 --- a/benchmark/data/benchmark_api_calling_data.yaml +++ b/benchmark/data/benchmark_api_calling_data.yaml @@ -73,3 +73,12 @@ api_calling: expected: parts_of_query: ["https://bio.tools/api/t/", "\\?topic=", "[mM]etabolomics"] + - case: brapi:germplasm:poc + input: + prompt: + fuzzy_search: "What germplasms belong to Vitis Species?" + expected: + parts_of_query: + ["https://urgi.versailles.inrae.fr/faidare/brapi/v1/", "\\germplasm", "Vitis"] + + diff --git a/biochatter/api_agent/brapi.py b/biochatter/api_agent/brapi.py index be2c5c27..7bac58c3 100644 --- a/biochatter/api_agent/brapi.py +++ b/biochatter/api_agent/brapi.py @@ -12,7 +12,7 @@ from .abc import BaseFetcher, BaseInterpreter, BaseQueryBuilder BRAPI_QUERY_PROMPT = """ -You are a world class algorithm for creating queries in structured formats. Your task is to use the web API of Breeding API (BrAPI) to answer questions about . +You are a world class algorithm for creating queries in structured formats. Your task is to use the web API of Breeding API (BrAPI) to answer questions about plant germplasm or phenotyping studies. You have to extract the appropriate information out of the examples: 1. To list information about the tools, use the endpoint with parameters like . @@ -22,11 +22,24 @@ Base URL - +https://urgi.versailles.inrae.fr/faidare/brapi/v1/ Endpoints and Parameters - +1. Get germplasm + • GET /germplasm + • Parameters: + • accessionNumber The unique identifier for a material or germplasm within a genebankMCPD (v2.1) (ACCENUMB) 2. This is the unique identifier for accessions within a genebank, and is assigned when a sample is entered into the genebank collection (e.g. "PI 113869"). + • collection A specific panel/collection/population name this germplasm belongs to. + • binomialName The full binomial name (scientific name) to identify a germplasm + • genus Genus name to identify germplasm + • species Species name to identify germplasm + • synonym Alternative name or ID used to reference this germplasm + • studyDbId Use this parameter to only return results associated with the given Study unique identifier. Use GET /studies to find the list of available Studies on a server. + • germplasmName Use this parameter to only return results associated with the given Germplasm by its human readable name. Use GET /germplasm to find the list of available Germplasm on a server. + • germplasmPUI Use this parameter to only return results associated with the given Germplasm by its global permanent unique identifier. Use GET /germplasm to find the list of available Germplasm on a server. + + """ @@ -55,6 +68,42 @@ class BrAPIQueryParameters(BaseModel): default_factory=lambda: str(uuid.uuid4()), description="Unique identifier for the question.", ) + accessionNumber : str = Field( + default=None, + description="The unique identifier for a material or germplasm within a genebankMCPD (v2.1) (ACCENUMB) 2. This is the unique identifier for accessions within a genebank, and is assigned when a sample is entered into the genebank collection (e.g. \"PI 113869\".)", + ) + collection : str = Field( + default=None, + description="A specific panel/collection/population name this germplasm belongs to.", + ) + binomialName : str = Field( + default=None, + description="The full binomial name (scientific name) to identify a germplasm.", + ) + genus : str = Field( + default=None, + description="Genus name to identify germplasm.", + ) + species : str = Field( + default=None, + description="Species name to identify germplasm.", + ) + synonym : str = Field( + default=None, + description="Alternative name or ID used to reference this germplasm.", + ) + studyDbId : str = Field( + default=None, + description="Use this parameter to only return results associated with the given Study unique identifier. Use GET /studies to find the list of available Studies on a server.", + ) + germplasmName : str = Field( + default=None, + description="Use this parameter to only return results associated with the given Germplasm by its human readable name. Use GET /germplasm to find the list of available Germplasm on a server.", + ) + germplasmPUI : str = Field( + default=None, + description="Use this parameter to only return results associated with the given Germplasm by its global permanent unique identifier. Use GET /germplasm to find the list of available Germplasm on a server.", + ) class BrAPIQueryBuilder(BaseQueryBuilder): From 4b3f7cfab681f108509de525d1a14a5b763ab3b2 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Thu, 14 Nov 2024 14:19:46 +0100 Subject: [PATCH 2/3] fix running the benchmark --- benchmark/data/benchmark_api_calling_data.yaml | 11 +++++++---- benchmark/results/api_calling.csv | 1 + benchmark/test_api_calling.py | 8 +++++++- biochatter/api_agent/__init__.py | 1 + 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/benchmark/data/benchmark_api_calling_data.yaml b/benchmark/data/benchmark_api_calling_data.yaml index d955fe8b..ddff278b 100644 --- a/benchmark/data/benchmark_api_calling_data.yaml +++ b/benchmark/data/benchmark_api_calling_data.yaml @@ -76,9 +76,12 @@ api_calling: - case: brapi:germplasm:poc input: prompt: - fuzzy_search: "What germplasms belong to Vitis Species?" + fuzzy_search: "What germplasms belong to the Vitis genus?" expected: parts_of_query: - ["https://urgi.versailles.inrae.fr/faidare/brapi/v1/", "\\germplasm", "Vitis"] - - + [ + "https://urgi.versailles.inrae.fr/faidare/brapi/v1/", + "germplasm", + "\\?genus", + "Vitis", + ] diff --git a/benchmark/results/api_calling.csv b/benchmark/results/api_calling.csv index 20083fc1..fa5d7237 100644 --- a/benchmark/results/api_calling.csv +++ b/benchmark/results/api_calling.csv @@ -19,6 +19,7 @@ gpt-4o-2024-05-13,oncokb:braf:histiocytosis:exact_spelling,4;4;4/4,3,f7e5a41f8fb gpt-4o-2024-05-13,oncokb:braf:melanoma:exact_spelling,4;4;4/4,3,b52eb44672033de81ec2213895507706,2024-07-18 12:27:07,0.4.13 gpt-4o-2024-05-13,oncokb:ros1:lung_adenocarcinoma:exact_spelling,3;3;3/6,3,a453756cc6dd549acb390cdf6108eddf,2024-07-18 12:27:35,0.4.13 gpt-4o-2024-05-13,oncokb:tp53:colon_adenocarcinoma:exact_spelling,2;2;2/3,3,72f23eb51d35737c7a446180582488ab,2024-07-18 12:27:13,0.4.13 +gpt-4o-2024-08-06,brapi:germplasm:poc:fuzzy_search,4/4,1,a906498221d9ee3fddd857529a7f6af8,2024-11-14 14:16:09,0.7.5 gpt-4o-mini-2024-07-18,oncokb:braf:histiocytosis:descriptive_spelling,3;3;3;3;3/4,5,a52dc4ba2dd21ff2aa53654c3f26b2c5,2024-07-31 00:14:55,0.5.1 gpt-4o-mini-2024-07-18,oncokb:braf:histiocytosis:exact_spelling,4;4;4;4;4/4,5,f7e5a41f8fb5eb520571bc46c8e4916c,2024-07-31 00:14:48,0.5.1 gpt-4o-mini-2024-07-18,oncokb:braf:melanoma:exact_spelling,4;4;4;4;4/4,5,b52eb44672033de81ec2213895507706,2024-07-31 00:14:32,0.5.1 diff --git a/benchmark/test_api_calling.py b/benchmark/test_api_calling.py index c198ebd1..a90c4e15 100644 --- a/benchmark/test_api_calling.py +++ b/benchmark/test_api_calling.py @@ -5,7 +5,11 @@ import pytest from biochatter._misc import ensure_iterable -from biochatter.api_agent import OncoKBQueryBuilder, BioToolsQueryBuilder +from biochatter.api_agent import ( + OncoKBQueryBuilder, + BioToolsQueryBuilder, + BrAPIQueryBuilder, +) from .conftest import calculate_bool_vector_score from .benchmark_utils import ( skip_if_already_run, @@ -36,6 +40,8 @@ def run_test(): builder = OncoKBQueryBuilder() elif "biotools" in yaml_data["case"]: builder = BioToolsQueryBuilder() + elif "brapi" in yaml_data["case"]: + builder = BrAPIQueryBuilder() parameters = builder.parameterise_query( question=yaml_data["input"]["prompt"], conversation=conversation, diff --git a/biochatter/api_agent/__init__.py b/biochatter/api_agent/__init__.py index 4d9084ab..74cf306e 100644 --- a/biochatter/api_agent/__init__.py +++ b/biochatter/api_agent/__init__.py @@ -11,6 +11,7 @@ BioToolsInterpreter, BioToolsQueryBuilder, ) +from .brapi import BrAPIQueryBuilder, BrAPIFetcher, BrAPIInterpreter from .api_agent import APIAgent __all__ = [ From ebdf195c1348f61717ed728d0d834c5c18893a93 Mon Sep 17 00:00:00 2001 From: Cyril Pommier Date: Thu, 14 Nov 2024 15:14:28 +0100 Subject: [PATCH 3/3] Two tests on germplasms --- benchmark/data/benchmark_api_calling_data.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/benchmark/data/benchmark_api_calling_data.yaml b/benchmark/data/benchmark_api_calling_data.yaml index ddff278b..c890fca4 100644 --- a/benchmark/data/benchmark_api_calling_data.yaml +++ b/benchmark/data/benchmark_api_calling_data.yaml @@ -85,3 +85,17 @@ api_calling: "\\?genus", "Vitis", ] + - case: brapi:germplasm:genusAndSpecies + input: + prompt: + fuzzy_search: "What germplasms belong to the Vitis genus and species vinifera?" + expected: + parts_of_query: + [ + "https://urgi.versailles.inrae.fr/faidare/brapi/v1/germplasm", + "\\?genus", + "Vitis", + "\\&species", + "vinifera", + ] +