classBaseFetcher(ABC):
-"""
- Abstract base class for fetchers. A fetcher is responsible for submitting
- queries (in systems where submission and fetching are separate) and fetching
- and saving results of queries. It has to implement a `fetch_results()`
- method, which can wrap a multi-step procedure to submit and retrieve. Should
- implement retry method to account for connectivity issues or processing
- times.
- """
-
-@abstractmethod
-deffetch_results(
-self,
-query_model:BaseModel,
-retries:Optional[int]=3,
-):
-"""
- Fetches results by submitting a query. Can implement a multi-step
- procedure if submitting and fetching are distinct processes (e.g., in
- the case of long processing times as in the case of BLAST).
-
- Args:
- query_model: the Pydantic model describing the parameterised query
- """
-pass
+103
classBaseFetcher(ABC):
+"""Abstract base class for fetchers. A fetcher is responsible for submitting
+ queries (in systems where submission and fetching are separate) and fetching
+ and saving results of queries. It has to implement a `fetch_results()`
+ method, which can wrap a multi-step procedure to submit and retrieve. Should
+ implement retry method to account for connectivity issues or processing
+ times.
+ """
+
+@abstractmethod
+deffetch_results(
+self,
+query_model:BaseModel,
+retries:int|None=3,
+):
+"""Fetches results by submitting a query. Can implement a multi-step
+ procedure if submitting and fetching are distinct processes (e.g., in
+ the case of long processing times as in the case of BLAST).
+
+ Args:
+ ----
+ query_model: the Pydantic model describing the parameterised query
+
+ """
@@ -2130,41 +2188,17 @@
Parameters:
-
-
-
-
Name
-
Type
-
Description
-
Default
-
-
-
-
-
- query_model
-
-
- BaseModel
-
-
-
-
the Pydantic model describing the parameterised query
-
-
-
- required
-
-
-
-
+
+
query_model: the Pydantic model describing the parameterised query
+
@abstractmethod
-deffetch_results(
-self,
-query_model:BaseModel,
-retries:Optional[int]=3,
-):
-"""
- Fetches results by submitting a query. Can implement a multi-step
- procedure if submitting and fetching are distinct processes (e.g., in
- the case of long processing times as in the case of BLAST).
-
- Args:
- query_model: the Pydantic model describing the parameterised query
- """
-pass
+103
@abstractmethod
+deffetch_results(
+self,
+query_model:BaseModel,
+retries:int|None=3,
+):
+"""Fetches results by submitting a query. Can implement a multi-step
+ procedure if submitting and fetching are distinct processes (e.g., in
+ the case of long processing times as in the case of BLAST).
+
+ Args:
+ ----
+ query_model: the Pydantic model describing the parameterised query
+
+ """
classBaseInterpreter(ABC):
-"""
- Abstract base class for result interpreters. The interpreter is aware of the
- nature and structure of the results and can extract and summarise
- information from them.
- """
-
-@abstractmethod
-defsummarise_results(
-self,
-question:str,
-conversation_factory:Callable,
-response_text:str,
-)->str:
-"""
- Summarises an answer based on the given parameters.
-
- Args:
- question (str): The question that was asked.
+140
classBaseInterpreter(ABC):
+"""Abstract base class for result interpreters. The interpreter is aware of the
+ nature and structure of the results and can extract and summarise
+ information from them.
+ """
+
+@abstractmethod
+defsummarise_results(
+self,
+question:str,
+conversation_factory:Callable,
+response_text:str,
+)->str:
+"""Summarises an answer based on the given parameters.
+
+ Args:
+ ----
+ question (str): The question that was asked.
+
+ conversation_factory (Callable): A function that creates a
+ BioChatter conversation.
+
+ response_text (str): The response.text returned from the request.
- conversation_factory (Callable): A function that creates a
- BioChatter conversation.
-
- response_text (str): The response.text returned from the request.
-
- Returns:
- A summary of the answer.
-
- Todo:
- Genericise (remove file path and n_lines parameters, and use a
- generic way to get the results). The child classes should manage the
- specifics of the results.
- """
-pass
+ Returns:
+ -------
+ A summary of the answer.
+
+ Todo:
+ ----
+ Genericise (remove file path and n_lines parameters, and use a
+ generic way to get the results). The child classes should manage the
+ specifics of the results.
+
+ """
@@ -2332,104 +2364,31 @@
Summarises an answer based on the given parameters.
-
-
-
Parameters:
-
-
-
-
Name
-
Type
-
Description
-
Default
-
-
-
-
-
- question
-
-
- str
-
-
-
-
The question that was asked.
-
-
-
- required
-
-
-
-
- conversation_factory
-
-
- Callable
-
-
-
-
A function that creates a
-BioChatter conversation.
-
-
-
- required
-
-
-
-
- response_text
-
-
- str
-
-
-
-
The response.text returned from the request.
-
-
-
- required
-
-
-
-
-
-
-
Returns:
-
-
-
-
Type
-
Description
-
-
-
-
-
- str
-
-
-
-
A summary of the answer.
-
-
-
-
-
-
-
-
- Todo
-
Genericise (remove file path and n_lines parameters, and use a
+
+
question (str): The question that was asked.
+
+conversation_factory (Callable): A function that creates a
+ BioChatter conversation.
+
+response_text (str): The response.text returned from the request.
+
+
+
A summary of the answer.
+
+
Todo:
+
Genericise (remove file path and n_lines parameters, and use a
generic way to get the results). The child classes should manage the
-specifics of the results.
-
+specifics of the results.
+
@abstractmethod
-defsummarise_results(
-self,
-question:str,
-conversation_factory:Callable,
-response_text:str,
-)->str:
-"""
- Summarises an answer based on the given parameters.
-
- Args:
- question (str): The question that was asked.
+140
@abstractmethod
+defsummarise_results(
+self,
+question:str,
+conversation_factory:Callable,
+response_text:str,
+)->str:
+"""Summarises an answer based on the given parameters.
+
+ Args:
+ ----
+ question (str): The question that was asked.
+
+ conversation_factory (Callable): A function that creates a
+ BioChatter conversation.
+
+ response_text (str): The response.text returned from the request.
- conversation_factory (Callable): A function that creates a
- BioChatter conversation.
-
- response_text (str): The response.text returned from the request.
-
- Returns:
- A summary of the answer.
-
- Todo:
- Genericise (remove file path and n_lines parameters, and use a
- generic way to get the results). The child classes should manage the
- specifics of the results.
- """
-pass
+ Returns:
+ -------
+ A summary of the answer.
+
+ Todo:
+ ----
+ Genericise (remove file path and n_lines parameters, and use a
+ generic way to get the results). The child classes should manage the
+ specifics of the results.
+
+ """
classBaseQueryBuilder(ABC):
-"""
- An abstract base class for query builders.
- """
-
-@property
-defstructured_output_prompt(self)->ChatPromptTemplate:
-"""
- Defines a structured output prompt template. This provides a default
- implementation for an API agent that can be overridden by subclasses to
- return a ChatPromptTemplate-compatible object.
- """
-returnChatPromptTemplate.from_messages(
-[
+77
classBaseQueryBuilder(ABC):
+"""An abstract base class for query builders."""
+
+@property
+defstructured_output_prompt(self)->ChatPromptTemplate:
+"""Defines a structured output prompt template. This provides a default
+ implementation for an API agent that can be overridden by subclasses to
+ return a ChatPromptTemplate-compatible object.
+ """
+returnChatPromptTemplate.from_messages(
+[
+(
+"system",
+"You are a world class algorithm for extracting information in structured formats.",
+),(
-"system",
-"You are a world class algorithm for extracting information in structured formats.",
+"human",
+"Use the given format to extract information from the following input: {input}",),
-(
-"human",
-"Use the given format to extract information from the following input: {input}",
-),
-("human","Tip: Make sure to answer in the correct format"),
-]
-)
-
-@abstractmethod
-defcreate_runnable(
-self,
-query_parameters:"BaseModel",
-conversation:"Conversation",
-)->Callable:
-"""
- Creates a runnable object for executing queries. Must be implemented by
- subclasses. Should use the LangChain `create_structured_output_runnable`
- method to generate the Callable.
+("human","Tip: Make sure to answer in the correct format"),
+],
+)
+
+@abstractmethod
+defcreate_runnable(
+self,
+query_parameters:"BaseModel",
+conversation:"Conversation",
+)->Callable:
+"""Creates a runnable object for executing queries. Must be implemented by
+ subclasses. Should use the LangChain `create_structured_output_runnable`
+ method to generate the Callable.
+
+ Args:
+ ----
+ query_parameters: A Pydantic data model that specifies the fields of
+ the API that should be queried.
- Args:
- query_parameters: A Pydantic data model that specifies the fields of
- the API that should be queried.
-
- conversation: A BioChatter conversation object.
+ conversation: A BioChatter conversation object.
+
+ Returns:
+ -------
+ A Callable object that can execute the query.
- Returns:
- A Callable object that can execute the query.
- """
-pass
-
-@abstractmethod
-defparameterise_query(
-self,
-question:str,
-conversation:"Conversation",
-)->BaseModel:
-"""
-
- Parameterises a query object (a Pydantic model with the fields of the
- API) based on the given question using a BioChatter conversation
- instance. Must be implemented by subclasses.
-
- Args:
- question (str): The question to be answered.
-
- conversation: The BioChatter conversation object containing the LLM
- that should parameterise the query.
+ """
+
+@abstractmethod
+defparameterise_query(
+self,
+question:str,
+conversation:"Conversation",
+)->BaseModel:
+"""Parameterises a query object (a Pydantic model with the fields of the
+ API) based on the given question using a BioChatter conversation
+ instance. Must be implemented by subclasses.
+
+ Args:
+ ----
+ question (str): The question to be answered.
+
+ conversation: The BioChatter conversation object containing the LLM
+ that should parameterise the query.
+
+ Returns:
+ -------
+ A parameterised instance of the query object (Pydantic BaseModel)
- Returns:
- A parameterised instance of the query object (Pydantic BaseModel)
- """
-pass
+ """
@@ -2716,81 +2670,23 @@
query_parameters: A Pydantic data model that specifies the fields of
+ the API that should be queried.
-
-
Parameters:
-
-
-
-
Name
-
Type
-
Description
-
Default
-
-
-
-
-
- query_parameters
-
-
- BaseModel
-
-
-
-
A Pydantic data model that specifies the fields of
-the API that should be queried.
@abstractmethod
-defcreate_runnable(
-self,
-query_parameters:"BaseModel",
-conversation:"Conversation",
-)->Callable:
-"""
- Creates a runnable object for executing queries. Must be implemented by
- subclasses. Should use the LangChain `create_structured_output_runnable`
- method to generate the Callable.
+54
@abstractmethod
+defcreate_runnable(
+self,
+query_parameters:"BaseModel",
+conversation:"Conversation",
+)->Callable:
+"""Creates a runnable object for executing queries. Must be implemented by
+ subclasses. Should use the LangChain `create_structured_output_runnable`
+ method to generate the Callable.
+
+ Args:
+ ----
+ query_parameters: A Pydantic data model that specifies the fields of
+ the API that should be queried.
- Args:
- query_parameters: A Pydantic data model that specifies the fields of
- the API that should be queried.
-
- conversation: A BioChatter conversation object.
+ conversation: A BioChatter conversation object.
+
+ Returns:
+ -------
+ A Callable object that can execute the query.
- Returns:
- A Callable object that can execute the query.
- """
-pass
+ """
@abstractmethod
-defparameterise_query(
-self,
-question:str,
-conversation:"Conversation",
-)->BaseModel:
-"""
-
- Parameterises a query object (a Pydantic model with the fields of the
- API) based on the given question using a BioChatter conversation
- instance. Must be implemented by subclasses.
-
- Args:
- question (str): The question to be answered.
-
- conversation: The BioChatter conversation object containing the LLM
- that should parameterise the query.
+77
@abstractmethod
+defparameterise_query(
+self,
+question:str,
+conversation:"Conversation",
+)->BaseModel:
+"""Parameterises a query object (a Pydantic model with the fields of the
+ API) based on the given question using a BioChatter conversation
+ instance. Must be implemented by subclasses.
+
+ Args:
+ ----
+ question (str): The question to be answered.
+
+ conversation: The BioChatter conversation object containing the LLM
+ that should parameterise the query.
+
+ Returns:
+ -------
+ A parameterised instance of the query object (Pydantic BaseModel)
- Returns:
- A parameterised instance of the query object (Pydantic BaseModel)
- """
-pass
+ """
@@ -3040,7 +2872,8 @@
Source code in biochatter/api_agent/api_agent.py
-
classAPIAgent:
-def__init__(
-self,
-conversation_factory:Callable,
-query_builder:"BaseQueryBuilder",
-fetcher:"BaseFetcher",
-interpreter:"BaseInterpreter",
-):
-"""
-
- API agent class to interact with a tool's API for querying and fetching
- results. The query fields have to be defined in a Pydantic model
- (`BaseModel`) and used (i.e., parameterised by the LLM) in the query
- builder. Specific API agents are defined in submodules of this directory
- (`api_agent`). The agent's logic is implemented in the `execute` method.
-
- Attributes:
- conversation_factory (Callable): A function used to create a
- BioChatter conversation, providing LLM access.
-
- query_builder (BaseQueryBuilder): An instance of a child of the
- BaseQueryBuilder class.
-
- result_fetcher (BaseFetcher): An instance of a child of the
- BaseFetcher class.
-
- result_interpreter (BaseInterpreter): An instance of a child of the
- BaseInterpreter class.
- """
-self.conversation_factory=conversation_factory
-self.query_builder=query_builder
-self.fetcher=fetcher
-self.interpreter=interpreter
-self.final_answer=None
-
-defparameterise_query(self,question:str)->Optional[BaseModel]:
-"""
- Use LLM to parameterise a query (a Pydantic model) based on the given
- question using a BioChatter conversation instance.
- """
-try:
-conversation=self.conversation_factory()
-returnself.query_builder.parameterise_query(question,conversation)
-exceptExceptionase:
-print(f"Error generating query: {e}")
-returnNone
-
-deffetch_results(self,query_model:str)->Optional[str]:
-"""
- Fetch the results of the query using the individual API's implementation
- (either single-step or submit-retrieve).
-
- Args:
- query_model: the parameterised query Pydantic model
- """
-try:
-returnself.fetcher.fetch_results(query_model,100)
-exceptExceptionase:
-print(f"Error fetching results: {e}")
-returnNone
-
-defsummarise_results(
-self,question:str,response_text:str
-)->Optional[str]:
-"""
- Summarise the retrieved results to extract the answer to the question.
- """
-try:
-returnself.interpreter.summarise_results(
-question=question,
-conversation_factory=self.conversation_factory,
-response_text=response_text,
-)
-exceptExceptionase:
-print(f"Error extracting answer: {e}")
-returnNone
-
-defexecute(self,question:str)->Optional[str]:
-"""
- Wrapper that uses class methods to execute the API agent logic. Consists
- of 1) query generation, 2) query submission, 3) results fetching, and
- 4) answer extraction. The final answer is stored in the final_answer
- attribute.
-
- Args:
- question (str): The question to be answered.
+141
classAPIAgent:
+def__init__(
+self,
+conversation_factory:Callable,
+query_builder:"BaseQueryBuilder",
+fetcher:"BaseFetcher",
+interpreter:"BaseInterpreter",
+):
+"""API agent class to interact with a tool's API for querying and fetching
+ results. The query fields have to be defined in a Pydantic model
+ (`BaseModel`) and used (i.e., parameterised by the LLM) in the query
+ builder. Specific API agents are defined in submodules of this directory
+ (`api_agent`). The agent's logic is implemented in the `execute` method.
+
+ Attributes
+ ----------
+ conversation_factory (Callable): A function used to create a
+ BioChatter conversation, providing LLM access.
+
+ query_builder (BaseQueryBuilder): An instance of a child of the
+ BaseQueryBuilder class.
+
+ result_fetcher (BaseFetcher): An instance of a child of the
+ BaseFetcher class.
+
+ result_interpreter (BaseInterpreter): An instance of a child of the
+ BaseInterpreter class.
+
+ """
+self.conversation_factory=conversation_factory
+self.query_builder=query_builder
+self.fetcher=fetcher
+self.interpreter=interpreter
+self.final_answer=None
+
+defparameterise_query(self,question:str)->BaseModel|None:
+"""Use LLM to parameterise a query (a Pydantic model) based on the given
+ question using a BioChatter conversation instance.
+ """
+try:
+conversation=self.conversation_factory()
+returnself.query_builder.parameterise_query(question,conversation)
+exceptExceptionase:
+print(f"Error generating query: {e}")
+returnNone
+
+deffetch_results(self,query_model:str)->str|None:
+"""Fetch the results of the query using the individual API's implementation
+ (either single-step or submit-retrieve).
+
+ Args:
+ ----
+ query_model: the parameterised query Pydantic model
+
+ """
+try:
+returnself.fetcher.fetch_results(query_model,100)
+exceptExceptionase:
+print(f"Error fetching results: {e}")
+returnNone
+
+defsummarise_results(
+self,
+question:str,
+response_text:str,
+)->str|None:
+"""Summarise the retrieved results to extract the answer to the question."""
+try:
+returnself.interpreter.summarise_results(
+question=question,
+conversation_factory=self.conversation_factory,
+response_text=response_text,
+)
+exceptExceptionase:
+print(f"Error extracting answer: {e}")
+returnNone
+
+defexecute(self,question:str)->str|None:
+"""Wrapper that uses class methods to execute the API agent logic. Consists
+ of 1) query generation, 2) query submission, 3) results fetching, and
+ 4) answer extraction. The final answer is stored in the final_answer
+ attribute.
+
+ Args:
+ ----
+ question (str): The question to be answered.
+ """# Generate querytry:
@@ -3277,10 +3108,7 @@
returnfinal_answerdefget_description(self,tool_name:str,tool_desc:str):
-return(
-f"This API agent interacts with {tool_name}'s API for querying and "
-f"fetching results. {tool_desc}"
-)
+returnf"This API agent interacts with {tool_name}'s API for querying and fetching results. {tool_desc}"
@@ -3312,72 +3140,24 @@
Attributes
+
conversation_factory (Callable): A function used to create a
+ BioChatter conversation, providing LLM access.
+query_builder (BaseQueryBuilder): An instance of a child of the
+ BaseQueryBuilder class.
-
Attributes:
-
-
-
-
Name
-
Type
-
Description
-
-
-
-
-
conversation_factory
-
- Callable
-
-
-
-
A function used to create a
-BioChatter conversation, providing LLM access.
An instance of a child of the
-BaseInterpreter class.
-
-
-
-
-
+result_fetcher (BaseFetcher): An instance of a child of the
+ BaseFetcher class.
+
+result_interpreter (BaseInterpreter): An instance of a child of the
+ BaseInterpreter class.
+
Source code in biochatter/api_agent/api_agent.py
-
def__init__(
-self,
-conversation_factory:Callable,
-query_builder:"BaseQueryBuilder",
-fetcher:"BaseFetcher",
-interpreter:"BaseInterpreter",
-):
-"""
-
- API agent class to interact with a tool's API for querying and fetching
- results. The query fields have to be defined in a Pydantic model
- (`BaseModel`) and used (i.e., parameterised by the LLM) in the query
- builder. Specific API agents are defined in submodules of this directory
- (`api_agent`). The agent's logic is implemented in the `execute` method.
-
- Attributes:
- conversation_factory (Callable): A function used to create a
- BioChatter conversation, providing LLM access.
-
- query_builder (BaseQueryBuilder): An instance of a child of the
- BaseQueryBuilder class.
-
- result_fetcher (BaseFetcher): An instance of a child of the
- BaseFetcher class.
-
- result_interpreter (BaseInterpreter): An instance of a child of the
- BaseInterpreter class.
- """
-self.conversation_factory=conversation_factory
-self.query_builder=query_builder
-self.fetcher=fetcher
-self.interpreter=interpreter
-self.final_answer=None
+56
def__init__(
+self,
+conversation_factory:Callable,
+query_builder:"BaseQueryBuilder",
+fetcher:"BaseFetcher",
+interpreter:"BaseInterpreter",
+):
+"""API agent class to interact with a tool's API for querying and fetching
+ results. The query fields have to be defined in a Pydantic model
+ (`BaseModel`) and used (i.e., parameterised by the LLM) in the query
+ builder. Specific API agents are defined in submodules of this directory
+ (`api_agent`). The agent's logic is implemented in the `execute` method.
+
+ Attributes
+ ----------
+ conversation_factory (Callable): A function used to create a
+ BioChatter conversation, providing LLM access.
+
+ query_builder (BaseQueryBuilder): An instance of a child of the
+ BaseQueryBuilder class.
+
+ result_fetcher (BaseFetcher): An instance of a child of the
+ BaseFetcher class.
+
+ result_interpreter (BaseInterpreter): An instance of a child of the
+ BaseInterpreter class.
+
+ """
+self.conversation_factory=conversation_factory
+self.query_builder=query_builder
+self.fetcher=fetcher
+self.interpreter=interpreter
+self.final_answer=None
@@ -3463,41 +3242,14 @@
-
-
-
Parameters:
-
-
-
-
Name
-
Type
-
Description
-
Default
-
-
-
-
-
- question
-
-
- str
-
-
-
-
The question to be answered.
-
-
-
- required
-
-
-
-
+
+
question (str): The question to be answered.
+
Source code in biochatter/api_agent/api_agent.py
-
defexecute(self,question:str)->Optional[str]:
-"""
- Wrapper that uses class methods to execute the API agent logic. Consists
- of 1) query generation, 2) query submission, 3) results fetching, and
- 4) answer extraction. The final answer is stored in the final_answer
- attribute.
-
- Args:
- question (str): The question to be answered.
+138
defexecute(self,question:str)->str|None:
+"""Wrapper that uses class methods to execute the API agent logic. Consists
+ of 1) query generation, 2) query submission, 3) results fetching, and
+ 4) answer extraction. The final answer is stored in the final_answer
+ attribute.
+
+ Args:
+ ----
+ question (str): The question to be answered.
+ """# Generate querytry:
@@ -3591,41 +3344,15 @@
Parameters:
-
-
-
-
Name
-
Type
-
Description
-
Default
-
-
-
-
-
- query_model
-
-
- str
-
-
-
-
the parameterised query Pydantic model
-
-
-
- required
-
-
-
-
+
+
query_model: the parameterised query Pydantic model
+
Source code in biochatter/api_agent/api_agent.py
-
deffetch_results(self,query_model:str)->Optional[str]:
-"""
- Fetch the results of the query using the individual API's implementation
- (either single-step or submit-retrieve).
-
- Args:
- query_model: the parameterised query Pydantic model
- """
-try:
-returnself.fetcher.fetch_results(query_model,100)
-exceptExceptionase:
-print(f"Error fetching results: {e}")
-returnNone
+82
deffetch_results(self,query_model:str)->str|None:
+"""Fetch the results of the query using the individual API's implementation
+ (either single-step or submit-retrieve).
+
+ Args:
+ ----
+ query_model: the parameterised query Pydantic model
+
+ """
+try:
+returnself.fetcher.fetch_results(query_model,100)
+exceptExceptionase:
+print(f"Error fetching results: {e}")
+returnNone
@@ -3672,7 +3399,8 @@
Source code in biochatter/api_agent/api_agent.py
-
defparameterise_query(self,question:str)->Optional[BaseModel]:
-"""
- Use LLM to parameterise a query (a Pydantic model) based on the given
- question using a BioChatter conversation instance.
- """
-try:
-conversation=self.conversation_factory()
-returnself.query_builder.parameterise_query(question,conversation)
-exceptExceptionase:
-print(f"Error generating query: {e}")
-returnNone
+67
defparameterise_query(self,question:str)->BaseModel|None:
+"""Use LLM to parameterise a query (a Pydantic model) based on the given
+ question using a BioChatter conversation instance.
+ """
+try:
+conversation=self.conversation_factory()
+returnself.query_builder.parameterise_query(question,conversation)
+exceptExceptionase:
+print(f"Error generating query: {e}")
+returnNone
@@ -3714,7 +3439,8 @@
Source code in biochatter/api_agent/api_agent.py
-
classBlastFetcher(BaseFetcher):
-"""
- A class for retrieving API results from BLAST given a parameterised
- BlastQuery.
-
- TODO add a limit of characters to be returned from the response.text?
- """
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
classBlastFetcher(BaseFetcher):
+"""A class for retrieving API results from BLAST given a parameterised
+ BlastQuery.
-def_submit_query(self,request_data:BlastQueryParameters)->str:
-"""Function to POST the BLAST query and retrieve RID.
- It submits the structured BlastQuery obj and return the RID.
-
- Args:
- request_data: BlastQuery object containing the BLAST query
- parameters.
- Returns:
- str: The Request ID (RID) for the submitted BLAST query.
- """
-data={
-"CMD":request_data.cmd,
-"PROGRAM":request_data.program,
-"DATABASE":request_data.database,
-"QUERY":request_data.query,
-"FORMAT_TYPE":request_data.format_type,
-"MEGABLAST":request_data.megablast,
-"HITLIST_SIZE":request_data.max_hits,
-}
-# Include any other_params if provided
-ifrequest_data.other_params:
-data.update(request_data.other_params)
-# Make the API call
-query_string=urlencode(data)
-# Combine base URL with the query string
-full_url=f"{request_data.url}?{query_string}"
-# Print the full URL
-request_data.full_url=full_url
-print("Full URL built by retriever:\n",request_data.full_url)
-response=requests.post(request_data.url,data=data)
-response.raise_for_status()
-# Extract RID from response
-print(response)
-match=re.search(r"RID = (\w+)",response.text)
-ifmatch:
-returnmatch.group(1)
-else:
-raiseValueError("RID not found in BLAST submission response.")
-
-def_fetch_results(
-self,
-rid:str,
-question_uuid:str,
-retries:int=10000,
-):
-"""SECOND function to be called for a BLAST query.
- Will look for the RID to fetch the data
- """
-###
-### TO DO: Implement logging for all BLAST queries
-###
-# log_question_uuid_json(request_data.question_uuid,question, file_name, log_file_path,request_data.full_url)
-base_url="https://blast.ncbi.nlm.nih.gov/Blast.cgi"
-check_status_params={
-"CMD":"Get",
-"FORMAT_OBJECT":"SearchInfo",
-"RID":rid,
-}
-get_results_params={
-"CMD":"Get",
-"FORMAT_TYPE":"XML",
-"RID":rid,
-}
-
-# Check the status of the BLAST job
-forattemptinrange(retries):
-status_response=requests.get(base_url,params=check_status_params)
-status_response.raise_for_status()
-status_text=status_response.text
-print("evaluating status")
-if"Status=WAITING"instatus_text:
-print(f"{question_uuid} results not ready, waiting...")
-time.sleep(15)
-elif"Status=FAILED"instatus_text:
-raiseRuntimeError("BLAST query FAILED.")
-elif"Status=UNKNOWN"instatus_text:
-raiseRuntimeError("BLAST query expired or does not exist.")
-elif"Status=READY"instatus_text:
-if"ThereAreHits=yes"instatus_text:
-print(f"{question_uuid} results are ready, retrieving.")
-results_response=requests.get(
-base_url,params=get_results_params
-)
-results_response.raise_for_status()
-# Save the results to a file
-returnresults_response.text
-else:
-return"No hits found"
-ifattempt==retries-1:
-raiseTimeoutError(
-"Maximum attempts reached. Results may not be ready."
-)
-
-deffetch_results(
-self,query_model:BlastQueryParameters,retries:int=20
-)->str:
-"""
- Submit request and fetch results from BLAST API. Wraps individual
- submission and retrieval of results.
-
- Args:
- query_model: the Pydantic model of the query
-
- retries: the number of maximum retries
-
- Returns:
- str: the result from the BLAST API
- """
-rid=self._submit_query(request_data=query_model)
-returnself._fetch_results(
-rid=rid,
-question_uuid=query_model.question_uuid,
-retries=retries,
-)
+ TODO add a limit of characters to be returned from the response.text?
+ """
+
+def_submit_query(self,request_data:BlastQueryParameters)->str:
+"""Function to POST the BLAST query and retrieve RID.
+ It submits the structured BlastQuery obj and return the RID.
+
+ Args:
+ ----
+ request_data: BlastQuery object containing the BLAST query
+ parameters.
+
+ Returns:
+ -------
+ str: The Request ID (RID) for the submitted BLAST query.
+
+ """
+data={
+"CMD":request_data.cmd,
+"PROGRAM":request_data.program,
+"DATABASE":request_data.database,
+"QUERY":request_data.query,
+"FORMAT_TYPE":request_data.format_type,
+"MEGABLAST":request_data.megablast,
+"HITLIST_SIZE":request_data.max_hits,
+}
+# Include any other_params if provided
+ifrequest_data.other_params:
+data.update(request_data.other_params)
+# Make the API call
+query_string=urlencode(data)
+# Combine base URL with the query string
+full_url=f"{request_data.url}?{query_string}"
+# Print the full URL
+request_data.full_url=full_url
+print("Full URL built by retriever:\n",request_data.full_url)
+response=requests.post(request_data.url,data=data)
+response.raise_for_status()
+# Extract RID from response
+print(response)
+match=re.search(r"RID = (\w+)",response.text)
+ifmatch:
+returnmatch.group(1)
+else:
+raiseValueError("RID not found in BLAST submission response.")
+
+def_fetch_results(
+self,
+rid:str,
+question_uuid:str,
+retries:int=10000,
+):
+"""SECOND function to be called for a BLAST query.
+ Will look for the RID to fetch the data
+ """
+###
+### TO DO: Implement logging for all BLAST queries
+###
+# log_question_uuid_json(request_data.question_uuid,question, file_name, log_file_path,request_data.full_url)
+base_url="https://blast.ncbi.nlm.nih.gov/Blast.cgi"
+check_status_params={
+"CMD":"Get",
+"FORMAT_OBJECT":"SearchInfo",
+"RID":rid,
+}
+get_results_params={
+"CMD":"Get",
+"FORMAT_TYPE":"XML",
+"RID":rid,
+}
+
+# Check the status of the BLAST job
+forattemptinrange(retries):
+status_response=requests.get(base_url,params=check_status_params)
+status_response.raise_for_status()
+status_text=status_response.text
+print("evaluating status")
+if"Status=WAITING"instatus_text:
+print(f"{question_uuid} results not ready, waiting...")
+time.sleep(15)
+elif"Status=FAILED"instatus_text:
+raiseRuntimeError("BLAST query FAILED.")
+elif"Status=UNKNOWN"instatus_text:
+raiseRuntimeError("BLAST query expired or does not exist.")
+elif"Status=READY"instatus_text:
+if"ThereAreHits=yes"instatus_text:
+print(f"{question_uuid} results are ready, retrieving.")
+results_response=requests.get(
+base_url,
+params=get_results_params,
+)
+results_response.raise_for_status()
+# Save the results to a file
+returnresults_response.text
+else:
+return"No hits found"
+ifattempt==retries-1:
+raiseTimeoutError(
+"Maximum attempts reached. Results may not be ready.",
+)
+
+deffetch_results(
+self,
+query_model:BlastQueryParameters,
+retries:int=20,
+)->str:
+"""Submit request and fetch results from BLAST API. Wraps individual
+ submission and retrieval of results.
+
+ Args:
+ ----
+ query_model: the Pydantic model of the query
+
+ retries: the number of maximum retries
+
+ Returns:
+ -------
+ str: the result from the BLAST API
+
+ """
+rid=self._submit_query(request_data=query_model)
+returnself._fetch_results(
+rid=rid,
+question_uuid=query_model.question_uuid,
+retries=retries,
+)
deffetch_results(
-self,query_model:BlastQueryParameters,retries:int=20
-)->str:
-"""
- Submit request and fetch results from BLAST API. Wraps individual
- submission and retrieval of results.
-
- Args:
- query_model: the Pydantic model of the query
-
- retries: the number of maximum retries
-
- Returns:
- str: the result from the BLAST API
- """
-rid=self._submit_query(request_data=query_model)
-returnself._fetch_results(
-rid=rid,
-question_uuid=query_model.question_uuid,
-retries=retries,
-)
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
deffetch_results(
+self,
+query_model:BlastQueryParameters,
+retries:int=20,
+)->str:
+"""Submit request and fetch results from BLAST API. Wraps individual
+ submission and retrieval of results.
+
+ Args:
+ ----
+ query_model: the Pydantic model of the query
+
+ retries: the number of maximum retries
+
+ Returns:
+ -------
+ str: the result from the BLAST API
+
+ """
+rid=self._submit_query(request_data=query_model)
+returnself._fetch_results(
+rid=rid,
+question_uuid=query_model.question_uuid,
+retries=retries,
+)
classBlastInterpreter(BaseInterpreter):
-defsummarise_results(
-self,
-question:str,
-conversation_factory:Callable,
-response_text:str,
-)->str:
-"""
- Function to extract the answer from the BLAST results.
-
- Args:
- question (str): The question to be answered.
- conversation_factory: A BioChatter conversation object.
- response_text (str): The response.text returned by NCBI.
-
- Returns:
- str: The extracted answer from the BLAST results.
-
- """
-prompt=ChatPromptTemplate.from_messages(
-[
-(
-"system",
-"You are a world class molecular biologist who knows everything about NCBI and BLAST results.",
-),
-("user","{input}"),
-]
-)
-summary_prompt=BLAST_SUMMARY_PROMPT.format(
-question=question,context=response_text
-)
-output_parser=StrOutputParser()
-conversation=conversation_factory()
-chain=prompt|conversation.chat|output_parser
-answer=chain.invoke({"input":{summary_prompt}})
-returnanswer
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
classBlastInterpreter(BaseInterpreter):
+defsummarise_results(
+self,
+question:str,
+conversation_factory:Callable,
+response_text:str,
+)->str:
+"""Function to extract the answer from the BLAST results.
+
+ Args:
+ ----
+ question (str): The question to be answered.
+ conversation_factory: A BioChatter conversation object.
+ response_text (str): The response.text returned by NCBI.
+
+ Returns:
+ -------
+ str: The extracted answer from the BLAST results.
+
+ """
+prompt=ChatPromptTemplate.from_messages(
+[
+(
+"system",
+"You are a world class molecular biologist who knows everything about NCBI and BLAST results.",
+),
+("user","{input}"),
+],
+)
+summary_prompt=BLAST_SUMMARY_PROMPT.format(
+question=question,
+context=response_text,
+)
+output_parser=StrOutputParser()
+conversation=conversation_factory()
+chain=prompt|conversation.chat|output_parser
+answer=chain.invoke({"input":{summary_prompt}})
+returnanswer
@@ -4337,108 +4028,18 @@
Function to extract the answer from the BLAST results.
-
-
-
Parameters:
-
-
-
-
Name
-
Type
-
Description
-
Default
-
-
-
-
-
- question
-
-
- str
-
-
-
-
The question to be answered.
-
-
-
- required
-
-
-
-
- conversation_factory
-
-
- Callable
-
-
-
-
A BioChatter conversation object.
-
-
-
- required
-
-
-
-
- response_text
-
-
- str
-
-
-
-
The response.text returned by NCBI.
-
-
-
- required
-
-
-
-
-
-
-
Returns:
-
-
-
-
Name
Type
-
Description
-
-
-
-
-
str
- str
-
-
-
-
The extracted answer from the BLAST results.
-
-
-
-
-
+
+
question (str): The question to be answered.
+conversation_factory: A BioChatter conversation object.
+response_text (str): The response.text returned by NCBI.
+
+
+
str: The extracted answer from the BLAST results.
+
defsummarise_results(
-self,
-question:str,
-conversation_factory:Callable,
-response_text:str,
-)->str:
-"""
- Function to extract the answer from the BLAST results.
-
- Args:
- question (str): The question to be answered.
- conversation_factory: A BioChatter conversation object.
- response_text (str): The response.text returned by NCBI.
-
- Returns:
- str: The extracted answer from the BLAST results.
-
- """
-prompt=ChatPromptTemplate.from_messages(
-[
-(
-"system",
-"You are a world class molecular biologist who knows everything about NCBI and BLAST results.",
-),
-("user","{input}"),
-]
-)
-summary_prompt=BLAST_SUMMARY_PROMPT.format(
-question=question,context=response_text
-)
-output_parser=StrOutputParser()
-conversation=conversation_factory()
-chain=prompt|conversation.chat|output_parser
-answer=chain.invoke({"input":{summary_prompt}})
-returnanswer
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
defsummarise_results(
+self,
+question:str,
+conversation_factory:Callable,
+response_text:str,
+)->str:
+"""Function to extract the answer from the BLAST results.
+
+ Args:
+ ----
+ question (str): The question to be answered.
+ conversation_factory: A BioChatter conversation object.
+ response_text (str): The response.text returned by NCBI.
+
+ Returns:
+ -------
+ str: The extracted answer from the BLAST results.
+
+ """
+prompt=ChatPromptTemplate.from_messages(
+[
+(
+"system",
+"You are a world class molecular biologist who knows everything about NCBI and BLAST results.",
+),
+("user","{input}"),
+],
+)
+summary_prompt=BLAST_SUMMARY_PROMPT.format(
+question=question,
+context=response_text,
+)
+output_parser=StrOutputParser()
+conversation=conversation_factory()
+chain=prompt|conversation.chat|output_parser
+answer=chain.invoke({"input":{summary_prompt}})
+returnanswer
classBlastQueryBuilder(BaseQueryBuilder):"""A class for building a BlastQuery object."""defcreate_runnable(
@@ -4597,54 +4218,58 @@
query_parameters:"BlastQueryParameters",conversation:"Conversation",)->Callable:
-"""
- Creates a runnable object for executing queries using the LangChain
- `create_structured_output_runnable` method.
-
- Args:
+"""Creates a runnable object for executing queries using the LangChain
+ `create_structured_output_runnable` method.
+
+ Args:
+ ---- query_parameters: A Pydantic data model that specifies the fields of the API that should be queried. conversation: A BioChatter conversation object. Returns:
- A Callable object that can execute the query.
- """
-returncreate_structured_output_runnable(
-output_schema=query_parameters,
-llm=conversation.chat,
-prompt=self.structured_output_prompt,
-)
-
-defparameterise_query(
-self,
-question:str,
-conversation:"Conversation",
-)->BlastQueryParameters:
-"""
- Generates a BlastQuery object based on the given question, prompt, and
- BioChatter conversation. Uses a Pydantic model to define the API fields.
- Creates a runnable that can be invoked on LLMs that are qualified to
- parameterise functions.
-
- Args:
- question (str): The question to be answered.
-
- conversation: The conversation object used for parameterising the
- BlastQuery.
-
- Returns:
- BlastQuery: the parameterised query object (Pydantic model)
- """
-runnable=self.create_runnable(
-query_parameters=BlastQueryParameters,
-conversation=conversation,
-)
-blast_call_obj=runnable.invoke(
-{"input":f"Answer:\n{question} based on:\n{BLAST_QUERY_PROMPT}"}
-)
-blast_call_obj.question_uuid=str(uuid.uuid4())
-returnblast_call_obj
+ -------
+ A Callable object that can execute the query.
+
+ """
+returncreate_structured_output_runnable(
+output_schema=query_parameters,
+llm=conversation.chat,
+prompt=self.structured_output_prompt,
+)
+
+defparameterise_query(
+self,
+question:str,
+conversation:"Conversation",
+)->BlastQueryParameters:
+"""Generates a BlastQuery object based on the given question, prompt, and
+ BioChatter conversation. Uses a Pydantic model to define the API fields.
+ Creates a runnable that can be invoked on LLMs that are qualified to
+ parameterise functions.
+
+ Args:
+ ----
+ question (str): The question to be answered.
+
+ conversation: The conversation object used for parameterising the
+ BlastQuery.
+
+ Returns:
+ -------
+ BlastQuery: the parameterised query object (Pydantic model)
+
+ """
+runnable=self.create_runnable(
+query_parameters=BlastQueryParameters,
+conversation=conversation,
+)
+blast_call_obj=runnable.invoke(
+{"input":f"Answer:\n{question} based on:\n{BLAST_QUERY_PROMPT}"},
+)
+blast_call_obj.question_uuid=str(uuid.uuid4())
+returnblast_call_obj
@@ -4673,77 +4298,15 @@
query_parameters: A Pydantic data model that specifies the fields of
+ the API that should be queried.
-
-
defcreate_runnable(self,query_parameters:"BlastQueryParameters",conversation:"Conversation",)->Callable:
-"""
- Creates a runnable object for executing queries using the LangChain
- `create_structured_output_runnable` method.
-
- Args:
+"""Creates a runnable object for executing queries using the LangChain
+ `create_structured_output_runnable` method.
+
+ Args:
+ ---- query_parameters: A Pydantic data model that specifies the fields of the API that should be queried. conversation: A BioChatter conversation object. Returns:
- A Callable object that can execute the query.
- """
-returncreate_structured_output_runnable(
-output_schema=query_parameters,
-llm=conversation.chat,
-prompt=self.structured_output_prompt,
-)
+ -------
+ A Callable object that can execute the query.
+
+ """
+returncreate_structured_output_runnable(
+output_schema=query_parameters,
+llm=conversation.chat,
+prompt=self.structured_output_prompt,
+)
defparameterise_query(
-self,
-question:str,
-conversation:"Conversation",
-)->BlastQueryParameters:
-"""
- Generates a BlastQuery object based on the given question, prompt, and
- BioChatter conversation. Uses a Pydantic model to define the API fields.
- Creates a runnable that can be invoked on LLMs that are qualified to
- parameterise functions.
-
- Args:
- question (str): The question to be answered.
-
- conversation: The conversation object used for parameterising the
- BlastQuery.
-
- Returns:
- BlastQuery: the parameterised query object (Pydantic model)
- """
-runnable=self.create_runnable(
-query_parameters=BlastQueryParameters,
-conversation=conversation,
-)
-blast_call_obj=runnable.invoke(
-{"input":f"Answer:\n{question} based on:\n{BLAST_QUERY_PROMPT}"}
-)
-blast_call_obj.question_uuid=str(uuid.uuid4())
-returnblast_call_obj
+161
+162
+163
+164
+165
defparameterise_query(
+self,
+question:str,
+conversation:"Conversation",
+)->BlastQueryParameters:
+"""Generates a BlastQuery object based on the given question, prompt, and
+ BioChatter conversation. Uses a Pydantic model to define the API fields.
+ Creates a runnable that can be invoked on LLMs that are qualified to
+ parameterise functions.
+
+ Args:
+ ----
+ question (str): The question to be answered.
+
+ conversation: The conversation object used for parameterising the
+ BlastQuery.
+
+ Returns:
+ -------
+ BlastQuery: the parameterised query object (Pydantic model)
+
+ """
+runnable=self.create_runnable(
+query_parameters=BlastQueryParameters,
+conversation=conversation,
+)
+blast_call_obj=runnable.invoke(
+{"input":f"Answer:\n{question} based on:\n{BLAST_QUERY_PROMPT}"},
+)
+blast_call_obj.question_uuid=str(uuid.uuid4())
+returnblast_call_obj
@@ -4971,7 +4480,7 @@
- Bases: BaseModel
+ Bases: BaseModel
BlastQuery is a Pydantic model for the parameters of a BLAST query request,
@@ -5044,61 +4553,61 @@
classBlastQueryParameters(BaseModel):
-"""
-
- BlastQuery is a Pydantic model for the parameters of a BLAST query request,
- used for configuring and sending a request to the NCBI BLAST query API. The
- fields are dynamically configured by the LLM based on the user's question.
+"""BlastQuery is a Pydantic model for the parameters of a BLAST query request,
+ used for configuring and sending a request to the NCBI BLAST query API. The
+ fields are dynamically configured by the LLM based on the user's question.
+
+ """
- """
-
-url:Optional[str]=Field(
-default="https://blast.ncbi.nlm.nih.gov/Blast.cgi?",
-description="ALWAYS USE DEFAULT, DO NOT CHANGE",
-)
-cmd:Optional[str]=Field(
-default="Put",
-description="Command to execute, 'Put' for submitting query, 'Get' for retrieving results.",
-)
-program:Optional[str]=Field(
-default="blastn",
-description="BLAST program to use, e.g., 'blastn' for nucleotide-nucleotide BLAST, 'blastp' for protein-protein BLAST.",
-)
-database:Optional[str]=Field(
-default="nt",
-description="Database to search, e.g., 'nt' for nucleotide database, 'nr' for non redundant protein database, pdb the Protein Data Bank database, which is used specifically for protein structures, 'refseq_rna' and 'refseq_genomic': specialized databases for RNA sequences and genomic sequences",
-)
-query:Optional[str]=Field(
-None,
-description="Nucleotide or protein sequence for the BLAST or blat query, make sure to always keep the entire sequence given.",
-)
-format_type:Optional[str]=Field(
-default="Text",
-description="Format of the BLAST results, e.g., 'Text', 'XML'.",
-)
-rid:Optional[str]=Field(
-None,description="Request ID for retrieving BLAST results."
-)
-other_params:Optional[dict]=Field(
-default={"email":"user@example.com"},
-description="Other optional BLAST parameters, including user email.",
-)
-max_hits:Optional[int]=Field(
-default=15,
-description="Maximum number of hits to return in the BLAST results.",
-)
-sort_by:Optional[str]=Field(
-default="score",
-description="Criterion to sort BLAST results by, e.g., 'score', 'evalue'.",
-)
-megablast:Optional[str]=Field(
-default="on",description="Set to 'on' for human genome alignemnts"
+url:str|None=Field(
+default="https://blast.ncbi.nlm.nih.gov/Blast.cgi?",
+description="ALWAYS USE DEFAULT, DO NOT CHANGE",
+)
+cmd:str|None=Field(
+default="Put",
+description="Command to execute, 'Put' for submitting query, 'Get' for retrieving results.",
+)
+program:str|None=Field(
+default="blastn",
+description="BLAST program to use, e.g., 'blastn' for nucleotide-nucleotide BLAST, 'blastp' for protein-protein BLAST.",
+)
+database:str|None=Field(
+default="nt",
+description="Database to search, e.g., 'nt' for nucleotide database, 'nr' for non redundant protein database, pdb the Protein Data Bank database, which is used specifically for protein structures, 'refseq_rna' and 'refseq_genomic': specialized databases for RNA sequences and genomic sequences",
+)
+query:str|None=Field(
+None,
+description="Nucleotide or protein sequence for the BLAST or blat query, make sure to always keep the entire sequence given.",
+)
+format_type:str|None=Field(
+default="Text",
+description="Format of the BLAST results, e.g., 'Text', 'XML'.",
+)
+rid:str|None=Field(
+None,
+description="Request ID for retrieving BLAST results.",
+)
+other_params:dict|None=Field(
+default={"email":"user@example.com"},
+description="Other optional BLAST parameters, including user email.",
+)
+max_hits:int|None=Field(
+default=15,
+description="Maximum number of hits to return in the BLAST results.",
+)
+sort_by:str|None=Field(
+default="score",
+description="Criterion to sort BLAST results by, e.g., 'score', 'evalue'.",
+)
+megablast:str|None=Field(
+default="on",
+description="Set to 'on' for human genome alignemnts",)
-question_uuid:Optional[str]=Field(
+question_uuid:str|None=Field(default_factory=lambda:str(uuid.uuid4()),description="Unique identifier for the question.",)
-full_url:Optional[str]=Field(
+full_url:str|None=Field(default="TBF",description="Full URL to be used to submit the BLAST query",)
@@ -5184,13 +4693,7 @@
classOncoKBFetcher(BaseFetcher):
-"""
- A class for retrieving API results from OncoKB given a parameterized
- OncoKBQuery.
- """
-
-def__init__(self,api_token="demo"):
-self.headers={
-"Authorization":f"Bearer {api_token}",
-"Accept":"application/json",
-}
-self.base_url="https://demo.oncokb.org/api/v1"
-
-deffetch_results(
-self,request_data:OncoKBQueryParameters,retries:Optional[int]=3
-)->str:
-"""Function to submit the OncoKB query and fetch the results directly.
- No multi-step procedure, thus no wrapping of submission and retrieval in
- this case.
-
- Args:
- request_data: OncoKBQuery object (Pydantic model) containing the
- OncoKB query parameters.
-
- Returns:
- str: The results of the OncoKB query.
- """
-# Submit the query and get the URL
-params=request_data.dict(exclude_unset=True)
-endpoint=params.pop("endpoint")
-params.pop("question_uuid")
-full_url=f"{self.base_url}/{endpoint}"
-response=requests.get(full_url,headers=self.headers,params=params)
-response.raise_for_status()
-
-# Fetch the results from the URL
-results_response=requests.get(response.url,headers=self.headers)
-results_response.raise_for_status()
-
-returnresults_response.text
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
classOncoKBFetcher(BaseFetcher):
+"""A class for retrieving API results from OncoKB given a parameterized
+ OncoKBQuery.
+ """
+
+def__init__(self,api_token="demo"):
+self.headers={
+"Authorization":f"Bearer {api_token}",
+"Accept":"application/json",
+}
+self.base_url="https://demo.oncokb.org/api/v1"
+
+deffetch_results(
+self,
+request_data:OncoKBQueryParameters,
+retries:int|None=3,
+)->str:
+"""Function to submit the OncoKB query and fetch the results directly.
+ No multi-step procedure, thus no wrapping of submission and retrieval in
+ this case.
+
+ Args:
+ ----
+ request_data: OncoKBQuery object (Pydantic model) containing the
+ OncoKB query parameters.
+
+ Returns:
+ -------
+ str: The results of the OncoKB query.
+
+ """
+# Submit the query and get the URL
+params=request_data.dict(exclude_unset=True)
+endpoint=params.pop("endpoint")
+params.pop("question_uuid")
+full_url=f"{self.base_url}/{endpoint}"
+response=requests.get(full_url,headers=self.headers,params=params)
+response.raise_for_status()
+
+# Fetch the results from the URL
+results_response=requests.get(response.url,headers=self.headers)
+results_response.raise_for_status()
+
+returnresults_response.text
@@ -5292,70 +4809,17 @@
Parameters:
-
-
-
-
Name
-
Type
-
Description
-
Default
-
-
-
-
-
- request_data
-
-
- OncoKBQueryParameters
-
-
-
-
OncoKBQuery object (Pydantic model) containing the
-OncoKB query parameters.
deffetch_results(
-self,request_data:OncoKBQueryParameters,retries:Optional[int]=3
-)->str:
-"""Function to submit the OncoKB query and fetch the results directly.
- No multi-step procedure, thus no wrapping of submission and retrieval in
- this case.
-
- Args:
- request_data: OncoKBQuery object (Pydantic model) containing the
- OncoKB query parameters.
-
- Returns:
- str: The results of the OncoKB query.
- """
-# Submit the query and get the URL
-params=request_data.dict(exclude_unset=True)
-endpoint=params.pop("endpoint")
-params.pop("question_uuid")
-full_url=f"{self.base_url}/{endpoint}"
-response=requests.get(full_url,headers=self.headers,params=params)
-response.raise_for_status()
-
-# Fetch the results from the URL
-results_response=requests.get(response.url,headers=self.headers)
-results_response.raise_for_status()
-
-returnresults_response.text
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
deffetch_results(
+self,
+request_data:OncoKBQueryParameters,
+retries:int|None=3,
+)->str:
+"""Function to submit the OncoKB query and fetch the results directly.
+ No multi-step procedure, thus no wrapping of submission and retrieval in
+ this case.
+
+ Args:
+ ----
+ request_data: OncoKBQuery object (Pydantic model) containing the
+ OncoKB query parameters.
+
+ Returns:
+ -------
+ str: The results of the OncoKB query.
+
+ """
+# Submit the query and get the URL
+params=request_data.dict(exclude_unset=True)
+endpoint=params.pop("endpoint")
+params.pop("question_uuid")
+full_url=f"{self.base_url}/{endpoint}"
+response=requests.get(full_url,headers=self.headers,params=params)
+response.raise_for_status()
+
+# Fetch the results from the URL
+results_response=requests.get(response.url,headers=self.headers)
+results_response.raise_for_status()
+
+returnresults_response.text
classOncoKBInterpreter(BaseInterpreter):
-defsummarise_results(
-self,
-question:str,
-conversation_factory:Callable,
-response_text:str,
-)->str:
-"""
- Function to extract the answer from the BLAST results.
-
- Args:
- question (str): The question to be answered.
- conversation_factory: A BioChatter conversation object.
- response_text (str): The response.text returned by OncoKB.
-
- Returns:
- str: The extracted answer from the BLAST results.
-
- """
-prompt=ChatPromptTemplate.from_messages(
-[
-(
-"system",
-"You are a world class molecular biologist who knows "
-"everything about OncoKB and cancer genomics. Your task is "
-"to interpret results from OncoKB API calls and summarise "
-"them for the user.",
-),
-("user","{input}"),
-]
-)
-summary_prompt=ONCOKB_SUMMARY_PROMPT.format(
-question=question,context=response_text
-)
-output_parser=StrOutputParser()
-conversation=conversation_factory()
-chain=prompt|conversation.chat|output_parser
-answer=chain.invoke({"input":{summary_prompt}})
-returnanswer
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
classOncoKBInterpreter(BaseInterpreter):
+defsummarise_results(
+self,
+question:str,
+conversation_factory:Callable,
+response_text:str,
+)->str:
+"""Function to extract the answer from the BLAST results.
+
+ Args:
+ ----
+ question (str): The question to be answered.
+ conversation_factory: A BioChatter conversation object.
+ response_text (str): The response.text returned by OncoKB.
+
+ Returns:
+ -------
+ str: The extracted answer from the BLAST results.
+
+ """
+prompt=ChatPromptTemplate.from_messages(
+[
+(
+"system",
+"You are a world class molecular biologist who knows "
+"everything about OncoKB and cancer genomics. Your task is "
+"to interpret results from OncoKB API calls and summarise "
+"them for the user.",
+),
+("user","{input}"),
+],
+)
+summary_prompt=ONCOKB_SUMMARY_PROMPT.format(
+question=question,
+context=response_text,
+)
+output_parser=StrOutputParser()
+conversation=conversation_factory()
+chain=prompt|conversation.chat|output_parser
+answer=chain.invoke({"input":{summary_prompt}})
+returnanswer
@@ -5544,106 +5027,18 @@
Function to extract the answer from the BLAST results.
-
-
-
Parameters:
-
-
-
-
Name
-
Type
-
Description
-
Default
-
-
-
-
-
- question
-
-
- str
-
-
-
-
The question to be answered.
-
-
-
- required
-
-
-
-
- conversation_factory
-
-
- Callable
-
-
-
-
A BioChatter conversation object.
-
-
-
- required
-
-
-
-
- response_text
-
-
- str
-
-
-
-
The response.text returned by OncoKB.
-
-
-
- required
-
-
-
-
-
-
-
Returns:
-
-
-
-
Name
Type
-
Description
-
-
-
-
-
str
- str
-
-
-
-
The extracted answer from the BLAST results.
-
-
-
-
-
+
+
question (str): The question to be answered.
+conversation_factory: A BioChatter conversation object.
+response_text (str): The response.text returned by OncoKB.
+
+
+
str: The extracted answer from the BLAST results.
+
defsummarise_results(
-self,
-question:str,
-conversation_factory:Callable,
-response_text:str,
-)->str:
-"""
- Function to extract the answer from the BLAST results.
-
- Args:
- question (str): The question to be answered.
- conversation_factory: A BioChatter conversation object.
- response_text (str): The response.text returned by OncoKB.
-
- Returns:
- str: The extracted answer from the BLAST results.
-
- """
-prompt=ChatPromptTemplate.from_messages(
-[
-(
-"system",
-"You are a world class molecular biologist who knows "
-"everything about OncoKB and cancer genomics. Your task is "
-"to interpret results from OncoKB API calls and summarise "
-"them for the user.",
-),
-("user","{input}"),
-]
-)
-summary_prompt=ONCOKB_SUMMARY_PROMPT.format(
-question=question,context=response_text
-)
-output_parser=StrOutputParser()
-conversation=conversation_factory()
-chain=prompt|conversation.chat|output_parser
-answer=chain.invoke({"input":{summary_prompt}})
-returnanswer
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
defsummarise_results(
+self,
+question:str,
+conversation_factory:Callable,
+response_text:str,
+)->str:
+"""Function to extract the answer from the BLAST results.
+
+ Args:
+ ----
+ question (str): The question to be answered.
+ conversation_factory: A BioChatter conversation object.
+ response_text (str): The response.text returned by OncoKB.
+
+ Returns:
+ -------
+ str: The extracted answer from the BLAST results.
+
+ """
+prompt=ChatPromptTemplate.from_messages(
+[
+(
+"system",
+"You are a world class molecular biologist who knows "
+"everything about OncoKB and cancer genomics. Your task is "
+"to interpret results from OncoKB API calls and summarise "
+"them for the user.",
+),
+("user","{input}"),
+],
+)
+summary_prompt=ONCOKB_SUMMARY_PROMPT.format(
+question=question,
+context=response_text,
+)
+output_parser=StrOutputParser()
+conversation=conversation_factory()
+chain=prompt|conversation.chat|output_parser
+answer=chain.invoke({"input":{summary_prompt}})
+returnanswer
classOncoKBQueryBuilder(BaseQueryBuilder):
-"""A class for building an OncoKBQuery object."""
-
-defcreate_runnable(
-self,
-query_parameters:"OncoKBQueryParameters",
-conversation:"Conversation",
-)->Callable:
-"""
- Creates a runnable object for executing queries using the LangChain
- `create_structured_output_runnable` method.
-
- Args:
- query_parameters: A Pydantic data model that specifies the fields of
- the API that should be queried.
-
- conversation: A BioChatter conversation object.
+245
+246
+247
+248
+249
+250
+251
classOncoKBQueryBuilder(BaseQueryBuilder):
+"""A class for building an OncoKBQuery object."""
+
+defcreate_runnable(
+self,
+query_parameters:"OncoKBQueryParameters",
+conversation:"Conversation",
+)->Callable:
+"""Creates a runnable object for executing queries using the LangChain
+ `create_structured_output_runnable` method.
+
+ Args:
+ ----
+ query_parameters: A Pydantic data model that specifies the fields of
+ the API that should be queried.
- Returns:
- A Callable object that can execute the query.
- """
-returncreate_structured_output_runnable(
-output_schema=query_parameters,
-llm=conversation.chat,
-prompt=self.structured_output_prompt,
-)
-
-defparameterise_query(
-self,
-question:str,
-conversation:"Conversation",
-)->OncoKBQueryParameters:
-"""
- Generates an OncoKBQuery object based on the given question, prompt, and
- BioChatter conversation. Uses a Pydantic model to define the API fields.
- Creates a runnable that can be invoked on LLMs that are qualified to
- parameterise functions.
-
- Args:
- question (str): The question to be answered.
+ conversation: A BioChatter conversation object.
+
+ Returns:
+ -------
+ A Callable object that can execute the query.
+
+ """
+returncreate_structured_output_runnable(
+output_schema=query_parameters,
+llm=conversation.chat,
+prompt=self.structured_output_prompt,
+)
+
+defparameterise_query(
+self,
+question:str,
+conversation:"Conversation",
+)->OncoKBQueryParameters:
+"""Generates an OncoKBQuery object based on the given question, prompt, and
+ BioChatter conversation. Uses a Pydantic model to define the API fields.
+ Creates a runnable that can be invoked on LLMs that are qualified to
+ parameterise functions.
- conversation: The conversation object used for parameterising the
- OncoKBQuery.
-
- Returns:
- OncoKBQueryParameters: the parameterised query object (Pydantic model)
- """
-runnable=self.create_runnable(
-query_parameters=OncoKBQueryParameters,
-conversation=conversation,
-)
-oncokb_call_obj=runnable.invoke(
-{"input":f"Answer:\n{question} based on:\n{ONCOKB_QUERY_PROMPT}"}
-)
-oncokb_call_obj.question_uuid=str(uuid.uuid4())
-returnoncokb_call_obj
+ Args:
+ ----
+ question (str): The question to be answered.
+
+ conversation: The conversation object used for parameterising the
+ OncoKBQuery.
+
+ Returns:
+ -------
+ OncoKBQueryParameters: the parameterised query object (Pydantic model)
+
+ """
+runnable=self.create_runnable(
+query_parameters=OncoKBQueryParameters,
+conversation=conversation,
+)
+oncokb_call_obj=runnable.invoke(
+{"input":f"Answer:\n{question} based on:\n{ONCOKB_QUERY_PROMPT}"},
+)
+oncokb_call_obj.question_uuid=str(uuid.uuid4())
+returnoncokb_call_obj
@@ -5886,83 +5303,19 @@
query_parameters: A Pydantic data model that specifies the fields of
+ the API that should be queried.
-
-
Parameters:
-
-
-
-
Name
-
Type
-
Description
-
Default
-
-
-
-
-
- query_parameters
-
-
- OncoKBQueryParameters
-
-
-
-
A Pydantic data model that specifies the fields of
-the API that should be queried.
defcreate_runnable(
-self,
-query_parameters:"OncoKBQueryParameters",
-conversation:"Conversation",
-)->Callable:
-"""
- Creates a runnable object for executing queries using the LangChain
- `create_structured_output_runnable` method.
-
- Args:
- query_parameters: A Pydantic data model that specifies the fields of
- the API that should be queried.
-
- conversation: A BioChatter conversation object.
+215
+216
+217
+218
+219
defcreate_runnable(
+self,
+query_parameters:"OncoKBQueryParameters",
+conversation:"Conversation",
+)->Callable:
+"""Creates a runnable object for executing queries using the LangChain
+ `create_structured_output_runnable` method.
+
+ Args:
+ ----
+ query_parameters: A Pydantic data model that specifies the fields of
+ the API that should be queried.
- Returns:
- A Callable object that can execute the query.
- """
-returncreate_structured_output_runnable(
-output_schema=query_parameters,
-llm=conversation.chat,
-prompt=self.structured_output_prompt,
-)
+ conversation: A BioChatter conversation object.
+
+ Returns:
+ -------
+ A Callable object that can execute the query.
+
+ """
+returncreate_structured_output_runnable(
+output_schema=query_parameters,
+llm=conversation.chat,
+prompt=self.structured_output_prompt,
+)
defparameterise_query(
-self,
-question:str,
-conversation:"Conversation",
-)->OncoKBQueryParameters:
-"""
- Generates an OncoKBQuery object based on the given question, prompt, and
- BioChatter conversation. Uses a Pydantic model to define the API fields.
- Creates a runnable that can be invoked on LLMs that are qualified to
- parameterise functions.
-
- Args:
- question (str): The question to be answered.
+245
+246
+247
+248
+249
+250
+251
defparameterise_query(
+self,
+question:str,
+conversation:"Conversation",
+)->OncoKBQueryParameters:
+"""Generates an OncoKBQuery object based on the given question, prompt, and
+ BioChatter conversation. Uses a Pydantic model to define the API fields.
+ Creates a runnable that can be invoked on LLMs that are qualified to
+ parameterise functions.
- conversation: The conversation object used for parameterising the
- OncoKBQuery.
-
- Returns:
- OncoKBQueryParameters: the parameterised query object (Pydantic model)
- """
-runnable=self.create_runnable(
-query_parameters=OncoKBQueryParameters,
-conversation=conversation,
-)
-oncokb_call_obj=runnable.invoke(
-{"input":f"Answer:\n{question} based on:\n{ONCOKB_QUERY_PROMPT}"}
-)
-oncokb_call_obj.question_uuid=str(uuid.uuid4())
-returnoncokb_call_obj
+ Args:
+ ----
+ question (str): The question to be answered.
+
+ conversation: The conversation object used for parameterising the
+ OncoKBQuery.
+
+ Returns:
+ -------
+ OncoKBQueryParameters: the parameterised query object (Pydantic model)
+
+ """
+runnable=self.create_runnable(
+query_parameters=OncoKBQueryParameters,
+conversation=conversation,
+)
+oncokb_call_obj=runnable.invoke(
+{"input":f"Answer:\n{question} based on:\n{ONCOKB_QUERY_PROMPT}"},
+)
+oncokb_call_obj.question_uuid=str(uuid.uuid4())
+returnoncokb_call_obj
classBioCypherPromptEngine:
-def__init__(
-self,
-schema_config_or_info_path:Optional[str]=None,
-schema_config_or_info_dict:Optional[dict]=None,
-model_name:str="gpt-3.5-turbo",
-conversation_factory:Optional[Callable]=None,
-)->None:
-"""
-
- Given a biocypher schema configuration, extract the entities and
- relationships, and for each extract their mode of representation (node
- or edge), properties, and identifier namespace. Using these data, allow
- the generation of prompts for a large language model, informing it of
- the schema constituents and their properties, to enable the
- parameterisation of function calls to a knowledge graph.
-
- Args:
- schema_config_or_info_path: Path to a biocypher schema configuration
- file or the extended schema information output generated by
- BioCypher's `write_schema_info` function (preferred).
-
- schema_config_or_info_dict: A dictionary containing the schema
- configuration file or the extended schema information output
- generated by BioCypher's `write_schema_info` function
- (preferred).
-
- model_name: The name of the model to use for the conversation.
- DEPRECATED: This should now be set in the conversation factory.
-
- conversation_factory: A function used to create a conversation for
- creating the KG query. If not provided, a default function is
- used (creating an OpenAI conversation with the specified model,
- see `_get_conversation`).
- """
-
-ifnotschema_config_or_info_pathandnotschema_config_or_info_dict:
-raiseValueError(
-"Please provide the schema configuration or schema info as a "
-"path to a file or as a dictionary."
-)
-
-ifschema_config_or_info_pathandschema_config_or_info_dict:
-raiseValueError(
-"Please provide the schema configuration or schema info as a "
-"path to a file or as a dictionary, not both."
-)
+641
classBioCypherPromptEngine:
+def__init__(
+self,
+schema_config_or_info_path:str|None=None,
+schema_config_or_info_dict:dict|None=None,
+model_name:str="gpt-3.5-turbo",
+conversation_factory:Callable|None=None,
+)->None:
+"""Given a biocypher schema configuration, extract the entities and
+ relationships, and for each extract their mode of representation (node
+ or edge), properties, and identifier namespace. Using these data, allow
+ the generation of prompts for a large language model, informing it of
+ the schema constituents and their properties, to enable the
+ parameterisation of function calls to a knowledge graph.
+
+ Args:
+ ----
+ schema_config_or_info_path: Path to a biocypher schema configuration
+ file or the extended schema information output generated by
+ BioCypher's `write_schema_info` function (preferred).
+
+ schema_config_or_info_dict: A dictionary containing the schema
+ configuration file or the extended schema information output
+ generated by BioCypher's `write_schema_info` function
+ (preferred).
+
+ model_name: The name of the model to use for the conversation.
+ DEPRECATED: This should now be set in the conversation factory.
+
+ conversation_factory: A function used to create a conversation for
+ creating the KG query. If not provided, a default function is
+ used (creating an OpenAI conversation with the specified model,
+ see `_get_conversation`).
+
+ """
+ifnotschema_config_or_info_pathandnotschema_config_or_info_dict:
+raiseValueError(
+"Please provide the schema configuration or schema info as a path to a file or as a dictionary.",
+)
+
+ifschema_config_or_info_pathandschema_config_or_info_dict:
+raiseValueError(
+"Please provide the schema configuration or schema info as a "
+"path to a file or as a dictionary, not both.",
+)
+
+# set conversation factory or use default
+self.conversation_factory=conversation_factoryifconversation_factoryisnotNoneelseself._get_conversation
-# set conversation factory or use default
-self.conversation_factory=(
-conversation_factory
-ifconversation_factoryisnotNone
-elseself._get_conversation
-)
+ifschema_config_or_info_path:
+# read the schema configuration
+withopen(schema_config_or_info_path)asf:
+schema_config=yaml.safe_load(f)
+elifschema_config_or_info_dict:
+schema_config=schema_config_or_info_dict
-ifschema_config_or_info_path:
-# read the schema configuration
-withopen(schema_config_or_info_path,"r")asf:
-schema_config=yaml.safe_load(f)
-elifschema_config_or_info_dict:
-schema_config=schema_config_or_info_dict
-
-# check whether it is the original schema config or the output of
-# biocypher info
-is_schema_info=schema_config.get("is_schema_info",False)
-
-# extract the entities and relationships: each top level key that has
-# a 'represented_as' key
-self.entities={}
-self.relationships={}
-ifnotis_schema_info:
-forkey,valueinschema_config.items():
-# hacky, better with biocypher output
-name_indicates_relationship=(
-"interaction"inkey.lower()or"association"inkey.lower()
-)
-if"represented_as"invalue:
-if(
-value["represented_as"]=="node"
-andnotname_indicates_relationship
-):
-self.entities[sentencecase_to_pascalcase(key)]=value
-elif(
-value["represented_as"]=="node"
-andname_indicates_relationship
-):
-self.relationships[sentencecase_to_pascalcase(key)]=(
-value
-)
-elifvalue["represented_as"]=="edge":
-self.relationships[sentencecase_to_pascalcase(key)]=(
-value
-)
-else:
-forkey,valueinschema_config.items():
-ifnotisinstance(value,dict):
-continue
-ifvalue.get("present_in_knowledge_graph",None)==False:
-continue
-ifvalue.get("is_relationship",None)==False:
-self.entities[sentencecase_to_pascalcase(key)]=value
-elifvalue.get("is_relationship",None)==True:
-value=self._capitalise_source_and_target(value)
-self.relationships[sentencecase_to_pascalcase(key)]=value
-
-self.question=""
-self.selected_entities=[]
-self.selected_relationships=[]# used in property selection
-self.selected_relationship_labels={}# copy to deal with labels that
-# are not the same as the relationship name, used in query generation
-# dictionary to also include source and target types
-self.rel_directions={}
-self.model_name=model_name
-
-def_capitalise_source_and_target(self,relationship:dict)->dict:
-"""
- Make sources and targets PascalCase to match the entities. Sources and
- targets can be strings or lists of strings.
- """
-if"source"inrelationship:
-ifisinstance(relationship["source"],str):
-relationship["source"]=sentencecase_to_pascalcase(
-relationship["source"]
-)
-elifisinstance(relationship["source"],list):
-relationship["source"]=[
-sentencecase_to_pascalcase(s)
-forsinrelationship["source"]
-]
-if"target"inrelationship:
-ifisinstance(relationship["target"],str):
-relationship["target"]=sentencecase_to_pascalcase(
-relationship["target"]
-)
-elifisinstance(relationship["target"],list):
-relationship["target"]=[
-sentencecase_to_pascalcase(t)
-fortinrelationship["target"]
-]
-returnrelationship
-
-def_select_graph_entities_from_question(
-self,question:str,conversation:Conversation
-)->str:
-conversation.reset()
-success1=self._select_entities(
-question=question,conversation=conversation
-)
-ifnotsuccess1:
-raiseValueError(
-"Entity selection failed. Please try again with a different "
-"question."
-)
-conversation.reset()
-success2=self._select_relationships(conversation=conversation)
-ifnotsuccess2:
-raiseValueError(
-"Relationship selection failed. Please try again with a "
-"different question."
-)
-conversation.reset()
-success3=self._select_properties(conversation=conversation)
-ifnotsuccess3:
-raiseValueError(
-"Property selection failed. Please try again with a different "
-"question."
-)
+# check whether it is the original schema config or the output of
+# biocypher info
+is_schema_info=schema_config.get("is_schema_info",False)
+
+# extract the entities and relationships: each top level key that has
+# a 'represented_as' key
+self.entities={}
+self.relationships={}
+ifnotis_schema_info:
+forkey,valueinschema_config.items():
+# hacky, better with biocypher output
+name_indicates_relationship="interaction"inkey.lower()or"association"inkey.lower()
+if"represented_as"invalue:
+ifvalue["represented_as"]=="node"andnotname_indicates_relationship:
+self.entities[sentencecase_to_pascalcase(key)]=value
+elif(value["represented_as"]=="node"andname_indicates_relationship)orvalue[
+"represented_as"
+]=="edge":
+self.relationships[sentencecase_to_pascalcase(key)]=value
+else:
+forkey,valueinschema_config.items():
+ifnotisinstance(value,dict):
+continue
+ifvalue.get("present_in_knowledge_graph",None)==False:
+continue
+ifvalue.get("is_relationship",None)==False:
+self.entities[sentencecase_to_pascalcase(key)]=value
+elifvalue.get("is_relationship",None)==True:
+value=self._capitalise_source_and_target(value)
+self.relationships[sentencecase_to_pascalcase(key)]=value
+
+self.question=""
+self.selected_entities=[]
+self.selected_relationships=[]# used in property selection
+self.selected_relationship_labels={}# copy to deal with labels that
+# are not the same as the relationship name, used in query generation
+# dictionary to also include source and target types
+self.rel_directions={}
+self.model_name=model_name
+
+def_capitalise_source_and_target(self,relationship:dict)->dict:
+"""Make sources and targets PascalCase to match the entities. Sources and
+ targets can be strings or lists of strings.
+ """
+if"source"inrelationship:
+ifisinstance(relationship["source"],str):
+relationship["source"]=sentencecase_to_pascalcase(
+relationship["source"],
+)
+elifisinstance(relationship["source"],list):
+relationship["source"]=[sentencecase_to_pascalcase(s)forsinrelationship["source"]]
+if"target"inrelationship:
+ifisinstance(relationship["target"],str):
+relationship["target"]=sentencecase_to_pascalcase(
+relationship["target"],
+)
+elifisinstance(relationship["target"],list):
+relationship["target"]=[sentencecase_to_pascalcase(t)fortinrelationship["target"]]
+returnrelationship
+
+def_select_graph_entities_from_question(
+self,
+question:str,
+conversation:Conversation,
+)->str:
+conversation.reset()
+success1=self._select_entities(
+question=question,
+conversation=conversation,
+)
+ifnotsuccess1:
+raiseValueError(
+"Entity selection failed. Please try again with a different question.",
+)
+conversation.reset()
+success2=self._select_relationships(conversation=conversation)
+ifnotsuccess2:
+raiseValueError(
+"Relationship selection failed. Please try again with a different question.",
+)
+conversation.reset()
+success3=self._select_properties(conversation=conversation)
+ifnotsuccess3:
+raiseValueError(
+"Property selection failed. Please try again with a different question.",
+)
+
+def_generate_query_prompt(
+self,
+entities:list,
+relationships:dict,
+properties:dict,
+query_language:str|None="Cypher",
+)->str:
+"""Generate a prompt for a large language model to generate a database
+ query based on the selected entities, relationships, and properties.
+
+ Args:
+ ----
+ entities: A list of entities that are relevant to the question.
+
+ relationships: A list of relationships that are relevant to the
+ question.
+
+ properties: A dictionary of properties that are relevant to the
+ question.
+
+ query_language: The language of the query to generate.
+
+ Returns:
+ -------
+ A prompt for a large language model to generate a database query.
-def_generate_query_prompt(
-self,
-entities:list,
-relationships:dict,
-properties:dict,
-query_language:Optional[str]="Cypher",
-)->str:
-"""
- Generate a prompt for a large language model to generate a database
- query based on the selected entities, relationships, and properties.
-
- Args:
- entities: A list of entities that are relevant to the question.
-
- relationships: A list of relationships that are relevant to the
- question.
-
- properties: A dictionary of properties that are relevant to the
- question.
-
- query_language: The language of the query to generate.
+ """
+msg=(
+f"Generate a database query in {query_language} that answers "
+f"the user's question. "
+f"You can use the following entities: {entities}, "
+f"relationships: {list(relationships.keys())}, and "
+f"properties: {properties}. "
+)
+
+forrelationship,valuesinrelationships.items():
+self._expand_pairs(relationship,values)
+
+ifself.rel_directions:
+msg+="Given the following valid combinations of source, relationship, and target: "
+forkey,valueinself.rel_directions.items():
+forpairinvalue:
+msg+=f"'(:{pair[0]})-(:{key})->(:{pair[1]})', "
+msg+=f"generate a {query_language} query using one of these combinations. "
+
+msg+="Only return the query, without any additional text, symbols or characters --- just the query statement."
+returnmsg
- Returns:
- A prompt for a large language model to generate a database query.
- """
-msg=(
-f"Generate a database query in {query_language} that answers "
-f"the user's question. "
-f"You can use the following entities: {entities}, "
-f"relationships: {list(relationships.keys())}, and "
-f"properties: {properties}. "
-)
-
-forrelationship,valuesinrelationships.items():
-self._expand_pairs(relationship,values)
-
-ifself.rel_directions:
-msg+="Given the following valid combinations of source, relationship, and target: "
-forkey,valueinself.rel_directions.items():
-forpairinvalue:
-msg+=f"'(:{pair[0]})-(:{key})->(:{pair[1]})', "
-msg+=f"generate a {query_language} query using one of these combinations. "
-
-msg+="Only return the query, without any additional text, symbols or characters --- just the query statement."
-returnmsg
-
-defgenerate_query_prompt(
-self,question:str,query_language:Optional[str]="Cypher"
-)->str:
-"""
- Generate a prompt for a large language model to generate a database
- query based on the user's question and class attributes informing about
- the schema.
+defgenerate_query_prompt(
+self,
+question:str,
+query_language:str|None="Cypher",
+)->str:
+"""Generate a prompt for a large language model to generate a database
+ query based on the user's question and class attributes informing about
+ the schema.
+
+ Args:
+ ----
+ question: A user's question.
+
+ query_language: The language of the query to generate.
+
+ Returns:
+ -------
+ A prompt for a large language model to generate a database query.
+
+ """
+self._select_graph_entities_from_question(
+question,
+self.conversation_factory(),
+)
+msg=self._generate_query_prompt(
+self.selected_entities,
+self.selected_relationship_labels,
+self.selected_properties,
+query_language,
+)
+returnmsg
- Args:
- question: A user's question.
-
- query_language: The language of the query to generate.
-
- Returns:
- A prompt for a large language model to generate a database query.
- """
-self._select_graph_entities_from_question(
-question,self.conversation_factory()
-)
-msg=self._generate_query_prompt(
-self.selected_entities,
-self.selected_relationship_labels,
-self.selected_properties,
-query_language,
-)
-returnmsg
-
-defgenerate_query(
-self,question:str,query_language:Optional[str]="Cypher"
-)->str:
-"""
- Wrap entity and property selection and query generation; return the
- generated query.
-
- Args:
- question: A user's question.
-
- query_language: The language of the query to generate.
-
- Returns:
- A database query that could answer the user's question.
- """
-
-self._select_graph_entities_from_question(
-question,self.conversation_factory()
-)
+defgenerate_query(
+self,
+question:str,
+query_language:str|None="Cypher",
+)->str:
+"""Wrap entity and property selection and query generation; return the
+ generated query.
+
+ Args:
+ ----
+ question: A user's question.
+
+ query_language: The language of the query to generate.
+
+ Returns:
+ -------
+ A database query that could answer the user's question.
+
+ """
+self._select_graph_entities_from_question(
+question,
+self.conversation_factory(),
+)
+
+returnself._generate_query(
+question=question,
+entities=self.selected_entities,
+relationships=self.selected_relationship_labels,
+properties=self.selected_properties,
+query_language=query_language,
+conversation=self.conversation_factory(),
+)
+
+def_get_conversation(
+self,
+model_name:str|None=None,
+)->"Conversation":
+"""Create a conversation object given a model name.
-returnself._generate_query(
-question=question,
-entities=self.selected_entities,
-relationships=self.selected_relationship_labels,
-properties=self.selected_properties,
-query_language=query_language,
-conversation=self.conversation_factory(),
-)
-
-def_get_conversation(
-self,model_name:Optional[str]=None
-)->"Conversation":
-"""
- Create a conversation object given a model name.
-
- Args:
- model_name: The name of the model to use for the conversation.
-
- Returns:
- A BioChatter Conversation object for connecting to the LLM.
-
- Todo:
- Genericise to models outside of OpenAI.
- """
-
-conversation=GptConversation(
-model_name=model_nameorself.model_name,
-prompts={},
-correct=False,
-)
-conversation.set_api_key(
-api_key=os.getenv("OPENAI_API_KEY"),user="test_user"
-)
-returnconversation
-
-def_select_entities(
-self,question:str,conversation:"Conversation"
-)->bool:
-"""
+ Args:
+ ----
+ model_name: The name of the model to use for the conversation.
+
+ Returns:
+ -------
+ A BioChatter Conversation object for connecting to the LLM.
+
+ Todo:
+ ----
+ Genericise to models outside of OpenAI.
+
+ """
+conversation=GptConversation(
+model_name=model_nameorself.model_name,
+prompts={},
+correct=False,
+)
+conversation.set_api_key(
+api_key=os.getenv("OPENAI_API_KEY"),
+user="test_user",
+)
+returnconversation
+
+def_select_entities(
+self,
+question:str,
+conversation:"Conversation",
+)->bool:
+"""Given a question, select the entities that are relevant to the question
+ and store them in `selected_entities` and `selected_relationships`. Use
+ LLM conversation to do this.
+
+ Args:
+ ----
+ question: A user's question.
+
+ conversation: A BioChatter Conversation object for connecting to the
+ LLM.
- Given a question, select the entities that are relevant to the question
- and store them in `selected_entities` and `selected_relationships`. Use
- LLM conversation to do this.
+ Returns:
+ -------
+ True if at least one entity was selected, False otherwise.
- Args:
- question: A user's question.
+ """
+self.question=question
- conversation: A BioChatter Conversation object for connecting to the
- LLM.
-
- Returns:
- True if at least one entity was selected, False otherwise.
-
- """
-
-self.question=question
-
-conversation.append_system_message(
-(
-"You have access to a knowledge graph that contains "
-f"these entity types: {', '.join(self.entities)}. Your task is "
-"to select the entity types that are relevant to the user's question "
-"for subsequent use in a query. Only return the entity types, "
-"comma-separated, without any additional text. Do not return "
-"entity names, relationships, or properties."
-)
-)
-
-msg,token_usage,correction=conversation.query(question)
-
-result=msg.split(",")ifmsgelse[]
-# TODO: do we go back and retry if no entities were selected? or ask for
-# a reason? offer visual selection of entities and relationships by the
-# user?
+conversation.append_system_message(
+"You have access to a knowledge graph that contains "
+f"these entity types: {', '.join(self.entities)}. Your task is "
+"to select the entity types that are relevant to the user's question "
+"for subsequent use in a query. Only return the entity types, "
+"comma-separated, without any additional text. Do not return "
+"entity names, relationships, or properties.",
+)
+
+msg,token_usage,correction=conversation.query(question)
+
+result=msg.split(",")ifmsgelse[]
+# TODO: do we go back and retry if no entities were selected? or ask for
+# a reason? offer visual selection of entities and relationships by the
+# user?
+
+ifresult:
+forentityinresult:
+entity=entity.strip()
+ifentityinself.entities:
+self.selected_entities.append(entity)
+
+returnbool(result)
+
+def_select_relationships(self,conversation:"Conversation")->bool:
+"""Given a question and the preselected entities, select relationships for
+ the query.
-ifresult:
-forentityinresult:
-entity=entity.strip()
-ifentityinself.entities:
-self.selected_entities.append(entity)
-
-returnbool(result)
-
-def_select_relationships(self,conversation:"Conversation")->bool:
-"""
- Given a question and the preselected entities, select relationships for
- the query.
-
- Args:
- conversation: A BioChatter Conversation object for connecting to the
- LLM.
-
- Returns:
- True if at least one relationship was selected, False otherwise.
-
- Todo:
- Now we have the problem that we discard all relationships that do
- not have a source and target, if at least one relationship has a
- source and target. At least communicate this all-or-nothing
- behaviour to the user.
- """
+ Args:
+ ----
+ conversation: A BioChatter Conversation object for connecting to the
+ LLM.
+
+ Returns:
+ -------
+ True if at least one relationship was selected, False otherwise.
+
+ Todo:
+ ----
+ Now we have the problem that we discard all relationships that do
+ not have a source and target, if at least one relationship has a
+ source and target. At least communicate this all-or-nothing
+ behaviour to the user.
+
+ """
+ifnotself.question:
+raiseValueError(
+"No question found. Please make sure to run entity selection first.",
+)
+
+ifnotself.selected_entities:
+raiseValueError(
+"No entities found. Please run the entity selection step first.",
+)
-ifnotself.question:
-raiseValueError(
-"No question found. Please make sure to run entity selection "
-"first."
-)
-
-ifnotself.selected_entities:
-raiseValueError(
-"No entities found. Please run the entity selection step first."
-)
-
-rels={}
-source_and_target_present=False
-forkey,valueinself.relationships.items():
-if"source"invalueand"target"invalue:
-# if source or target is a list, expand to single pairs
-source=ensure_iterable(value["source"])
-target=ensure_iterable(value["target"])
-pairs=[]
-forsinsource:
-fortintarget:
-pairs.append(
-(
-sentencecase_to_pascalcase(s),
-sentencecase_to_pascalcase(t),
-)
-)
-rels[key]=pairs
-source_and_target_present=True
-else:
-rels[key]={}
+rels={}
+source_and_target_present=False
+forkey,valueinself.relationships.items():
+if"source"invalueand"target"invalue:
+# if source or target is a list, expand to single pairs
+source=ensure_iterable(value["source"])
+target=ensure_iterable(value["target"])
+pairs=[]
+forsinsource:
+fortintarget:
+pairs.append(
+(
+sentencecase_to_pascalcase(s),
+sentencecase_to_pascalcase(t),
+),
+)
+rels[key]=pairs
+source_and_target_present=True
+else:
+rels[key]={}
+
+# prioritise relationships that have source and target, and discard
+# relationships that do not have both source and target, if at least one
+# relationship has both source and target. keep relationships that have
+# either source or target, if none of the relationships have both source
+# and target.
+
+ifsource_and_target_present:
+# First, separate the relationships into two groups: those with both
+# source and target in the selected entities, and those with either
+# source or target but not both.
-# prioritise relationships that have source and target, and discard
-# relationships that do not have both source and target, if at least one
-# relationship has both source and target. keep relationships that have
-# either source or target, if none of the relationships have both source
-# and target.
-
-ifsource_and_target_present:
-# First, separate the relationships into two groups: those with both
-# source and target in the selected entities, and those with either
-# source or target but not both.
-
-rels_with_both={}
-rels_with_either={}
-forkey,valueinrels.items():
-forpairinvalue:
-ifpair[0]inself.selected_entities:
-ifpair[1]inself.selected_entities:
-rels_with_both[key]=value
-else:
-rels_with_either[key]=value
-elifpair[1]inself.selected_entities:
-rels_with_either[key]=value
-
-# If there are any relationships with both source and target,
-# discard the others.
-
-ifrels_with_both:
-rels=rels_with_both
-else:
-rels=rels_with_either
-
-selected_rels=[]
-forkey,valueinrels.items():
-ifnotvalue:
-continue
-
-forpairinvalue:
-if(
-pair[0]inself.selected_entities
-orpair[1]inself.selected_entities
-):
-selected_rels.append((key,pair))
-
-rels=json.dumps(selected_rels)
-else:
-rels=json.dumps(self.relationships)
-
-msg=(
-"You have access to a knowledge graph that contains "
-f"these entities: {', '.join(self.selected_entities)}. "
-"Your task is to select the relationships that are relevant "
-"to the user's question for subsequent use in a query. Only "
-"return the relationships without their sources or targets, "
-"comma-separated, and without any additional text. Here are the "
-"possible relationships and their source and target entities: "
-f"{rels}."
-)
-
-conversation.append_system_message(msg)
-
-res,token_usage,correction=conversation.query(self.question)
-
-result=res.split(",")ifmsgelse[]
-
-ifresult:
-forrelationshipinresult:
-relationship=relationship.strip()
-ifrelationshipinself.relationships:
-self.selected_relationships.append(relationship)
-rel_dict=self.relationships[relationship]
-label=rel_dict.get("label_as_edge",relationship)
-if"source"inrel_dictand"target"inrel_dict:
-self.selected_relationship_labels[label]={
-"source":rel_dict["source"],
-"target":rel_dict["target"],
-}
-else:
-self.selected_relationship_labels[label]={
-"source":None,
-"target":None,
-}
-
-# if we selected relationships that have either source or target which
-# is not in the selected entities, we add those entities to the selected
-# entities.
-
-ifself.selected_relationship_labels:
-forkey,valueinself.selected_relationship_labels.items():
-sources=ensure_iterable(value["source"])
-targets=ensure_iterable(value["target"])
-forsourceinsources:
-ifsourceisNone:
-continue
-ifsourcenotinself.selected_entities:
-self.selected_entities.append(
-sentencecase_to_pascalcase(source)
-)
-fortargetintargets:
-iftargetisNone:
-continue
-iftargetnotinself.selected_entities:
-self.selected_entities.append(
-sentencecase_to_pascalcase(target)
-)
-
-returnbool(result)
-
-@staticmethod
-def_validate_json_str(json_str:str):
-json_str=json_str.strip()
-ifjson_str.startswith("```json"):
-json_str=json_str[7:]
-ifjson_str.endswith("```"):
-json_str=json_str[:-3]
-returnjson_str.strip()
-
-def_select_properties(self,conversation:"Conversation")->bool:
-"""
-
- Given a question (optionally provided, but in the standard use case
- reused from the entity selection step) and the selected entities, select
- the properties that are relevant to the question and store them in
- the dictionary `selected_properties`.
-
- Returns:
- True if at least one property was selected, False otherwise.
-
- """
-
-ifnotself.question:
-raiseValueError(
-"No question found. Please make sure to run entity and "
-"relationship selection first."
-)
-
-ifnotself.selected_entitiesandnotself.selected_relationships:
-raiseValueError(
-"No entities or relationships provided, and none available "
-"from entity selection step. Please provide "
-"entities/relationships or run the entity selection "
-"(`select_entities()`) step first."
-)
-
-e_props={}
-forentityinself.selected_entities:
-ifself.entities[entity].get("properties"):
-e_props[entity]=list(
-self.entities[entity]["properties"].keys()
-)
-
-r_props={}
-forrelationshipinself.selected_relationships:
-ifself.relationships[relationship].get("properties"):
-r_props[relationship]=list(
-self.relationships[relationship]["properties"].keys()
-)
-
-msg=(
-"You have access to a knowledge graph that contains entities and "
-"relationships. They have the following properties. Entities:"
-f"{e_props}, Relationships: {r_props}. "
-"Your task is to select the properties that are relevant to the "
-"user's question for subsequent use in a query. Only return the "
-"entities and relationships with their relevant properties in compact "
-"JSON format, without any additional text. Return the "
-"entities/relationships as top-level dictionary keys, and their "
-"properties as dictionary values. "
-"Do not return properties that are not relevant to the question."
-)
-
-conversation.append_system_message(msg)
-
-msg,token_usage,correction=conversation.query(self.question)
-msg=BioCypherPromptEngine._validate_json_str(msg)
-
-try:
-self.selected_properties=json.loads(msg)ifmsgelse{}
-exceptjson.decoder.JSONDecodeError:
-self.selected_properties={}
-
-returnbool(self.selected_properties)
-
-def_generate_query(
-self,
-question:str,
-entities:list,
-relationships:dict,
-properties:dict,
-query_language:str,
-conversation:"Conversation",
-)->str:
-"""
- Generate a query in the specified query language that answers the user's
- question.
-
- Args:
- question: A user's question.
+rels_with_both={}
+rels_with_either={}
+forkey,valueinrels.items():
+forpairinvalue:
+ifpair[0]inself.selected_entities:
+ifpair[1]inself.selected_entities:
+rels_with_both[key]=value
+else:
+rels_with_either[key]=value
+elifpair[1]inself.selected_entities:
+rels_with_either[key]=value
+
+# If there are any relationships with both source and target,
+# discard the others.
+
+ifrels_with_both:
+rels=rels_with_both
+else:
+rels=rels_with_either
+
+selected_rels=[]
+forkey,valueinrels.items():
+ifnotvalue:
+continue
+
+forpairinvalue:
+ifpair[0]inself.selected_entitiesorpair[1]inself.selected_entities:
+selected_rels.append((key,pair))
+
+rels=json.dumps(selected_rels)
+else:
+rels=json.dumps(self.relationships)
+
+msg=(
+"You have access to a knowledge graph that contains "
+f"these entities: {', '.join(self.selected_entities)}. "
+"Your task is to select the relationships that are relevant "
+"to the user's question for subsequent use in a query. Only "
+"return the relationships without their sources or targets, "
+"comma-separated, and without any additional text. Here are the "
+"possible relationships and their source and target entities: "
+f"{rels}."
+)
+
+conversation.append_system_message(msg)
+
+res,token_usage,correction=conversation.query(self.question)
+
+result=res.split(",")ifmsgelse[]
+
+ifresult:
+forrelationshipinresult:
+relationship=relationship.strip()
+ifrelationshipinself.relationships:
+self.selected_relationships.append(relationship)
+rel_dict=self.relationships[relationship]
+label=rel_dict.get("label_as_edge",relationship)
+if"source"inrel_dictand"target"inrel_dict:
+self.selected_relationship_labels[label]={
+"source":rel_dict["source"],
+"target":rel_dict["target"],
+}
+else:
+self.selected_relationship_labels[label]={
+"source":None,
+"target":None,
+}
+
+# if we selected relationships that have either source or target which
+# is not in the selected entities, we add those entities to the selected
+# entities.
+
+ifself.selected_relationship_labels:
+forkey,valueinself.selected_relationship_labels.items():
+sources=ensure_iterable(value["source"])
+targets=ensure_iterable(value["target"])
+forsourceinsources:
+ifsourceisNone:
+continue
+ifsourcenotinself.selected_entities:
+self.selected_entities.append(
+sentencecase_to_pascalcase(source),
+)
+fortargetintargets:
+iftargetisNone:
+continue
+iftargetnotinself.selected_entities:
+self.selected_entities.append(
+sentencecase_to_pascalcase(target),
+)
+
+returnbool(result)
+
+@staticmethod
+def_validate_json_str(json_str:str):
+json_str=json_str.strip()
+ifjson_str.startswith("```json"):
+json_str=json_str[7:]
+ifjson_str.endswith("```"):
+json_str=json_str[:-3]
+returnjson_str.strip()
+
+def_select_properties(self,conversation:"Conversation")->bool:
+"""Given a question (optionally provided, but in the standard use case
+ reused from the entity selection step) and the selected entities, select
+ the properties that are relevant to the question and store them in
+ the dictionary `selected_properties`.
+
+ Returns
+ -------
+ True if at least one property was selected, False otherwise.
+
+ """
+ifnotself.question:
+raiseValueError(
+"No question found. Please make sure to run entity and relationship selection first.",
+)
+
+ifnotself.selected_entitiesandnotself.selected_relationships:
+raiseValueError(
+"No entities or relationships provided, and none available "
+"from entity selection step. Please provide "
+"entities/relationships or run the entity selection "
+"(`select_entities()`) step first.",
+)
+
+e_props={}
+forentityinself.selected_entities:
+ifself.entities[entity].get("properties"):
+e_props[entity]=list(
+self.entities[entity]["properties"].keys(),
+)
+
+r_props={}
+forrelationshipinself.selected_relationships:
+ifself.relationships[relationship].get("properties"):
+r_props[relationship]=list(
+self.relationships[relationship]["properties"].keys(),
+)
+
+msg=(
+"You have access to a knowledge graph that contains entities and "
+"relationships. They have the following properties. Entities:"
+f"{e_props}, Relationships: {r_props}. "
+"Your task is to select the properties that are relevant to the "
+"user's question for subsequent use in a query. Only return the "
+"entities and relationships with their relevant properties in compact "
+"JSON format, without any additional text. Return the "
+"entities/relationships as top-level dictionary keys, and their "
+"properties as dictionary values. "
+"Do not return properties that are not relevant to the question."
+)
+
+conversation.append_system_message(msg)
+
+msg,token_usage,correction=conversation.query(self.question)
+msg=BioCypherPromptEngine._validate_json_str(msg)
+
+try:
+self.selected_properties=json.loads(msg)ifmsgelse{}
+exceptjson.decoder.JSONDecodeError:
+self.selected_properties={}
+
+returnbool(self.selected_properties)
+
+def_generate_query(
+self,
+question:str,
+entities:list,
+relationships:dict,
+properties:dict,
+query_language:str,
+conversation:"Conversation",
+)->str:
+"""Generate a query in the specified query language that answers the user's
+ question.
+
+ Args:
+ ----
+ question: A user's question.
+
+ entities: A list of entities that are relevant to the question.
+
+ relationships: A list of relationships that are relevant to the
+ question.
+
+ properties: A dictionary of properties that are relevant to the
+ question.
+
+ query_language: The language of the query to generate.
+
+ conversation: A BioChatter Conversation object for connecting to the
+ LLM.
+
+ Returns:
+ -------
+ A database query that could answer the user's question.
- entities: A list of entities that are relevant to the question.
-
- relationships: A list of relationships that are relevant to the
- question.
-
- properties: A dictionary of properties that are relevant to the
- question.
+ """
+msg=self._generate_query_prompt(
+entities,
+relationships,
+properties,
+query_language,
+)
- query_language: The language of the query to generate.
+conversation.append_system_message(msg)
- conversation: A BioChatter Conversation object for connecting to the
- LLM.
-
- Returns:
- A database query that could answer the user's question.
- """
-msg=self._generate_query_prompt(
-entities,
-relationships,
-properties,
-query_language,
-)
-
-conversation.append_system_message(msg)
-
-out_msg,token_usage,correction=conversation.query(question)
-
-returnout_msg.strip()
-
-def_expand_pairs(self,relationship,values)->None:
-ifnotself.rel_directions.get(relationship):
-self.rel_directions[relationship]=[]
-ifisinstance(values["source"],list):
-forsourceinvalues["source"]:
-ifisinstance(values["target"],list):
-fortargetinvalues["target"]:
-self.rel_directions[relationship].append(
-(source,target)
-)
-else:
-self.rel_directions[relationship].append(
-(source,values["target"])
-)
-elifisinstance(values["target"],list):
-fortargetinvalues["target"]:
-self.rel_directions[relationship].append(
-(values["source"],target)
-)
-else:
-self.rel_directions[relationship].append(
-(values["source"],values["target"])
-)
+out_msg,token_usage,correction=conversation.query(question)
+
+returnout_msg.strip()
+
+def_expand_pairs(self,relationship,values)->None:
+ifnotself.rel_directions.get(relationship):
+self.rel_directions[relationship]=[]
+ifisinstance(values["source"],list):
+forsourceinvalues["source"]:
+ifisinstance(values["target"],list):
+fortargetinvalues["target"]:
+self.rel_directions[relationship].append(
+(source,target),
+)
+else:
+self.rel_directions[relationship].append(
+(source,values["target"]),
+)
+elifisinstance(values["target"],list):
+fortargetinvalues["target"]:
+self.rel_directions[relationship].append(
+(values["source"],target),
+)
+else:
+self.rel_directions[relationship].append(
+(values["source"],values["target"]),
+)
@@ -2814,98 +2786,29 @@
schema_config_or_info_path: Path to a biocypher schema configuration
+ file or the extended schema information output generated by
+ BioCypher's `write_schema_info` function (preferred).
+schema_config_or_info_dict: A dictionary containing the schema
+ configuration file or the extended schema information output
+ generated by BioCypher's `write_schema_info` function
+ (preferred).
-
Parameters:
-
-
-
-
Name
-
Type
-
Description
-
Default
-
-
-
-
-
- schema_config_or_info_path
-
-
- Optional[str]
-
-
-
-
Path to a biocypher schema configuration
-file or the extended schema information output generated by
-BioCypher's write_schema_info function (preferred).
-
-
-
- None
-
-
-
-
- schema_config_or_info_dict
-
-
- Optional[dict]
-
-
-
-
A dictionary containing the schema
-configuration file or the extended schema information output
-generated by BioCypher's write_schema_info function
-(preferred).
-
-
-
- None
-
-
-
-
- model_name
-
-
- str
-
-
-
-
The name of the model to use for the conversation.
-DEPRECATED: This should now be set in the conversation factory.
-
-
-
- 'gpt-3.5-turbo'
-
-
-
-
- conversation_factory
-
-
- Optional[Callable]
-
-
-
-
A function used to create a conversation for
-creating the KG query. If not provided, a default function is
-used (creating an OpenAI conversation with the specified model,
-see _get_conversation).
-
-
-
- None
-
-
-
-
+model_name: The name of the model to use for the conversation.
+ DEPRECATED: This should now be set in the conversation factory.
+
+conversation_factory: A function used to create a conversation for
+ creating the KG query. If not provided, a default function is
+ used (creating an OpenAI conversation with the specified model,
+ see `_get_conversation`).
+
def__init__(
-self,
-schema_config_or_info_path:Optional[str]=None,
-schema_config_or_info_dict:Optional[dict]=None,
-model_name:str="gpt-3.5-turbo",
-conversation_factory:Optional[Callable]=None,
-)->None:
-"""
-
- Given a biocypher schema configuration, extract the entities and
- relationships, and for each extract their mode of representation (node
- or edge), properties, and identifier namespace. Using these data, allow
- the generation of prompts for a large language model, informing it of
- the schema constituents and their properties, to enable the
- parameterisation of function calls to a knowledge graph.
-
- Args:
- schema_config_or_info_path: Path to a biocypher schema configuration
- file or the extended schema information output generated by
- BioCypher's `write_schema_info` function (preferred).
-
- schema_config_or_info_dict: A dictionary containing the schema
- configuration file or the extended schema information output
- generated by BioCypher's `write_schema_info` function
- (preferred).
-
- model_name: The name of the model to use for the conversation.
- DEPRECATED: This should now be set in the conversation factory.
-
- conversation_factory: A function used to create a conversation for
- creating the KG query. If not provided, a default function is
- used (creating an OpenAI conversation with the specified model,
- see `_get_conversation`).
- """
-
-ifnotschema_config_or_info_pathandnotschema_config_or_info_dict:
-raiseValueError(
-"Please provide the schema configuration or schema info as a "
-"path to a file or as a dictionary."
-)
-
-ifschema_config_or_info_pathandschema_config_or_info_dict:
-raiseValueError(
-"Please provide the schema configuration or schema info as a "
-"path to a file or as a dictionary, not both."
-)
+105
def__init__(
+self,
+schema_config_or_info_path:str|None=None,
+schema_config_or_info_dict:dict|None=None,
+model_name:str="gpt-3.5-turbo",
+conversation_factory:Callable|None=None,
+)->None:
+"""Given a biocypher schema configuration, extract the entities and
+ relationships, and for each extract their mode of representation (node
+ or edge), properties, and identifier namespace. Using these data, allow
+ the generation of prompts for a large language model, informing it of
+ the schema constituents and their properties, to enable the
+ parameterisation of function calls to a knowledge graph.
+
+ Args:
+ ----
+ schema_config_or_info_path: Path to a biocypher schema configuration
+ file or the extended schema information output generated by
+ BioCypher's `write_schema_info` function (preferred).
+
+ schema_config_or_info_dict: A dictionary containing the schema
+ configuration file or the extended schema information output
+ generated by BioCypher's `write_schema_info` function
+ (preferred).
+
+ model_name: The name of the model to use for the conversation.
+ DEPRECATED: This should now be set in the conversation factory.
+
+ conversation_factory: A function used to create a conversation for
+ creating the KG query. If not provided, a default function is
+ used (creating an OpenAI conversation with the specified model,
+ see `_get_conversation`).
+
+ """
+ifnotschema_config_or_info_pathandnotschema_config_or_info_dict:
+raiseValueError(
+"Please provide the schema configuration or schema info as a path to a file or as a dictionary.",
+)
+
+ifschema_config_or_info_pathandschema_config_or_info_dict:
+raiseValueError(
+"Please provide the schema configuration or schema info as a "
+"path to a file or as a dictionary, not both.",
+)
+
+# set conversation factory or use default
+self.conversation_factory=conversation_factoryifconversation_factoryisnotNoneelseself._get_conversation
-# set conversation factory or use default
-self.conversation_factory=(
-conversation_factory
-ifconversation_factoryisnotNone
-elseself._get_conversation
-)
+ifschema_config_or_info_path:
+# read the schema configuration
+withopen(schema_config_or_info_path)asf:
+schema_config=yaml.safe_load(f)
+elifschema_config_or_info_dict:
+schema_config=schema_config_or_info_dict
-ifschema_config_or_info_path:
-# read the schema configuration
-withopen(schema_config_or_info_path,"r")asf:
-schema_config=yaml.safe_load(f)
-elifschema_config_or_info_dict:
-schema_config=schema_config_or_info_dict
-
-# check whether it is the original schema config or the output of
-# biocypher info
-is_schema_info=schema_config.get("is_schema_info",False)
-
-# extract the entities and relationships: each top level key that has
-# a 'represented_as' key
-self.entities={}
-self.relationships={}
-ifnotis_schema_info:
-forkey,valueinschema_config.items():
-# hacky, better with biocypher output
-name_indicates_relationship=(
-"interaction"inkey.lower()or"association"inkey.lower()
-)
-if"represented_as"invalue:
-if(
-value["represented_as"]=="node"
-andnotname_indicates_relationship
-):
-self.entities[sentencecase_to_pascalcase(key)]=value
-elif(
-value["represented_as"]=="node"
-andname_indicates_relationship
-):
-self.relationships[sentencecase_to_pascalcase(key)]=(
-value
-)
-elifvalue["represented_as"]=="edge":
-self.relationships[sentencecase_to_pascalcase(key)]=(
-value
-)
-else:
-forkey,valueinschema_config.items():
-ifnotisinstance(value,dict):
-continue
-ifvalue.get("present_in_knowledge_graph",None)==False:
-continue
-ifvalue.get("is_relationship",None)==False:
-self.entities[sentencecase_to_pascalcase(key)]=value
-elifvalue.get("is_relationship",None)==True:
-value=self._capitalise_source_and_target(value)
-self.relationships[sentencecase_to_pascalcase(key)]=value
-
-self.question=""
-self.selected_entities=[]
-self.selected_relationships=[]# used in property selection
-self.selected_relationship_labels={}# copy to deal with labels that
-# are not the same as the relationship name, used in query generation
-# dictionary to also include source and target types
-self.rel_directions={}
-self.model_name=model_name
+# check whether it is the original schema config or the output of
+# biocypher info
+is_schema_info=schema_config.get("is_schema_info",False)
+
+# extract the entities and relationships: each top level key that has
+# a 'represented_as' key
+self.entities={}
+self.relationships={}
+ifnotis_schema_info:
+forkey,valueinschema_config.items():
+# hacky, better with biocypher output
+name_indicates_relationship="interaction"inkey.lower()or"association"inkey.lower()
+if"represented_as"invalue:
+ifvalue["represented_as"]=="node"andnotname_indicates_relationship:
+self.entities[sentencecase_to_pascalcase(key)]=value
+elif(value["represented_as"]=="node"andname_indicates_relationship)orvalue[
+"represented_as"
+]=="edge":
+self.relationships[sentencecase_to_pascalcase(key)]=value
+else:
+forkey,valueinschema_config.items():
+ifnotisinstance(value,dict):
+continue
+ifvalue.get("present_in_knowledge_graph",None)==False:
+continue
+ifvalue.get("is_relationship",None)==False:
+self.entities[sentencecase_to_pascalcase(key)]=value
+elifvalue.get("is_relationship",None)==True:
+value=self._capitalise_source_and_target(value)
+self.relationships[sentencecase_to_pascalcase(key)]=value
+
+self.question=""
+self.selected_entities=[]
+self.selected_relationships=[]# used in property selection
+self.selected_relationship_labels={}# copy to deal with labels that
+# are not the same as the relationship name, used in query generation
+# dictionary to also include source and target types
+self.rel_directions={}
+self.model_name=model_name
@@ -3147,80 +3013,37 @@
question: A user's question.
-
-
Parameters:
-
-
-
-
Name
-
Type
-
Description
-
Default
-
-
-
-
-
- question
-
-
- str
-
-
-
-
A user's question.
-
-
-
- required
-
-
-
-
- query_language
-
-
- Optional[str]
-
-
-
-
The language of the query to generate.
-
-
-
- 'Cypher'
-
-
-
-
-
-
-
Returns:
-
-
-
-
Type
-
Description
-
-
-
-
-
- str
-
-
-
-
A database query that could answer the user's question.
-
-
-
-
-
+query_language: The language of the query to generate.
+
+
+
A database query that could answer the user's question.
+
defgenerate_query(
-self,question:str,query_language:Optional[str]="Cypher"
-)->str:
-"""
- Wrap entity and property selection and query generation; return the
- generated query.
-
- Args:
- question: A user's question.
-
- query_language: The language of the query to generate.
-
- Returns:
- A database query that could answer the user's question.
- """
-
-self._select_graph_entities_from_question(
-question,self.conversation_factory()
-)
-
-returnself._generate_query(
-question=question,
-entities=self.selected_entities,
-relationships=self.selected_relationship_labels,
-properties=self.selected_properties,
-query_language=query_language,
-conversation=self.conversation_factory(),
-)
+265
defgenerate_query(
+self,
+question:str,
+query_language:str|None="Cypher",
+)->str:
+"""Wrap entity and property selection and query generation; return the
+ generated query.
+
+ Args:
+ ----
+ question: A user's question.
+
+ query_language: The language of the query to generate.
+
+ Returns:
+ -------
+ A database query that could answer the user's question.
+
+ """
+self._select_graph_entities_from_question(
+question,
+self.conversation_factory(),
+)
+
+returnself._generate_query(
+question=question,
+entities=self.selected_entities,
+relationships=self.selected_relationship_labels,
+properties=self.selected_properties,
+query_language=query_language,
+conversation=self.conversation_factory(),
+)
@@ -3295,130 +3107,78 @@
question: A user's question.
-
-
Parameters:
-
-
-
-
Name
-
Type
-
Description
-
Default
-
-
-
-
-
- question
-
-
- str
-
-
-
-
A user's question.
-
-
-
- required
-
-
-
-
- query_language
-
-
- Optional[str]
-
-
-
-
The language of the query to generate.
-
-
-
- 'Cypher'
-
-
-
-
-
-
-
Returns:
-
-
-
-
Type
-
Description
-
-
-
-
-
- str
-
-
-
-
A prompt for a large language model to generate a database query.
-
-
-
-
-
+query_language: The language of the query to generate.
+
+
+
A prompt for a large language model to generate a database query.
+
defgenerate_query_prompt(
-self,question:str,query_language:Optional[str]="Cypher"
-)->str:
-"""
- Generate a prompt for a large language model to generate a database
- query based on the user's question and class attributes informing about
- the schema.
-
- Args:
- question: A user's question.
-
- query_language: The language of the query to generate.
-
- Returns:
- A prompt for a large language model to generate a database query.
- """
-self._select_graph_entities_from_question(
-question,self.conversation_factory()
-)
-msg=self._generate_query_prompt(
-self.selected_entities,
-self.selected_relationship_labels,
-self.selected_properties,
-query_language,
-)
-returnmsg
+232
defgenerate_query_prompt(
+self,
+question:str,
+query_language:str|None="Cypher",
+)->str:
+"""Generate a prompt for a large language model to generate a database
+ query based on the user's question and class attributes informing about
+ the schema.
+
+ Args:
+ ----
+ question: A user's question.
+
+ query_language: The language of the query to generate.
+
+ Returns:
+ -------
+ A prompt for a large language model to generate a database query.
+
+ """
+self._select_graph_entities_from_question(
+question,
+self.conversation_factory(),
+)
+msg=self._generate_query_prompt(
+self.selected_entities,
+self.selected_relationship_labels,
+self.selected_properties,
+query_language,
+)
+returnmsg
classDatabaseAgent:
-def__init__(
-self,
-model_name:str,
-connection_args:dict,
-schema_config_or_info_dict:dict,
-conversation_factory:Callable,
-use_reflexion:bool,
-)->None:
-"""
- Create a DatabaseAgent analogous to the VectorDatabaseAgentMilvus class,
- which can return results from a database using a query engine. Currently
- limited to Neo4j for development.
-
- Args:
- connection_args (dict): A dictionary of arguments to connect to the
- database. Contains database name, URI, user, and password.
-
- conversation_factory (Callable): A function to create a conversation
- for creating the KG query.
-
- use_reflexion (bool): Whether to use the ReflexionAgent to generate
- the query.
+176
classDatabaseAgent:
+def__init__(
+self,
+model_name:str,
+connection_args:dict,
+schema_config_or_info_dict:dict,
+conversation_factory:Callable,
+use_reflexion:bool,
+)->None:
+"""Create a DatabaseAgent analogous to the VectorDatabaseAgentMilvus class,
+ which can return results from a database using a query engine. Currently
+ limited to Neo4j for development.
+
+ Args:
+ ----
+ connection_args (dict): A dictionary of arguments to connect to the
+ database. Contains database name, URI, user, and password.
+
+ conversation_factory (Callable): A function to create a conversation
+ for creating the KG query.
+
+ use_reflexion (bool): Whether to use the ReflexionAgent to generate
+ the query.
+ """self.conversation_factory=conversation_factoryself.prompt_engine=BioCypherPromptEngine(
@@ -3692,142 +3447,135 @@
self.use_reflexion=use_reflexiondefconnect(self)->None:
-"""
- Connect to the database and authenticate.
- """
-db_name=self.connection_args.get("db_name")
-uri=f"{self.connection_args.get('host')}:{self.connection_args.get('port')}"
-uri=uriifuri.startswith("bolt://")else"bolt://"+uri
-user=self.connection_args.get("user")
-password=self.connection_args.get("password")
-self.driver=nu.Driver(
-db_name=db_nameor"neo4j",
-db_uri=uri,
-user=user,
-password=password,
-)
-
-defis_connected(self)->bool:
-returnnotself.driverisNone
-
-def_generate_query(self,query:str):
-ifself.use_reflexion:
-agent=KGQueryReflexionAgent(
-self.conversation_factory,
-self.connection_args,
-)
-query_prompt=self.prompt_engine.generate_query_prompt(query)
-agent_result=agent.execute(query,query_prompt)
-tool_result=(
-[agent_result.tool_result]
-ifagent_result.tool_resultisnotNone
-elseNone
-)
-returnagent_result.answer,tool_result
-else:
-query=self.prompt_engine.generate_query(query)
-results=self.driver.query(query=query)
-returnquery,results
-
-def_build_response(
-self,
-results:List[Dict],
-cypher_query:str,
-results_num:Optional[int]=3,
-)->List[Document]:
-iflen(results)==0:
-return[
-Document(
-page_content=(
-"I didn't find any result in knowledge graph, "
-f"but here is the query I used: {cypher_query}. "
-"You can ask user to refine the question. "
-"Note: please ensure to include the query in a code "
-"block in your response so that the user can refine "
-"their question effectively."
-),
-metadata={"cypher_query":cypher_query},
-)
-]
-
-clipped_results=results[:results_num]ifresults_num>0elseresults
-results_dump=json.dumps(clipped_results)
-
-return[
-Document(
-page_content=(
-"The results retrieved from knowledge graph are: "
-f"{results_dump}. "
-f"The query used is: {cypher_query}. "
-"Note: please ensure to include the query in a code block "
-"in your response so that the user can refine "
-"their question effectively."
-),
-metadata={"cypher_query":cypher_query},
-)
-]
-
-defget_query_results(self,query:str,k:int=3)->list[Document]:
-"""
- Generate a query using the prompt engine and return the results.
- Replicates vector database similarity search API. Results are returned
- as a list of Document objects to align with the vector database agent.
-
- Args:
- query (str): A query string.
-
- k (int): The number of results to return.
-
- Returns:
- List[Document]: A list of Document objects. The page content values
- are the literal dictionaries returned by the query, the metadata
- values are the cypher query used to generate the results, for
- now.
- """
-(cypher_query,tool_result)=self._generate_query(
-query
-)# self.prompt_engine.generate_query(query)
-# TODO some logic if it fails?
-iftool_resultisnotNone:
-# If _generate_query() already returned tool_result, we won't connect
-# to graph database to query result any more
-results=tool_result
-else:
-results=self.driver.query(query=cypher_query)
-
-# return first k results
-# returned nodes can have any formatting, and can also be empty or fewer
-# than k
-ifresultsisNoneorlen(results)==0orresults[0]isNone:
-return[]
-returnself._build_response(
-results=results[0],cypher_query=cypher_query,results_num=k
-)
-
-defget_description(self):
-result=self.driver.query("MATCH (n:Schema_info) RETURN n LIMIT 1")
-
-ifresult[0]:
-schema_info_node=result[0][0]["n"]
-schema_dict_content=schema_info_node["schema_info"][
-:MAX_AGENT_DESC_LENGTH
-]# limit to 1000 characters
-return(
-f"the graph database contains the following nodes and edges: \n\n"
-f"{schema_dict_content}"
-)
-
-# schema_info is not found in database
-nodes_query="MATCH (n) RETURN DISTINCT labels(n) LIMIT 300"
-node_results=self.driver.query(query=nodes_query)
-edges_query="MATCH (n) RETURN DISTINCT type(n) LIMIT 300"
-edge_results=self.driver.query(query=edges_query)
-desc=(
-f"The graph database contains the following nodes and edges: \n"
-f"nodes: \n{node_results}"
-f"edges: \n{edge_results}"
-)
-returndesc[:MAX_AGENT_DESC_LENGTH]
+"""Connect to the database and authenticate."""
+db_name=self.connection_args.get("db_name")
+uri=f"{self.connection_args.get('host')}:{self.connection_args.get('port')}"
+uri=uriifuri.startswith("bolt://")else"bolt://"+uri
+user=self.connection_args.get("user")
+password=self.connection_args.get("password")
+self.driver=nu.Driver(
+db_name=db_nameor"neo4j",
+db_uri=uri,
+user=user,
+password=password,
+)
+
+defis_connected(self)->bool:
+returnself.driverisnotNone
+
+def_generate_query(self,query:str):
+ifself.use_reflexion:
+agent=KGQueryReflexionAgent(
+self.conversation_factory,
+self.connection_args,
+)
+query_prompt=self.prompt_engine.generate_query_prompt(query)
+agent_result=agent.execute(query,query_prompt)
+tool_result=[agent_result.tool_result]ifagent_result.tool_resultisnotNoneelseNone
+returnagent_result.answer,tool_result
+else:
+query=self.prompt_engine.generate_query(query)
+results=self.driver.query(query=query)
+returnquery,results
+
+def_build_response(
+self,
+results:list[dict],
+cypher_query:str,
+results_num:int|None=3,
+)->list[Document]:
+iflen(results)==0:
+return[
+Document(
+page_content=(
+"I didn't find any result in knowledge graph, "
+f"but here is the query I used: {cypher_query}. "
+"You can ask user to refine the question. "
+"Note: please ensure to include the query in a code "
+"block in your response so that the user can refine "
+"their question effectively."
+),
+metadata={"cypher_query":cypher_query},
+),
+]
+
+clipped_results=results[:results_num]ifresults_num>0elseresults
+results_dump=json.dumps(clipped_results)
+
+return[
+Document(
+page_content=(
+"The results retrieved from knowledge graph are: "
+f"{results_dump}. "
+f"The query used is: {cypher_query}. "
+"Note: please ensure to include the query in a code block "
+"in your response so that the user can refine "
+"their question effectively."
+),
+metadata={"cypher_query":cypher_query},
+),
+]
+
+defget_query_results(self,query:str,k:int=3)->list[Document]:
+"""Generate a query using the prompt engine and return the results.
+ Replicates vector database similarity search API. Results are returned
+ as a list of Document objects to align with the vector database agent.
+
+ Args:
+ ----
+ query (str): A query string.
+
+ k (int): The number of results to return.
+
+ Returns:
+ -------
+ List[Document]: A list of Document objects. The page content values
+ are the literal dictionaries returned by the query, the metadata
+ values are the cypher query used to generate the results, for
+ now.
+
+ """
+(cypher_query,tool_result)=self._generate_query(
+query,
+)# self.prompt_engine.generate_query(query)
+# TODO some logic if it fails?
+iftool_resultisnotNone:
+# If _generate_query() already returned tool_result, we won't connect
+# to graph database to query result any more
+results=tool_result
+else:
+results=self.driver.query(query=cypher_query)
+
+# return first k results
+# returned nodes can have any formatting, and can also be empty or fewer
+# than k
+ifresultsisNoneorlen(results)==0orresults[0]isNone:
+return[]
+returnself._build_response(
+results=results[0],
+cypher_query=cypher_query,
+results_num=k,
+)
+
+defget_description(self):
+result=self.driver.query("MATCH (n:Schema_info) RETURN n LIMIT 1")
+
+ifresult[0]:
+schema_info_node=result[0][0]["n"]
+schema_dict_content=schema_info_node["schema_info"][:MAX_AGENT_DESC_LENGTH]# limit to 1000 characters
+returnf"the graph database contains the following nodes and edges: \n\n{schema_dict_content}"
+
+# schema_info is not found in database
+nodes_query="MATCH (n) RETURN DISTINCT labels(n) LIMIT 300"
+node_results=self.driver.query(query=nodes_query)
+edges_query="MATCH (n) RETURN DISTINCT type(n) LIMIT 300"
+edge_results=self.driver.query(query=edges_query)
+desc=(
+f"The graph database contains the following nodes and edges: \n"
+f"nodes: \n{node_results}"
+f"edges: \n{edge_results}"
+)
+returndesc[:MAX_AGENT_DESC_LENGTH]
@@ -3857,76 +3605,21 @@
connection_args (dict): A dictionary of arguments to connect to the
+ database. Contains database name, URI, user, and password.
+conversation_factory (Callable): A function to create a conversation
+ for creating the KG query.
-
Parameters:
-
-
-
-
Name
-
Type
-
Description
-
Default
-
-
-
-
-
- connection_args
-
-
- dict
-
-
-
-
A dictionary of arguments to connect to the
-database. Contains database name, URI, user, and password.
-
-
-
- required
-
-
-
-
- conversation_factory
-
-
- Callable
-
-
-
-
A function to create a conversation
-for creating the KG query.
-
-
-
- required
-
-
-
-
- use_reflexion
-
-
- bool
-
-
-
-
Whether to use the ReflexionAgent to generate
-the query.
-
-
-
- required
-
-
-
-
+use_reflexion (bool): Whether to use the ReflexionAgent to generate
+ the query.
+
def__init__(
-self,
-model_name:str,
-connection_args:dict,
-schema_config_or_info_dict:dict,
-conversation_factory:Callable,
-use_reflexion:bool,
-)->None:
-"""
- Create a DatabaseAgent analogous to the VectorDatabaseAgentMilvus class,
- which can return results from a database using a query engine. Currently
- limited to Neo4j for development.
-
- Args:
- connection_args (dict): A dictionary of arguments to connect to the
- database. Contains database name, URI, user, and password.
-
- conversation_factory (Callable): A function to create a conversation
- for creating the KG query.
-
- use_reflexion (bool): Whether to use the ReflexionAgent to generate
- the query.
+45
def__init__(
+self,
+model_name:str,
+connection_args:dict,
+schema_config_or_info_dict:dict,
+conversation_factory:Callable,
+use_reflexion:bool,
+)->None:
+"""Create a DatabaseAgent analogous to the VectorDatabaseAgentMilvus class,
+ which can return results from a database using a query engine. Currently
+ limited to Neo4j for development.
+
+ Args:
+ ----
+ connection_args (dict): A dictionary of arguments to connect to the
+ database. Contains database name, URI, user, and password.
+
+ conversation_factory (Callable): A function to create a conversation
+ for creating the KG query.
+
+ use_reflexion (bool): Whether to use the ReflexionAgent to generate
+ the query.
+ """self.conversation_factory=conversation_factoryself.prompt_engine=BioCypherPromptEngine(
@@ -4022,23 +3716,19 @@
defconnect(self)->None:
-"""
- Connect to the database and authenticate.
- """
-db_name=self.connection_args.get("db_name")
-uri=f"{self.connection_args.get('host')}:{self.connection_args.get('port')}"
-uri=uriifuri.startswith("bolt://")else"bolt://"+uri
-user=self.connection_args.get("user")
-password=self.connection_args.get("password")
-self.driver=nu.Driver(
-db_name=db_nameor"neo4j",
-db_uri=uri,
-user=user,
-password=password,
-)
+59
defconnect(self)->None:
+"""Connect to the database and authenticate."""
+db_name=self.connection_args.get("db_name")
+uri=f"{self.connection_args.get('host')}:{self.connection_args.get('port')}"
+uri=uriifuri.startswith("bolt://")else"bolt://"+uri
+user=self.connection_args.get("user")
+password=self.connection_args.get("password")
+self.driver=nu.Driver(
+db_name=db_nameor"neo4j",
+db_uri=uri,
+user=user,
+password=password,
+)
@@ -4059,83 +3749,27 @@
Parameters:
-
-
-
-
Name
-
Type
-
Description
-
Default
-
-
-
-
-
- query
-
-
- str
-
-
-
-
A query string.
-
-
-
- required
-
-
-
-
- k
-
-
- int
-
-
-
-
The number of results to return.
-
-
-
- 3
-
-
-
-
-
-
-
Returns:
-
-
-
-
Type
-
Description
-
-
-
-
-
- list[Document]
-
-
-
-
List[Document]: A list of Document objects. The page content values
-are the literal dictionaries returned by the query, the metadata
-values are the cypher query used to generate the results, for
-now.
-
-
-
-
-
+
+
query (str): A query string.
+
+k (int): The number of results to return.
+
+
+
List[Document]: A list of Document objects. The page content values
+ are the literal dictionaries returned by the query, the metadata
+ values are the cypher query used to generate the results, for
+ now.
+
defget_query_results(self,query:str,k:int=3)->list[Document]:
-"""
- Generate a query using the prompt engine and return the results.
- Replicates vector database similarity search API. Results are returned
- as a list of Document objects to align with the vector database agent.
-
- Args:
- query (str): A query string.
-
- k (int): The number of results to return.
-
- Returns:
- List[Document]: A list of Document objects. The page content values
- are the literal dictionaries returned by the query, the metadata
- values are the cypher query used to generate the results, for
- now.
- """
-(cypher_query,tool_result)=self._generate_query(
-query
-)# self.prompt_engine.generate_query(query)
-# TODO some logic if it fails?
-iftool_resultisnotNone:
-# If _generate_query() already returned tool_result, we won't connect
-# to graph database to query result any more
-results=tool_result
-else:
-results=self.driver.query(query=cypher_query)
-
-# return first k results
-# returned nodes can have any formatting, and can also be empty or fewer
-# than k
-ifresultsisNoneorlen(results)==0orresults[0]isNone:
-return[]
-returnself._build_response(
-results=results[0],cypher_query=cypher_query,results_num=k
-)
+156
defget_query_results(self,query:str,k:int=3)->list[Document]:
+"""Generate a query using the prompt engine and return the results.
+ Replicates vector database similarity search API. Results are returned
+ as a list of Document objects to align with the vector database agent.
+
+ Args:
+ ----
+ query (str): A query string.
+
+ k (int): The number of results to return.
+
+ Returns:
+ -------
+ List[Document]: A list of Document objects. The page content values
+ are the literal dictionaries returned by the query, the metadata
+ values are the cypher query used to generate the results, for
+ now.
+
+ """
+(cypher_query,tool_result)=self._generate_query(
+query,
+)# self.prompt_engine.generate_query(query)
+# TODO some logic if it fails?
+iftool_resultisnotNone:
+# If _generate_query() already returned tool_result, we won't connect
+# to graph database to query result any more
+results=tool_result
+else:
+results=self.driver.query(query=cypher_query)
+
+# return first k results
+# returned nodes can have any formatting, and can also be empty or fewer
+# than k
+ifresultsisNoneorlen(results)==0orresults[0]isNone:
+return[]
+returnself._build_response(
+results=results[0],
+cypher_query=cypher_query,
+results_num=k,
+)
classAnthropicConversation(Conversation):
-def__init__(
-self,
-model_name:str,
-prompts:dict,
-correct:bool=False,
-split_correction:bool=False,
-):
-"""
- Connect to Anthropic's API and set up a conversation with the user.
- Also initialise a second conversational agent to provide corrections to
- the model output, if necessary.
+1290
classAnthropicConversation(Conversation):
+def__init__(
+self,
+model_name:str,
+prompts:dict,
+correct:bool=False,
+split_correction:bool=False,
+):
+"""Connect to Anthropic's API and set up a conversation with the user.
+ Also initialise a second conversational agent to provide corrections to
+ the model output, if necessary.
+
+ Args:
+ ----
+ model_name (str): The name of the model to use.
+
+ prompts (dict): A dictionary of prompts to use for the conversation.
+
+ split_correction (bool): Whether to correct the model output by
+ splitting the output into sentences and correcting each
+ sentence individually.
+
+ """
+super().__init__(
+model_name=model_name,
+prompts=prompts,
+correct=correct,
+split_correction=split_correction,
+)
+
+self.ca_model_name="claude-3-5-sonnet-20240620"
+# TODO make accessible by drop-down
- Args:
- model_name (str): The name of the model to use.
-
- prompts (dict): A dictionary of prompts to use for the conversation.
-
- split_correction (bool): Whether to correct the model output by
- splitting the output into sentences and correcting each
- sentence individually.
- """
-super().__init__(
-model_name=model_name,
-prompts=prompts,
-correct=correct,
-split_correction=split_correction,
-)
-
-self.ca_model_name="claude-3-5-sonnet-20240620"
-# TODO make accessible by drop-down
-
-defset_api_key(self,api_key:str,user:str)->bool:
-"""
- Set the API key for the Anthropic API. If the key is valid, initialise the
- conversational agent. Set the user for usage statistics.
-
- Args:
- api_key (str): The API key for the Anthropic API.
-
- user (str): The user for usage statistics.
-
- Returns:
- bool: True if the API key is valid, False otherwise.
- """
-client=anthropic.Anthropic(
-api_key=api_key,
-)
-self.user=user
+defset_api_key(self,api_key:str,user:str)->bool:
+"""Set the API key for the Anthropic API. If the key is valid, initialise the
+ conversational agent. Set the user for usage statistics.
+
+ Args:
+ ----
+ api_key (str): The API key for the Anthropic API.
+
+ user (str): The user for usage statistics.
+
+ Returns:
+ -------
+ bool: True if the API key is valid, False otherwise.
+
+ """
+client=anthropic.Anthropic(
+api_key=api_key,
+)
+self.user=user
+
+try:
+client.count_tokens("Test connection")
+self.chat=ChatAnthropic(
+model_name=self.model_name,
+temperature=0,
+api_key=api_key,
+)
+self.ca_chat=ChatAnthropic(
+model_name=self.ca_model_name,
+temperature=0,
+api_key=api_key,
+)
+ifuser=="community":
+self.usage_stats=get_stats(user=user)
+
+returnTrue
-try:
-client.count_tokens("Test connection")
-self.chat=ChatAnthropic(
-model_name=self.model_name,
-temperature=0,
-api_key=api_key,
-)
-self.ca_chat=ChatAnthropic(
-model_name=self.ca_model_name,
-temperature=0,
-api_key=api_key,
-)
-ifuser=="community":
-self.usage_stats=get_stats(user=user)
-
-returnTrue
-
-exceptanthropic._exceptions.AuthenticationErrorase:
-returnFalse
-
-def_primary_query(self):
-"""
- Query the Anthropic API with the user's message and return the response
- using the message history (flattery system messages, prior conversation)
- as context. Correct the response if necessary.
-
- Returns:
- tuple: A tuple containing the response from the Anthropic API and
- the token usage.
- """
-try:
-history=self._create_history()
-response=self.chat.generate([history])
-except(
-anthropic._exceptions.APIError,
-anthropic._exceptions.AnthropicError,
-anthropic._exceptions.ConflictError,
-anthropic._exceptions.NotFoundError,
-anthropic._exceptions.APIStatusError,
-anthropic._exceptions.RateLimitError,
-anthropic._exceptions.APITimeoutError,
-anthropic._exceptions.BadRequestError,
-anthropic._exceptions.APIConnectionError,
-anthropic._exceptions.AuthenticationError,
-anthropic._exceptions.InternalServerError,
-anthropic._exceptions.PermissionDeniedError,
-anthropic._exceptions.UnprocessableEntityError,
-anthropic._exceptions.APIResponseValidationError,
-)ase:
-returnstr(e),None
-
-msg=response.generations[0][0].text
-token_usage=response.llm_output.get("token_usage")
-
-self.append_ai_message(msg)
-
-returnmsg,token_usage
+exceptanthropic._exceptions.AuthenticationError:
+returnFalse
+
+def_primary_query(self):
+"""Query the Anthropic API with the user's message and return the response
+ using the message history (flattery system messages, prior conversation)
+ as context. Correct the response if necessary.
+
+ Returns
+ -------
+ tuple: A tuple containing the response from the Anthropic API and
+ the token usage.
+
+ """
+try:
+history=self._create_history()
+response=self.chat.generate([history])
+except(
+anthropic._exceptions.APIError,
+anthropic._exceptions.AnthropicError,
+anthropic._exceptions.ConflictError,
+anthropic._exceptions.NotFoundError,
+anthropic._exceptions.APIStatusError,
+anthropic._exceptions.RateLimitError,
+anthropic._exceptions.APITimeoutError,
+anthropic._exceptions.BadRequestError,
+anthropic._exceptions.APIConnectionError,
+anthropic._exceptions.AuthenticationError,
+anthropic._exceptions.InternalServerError,
+anthropic._exceptions.PermissionDeniedError,
+anthropic._exceptions.UnprocessableEntityError,
+anthropic._exceptions.APIResponseValidationError,
+)ase:
+returnstr(e),None
+
+msg=response.generations[0][0].text
+token_usage=response.llm_output.get("token_usage")
+
+self.append_ai_message(msg)
+
+returnmsg,token_usage
+
+def_create_history(self):
+history=[]
+# extract text components from message contents
+msg_texts=[m.content[0]["text"]ifisinstance(m.content,list)elsem.contentforminself.messages]
+
+# check if last message is an image message
+is_image_message=False
+ifisinstance(self.messages[-1].content,list):
+is_image_message=self.messages[-1].content[1]["type"]=="image_url"
+
+# find location of last AI message (if any)
+last_ai_message=None
+fori,minenumerate(self.messages):
+ifisinstance(m,AIMessage):
+last_ai_message=i
-def_create_history(self):
-history=[]
-# extract text components from message contents
-msg_texts=[
-m.content[0]["text"]ifisinstance(m.content,list)elsem.content
-forminself.messages
-]
-
-# check if last message is an image message
-is_image_message=False
-ifisinstance(self.messages[-1].content,list):
-is_image_message=(
-self.messages[-1].content[1]["type"]=="image_url"
+# Aggregate system messages into one message at the beginning
+system_messages=[m.contentforminself.messagesifisinstance(m,SystemMessage)]
+ifsystem_messages:
+history.append(
+SystemMessage(content="\n".join(system_messages)),
+)
+
+# concatenate all messages before the last AI message into one message
+iflast_ai_messageisnotNone:
+history.append(
+HumanMessage(
+content="\n".join([mforminmsg_texts[:last_ai_message]]),
+),)
-
-# find location of last AI message (if any)
-last_ai_message=None
-fori,minenumerate(self.messages):
-ifisinstance(m,AIMessage):
-last_ai_message=i
+# then append the last AI message
+history.append(
+AIMessage(
+content=msg_texts[last_ai_message],
+),
+)
-# Aggregate system messages into one message at the beginning
-system_messages=[
-m.contentforminself.messagesifisinstance(m,SystemMessage)
-]
-ifsystem_messages:
-history.append(
-SystemMessage(content="\n".join(system_messages)),
-)
-
-# concatenate all messages before the last AI message into one message
-iflast_ai_messageisnotNone:
-history.append(
-HumanMessage(
-content="\n".join([mforminmsg_texts[:last_ai_message]]),
-),
-)
-# then append the last AI message
-history.append(
-AIMessage(
-content=msg_texts[last_ai_message],
-),
-)
-
-# then concatenate all messages after that
-# into one HumanMessage
-history.append(
-HumanMessage(
-content="\n".join(
-[mforminmsg_texts[last_ai_message+1:]]
-),
-),
-)
-
-# else add human message to history (without system messages)
-else:
-last_system_message=None
-fori,minenumerate(self.messages):
-ifisinstance(m,SystemMessage):
-last_system_message=i
-history.append(
-HumanMessage(
-content="\n".join(
-[mforminmsg_texts[last_system_message+1:]]
-),
-),
-)
-
-# if the last message is an image message, add the image to the history
-ifis_image_message:
-history[-1]["content"]=[
-{"type":"text","text":history[-1]["content"]},
-{
-"type":"image_url",
-"image_url":{
-"url":self.messages[-1].content[1]["image_url"]["url"]
-},
-},
-]
-returnhistory
-
-def_correct_response(self,msg:str):
-"""
- Correct the response from the Anthropic API by sending it to a secondary
- language model. Optionally split the response into single sentences and
- correct each sentence individually. Update usage stats.
-
- Args:
- msg (str): The response from the Anthropic API.
-
- Returns:
- str: The corrected response (or OK if no correction necessary).
- """
-ca_messages=self.ca_messages.copy()
-ca_messages.append(
-HumanMessage(
-content=msg,
-),
-)
-ca_messages.append(
-SystemMessage(
-content="If there is nothing to correct, please respond "
-"with just 'OK', and nothing else!",
-),
-)
-
-response=self.ca_chat.generate([ca_messages])
-
-correction=response.generations[0][0].text
-token_usage=response.llm_output.get("token_usage")
-
-returncorrection
+# then concatenate all messages after that
+# into one HumanMessage
+history.append(
+HumanMessage(
+content="\n".join(
+[mforminmsg_texts[last_ai_message+1:]],
+),
+),
+)
+
+# else add human message to history (without system messages)
+else:
+last_system_message=None
+fori,minenumerate(self.messages):
+ifisinstance(m,SystemMessage):
+last_system_message=i
+history.append(
+HumanMessage(
+content="\n".join(
+[mforminmsg_texts[last_system_message+1:]],
+),
+),
+)
+
+# if the last message is an image message, add the image to the history
+ifis_image_message:
+history[-1]["content"]=[
+{"type":"text","text":history[-1]["content"]},
+{
+"type":"image_url",
+"image_url":{
+"url":self.messages[-1].content[1]["image_url"]["url"],
+},
+},
+]
+returnhistory
+
+def_correct_response(self,msg:str):
+"""Correct the response from the Anthropic API by sending it to a secondary
+ language model. Optionally split the response into single sentences and
+ correct each sentence individually. Update usage stats.
+
+ Args:
+ ----
+ msg (str): The response from the Anthropic API.
+
+ Returns:
+ -------
+ str: The corrected response (or OK if no correction necessary).
+
+ """
+ca_messages=self.ca_messages.copy()
+ca_messages.append(
+HumanMessage(
+content=msg,
+),
+)
+ca_messages.append(
+SystemMessage(
+content="If there is nothing to correct, please respond with just 'OK', and nothing else!",
+),
+)
+
+response=self.ca_chat.generate([ca_messages])
+
+correction=response.generations[0][0].text
+token_usage=response.llm_output.get("token_usage")
+
+returncorrection
@@ -2483,75 +2569,39 @@
model_name (str): The name of the model to use.
+prompts (dict): A dictionary of prompts to use for the conversation.
-
Parameters:
-
-
-
-
Name
-
Type
-
Description
-
Default
-
-
-
-
-
- model_name
-
-
- str
-
-
-
-
The name of the model to use.
-
-
-
- required
-
-
-
-
- prompts
-
-
- dict
-
-
-
-
A dictionary of prompts to use for the conversation.
-
-
-
- required
-
-
-
-
- split_correction
-
-
- bool
-
-
-
-
Whether to correct the model output by
-splitting the output into sentences and correcting each
-sentence individually.
-
-
-
- False
-
-
-
-
+split_correction (bool): Whether to correct the model output by
+ splitting the output into sentences and correcting each
+ sentence individually.
+
def__init__(
-self,
-model_name:str,
-prompts:dict,
-correct:bool=False,
-split_correction:bool=False,
-):
-"""
- Connect to Anthropic's API and set up a conversation with the user.
- Also initialise a second conversational agent to provide corrections to
- the model output, if necessary.
-
- Args:
- model_name (str): The name of the model to use.
-
- prompts (dict): A dictionary of prompts to use for the conversation.
-
- split_correction (bool): Whether to correct the model output by
- splitting the output into sentences and correcting each
- sentence individually.
- """
-super().__init__(
-model_name=model_name,
-prompts=prompts,
-correct=correct,
-split_correction=split_correction,
-)
-
-self.ca_model_name="claude-3-5-sonnet-20240620"
+1103
def__init__(
+self,
+model_name:str,
+prompts:dict,
+correct:bool=False,
+split_correction:bool=False,
+):
+"""Connect to Anthropic's API and set up a conversation with the user.
+ Also initialise a second conversational agent to provide corrections to
+ the model output, if necessary.
+
+ Args:
+ ----
+ model_name (str): The name of the model to use.
+
+ prompts (dict): A dictionary of prompts to use for the conversation.
+
+ split_correction (bool): Whether to correct the model output by
+ splitting the output into sentences and correcting each
+ sentence individually.
+
+ """
+super().__init__(
+model_name=model_name,
+prompts=prompts,
+correct=correct,
+split_correction=split_correction,
+)
+
+self.ca_model_name="claude-3-5-sonnet-20240620"
@@ -2627,80 +2659,37 @@
api_key (str): The API key for the Anthropic API.
-
-
Parameters:
-
-
-
-
Name
-
Type
-
Description
-
Default
-
-
-
-
-
- api_key
-
-
- str
-
-
-
-
The API key for the Anthropic API.
-
-
-
- required
-
-
-
-
- user
-
-
- str
-
-
-
-
The user for usage statistics.
-
-
-
- required
-
-
-
-
-
-
-
Returns:
-
-
-
-
Name
Type
-
Description
-
-
-
-
-
bool
- bool
-
-
-
-
True if the API key is valid, False otherwise.
-
-
-
-
-
+user (str): The user for usage statistics.
+
+
+
bool: True if the API key is valid, False otherwise.
+
defset_api_key(self,api_key:str,user:str)->bool:
-"""
- Set the API key for the Anthropic API. If the key is valid, initialise the
- conversational agent. Set the user for usage statistics.
-
- Args:
- api_key (str): The API key for the Anthropic API.
-
- user (str): The user for usage statistics.
-
- Returns:
- bool: True if the API key is valid, False otherwise.
- """
-client=anthropic.Anthropic(
-api_key=api_key,
-)
-self.user=user
+1144
defset_api_key(self,api_key:str,user:str)->bool:
+"""Set the API key for the Anthropic API. If the key is valid, initialise the
+ conversational agent. Set the user for usage statistics.
+
+ Args:
+ ----
+ api_key (str): The API key for the Anthropic API.
+
+ user (str): The user for usage statistics.
+
+ Returns:
+ -------
+ bool: True if the API key is valid, False otherwise.
+
+ """
+client=anthropic.Anthropic(
+api_key=api_key,
+)
+self.user=user
+
+try:
+client.count_tokens("Test connection")
+self.chat=ChatAnthropic(
+model_name=self.model_name,
+temperature=0,
+api_key=api_key,
+)
+self.ca_chat=ChatAnthropic(
+model_name=self.ca_model_name,
+temperature=0,
+api_key=api_key,
+)
+ifuser=="community":
+self.usage_stats=get_stats(user=user)
+
+returnTrue
-try:
-client.count_tokens("Test connection")
-self.chat=ChatAnthropic(
-model_name=self.model_name,
-temperature=0,
-api_key=api_key,
-)
-self.ca_chat=ChatAnthropic(
-model_name=self.ca_model_name,
-temperature=0,
-api_key=api_key,
-)
-ifuser=="community":
-self.usage_stats=get_stats(user=user)
-
-returnTrue
-
-exceptanthropic._exceptions.AuthenticationErrorase:
-returnFalse
+exceptanthropic._exceptions.AuthenticationError:
+returnFalse
classAzureGptConversation(GptConversation):
-def__init__(
-self,
-deployment_name:str,
-model_name:str,
-prompts:dict,
-correct:bool=False,
-split_correction:bool=False,
-version:Optional[str]=None,
-base_url:Optional[str]=None,
-):
-"""
- Connect to Azure's GPT API and set up a conversation with the user.
- Extends GptConversation.
-
- Args:
- deployment_name (str): The name of the Azure deployment to use.
+1556
classAzureGptConversation(GptConversation):
+def__init__(
+self,
+deployment_name:str,
+model_name:str,
+prompts:dict,
+correct:bool=False,
+split_correction:bool=False,
+version:str|None=None,
+base_url:str|None=None,
+):
+"""Connect to Azure's GPT API and set up a conversation with the user.
+
+ Extends GptConversation.
+
+ Args:
+ ----
+ deployment_name (str): The name of the Azure deployment to use.
+
+ model_name (str): The name of the model to use. This is distinct
+ from the deployment name.
+
+ prompts (dict): A dictionary of prompts to use for the conversation.
+
+ correct (bool): Whether to correct the model output.
+
+ split_correction (bool): Whether to correct the model output by
+ splitting the output into sentences and correcting each
+ sentence individually.
+
+ version (str): The version of the Azure API to use.
+
+ base_url (str): The base URL of the Azure API to use.
- model_name (str): The name of the model to use. This is distinct
- from the deployment name.
-
- prompts (dict): A dictionary of prompts to use for the conversation.
-
- correct (bool): Whether to correct the model output.
-
- split_correction (bool): Whether to correct the model output by
- splitting the output into sentences and correcting each
- sentence individually.
-
- version (str): The version of the Azure API to use.
-
- base_url (str): The base URL of the Azure API to use.
- """
-super().__init__(
-model_name=model_name,
-prompts=prompts,
-correct=correct,
-split_correction=split_correction,
-)
+ """
+super().__init__(
+model_name=model_name,
+prompts=prompts,
+correct=correct,
+split_correction=split_correction,
+)
+
+self.version=version
+self.base_url=base_url
+self.deployment_name=deployment_name
+
+defset_api_key(self,api_key:str)->bool:
+"""Set the API key for the Azure API.
+
+ If the key is valid, initialise the conversational agent. No user stats
+ on Azure.
+
+ Args:
+ ----
+ api_key (str): The API key for the Azure API.
-self.version=version
-self.base_url=base_url
-self.deployment_name=deployment_name
+ Returns:
+ -------
+ bool: True if the API key is valid, False otherwise.
-defset_api_key(self,api_key:str,user:Optional[str]=None)->bool:
-"""
- Set the API key for the Azure API. If the key is valid, initialise the
- conversational agent. No user stats on Azure.
-
- Args:
- api_key (str): The API key for the Azure API.
-
- Returns:
- bool: True if the API key is valid, False otherwise.
- """
-
-try:
-self.chat=AzureChatOpenAI(
-deployment_name=self.deployment_name,
-model_name=self.model_name,
-openai_api_version=self.version,
-azure_endpoint=self.base_url,
-openai_api_key=api_key,
-temperature=0,
-)
-# TODO this is the same model as the primary one; refactor to be
-# able to use any model for correction
-self.ca_chat=AzureChatOpenAI(
-deployment_name=self.deployment_name,
-model_name=self.model_name,
-openai_api_version=self.version,
-azure_endpoint=self.base_url,
-openai_api_key=api_key,
-temperature=0,
-)
-
-test=self.chat.generate([[HumanMessage(content="Hello")]])
-
-returnTrue
-
-exceptopenai._exceptions.AuthenticationErrorase:
-returnFalse
-
-def_update_usage_stats(self,model:str,token_usage:dict):
-"""
- We do not track usage stats for Azure.
- """
-return
+ """
+try:
+self.chat=AzureChatOpenAI(
+deployment_name=self.deployment_name,
+model_name=self.model_name,
+openai_api_version=self.version,
+azure_endpoint=self.base_url,
+openai_api_key=api_key,
+temperature=0,
+)
+# TODO this is the same model as the primary one; refactor to be
+# able to use any model for correction
+self.ca_chat=AzureChatOpenAI(
+deployment_name=self.deployment_name,
+model_name=self.model_name,
+openai_api_version=self.version,
+azure_endpoint=self.base_url,
+openai_api_key=api_key,
+temperature=0,
+)
+
+test=self.chat.generate([[HumanMessage(content="Hello")]])
+
+returnTrue
+
+exceptopenai._exceptions.AuthenticationError:
+returnFalse
+
+def_update_usage_stats(self,model:str,token_usage:dict):
+"""We do not track usage stats for Azure."""
+return
@@ -3011,142 +2991,46 @@
-
Connect to Azure's GPT API and set up a conversation with the user.
-Extends GptConversation.
-
-
-
Parameters:
-
-
-
-
Name
-
Type
-
Description
-
Default
-
-
-
-
-
- deployment_name
-
-
- str
-
-
-
-
The name of the Azure deployment to use.
-
-
-
- required
-
-
-
-
- model_name
-
-
- str
-
-
-
-
The name of the model to use. This is distinct
-from the deployment name.
-
-
-
- required
-
-
-
-
- prompts
-
-
- dict
-
-
-
-
A dictionary of prompts to use for the conversation.
-
-
-
- required
-
-
-
-
- correct
-
-
- bool
-
-
-
-
Whether to correct the model output.
-
-
-
- False
-
-
-
-
- split_correction
-
-
- bool
-
-
-
-
Whether to correct the model output by
-splitting the output into sentences and correcting each
-sentence individually.
-
-
-
- False
-
-
-
-
- version
-
-
- str
-
-
-
-
The version of the Azure API to use.
-
-
-
- None
-
-
-
-
- base_url
-
-
- str
-
-
-
-
The base URL of the Azure API to use.
-
-
-
- None
-
-
-
-
+
Connect to Azure's GPT API and set up a conversation with the user.
+
Extends GptConversation.
+
+
deployment_name (str): The name of the Azure deployment to use.
+
+model_name (str): The name of the model to use. This is distinct
+ from the deployment name.
+
+prompts (dict): A dictionary of prompts to use for the conversation.
+
+correct (bool): Whether to correct the model output.
+
+split_correction (bool): Whether to correct the model output by
+ splitting the output into sentences and correcting each
+ sentence individually.
+
+version (str): The version of the Azure API to use.
+
+base_url (str): The base URL of the Azure API to use.
+
def__init__(
-self,
-deployment_name:str,
-model_name:str,
-prompts:dict,
-correct:bool=False,
-split_correction:bool=False,
-version:Optional[str]=None,
-base_url:Optional[str]=None,
-):
-"""
- Connect to Azure's GPT API and set up a conversation with the user.
- Extends GptConversation.
-
- Args:
- deployment_name (str): The name of the Azure deployment to use.
+1510
def__init__(
+self,
+deployment_name:str,
+model_name:str,
+prompts:dict,
+correct:bool=False,
+split_correction:bool=False,
+version:str|None=None,
+base_url:str|None=None,
+):
+"""Connect to Azure's GPT API and set up a conversation with the user.
+
+ Extends GptConversation.
+
+ Args:
+ ----
+ deployment_name (str): The name of the Azure deployment to use.
+
+ model_name (str): The name of the model to use. This is distinct
+ from the deployment name.
+
+ prompts (dict): A dictionary of prompts to use for the conversation.
+
+ correct (bool): Whether to correct the model output.
+
+ split_correction (bool): Whether to correct the model output by
+ splitting the output into sentences and correcting each
+ sentence individually.
+
+ version (str): The version of the Azure API to use.
+
+ base_url (str): The base URL of the Azure API to use.
- model_name (str): The name of the model to use. This is distinct
- from the deployment name.
-
- prompts (dict): A dictionary of prompts to use for the conversation.
-
- correct (bool): Whether to correct the model output.
-
- split_correction (bool): Whether to correct the model output by
- splitting the output into sentences and correcting each
- sentence individually.
-
- version (str): The version of the Azure API to use.
-
- base_url (str): The base URL of the Azure API to use.
- """
-super().__init__(
-model_name=model_name,
-prompts=prompts,
-correct=correct,
-split_correction=split_correction,
-)
-
-self.version=version
-self.base_url=base_url
-self.deployment_name=deployment_name
+ """
+super().__init__(
+model_name=model_name,
+prompts=prompts,
+correct=correct,
+split_correction=split_correction,
+)
+
+self.version=version
+self.base_url=base_url
+self.deployment_name=deployment_name
defset_api_key(self,api_key:str,user:Optional[str]=None)->bool:
-"""
- Set the API key for the Azure API. If the key is valid, initialise the
- conversational agent. No user stats on Azure.
-
- Args:
- api_key (str): The API key for the Azure API.
-
- Returns:
- bool: True if the API key is valid, False otherwise.
- """
-
-try:
-self.chat=AzureChatOpenAI(
-deployment_name=self.deployment_name,
-model_name=self.model_name,
-openai_api_version=self.version,
-azure_endpoint=self.base_url,
-openai_api_key=api_key,
-temperature=0,
-)
-# TODO this is the same model as the primary one; refactor to be
-# able to use any model for correction
-self.ca_chat=AzureChatOpenAI(
-deployment_name=self.deployment_name,
-model_name=self.model_name,
-openai_api_version=self.version,
-azure_endpoint=self.base_url,
-openai_api_key=api_key,
-temperature=0,
-)
-
-test=self.chat.generate([[HumanMessage(content="Hello")]])
-
-returnTrue
-
-exceptopenai._exceptions.AuthenticationErrorase:
-returnFalse
+1552
defset_api_key(self,api_key:str)->bool:
+"""Set the API key for the Azure API.
+
+ If the key is valid, initialise the conversational agent. No user stats
+ on Azure.
+
+ Args:
+ ----
+ api_key (str): The API key for the Azure API.
+
+ Returns:
+ -------
+ bool: True if the API key is valid, False otherwise.
+
+ """
+try:
+self.chat=AzureChatOpenAI(
+deployment_name=self.deployment_name,
+model_name=self.model_name,
+openai_api_version=self.version,
+azure_endpoint=self.base_url,
+openai_api_key=api_key,
+temperature=0,
+)
+# TODO this is the same model as the primary one; refactor to be
+# able to use any model for correction
+self.ca_chat=AzureChatOpenAI(
+deployment_name=self.deployment_name,
+model_name=self.model_name,
+openai_api_version=self.version,
+azure_endpoint=self.base_url,
+openai_api_key=api_key,
+temperature=0,
+)
+
+test=self.chat.generate([[HumanMessage(content="Hello")]])
+
+returnTrue
+
+exceptopenai._exceptions.AuthenticationError:
+returnFalse
classConversation(ABC):
-"""
-
- Use this class to set up a connection to an LLM API. Can be used to set the
- user name and API key, append specific messages for system, user, and AI
- roles (if available), set up the general context as well as manual and
- tool-based data inputs, and finally to query the API with prompts made by
- the user.
+445
classConversation(ABC):
+"""Use this class to set up a connection to an LLM API. Can be used to set the
+ user name and API key, append specific messages for system, user, and AI
+ roles (if available), set up the general context as well as manual and
+ tool-based data inputs, and finally to query the API with prompts made by
+ the user.
+
+ The conversation class is expected to have a `messages` attribute to store
+ the conversation, and a `history` attribute, which is a list of messages in
+ a specific format for logging / printing.
- The conversation class is expected to have a `messages` attribute to store
- the conversation, and a `history` attribute, which is a list of messages in
- a specific format for logging / printing.
-
- """
-
-def__init__(
-self,
-model_name:str,
-prompts:dict,
-correct:bool=False,
-split_correction:bool=False,
-use_ragagent_selector:bool=False,
-):
-super().__init__()
-self.model_name=model_name
-self.prompts=prompts
-self.correct=correct
-self.split_correction=split_correction
-self.rag_agents:list[RagAgent]=[]
-self.history=[]
-self.messages=[]
-self.ca_messages=[]
-self.current_statements=[]
-self._use_ragagent_selector=use_ragagent_selector
+ """
+
+def__init__(
+self,
+model_name:str,
+prompts:dict,
+correct:bool=False,
+split_correction:bool=False,
+use_ragagent_selector:bool=False,
+):
+super().__init__()
+self.model_name=model_name
+self.prompts=prompts
+self.correct=correct
+self.split_correction=split_correction
+self.rag_agents:list[RagAgent]=[]
+self.history=[]
+self.messages=[]
+self.ca_messages=[]
+self.current_statements=[]
+self._use_ragagent_selector=use_ragagent_selector
+
+@property
+defuse_ragagent_selector(self):
+returnself._use_ragagent_selector
-@property
-defuse_ragagent_selector(self):
-returnself._use_ragagent_selector
+@use_ragagent_selector.setter
+defuse_ragagent_selector(self,val:bool):
+self._use_ragagent_selector=val
-@use_ragagent_selector.setter
-defuse_ragagent_selector(self,val:bool):
-self._use_ragagent_selector=val
-
-defset_user_name(self,user_name:str):
-self.user_name=user_name
-
-defset_rag_agent(self,agent:RagAgent):
-"""
- Update or insert rag_agent: if the rag_agent with the same mode already
- exists, it will be updated. Otherwise, the new rag_agent will be inserted.
- """
-i,_=self.find_rag_agent(agent.mode)
-ifi<0:
-# insert
-self.rag_agents.append(agent)
-else:
-# update
-self.rag_agents[i]=agent
-
-deffind_rag_agent(self,mode:str)->tuple[int,RagAgent]:
-fori,valinenumerate(self.rag_agents):
-ifval.mode==mode:
-returni,val
-return-1,None
-
-@abstractmethod
-defset_api_key(self,api_key:str,user:Optional[str]=None):
-pass
-
-defget_prompts(self):
-returnself.prompts
-
-defset_prompts(self,prompts:dict):
-self.prompts=prompts
-
-defappend_ai_message(self,message:str)->None:
-"""
- Add a message from the AI to the conversation.
-
- Args:
- message (str): The message from the AI.
- """
-self.messages.append(
-AIMessage(
-content=message,
-),
-)
-
-defappend_system_message(self,message:str)->None:
-"""
- Add a system message to the conversation.
-
- Args:
- message (str): The system message.
- """
-self.messages.append(
-SystemMessage(
-content=message,
-),
-)
+defset_user_name(self,user_name:str):
+self.user_name=user_name
+
+defset_rag_agent(self,agent:RagAgent):
+"""Update or insert rag_agent: if the rag_agent with the same mode already
+ exists, it will be updated. Otherwise, the new rag_agent will be inserted.
+ """
+i,_=self.find_rag_agent(agent.mode)
+ifi<0:
+# insert
+self.rag_agents.append(agent)
+else:
+# update
+self.rag_agents[i]=agent
+
+deffind_rag_agent(self,mode:str)->tuple[int,RagAgent]:
+fori,valinenumerate(self.rag_agents):
+ifval.mode==mode:
+returni,val
+return-1,None
+
+@abstractmethod
+defset_api_key(self,api_key:str,user:str|None=None):
+pass
+
+defget_prompts(self):
+returnself.prompts
+
+defset_prompts(self,prompts:dict):
+self.prompts=prompts
+
+defappend_ai_message(self,message:str)->None:
+"""Add a message from the AI to the conversation.
+
+ Args:
+ ----
+ message (str): The message from the AI.
+
+ """
+self.messages.append(
+AIMessage(
+content=message,
+),
+)
+
+defappend_system_message(self,message:str)->None:
+"""Add a system message to the conversation.
+
+ Args:
+ ----
+ message (str): The system message.
+
+ """
+self.messages.append(
+SystemMessage(
+content=message,
+),
+)
+
+defappend_ca_message(self,message:str)->None:
+"""Add a message to the correcting agent conversation.
-defappend_ca_message(self,message:str)->None:
-"""
- Add a message to the correcting agent conversation.
+ Args:
+ ----
+ message (str): The message to the correcting agent.
- Args:
- message (str): The message to the correcting agent.
- """
-self.ca_messages.append(
-SystemMessage(
-content=message,
-),
-)
-
-defappend_user_message(self,message:str)->None:
-"""
- Add a message from the user to the conversation.
-
- Args:
- message (str): The message from the user.
- """
-self.messages.append(
-HumanMessage(
-content=message,
-),
-)
-
-defappend_image_message(
-self,message:str,image_url:str,local:bool=False
-)->None:
-"""
- Add a user message with an image to the conversation. Also checks, in
- addition to the `local` flag, if the image URL is a local file path.
- If it is local, the image will be encoded as a base64 string to be
- passed to the LLM.
-
- Args:
- message (str): The message from the user.
- image_url (str): The URL of the image.
- local (bool): Whether the image is local or not. If local, it will
- be encoded as a base64 string to be passed to the LLM.
- """
-parsed_url=urllib.parse.urlparse(image_url)
-iflocalornotparsed_url.netloc:
-image_url=f"data:image/jpeg;base64,{encode_image(image_url)}"
-else:
-image_url=(
-f"data:image/jpeg;base64,{encode_image_from_url(image_url)}"
-)
-
-self.messages.append(
-HumanMessage(
-content=[
-{"type":"text","text":message},
-{"type":"image_url","image_url":{"url":image_url}},
-],
-),
-)
-
-defsetup(self,context:str):
-"""
- Set up the conversation with general prompts and a context.
- """
-formsginself.prompts["primary_model_prompts"]:
-ifmsg:
-self.append_system_message(msg)
-
-formsginself.prompts["correcting_agent_prompts"]:
-ifmsg:
-self.append_ca_message(msg)
-
-self.context=context
-msg=f"The topic of the research is {context}."
-self.append_system_message(msg)
-
-defsetup_data_input_manual(self,data_input:str):
-self.data_input=data_input
-msg=f"The user has given information on the data input: {data_input}."
-self.append_system_message(msg)
-
-defsetup_data_input_tool(self,df,input_file_name:str):
-self.data_input_tool=df
-
-fortool_nameinself.prompts["tool_prompts"]:
-iftool_nameininput_file_name:
-msg=self.prompts["tool_prompts"][tool_name].format(df=df)
-self.append_system_message(msg)
-
-defquery(self,text:str,image_url:str=None)->tuple[str,dict,str]:
-"""
- The main workflow for querying the LLM API. Appends the most recent
- query to the conversation, optionally injects context from the RAG
- agent, and runs the primary query method of the child class.
-
- Args:
- text (str): The user query.
-
- image_url (str): The URL of an image to include in the conversation.
- Optional and only supported for models with vision capabilities.
-
- Returns:
+ """
+self.ca_messages.append(
+SystemMessage(
+content=message,
+),
+)
+
+defappend_user_message(self,message:str)->None:
+"""Add a message from the user to the conversation.
+
+ Args:
+ ----
+ message (str): The message from the user.
+
+ """
+self.messages.append(
+HumanMessage(
+content=message,
+),
+)
+
+defappend_image_message(
+self,
+message:str,
+image_url:str,
+local:bool=False,
+)->None:
+"""Add a user message with an image to the conversation. Also checks, in
+ addition to the `local` flag, if the image URL is a local file path.
+ If it is local, the image will be encoded as a base64 string to be
+ passed to the LLM.
+
+ Args:
+ ----
+ message (str): The message from the user.
+ image_url (str): The URL of the image.
+ local (bool): Whether the image is local or not. If local, it will
+ be encoded as a base64 string to be passed to the LLM.
+
+ """
+parsed_url=urllib.parse.urlparse(image_url)
+iflocalornotparsed_url.netloc:
+image_url=f"data:image/jpeg;base64,{encode_image(image_url)}"
+else:
+image_url=f"data:image/jpeg;base64,{encode_image_from_url(image_url)}"
+
+self.messages.append(
+HumanMessage(
+content=[
+{"type":"text","text":message},
+{"type":"image_url","image_url":{"url":image_url}},
+],
+),
+)
+
+defsetup(self,context:str):
+"""Set up the conversation with general prompts and a context."""
+formsginself.prompts["primary_model_prompts"]:
+ifmsg:
+self.append_system_message(msg)
+
+formsginself.prompts["correcting_agent_prompts"]:
+ifmsg:
+self.append_ca_message(msg)
+
+self.context=context
+msg=f"The topic of the research is {context}."
+self.append_system_message(msg)
+
+defsetup_data_input_manual(self,data_input:str):
+self.data_input=data_input
+msg=f"The user has given information on the data input: {data_input}."
+self.append_system_message(msg)
+
+defsetup_data_input_tool(self,df,input_file_name:str):
+self.data_input_tool=df
+
+fortool_nameinself.prompts["tool_prompts"]:
+iftool_nameininput_file_name:
+msg=self.prompts["tool_prompts"][tool_name].format(df=df)
+self.append_system_message(msg)
+
+defquery(self,text:str,image_url:str=None)->tuple[str,dict,str]:
+"""The main workflow for querying the LLM API. Appends the most recent
+ query to the conversation, optionally injects context from the RAG
+ agent, and runs the primary query method of the child class.
+
+ Args:
+ ----
+ text (str): The user query.
+
+ image_url (str): The URL of an image to include in the conversation.
+ Optional and only supported for models with vision capabilities.
+
+ Returns:
+ ------- tuple: A tuple containing the response from the API, the token usage information, and the correction if necessary/desired.
- """
-
+
+ """ifnotimage_url:self.append_user_message(text)else:
@@ -4266,178 +4081,170 @@
ifnotself.correct:return(msg,token_usage,None)
-cor_msg=(
-"Correcting (using single sentences) ..."
-ifself.split_correction
-else"Correcting ..."
-)
-
-ifst:
-withst.spinner(cor_msg):
-corrections=self._correct_query(text)
-else:
-corrections=self._correct_query(text)
-
-ifnotcorrections:
-return(msg,token_usage,None)
-
-correction="\n".join(corrections)
-return(msg,token_usage,correction)
-
-def_correct_query(self,msg:str):
-corrections=[]
-ifself.split_correction:
-nltk.download("punkt")
-tokenizer=nltk.data.load("tokenizers/punkt/english.pickle")
-sentences=tokenizer.tokenize(msg)
-forsentenceinsentences:
-correction=self._correct_response(sentence)
-
-ifnotstr(correction).lower()in["ok","ok."]:
-corrections.append(correction)
-else:
-correction=self._correct_response(msg)
-
-ifnotstr(correction).lower()in["ok","ok."]:
-corrections.append(correction)
-
-returncorrections
+cor_msg="Correcting (using single sentences) ..."ifself.split_correctionelse"Correcting ..."
+
+ifst:
+withst.spinner(cor_msg):
+corrections=self._correct_query(text)
+else:
+corrections=self._correct_query(text)
+
+ifnotcorrections:
+return(msg,token_usage,None)
+
+correction="\n".join(corrections)
+return(msg,token_usage,correction)
+
+def_correct_query(self,msg:str):
+corrections=[]
+ifself.split_correction:
+nltk.download("punkt")
+tokenizer=nltk.data.load("tokenizers/punkt/english.pickle")
+sentences=tokenizer.tokenize(msg)
+forsentenceinsentences:
+correction=self._correct_response(sentence)
+
+ifstr(correction).lower()notin["ok","ok."]:
+corrections.append(correction)
+else:
+correction=self._correct_response(msg)
+
+ifstr(correction).lower()notin["ok","ok."]:
+corrections.append(correction)
+
+returncorrections
+
+@abstractmethod
+def_primary_query(self,text:str):
+pass@abstractmethod
-def_primary_query(self,text:str):
+def_correct_response(self,msg:str):pass
-@abstractmethod
-def_correct_response(self,msg:str):
-pass
-
-def_inject_context_by_ragagent_selector(self,text:str):
-"""
- Inject the context generated by RagAgentSelector, which will choose appropriate
- rag agent to generate context according to user's question
- Args:
- text (str): The user query to be used for choosing rag agent
- """
-rag_agents:list[RagAgent]=[
-agentforagentinself.rag_agentsifagent.use_prompt
-]
-decider_agent=RagAgentSelector(
-rag_agents=rag_agents,
-conversation_factory=lambda:self,
-)
-result=decider_agent.execute(text)
-ifresult.tool_resultisnotNoneandlen(result.tool_result)>0:
-returnresult.tool_result
-# find rag agent selected
-rag_agent=next(
-[agentforagentinrag_agentsifagent.mode==result.answer],None
-)
-ifrag_agentisNone:
-returnNone
-returnrag_agent.generate_responses(text)
-
-def_inject_context(self,text:str):
-"""
-
- Inject the context received from the RAG agent into the prompt. The RAG
- agent will find the most similar n text fragments and add them to the
- message history object for usage in the next prompt. Uses the document
- summarisation prompt set to inject the context. The ultimate prompt
- should include the placeholder for the statements, `{statements}` (used
- for formatting the string).
-
- Args:
- text (str): The user query to be used for similarity search.
- """
-
-sim_msg=f"Performing similarity search to inject fragments ..."
-
-ifst:
-withst.spinner(sim_msg):
-statements=[]
-ifself.use_ragagent_selector:
-statements=self._inject_context_by_ragagent_selector(text)
-else:
-foragentinself.rag_agents:
-try:
-docs=agent.generate_responses(text)
-statements=statements+[doc[0]fordocindocs]
-exceptValueErrorase:
-logger.warning(e)
-
-else:
-statements=[]
-ifself.use_ragagent_selector:
-statements=self._inject_context_by_ragagent_selector(text)
-else:
-foragentinself.rag_agents:
-try:
-docs=agent.generate_responses(text)
-statements=statements+[doc[0]fordocindocs]
-exceptValueErrorase:
-logger.warning(e)
-
-ifstatementsandlen(statements)>0:
-prompts=self.prompts["rag_agent_prompts"]
-self.current_statements=statements
-fori,promptinenumerate(prompts):
-# if last prompt, format the statements into the prompt
-ifi==len(prompts)-1:
-self.append_system_message(
-prompt.format(statements=statements)
-)
-else:
-self.append_system_message(prompt)
-
-defget_last_injected_context(self)->list[dict]:
-"""
- Get a formatted list of the last context injected into the
- conversation. Contains one dictionary for each RAG mode.
-
- Returns:
- List[dict]: A list of dictionaries containing the mode and context
- for each RAG agent.
- """
-last_context=[]
-foragentinself.rag_agents:
-last_context.append(
-{"mode":agent.mode,"context":agent.last_response}
-)
-returnlast_context
-
-defget_msg_json(self)->str:
-"""
- Return a JSON representation (of a list of dicts) of the messages in
- the conversation. The keys of the dicts are the roles, the values are
- the messages.
-
- Returns:
- str: A JSON representation of the messages in the conversation.
- """
-d=[]
-formsginself.messages:
-ifisinstance(msg,SystemMessage):
-role="system"
-elifisinstance(msg,HumanMessage):
-role="user"
-elifisinstance(msg,AIMessage):
-role="ai"
-else:
-raiseValueError(f"Unknown message type: {type(msg)}")
-
-d.append({role:msg.content})
-
-returnjson.dumps(d)
-
-defreset(self):
-"""
- Resets the conversation to the initial state.
- """
-
-self.history=[]
-self.messages=[]
-self.ca_messages=[]
-self.current_statements=[]
+def_inject_context_by_ragagent_selector(self,text:str):
+"""Inject the context generated by RagAgentSelector, which will choose appropriate
+ rag agent to generate context according to user's question
+ Args:
+ text (str): The user query to be used for choosing rag agent
+ """
+rag_agents:list[RagAgent]=[agentforagentinself.rag_agentsifagent.use_prompt]
+decider_agent=RagAgentSelector(
+rag_agents=rag_agents,
+conversation_factory=lambda:self,
+)
+result=decider_agent.execute(text)
+ifresult.tool_resultisnotNoneandlen(result.tool_result)>0:
+returnresult.tool_result
+# find rag agent selected
+rag_agent=next(
+[agentforagentinrag_agentsifagent.mode==result.answer],
+None,
+)
+ifrag_agentisNone:
+returnNone
+returnrag_agent.generate_responses(text)
+
+def_inject_context(self,text:str):
+"""Inject the context received from the RAG agent into the prompt. The RAG
+ agent will find the most similar n text fragments and add them to the
+ message history object for usage in the next prompt. Uses the document
+ summarisation prompt set to inject the context. The ultimate prompt
+ should include the placeholder for the statements, `{statements}` (used
+ for formatting the string).
+
+ Args:
+ ----
+ text (str): The user query to be used for similarity search.
+
+ """
+sim_msg="Performing similarity search to inject fragments ..."
+
+ifst:
+withst.spinner(sim_msg):
+statements=[]
+ifself.use_ragagent_selector:
+statements=self._inject_context_by_ragagent_selector(text)
+else:
+foragentinself.rag_agents:
+try:
+docs=agent.generate_responses(text)
+statements=statements+[doc[0]fordocindocs]
+exceptValueErrorase:
+logger.warning(e)
+
+else:
+statements=[]
+ifself.use_ragagent_selector:
+statements=self._inject_context_by_ragagent_selector(text)
+else:
+foragentinself.rag_agents:
+try:
+docs=agent.generate_responses(text)
+statements=statements+[doc[0]fordocindocs]
+exceptValueErrorase:
+logger.warning(e)
+
+ifstatementsandlen(statements)>0:
+prompts=self.prompts["rag_agent_prompts"]
+self.current_statements=statements
+fori,promptinenumerate(prompts):
+# if last prompt, format the statements into the prompt
+ifi==len(prompts)-1:
+self.append_system_message(
+prompt.format(statements=statements),
+)
+else:
+self.append_system_message(prompt)
+
+defget_last_injected_context(self)->list[dict]:
+"""Get a formatted list of the last context injected into the
+ conversation. Contains one dictionary for each RAG mode.
+
+ Returns
+ -------
+ List[dict]: A list of dictionaries containing the mode and context
+ for each RAG agent.
+
+ """
+last_context=[]
+foragentinself.rag_agents:
+last_context.append(
+{"mode":agent.mode,"context":agent.last_response},
+)
+returnlast_context
+
+defget_msg_json(self)->str:
+"""Return a JSON representation (of a list of dicts) of the messages in
+ the conversation. The keys of the dicts are the roles, the values are
+ the messages.
+
+ Returns
+ -------
+ str: A JSON representation of the messages in the conversation.
+
+ """
+d=[]
+formsginself.messages:
+ifisinstance(msg,SystemMessage):
+role="system"
+elifisinstance(msg,HumanMessage):
+role="user"
+elifisinstance(msg,AIMessage):
+role="ai"
+else:
+raiseValueError(f"Unknown message type: {type(msg)}")
+
+d.append({role:msg.content})
+
+returnjson.dumps(d)
+
+defreset(self):
+"""Resets the conversation to the initial state."""
+self.history=[]
+self.messages=[]
+self.ca_messages=[]
+self.current_statements=[]