From e66f34f3fb55f8a2183d4e9b08d314bbf4564a32 Mon Sep 17 00:00:00 2001 From: sreyakumar <121137643+sreyakumar@users.noreply.github.com> Date: Mon, 25 Nov 2024 15:37:26 -0800 Subject: [PATCH] improved efficiency --- README.md | 4 +- app.py | 39 ++ src/metadata_chatbot/agents/agentic_graph.py | 54 +- src/metadata_chatbot/agents/async_workflow.py | 195 ++++-- .../agents/docdb_retriever.py | 39 +- src/metadata_chatbot/agents/react_agent.py | 62 +- src/metadata_chatbot/agents/workflow.py | 2 +- .../bedrock_model/Metamorph.py | 43 -- .../bedrock_model/__init__.py | 3 - src/metadata_chatbot/bedrock_model/chat.py | 272 -------- src/metadata_chatbot/bedrock_model/config.py | 65 -- .../bedrock_model/ref/acquisition_schema.json | 215 ------- .../ref/data_description_schema.json | 250 -------- .../bedrock_model/ref/instrument_schema.json | 597 ------------------ .../bedrock_model/ref/metadata.json | 177 ------ .../bedrock_model/ref/procedures_schema.json | 77 --- .../bedrock_model/ref/processing_schema.json | 53 -- .../bedrock_model/ref/rig_schema.json | 409 ------------ .../bedrock_model/ref/session_schema.json | 245 ------- .../ref/subject_609281_metadata.json | 59 -- .../bedrock_model/ref/subject_schema.json | 212 ------- .../bedrock_model/system_prompt.py | 142 ----- src/metadata_chatbot/bedrock_model/tools.py | 112 ---- 23 files changed, 248 insertions(+), 3078 deletions(-) create mode 100644 app.py delete mode 100644 src/metadata_chatbot/bedrock_model/Metamorph.py delete mode 100644 src/metadata_chatbot/bedrock_model/__init__.py delete mode 100644 src/metadata_chatbot/bedrock_model/chat.py delete mode 100644 src/metadata_chatbot/bedrock_model/config.py delete mode 100644 src/metadata_chatbot/bedrock_model/ref/acquisition_schema.json delete mode 100644 src/metadata_chatbot/bedrock_model/ref/data_description_schema.json delete mode 100644 src/metadata_chatbot/bedrock_model/ref/instrument_schema.json delete mode 100644 src/metadata_chatbot/bedrock_model/ref/metadata.json delete mode 100644 src/metadata_chatbot/bedrock_model/ref/procedures_schema.json delete mode 100644 src/metadata_chatbot/bedrock_model/ref/processing_schema.json delete mode 100644 src/metadata_chatbot/bedrock_model/ref/rig_schema.json delete mode 100644 src/metadata_chatbot/bedrock_model/ref/session_schema.json delete mode 100644 src/metadata_chatbot/bedrock_model/ref/subject_609281_metadata.json delete mode 100644 src/metadata_chatbot/bedrock_model/ref/subject_schema.json delete mode 100644 src/metadata_chatbot/bedrock_model/system_prompt.py delete mode 100644 src/metadata_chatbot/bedrock_model/tools.py diff --git a/README.md b/README.md index adf11a9..76be489 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# GAMER: Generative Analysis for Metadata Retrieval +# GAMER: Generative Analysis for Metadata Retrieval [![License](https://img.shields.io/badge/license-MIT-brightgreen)](LICENSE) ![Code Style](https://img.shields.io/badge/code%20style-black-black) @@ -29,6 +29,8 @@ Install the chatbot package -- ensure virtual environment is running. pip install metadata-chatbot ``` +Create a folder called huggingface_cache in the directory in which you are running the model. + ## Usage To call the model, diff --git a/app.py b/app.py new file mode 100644 index 0000000..5c3f20b --- /dev/null +++ b/app.py @@ -0,0 +1,39 @@ +# Import the Streamlit library +import streamlit as st +from metadata_chatbot.agents.GAMER import GAMER +import asyncio + +#run on terminal with streamlit run [ARGUMENTS] + +async def main(): +# Write a simple message to the app's webpage + llm = GAMER() + message = st.chat_message("assistant") + message.write("Hello!") + + prompt = st.chat_input("Ask a question about the AIND Metadata!") + + if "messages" not in st.session_state: + st.session_state.messages = [] + + for message in st.session_state.messages: + with st.chat_message(message["role"]): + st.markdown(message["content"]) + + if prompt: + # Display user message in chat message container + with st.chat_message("user"): + st.markdown(prompt) + # Add user message to chat history + st.session_state.messages.append({"role": "user", "content": prompt}) + response = await llm.ainvoke(prompt) + + with st.chat_message("assistant"): + st.markdown(response) + + # Add assistant response to chat history + st.session_state.messages.append({"role": "assistant", "content": response}) + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/src/metadata_chatbot/agents/agentic_graph.py b/src/metadata_chatbot/agents/agentic_graph.py index d941abb..9896129 100644 --- a/src/metadata_chatbot/agents/agentic_graph.py +++ b/src/metadata_chatbot/agents/agentic_graph.py @@ -9,19 +9,21 @@ from langgraph.prebuilt import create_react_agent MODEL_ID_SONNET_3 = "anthropic.claude-3-sonnet-20240229-v1:0" -MODEL_ID_SONNET_3_5 = "anthropic.claude-3-sonnet-20240229-v1:0" +MODEL_ID_SONNET_3_5 = "anthropic.claude-3-5-sonnet-20240620-v1:0" SONNET_3_LLM = ChatBedrock( model_id= MODEL_ID_SONNET_3, model_kwargs= { "temperature": 0 - } + }, + streaming = True ) SONNET_3_5_LLM = ChatBedrock( model_id= MODEL_ID_SONNET_3_5, model_kwargs= { "temperature": 0 - } + }, + streaming = True ) # Determining if entire database needs to be surveyed @@ -35,50 +37,6 @@ class RouteQuery(TypedDict): router_prompt = hub.pull("eden19/query_rerouter") datasource_router = router_prompt | structured_llm_router -# Tool to survey entire database -API_GATEWAY_HOST = "api.allenneuraldynamics.org" -DATABASE = "metadata_index" -COLLECTION = "data_assets" - -docdb_api_client = MetadataDbClient( - host=API_GATEWAY_HOST, - database=DATABASE, - collection=COLLECTION, -) - -@tool -def aggregation_retrieval(agg_pipeline: list) -> list: - """Given a MongoDB query and list of projections, this function retrieves and returns the - relevant information in the documents. - Use a project stage as the first stage to minimize the size of the queries before proceeding with the remaining steps. - The input to $map must be an array not a string, avoid using it in the $project stage. - - Parameters - ---------- - agg_pipeline - MongoDB aggregation pipeline - - Returns - ------- - list - List of retrieved documents - """ - - result = docdb_api_client.aggregate_docdb_records( - pipeline=agg_pipeline - ) - return result - -tools = [aggregation_retrieval] -tool_model = SONNET_3_5_LLM.bind_tools(tools) - -db_prompt = hub.pull("eden19/entire_db_retrieval") -langgraph_agent_executor = create_react_agent(SONNET_3_LLM, tools=tools, state_modifier= db_prompt) - -db_surveyor_agent = create_tool_calling_agent(SONNET_3_LLM, tools, db_prompt) -query_retriever = AgentExecutor(agent=db_surveyor_agent, tools=tools, return_intermediate_steps = True, verbose=False) - - # Generating appropriate filter class FilterGenerator(TypedDict): """MongoDB filter to be applied before vector retrieval""" @@ -95,7 +53,7 @@ class FilterGenerator(TypedDict): class RetrievalGrader(TypedDict): """Relevant material in the retrieved document + Binary score to check relevance to the question""" - relevant_context:Annotated[str, ..., "Relevant context extracted from document that helps directly answer the question"] + #relevant_context:Annotated[str, ..., "Relevant context extracted from document that helps directly answer the question"] binary_score: Annotated[Literal["yes", "no"], ..., "Retrieved documents are relevant to the query, 'yes' or 'no'"] retrieval_grader = SONNET_3_5_LLM.with_structured_output(RetrievalGrader) diff --git a/src/metadata_chatbot/agents/async_workflow.py b/src/metadata_chatbot/agents/async_workflow.py index 6e24004..881163d 100644 --- a/src/metadata_chatbot/agents/async_workflow.py +++ b/src/metadata_chatbot/agents/async_workflow.py @@ -1,14 +1,27 @@ import asyncio -from typing import List, Optional +from typing import List, Optional, Annotated from typing_extensions import TypedDict +from pprint import pprint +import uuid from langchain_core.documents import Document from langgraph.graph import END, StateGraph, START -from langchain_core.messages.ai import AIMessage +from langchain_core.messages import AIMessage, HumanMessage from metadata_chatbot.agents.docdb_retriever import DocDBRetriever from metadata_chatbot.agents.react_agent import react_agent -from metadata_chatbot.agents.agentic_graph import datasource_router, filter_generation_chain, doc_grader, rag_chain, db_rag_chain +from metadata_chatbot.agents.agentic_graph import datasource_router, filter_generation_chain, doc_grader, rag_chain + +# from docdb_retriever import DocDBRetriever +# from react_agent import react_agent +# from agentic_graph import datasource_router, filter_generation_chain, doc_grader, rag_chain, db_rag_chain + +from langgraph.checkpoint.memory import MemorySaver +from langchain_core.messages import AnyMessage +from langgraph.graph.message import add_messages + +import warnings +warnings.filterwarnings('ignore') class GraphState(TypedDict): @@ -20,9 +33,8 @@ class GraphState(TypedDict): generation: LLM generation documents: list of documents """ - - query: str - generation: str + messages: Annotated[list[AnyMessage], add_messages] + generation: Optional[str] documents: Optional[List[str]] filter: Optional[dict] top_k: Optional[int] @@ -36,7 +48,7 @@ async def route_question_async(state): Returns: str: Next node to call """ - query = state["query"] + query = state['messages'][0].content source = await datasource_router.ainvoke({"query": query}) @@ -71,28 +83,33 @@ async def retrieve_DB_async(state): state (dict): New key may be added to state, generation, which contains the answer for query asked """ - query = state["query"] + query = state['messages'][0].content inputs = {"messages": [("user", query)]} - generation = print_stream(react_agent.stream(inputs, stream_mode="values")) - - # generation = react_agent.invoke(inputs) - # AIMessage_list = [] - # for message in generation['messages']: - # if isinstance(message, AIMessage): - # AIMessage_list.append(message) - - # final_answer = AIMessage_list[-1].content - - # document_dict = dict() - # retrieved_dict = await query_retriever.ainvoke({'query': query, 'chat_history': [], 'agent_scratchpad' : []}) - # document_dict['mongodb_query'] = retrieved_dict['intermediate_steps'][0][0].tool_input['agg_pipeline'] - # document_dict['retrieved_output'] = retrieved_dict['intermediate_steps'][0][1] - # documents = await asyncio.to_thread(json.dumps, document_dict) + try: + message_list = [] + async for s in react_agent.astream(inputs, stream_mode="values"): + message = s["messages"][-1] + state['messages'] = state.get('messages', []) + [message] + message_list.append(message.content) + return {"messages": state.get("messages", []), + "generation": state['messages'][-1].content} + + #print(message_list) + #generation = print_stream(react_agent.stream(inputs, stream_mode="values")) + answer = '' + except: + answer = "An error has occured with the retrieval from DocDB, try structuring your query another way!" - return {"query": query, "generation": ''} + return {"messages": [ + AIMessage(answer) + ], + "generation": answer, + "filter": None, + "documents": None, + "top_k": None} async def filter_generator_async(state): """ @@ -104,13 +121,23 @@ async def filter_generator_async(state): Returns: state (dict): New key may be added to state, filter, which contains the MongoDB query that will be applied before retrieval """ - query = state["query"] - - result = await filter_generation_chain.ainvoke({"query": query}) - filter = result['filter_query'] - top_k = result['top_k'] + query = state['messages'][0].content + + try: + result = await filter_generation_chain.ainvoke({"query": query}) + filter = result['filter_query'] + top_k = result['top_k'] + except: + filter = None + top_k = None - return {"filter": filter, "top_k": top_k, "query": query} + return {"filter": filter, + "top_k": top_k, + "messages": [ + AIMessage(str(result)) + ], + "documents": None, + "generation": None} async def retrieve_VI_async(state): """ @@ -122,20 +149,37 @@ async def retrieve_VI_async(state): Returns: state (dict): New key added to state, documents, that contains retrieved documents """ - query = state["query"] + query = state['messages'][0].content filter = state["filter"] top_k = state["top_k"] - retriever = DocDBRetriever(k = top_k) - documents = await retriever.aget_relevant_documents(query = query, query_filter = filter) - return {"documents": documents, "query": query} + try: + retriever = DocDBRetriever(k = top_k) + + + #print("Retrieving relevant documents from vector index...") + documents = await retriever.aget_relevant_documents(query = query, query_filter = filter) + + except: + documents = "No documents were returned" + + return {"documents": documents, + "filter": state.get("filter", None), + "top_k": state.get("top_k", None), + "messages": [AIMessage("Retrieving relevant documents from vector index...")], + "generation" : None} async def grade_doc_async(query, doc: Document): score = await doc_grader.ainvoke({"query": query, "document": doc.page_content}) grade = score['binary_score'] - if grade == "yes": - return doc.page_content + try: + if grade == "yes": + return doc.page_content + else: + return None + except: + return "There was an error processing this document." async def grade_documents_async(state): @@ -148,29 +192,20 @@ async def grade_documents_async(state): Returns: state (dict): Updates documents key with only filtered relevant documents """ - query = state["query"] + query = state['messages'][0].content documents = state["documents"] - filtered_docs = await asyncio.gather(*[grade_doc_async(query, doc) for doc in documents]) - filtered_docs = [doc for doc in filtered_docs if doc is not None] - return {"documents": filtered_docs, "query": query} - -async def generate_DB_async(state): - """ - Generate answer - - Args: - state (dict): The current graph state + #print("Checking document relevancy to your query...") - Returns: - state (dict): New key added to state, generation, that contains LLM generation - """ - query = state["query"] - documents = state["documents"] - - # RAG generation - generation = await db_rag_chain.ainvoke({"documents": documents, "query": query}) - return {"documents": documents, "query": query, "generation": generation, "filter": state.get("filter", None)} + filtered_docs = await asyncio.gather( + *[grade_doc_async(query, doc) for doc in documents], + return_exceptions = True) + filtered_docs = [doc for doc in filtered_docs if doc is not None] + return {"documents": filtered_docs, + "top_k": state.get("top_k", None), + "filter": state.get("filter", None), + "messages": [AIMessage("Checking document relevancy to your query...")], + "generation" : None} async def generate_VI_async(state): """ @@ -182,12 +217,19 @@ async def generate_VI_async(state): Returns: state (dict): New key added to state, generation, that contains LLM generation """ - query = state["query"] + query = state['messages'][0].content documents = state["documents"] - # RAG generation - generation = await rag_chain.ainvoke({"documents": documents, "query": query}) - return {"documents": documents, "query": query, "generation": generation, "filter": state.get("filter", None)} + try: + generation = await rag_chain.ainvoke({"documents": documents, "query": query}) + except: + generation = "Apologies, would you mind reframing the query in another way?" + + return {"documents": documents, + "messages": [AIMessage(str(generation))], + "generation": generation, + "top_k": state.get("top_k", None), + "filter": state.get("filter", None)} async_workflow = StateGraph(GraphState) async_workflow.add_node("database_query", retrieve_DB_async) @@ -212,13 +254,34 @@ async def generate_VI_async(state): async_workflow.add_edge("document_grading","generate_vi") async_workflow.add_edge("generate_vi", END) -async_app = async_workflow.compile() +memory = MemorySaver() +async_app = async_workflow.compile(checkpointer=memory) +unique_id = str(uuid.uuid4()) +config = {"configurable":{"thread_id": unique_id}} + +outputs = [] async def main(): - query = "How many records are in the dataset?" - inputs = {"query": query} - answer = await async_app.ainvoke(inputs) - return answer['generation'] + query = "Give me a list of sessions for subject 740955?" + #query = "What is the mongod query to find How many records are in the database?" + inputs = { + "messages": [HumanMessage(query)], + } + + async for output in async_app.astream(inputs, config): + for key, value in output.items(): + if "generation" in value: # Check if 'generation' exists in the value + yield value["generation"] + +async def collect_main(): + result = [] + async for item in main(): # main is an async generator + result.append(item) # or do something with each item + return result[-1] + + + + # result = await async_app.ainvoke(inputs) #Run the async function -# print(asyncio.run(main())) +print(asyncio.run(collect_main())) diff --git a/src/metadata_chatbot/agents/docdb_retriever.py b/src/metadata_chatbot/agents/docdb_retriever.py index a1f75f7..e655106 100644 --- a/src/metadata_chatbot/agents/docdb_retriever.py +++ b/src/metadata_chatbot/agents/docdb_retriever.py @@ -9,17 +9,20 @@ from langchain_aws import BedrockEmbeddings from langchain_huggingface import HuggingFaceEmbeddings +import time +from sentence_transformers import SentenceTransformer -model_name = "dunzhang/stella_en_1.5B_v5" -model_kwargs = {'device': 'cpu'} -encode_kwargs = {'normalize_embeddings': False} -hf = HuggingFaceEmbeddings( - model_name=model_name, - model_kwargs=model_kwargs, - encode_kwargs=encode_kwargs, - cache_folder="/scratch/huggingface_cache" -) + +# model_name = "dunzhang/stella_en_1.5B_v5" +# model_kwargs = {'device': 'cpu'} +# encode_kwargs = {'normalize_embeddings': False} +# hf = HuggingFaceEmbeddings( +# model_name=model_name, +# model_kwargs=model_kwargs, +# encode_kwargs=encode_kwargs, +# cache_folder="/scratch/huggingface_cache" +# ) BEDROCK_CLIENT = boto3.client( service_name="bedrock-runtime", @@ -30,7 +33,7 @@ API_GATEWAY_HOST = "api.allenneuraldynamics-test.org" DATABASE = "metadata_vector_index" -COLLECTION = "STELLA_4096_all" +COLLECTION = "TITAN_4096_all" docdb_api_client = MetadataDbClient( host=API_GATEWAY_HOST, @@ -38,6 +41,8 @@ collection=COLLECTION, ) + + #print("Using collection:", COLLECTION) @@ -55,7 +60,14 @@ def _get_relevant_documents( ) -> List[Document]: #Embed query - embedded_query = BEDROCK_EMBEDDINGS.embed_query(query) + query_to_embed = [query] + query_prompt_name = "s2p_query" + model = SentenceTransformer("dunzhang/stella_en_1.5B_v5", trust_remote_code=True) + embedded_query = model.encode(query_to_embed, prompt_name=query_prompt_name)[0] + + + #embedded_query = BEDROCK_EMBEDDINGS.embed_query(query) + #Construct aggregation pipeline vector_search = { @@ -79,6 +91,7 @@ def _get_relevant_documents( try: result = docdb_api_client.aggregate_docdb_records(pipeline=pipeline) + except Exception as e: print(f"Error during aggregation: {e}") return [] @@ -111,7 +124,8 @@ async def _aget_relevant_documents( ) -> List[Document]: #Embed query - embedded_query = await hf.aembed_query(query) + + embedded_query = await BEDROCK_EMBEDDINGS.aembed_query(query) #Construct aggregation pipeline vector_search = { @@ -137,6 +151,7 @@ async def _aget_relevant_documents( if query_filter: pipeline.insert(0, query_filter) + result = docdb_api_client.aggregate_docdb_records(pipeline=pipeline) #Transform retrieved docs to langchain Documents diff --git a/src/metadata_chatbot/agents/react_agent.py b/src/metadata_chatbot/agents/react_agent.py index 44d04bd..38b102a 100644 --- a/src/metadata_chatbot/agents/react_agent.py +++ b/src/metadata_chatbot/agents/react_agent.py @@ -10,10 +10,17 @@ from langchain_core.messages import ToolMessage, SystemMessage from langchain_core.runnables import RunnableConfig from langgraph.graph import StateGraph, END -from metadata_chatbot.agents.react_agent_prompt import system_prompt +from langchain_core.runnables import RunnableSequence, RunnableLambda -MODEL_ID_SONNET_3_5 = "anthropic.claude-3-sonnet-20240229-v1:0" +# from metadata_chatbot.agents.react_agent_prompt import system_prompt +# from react_agent_prompt import system_prompt +from langchain_core.prompts import ChatPromptTemplate +from langchain import hub +import asyncio + + +MODEL_ID_SONNET_3_5 = "anthropic.claude-3-5-sonnet-20240620-v1:0" SONNET_3_5_LLM = ChatBedrock( model_id= MODEL_ID_SONNET_3_5, @@ -61,6 +68,10 @@ def aggregation_retrieval(agg_pipeline: list) -> list: tools = [aggregation_retrieval] model = SONNET_3_5_LLM.bind_tools(tools) +template = hub.pull("eden19/entire_db_retrieval") +#system_prompt = SystemMessage(system_rompt) +retrieval_agent_chain = template | model + class AgentState(TypedDict): """The state of the agent.""" @@ -68,10 +79,10 @@ class AgentState(TypedDict): tools_by_name = {tool.name: tool for tool in tools} -def tool_node(state: AgentState): +async def tool_node(state: AgentState): outputs = [] for tool_call in state["messages"][-1].tool_calls: - tool_result = tools_by_name[tool_call["name"]].invoke(tool_call["args"]) + tool_result = await tools_by_name[tool_call["name"]].ainvoke(tool_call["args"]) outputs.append( ToolMessage( content=json.dumps(tool_result), @@ -81,19 +92,27 @@ def tool_node(state: AgentState): ) return {"messages": outputs} -def call_model( - state: AgentState, - config: RunnableConfig, +async def call_model( + state: AgentState ): + if ToolMessage in state['messages']: # this is similar to customizing the create_react_agent with state_modifier, but is a lot more flexible - system_prompt_ = SystemMessage(system_prompt) - response = model.invoke([system_prompt_] + state["messages"], config) + response = await SONNET_3_5_LLM.ainvoke(state["messages"]) + else: + response = await retrieval_agent_chain.ainvoke(state["messages"]) # We return a list, because this will get added to the existing list return {"messages": [response]} +# async def summarizer( +# state: AgentState +# ): +# response = await SONNET_3_5_LLM.ainvoke(f"Summarize {state["messages"][-1]}") +# # We return a list, because this will get added to the existing list +# return {"messages": [response]} + # Define the conditional edge that determines whether to continue or not -def should_continue(state: AgentState): +async def should_continue(state: AgentState): messages = state["messages"] last_message = messages[-1] # If there is no function call, then we finish @@ -122,14 +141,19 @@ def should_continue(state: AgentState): react_agent = workflow.compile() -# def print_stream(stream): -# for s in stream: -# message = s["messages"][-1] -# if isinstance(message, tuple): -# print(message) -# else: -# message.pretty_print() +async def print_stream(stream): + async for s in stream: + message = s["messages"][-1] + if isinstance(message, tuple): + print(message) + else: + message.pretty_print() + +# async def main(): +# inputs = {"messages": [("user", "What is the total number of record in the database?")]} +# answer = await print_stream(react_agent.astream(inputs, stream_mode="values")) +# return answer -# inputs = {"messages": [("user", "What is the mongo db query to find the unique injection sites and counts of each in smartspim experiments?")]} -# print_stream(react_agent.stream(inputs, stream_mode="values")) \ No newline at end of file +# if __name__ == "__main__": +# asyncio.run(main()) \ No newline at end of file diff --git a/src/metadata_chatbot/agents/workflow.py b/src/metadata_chatbot/agents/workflow.py index 1908d38..f56c9cf 100644 --- a/src/metadata_chatbot/agents/workflow.py +++ b/src/metadata_chatbot/agents/workflow.py @@ -4,7 +4,7 @@ from langgraph.graph import END, StateGraph, START from metadata_chatbot.agents.docdb_retriever import DocDBRetriever -from metadata_chatbot.agents.agentic_graph import datasource_router, query_retriever, filter_generation_chain, doc_grader, rag_chain, db_rag_chain +from metadata_chatbot.agents.agentic_graph import datasource_router, filter_generation_chain, doc_grader, rag_chain, db_rag_chain logging.basicConfig(filename='workflow.log', level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', filemode="w") diff --git a/src/metadata_chatbot/bedrock_model/Metamorph.py b/src/metadata_chatbot/bedrock_model/Metamorph.py deleted file mode 100644 index d0786f7..0000000 --- a/src/metadata_chatbot/bedrock_model/Metamorph.py +++ /dev/null @@ -1,43 +0,0 @@ -from typing import Any, Dict, Iterator, List, Mapping, Optional - -from langchain_core.callbacks.manager import CallbackManagerForLLMRun -from langchain_core.language_models.llms import LLM -from langchain_core.outputs import GenerationChunk - -import boto3, json, os -from chat import get_completion -from botocore.exceptions import ClientError - -bedrock = boto3.client( - service_name="bedrock-runtime", - region_name = 'us-west-2' -) - -#model_id = "anthropic.claude-3-sonnet-20240229-v1:0" - -class Metamorph(LLM): - - def _call(self, - prompt: str, - bedrock_client = bedrock, - stop: Optional[List[str]] = None, - run_manager: Optional[CallbackManagerForLLMRun] = None - )-> str: - answer = get_completion(prompt, bedrock_client) - return answer - - - @property - def _identifying_params(self) -> Dict[str, Any]: - """Return a dictionary of identifying parameters.""" - return {"model_name" : "Metamorph"} - - @property - def _llm_type(self) -> str: - """Get the type of language model used by this chat model. Used for logging purposes only.""" - return "Claude 3 Sonnet" - -if __name__ == '__main__': - llm = Metamorph() - prompt = "Give me the count of genotypes in the ecephys modality in the database." - llm.invoke(prompt) diff --git a/src/metadata_chatbot/bedrock_model/__init__.py b/src/metadata_chatbot/bedrock_model/__init__.py deleted file mode 100644 index a41897c..0000000 --- a/src/metadata_chatbot/bedrock_model/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -"""Init package""" -__version__ = "0.0.12" - diff --git a/src/metadata_chatbot/bedrock_model/chat.py b/src/metadata_chatbot/bedrock_model/chat.py deleted file mode 100644 index 7113996..0000000 --- a/src/metadata_chatbot/bedrock_model/chat.py +++ /dev/null @@ -1,272 +0,0 @@ -import boto3, json, os, logging -from tools import doc_retrieval, projection_retrieval, aggregation_retrieval, tool_call -from system_prompt import system_prompt, summary_system_prompt -from config import toolConfig -from botocore.exceptions import ClientError - -logging.basicConfig(filename='error.log', level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') - -#Connecting to bedrock - -bedrock = boto3.client( - service_name="bedrock-runtime", - region_name = 'us-west-2' -) - -model_id = "anthropic.claude-3-sonnet-20240229-v1:0" - -def get_completion(prompt: str, bedrock_client, system_prompt=system_prompt) -> str: - - """Given a prompt, this function returns a reply to the question. - - Parameters - ---------- - prompt: str - Query given by the user to the chatbot - bedrock_client: variable - Initialization of boto3 bedrock client - system_prompt: str - Commands to be given to the model, will determine model response - prefill: str - Formatted prefill words to start Claude's reply - - Returns - ------- - str - Model's reply to prompt - """ - - messages = [{"role": "user", "content": [{"text": prompt}]}] - - inference_config = { - "temperature": 0, - "maxTokens": 4096 - } - converse_api_params = { - "modelId": model_id, - "messages" : messages, - "inferenceConfig": inference_config, - "toolConfig": toolConfig, - "system" : [{"text": system_prompt}] - } - - try: - response = bedrock_client.converse(**converse_api_params) - print(response) - response_content_blocks = response['output']['message']['content'] - - #Printing Claude's initial response to query - print(response_content_blocks[0]['text']) - - #Assistant reply including tool use - messages.append({"role": "assistant", "content": response_content_blocks}) - - if response['stopReason'] == "tool_use": - tool_use = response_content_blocks[-1]['toolUse'] - tool_id = tool_use['toolUseId'] - tool_name = tool_use['name'] - tool_inputs = tool_use['input'] - - logging.info(f"Using tool {tool_name}") - - retrieved_info = tool_call(tool_name, tool_inputs) - - tool_response = { - "role": "user", - "content": [ - { - "toolResult": { - "toolUseId": tool_id, - "content": [ - { - "text": retrieved_info - } - ], - 'status':'success' - } - } - ] - } - - messages.append(tool_response) - logging.info("Successful information retrieval") - - converse_api_params = { - "modelId": model_id, - "messages": messages, - "inferenceConfig": inference_config, - "toolConfig": toolConfig - } - - logging.info("Generating response...") - final_response = bedrock_client.converse(**converse_api_params) - final_response_text = final_response['output']['message']['content'][0]['text'] - return(final_response_text) - - except ClientError as e: - logging.error("A client exception occurred: %s", str(e), exc_info=True) - - -def get_summary(prompt, bedrock_client = bedrock, system_prompt=summary_system_prompt): - - messages = [{"role": "user", "content": [{"text": f"Summarize the record with id {prompt}"}]}] - - inference_config = { - "temperature": 0, - "maxTokens": 2000 - } - converse_api_params = { - "modelId": model_id, - "messages" : messages, - "inferenceConfig": inference_config, - "toolConfig": toolConfig - } - - if system_prompt: - converse_api_params["system"] = [{"text": system_prompt}] - - try: - response = bedrock_client.converse(**converse_api_params) - - response_message = response['output']['message'] - - response_content_blocks = response_message['content'] - - messages.append({"role": "assistant", "content": response_content_blocks}) - - for content_block in response_content_blocks: - if 'toolUse' in content_block: - - tool_use = response_content_blocks[-1] - tool_id = tool_use['toolUse']['toolUseId'] - tool_name = tool_use['toolUse']['name'] - tool_inputs = tool_use['toolUse']['input'] - - if tool_name == 'doc_retrieval': - filter_query_s = tool_inputs['filter'] # filter query stored as a string instead of dictionary - filter_query = json.loads(filter_query_s) - retrieved_info_list = doc_retrieval(filter_query) #retrieved info type, dictionary - retrieved_info = " ".join(map(str, retrieved_info_list)) - - tool_response = { - "role": "user", - "content": [ - { - "toolResult": { - "toolUseId": tool_id, - "content": [ - { - "text": retrieved_info - } - ], - 'status':'success' - } - } - ] - } - - messages.append(tool_response) - - converse_api_params = { - "modelId": model_id, - "messages": messages, - "inferenceConfig": inference_config, - "toolConfig": toolConfig - } - - final_response = bedrock_client.converse(**converse_api_params) - final_response_text = final_response['output']['message']['content'][0]['text'] - return(final_response_text) - - except ClientError as err: - message = err.response['Error']['Message'] - print(f"A client error occured: {message}") - - -def simple_chat(bedrock_client = bedrock, system_prompt = system_prompt): - - """This function is able to demonstrate back and forth conversation given user input. - - Parameters - ---------- - bedrock_client: variable - Initialization of boto3 bedrock client - system_prompt: str - Commands to be given to the model, will determine model response - - Returns - ------- - str - Model's reply to prompt - """ - - model_id = "anthropic.claude-3-sonnet-20240229-v1:0" - - user_message = input("\nUser: ") - messages = [{"role": "user", "content": [{"text": user_message}]}] - - inference_config = { - "temperature": 0, - "maxTokens": 4000 - } - - while True: - #If the last message is from the assistant, get another input from the user - if messages[-1].get("role") == "assistant": - user_message = input("\nUser: ") - messages.append({"role": "user", "content": [{"text": user_message}]}) - - converse_api_params = { - "modelId": model_id, - "messages": messages, - "inferenceConfig": inference_config, - "toolConfig":toolConfig, - } - if system_prompt: - converse_api_params["system"] = [{"text": system_prompt}] - - response = bedrock_client.converse(**converse_api_params) - print(response) - - messages.append({"role": "assistant", "content": response['output']['message']['content']}) - - #If Claude stops because it wants to use a tool: - if response['stopReason'] == "tool_use": - tool_use = response['output']['message']['content'][-1] #Naive approach assumes only 1 tool is called at a time - tool_id = tool_use['toolUse']['toolUseId'] - tool_name = tool_use['toolUse']['name'] - tool_inputs = tool_use['toolUse']['input'] - - print(f"Using the {tool_name} tool...") - print(f"Tool Input:") - print(json.dumps(tool_inputs, indent=2)) - - retrieved_info = tool_call(tool_name, tool_inputs) - - - messages.append({ - "role": "user", - "content": [ - { - "toolResult": { - "toolUseId": tool_id, - "content": [ - { - "text": retrieved_info - } - ], - - } - } - ] - }) - - else: - print("\nClaude:" + f"{response['output']['message']['content'][0]['text']}") - #print("\nClaude: Is there anything else I can help you with?") - -if __name__ == '__main__': - #simple_chat(bedrock) - prompt = "What is the experimental history for subject 664956" - response = get_completion(prompt, bedrock) - print(response) \ No newline at end of file diff --git a/src/metadata_chatbot/bedrock_model/config.py b/src/metadata_chatbot/bedrock_model/config.py deleted file mode 100644 index 219b935..0000000 --- a/src/metadata_chatbot/bedrock_model/config.py +++ /dev/null @@ -1,65 +0,0 @@ -toolConfig = { - "tools": [ - { - "toolSpec": { - "name": "doc_retrieval", - "description": "Retrieve entire document from docDB. To be used only when it's necessary to retrieve all information in a document", - "inputSchema": { - "json": { - "type": "object", - "properties": { - "filter": { - "type": "string", - "description": "A MongoDB query to pass to the function" - } - }, - "required": ["filter"] - } - } - } - }, - { - "toolSpec": { - "name": "projection_retrieval", - "description": "Retrieve multiple documents from docDB with only specific field information. Used when most of the document is not necessary to answer natural language query", - "inputSchema": { - "json": { - "type": "object", - "properties": { - "filter": { - "type": "string", - "description": "A MongoDB query to pass to the function" - }, - "fieldNameList": { - "type": "string", - "description": "A list of field names following JSON format to retrieve from the document. The string shouldn't contain the value, just the key you would need to access the value in a metadata schema document." - } - }, - "required": ["filter", "fieldNameList"] - } - } - } - }, - { - "toolSpec": { - "name": "aggregation_retrieval", - "description": "Retrieve relevant values from docDB through an aggregation pipeline (running a series of operations on a collection of items). To be used in cases where multiple steps would be needed to retrieve desired output. Add allowDiskUse: true to the pipeline for large retrievals", - "inputSchema": { - "json": { - "type": "object", - "properties": { - "pipeline": { - "type": "string", - "description": "A MongoDB aggregation pipeline to pass to the function" - } - }, - "required": ["pipeline"] - } - } - } - } - ], - "toolChoice": { - "auto":{}, - } -} diff --git a/src/metadata_chatbot/bedrock_model/ref/acquisition_schema.json b/src/metadata_chatbot/bedrock_model/ref/acquisition_schema.json deleted file mode 100644 index 375eeea..0000000 --- a/src/metadata_chatbot/bedrock_model/ref/acquisition_schema.json +++ /dev/null @@ -1,215 +0,0 @@ -{ - "additionalProperties": false, - "description": "Description of an imaging acquisition session", - "properties": { - "describedBy": { - "const": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/acquisition.py", - "default": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/acquisition.py", - "title": "Describedby", - "type": "string" - }, - "schema_version": { - "const": "0.6.20", - "default": "0.6.20", - "title": "Schema Version" - }, - "protocol_id": { - "default": [], - "description": "DOI for protocols.io", - "items": { - "type": "string" - }, - "title": "Protocol ID", - "type": "array" - }, - "experimenter_full_name": { - "description": "First and last name of the experimenter(s).", - "items": { - "type": "string" - }, - "title": "Experimenter(s) full name", - "type": "array" - }, - "specimen_id": { - "title": "Specimen ID", - "type": "string" - }, - "subject_id": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Subject ID" - }, - "instrument_id": { - "title": "Instrument ID", - "type": "string" - }, - "calibrations": { - "default": [], - "description": "List of calibration measurements taken prior to acquisition.", - "items": { - "$ref": "#/$defs/Calibration" - }, - "title": "Calibrations", - "type": "array" - }, - "maintenance": { - "default": [], - "description": "List of maintenance on rig prior to acquisition.", - "items": { - "$ref": "#/$defs/Maintenance" - }, - "title": "Maintenance", - "type": "array" - }, - "session_start_time": { - "format": "date-time", - "title": "Session start time", - "type": "string" - }, - "session_end_time": { - "format": "date-time", - "title": "Session end time", - "type": "string" - }, - "session_type": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Session type" - }, - "tiles": { - "items": { - "$ref": "#/$defs/AcquisitionTile" - }, - "title": "Acquisition tiles", - "type": "array" - }, - "axes": { - "items": { - "$ref": "#/$defs/ImageAxis" - }, - "title": "Acquisition axes", - "type": "array" - }, - "chamber_immersion": { - "allOf": [ - { - "$ref": "#/$defs/Immersion" - } - ], - "title": "Acquisition chamber immersion data" - }, - "sample_immersion": { - "anyOf": [ - { - "$ref": "#/$defs/Immersion" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Acquisition sample immersion data" - }, - "active_objectives": { - "anyOf": [ - { - "items": { - "type": "string" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "default": null, - "title": "List of objectives used in this acquisition." - }, - "local_storage_directory": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Local storage directory" - }, - "external_storage_directory": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "External storage directory" - }, - "processing_steps": { - "default": [], - "description": "List of downstream processing steps planned for each channel", - "items": { - "$ref": "#/$defs/ProcessingSteps" - }, - "title": "Processing steps", - "type": "array" - }, - "software": { - "anyOf": [ - { - "items": { - "$ref": "#/$defs/Software" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "default": [], - "title": "Acquisition software version data" - }, - "notes": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Notes" - } - }, - "required": [ - "experimenter_full_name", - "specimen_id", - "instrument_id", - "session_start_time", - "session_end_time", - "tiles", - "axes", - "chamber_immersion" - ], - "title": "Acquisition", - "type": "object" -} \ No newline at end of file diff --git a/src/metadata_chatbot/bedrock_model/ref/data_description_schema.json b/src/metadata_chatbot/bedrock_model/ref/data_description_schema.json deleted file mode 100644 index 9ddab93..0000000 --- a/src/metadata_chatbot/bedrock_model/ref/data_description_schema.json +++ /dev/null @@ -1,250 +0,0 @@ -{ - "subject_id": { - "description": "Unique identifier for the subject of data acquisition", - "pattern": "^[^_]+$", - "title": "Subject ID", - "type": "string" - }, - "creation_time": { - "description": "Time that data files were created, used to uniquely identify the data", - "format": "date-time", - "title": "Creation Time", - "type": "string" - }, - "label": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "A short name for the data, used in file names and labels", - "title": "Label" - }, - "name": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Name of data, conventionally also the name of the directory containing all data and metadata", - "title": "Name" - }, - "institution": { - "description": "An established society, corporation, foundation or other organization that collected this data", - "discriminator": { - "mapping": { - "Allen Institute for Brain Science": "#/$defs/AllenInstituteForBrainScience", - "Allen Institute for Neural Dynamics": "#/$defs/AllenInstituteForNeuralDynamics", - "Columbia University": "#/$defs/ColumbiaUniversity", - "Huazhong University of Science and Technology": "#/$defs/HuazhongUniversityOfScienceAndTechnology", - "Janelia Research Campus": "#/$defs/JaneliaResearchCampus", - "New York University": "#/$defs/NewYorkUniversity", - "Other": "#/$defs/Other" - }, - "propertyName": "name" - }, - "oneOf": [ - { - "$ref": "#/$defs/AllenInstituteForBrainScience" - }, - { - "$ref": "#/$defs/AllenInstituteForNeuralDynamics" - }, - { - "$ref": "#/$defs/ColumbiaUniversity" - }, - { - "$ref": "#/$defs/HuazhongUniversityOfScienceAndTechnology" - }, - { - "$ref": "#/$defs/JaneliaResearchCampus" - }, - { - "$ref": "#/$defs/NewYorkUniversity" - }, - { - "$ref": "#/$defs/Other" - } - ], - "title": "Institution" - }, - "funding_source": { - "description": "Funding source. If internal funding, select 'Allen Institute'", - "items": { - "$ref": "#/$defs/Funding" - }, - "minItems": 1, - "title": "Funding source", - "type": "array" - }, - "data_level": { - "allOf": [ - { - "$ref": "#/$defs/DataLevel" - } - ], - "description": "level of processing that data has undergone", - "title": "Data Level" - }, - "group": { - "anyOf": [ - { - "$ref": "#/$defs/Group" - }, - { - "type": "null" - } - ], - "default": null, - "description": "A short name for the group of individuals that collected this data", - "title": "Group" - }, - "investigators": { - "description": "Full name(s) of key investigators (e.g. PI, lead scientist, contact person)", - "items": { - "$ref": "#/$defs/PIDName" - }, - "minItems": 1, - "title": "Investigators", - "type": "array" - }, - "project_name": { - "anyOf": [ - { - "pattern": "^[^<>:;\"/|?\\\\_]+$", - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "A name for a set of coordinated activities intended to achieve one or more objectives.", - "title": "Project Name" - }, - "restrictions": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Detail any restrictions on publishing or sharing these data", - "title": "Restrictions" - }, - "modality": { - "description": "A short name for the specific manner, characteristic, pattern of application, or the employmentof any technology or formal procedure to generate data for a study", - "items": { - "discriminator": { - "mapping": { - "Behavior": "#/$defs/aind_data_schema_models__modalities__Behavior", - "Behavior videos": "#/$defs/BehaviorVideos", - "Confocal microscopy": "#/$defs/aind_data_schema_models__modalities__Confocal", - "Electromyography": "#/$defs/Electromyography", - "Extracellular electrophysiology": "#/$defs/aind_data_schema_models__modalities__Ecephys", - "Fiber photometry": "#/$defs/Fib", - "Fluorescence micro-optical sectioning tomography": "#/$defs/Fmost", - "Intracellular electrophysiology": "#/$defs/Icephys", - "Intrinsic signal imaging": "#/$defs/aind_data_schema_models__modalities__Isi", - "Magnetic resonance imaging": "#/$defs/aind_data_schema_models__modalities__Mri", - "Multiplexed error-robust fluorescence in situ hybridization": "#/$defs/aind_data_schema_models__modalities__Merfish", - "Planar optical physiology": "#/$defs/POphys", - "Scanned line projection imaging": "#/$defs/Slap", - "Selective plane illumination microscopy": "#/$defs/Spim" - }, - "propertyName": "name" - }, - "oneOf": [ - { - "$ref": "#/$defs/aind_data_schema_models__modalities__Behavior" - }, - { - "$ref": "#/$defs/BehaviorVideos" - }, - { - "$ref": "#/$defs/aind_data_schema_models__modalities__Confocal" - }, - { - "$ref": "#/$defs/aind_data_schema_models__modalities__Ecephys" - }, - { - "$ref": "#/$defs/Electromyography" - }, - { - "$ref": "#/$defs/Fmost" - }, - { - "$ref": "#/$defs/Icephys" - }, - { - "$ref": "#/$defs/aind_data_schema_models__modalities__Isi" - }, - { - "$ref": "#/$defs/Fib" - }, - { - "$ref": "#/$defs/aind_data_schema_models__modalities__Merfish" - }, - { - "$ref": "#/$defs/aind_data_schema_models__modalities__Mri" - }, - { - "$ref": "#/$defs/POphys" - }, - { - "$ref": "#/$defs/Slap" - }, - { - "$ref": "#/$defs/Spim" - } - ] - }, - "title": "Modality", - "type": "array" - }, - "related_data": { - "default": [], - "description": "Path and description of data assets associated with this asset (eg. reference images)", - "items": { - "$ref": "#/$defs/RelatedData" - }, - "title": "Related data", - "type": "array" - }, - "data_summary": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Semantic summary of experimental goal", - "title": "Data summary" - }, - "required": [ - "platform", - "subject_id", - "creation_time", - "institution", - "funding_source", - "data_level", - "investigators", - "modality" - ], - "title": "DataDescription", - "type": "object" -} \ No newline at end of file diff --git a/src/metadata_chatbot/bedrock_model/ref/instrument_schema.json b/src/metadata_chatbot/bedrock_model/ref/instrument_schema.json deleted file mode 100644 index 1fb9e51..0000000 --- a/src/metadata_chatbot/bedrock_model/ref/instrument_schema.json +++ /dev/null @@ -1,597 +0,0 @@ -{ - "additionalProperties": false, - "description": "Description of an instrument, which is a collection of devices", - "properties": { - "describedBy": { - "const": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/instrument.py", - "default": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/instrument.py", - "title": "Describedby", - "type": "string" - }, - "schema_version": { - "const": "0.10.28", - "default": "0.10.28", - "title": "Schema Version" - }, - "instrument_id": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Unique instrument identifier, name convention: --", - "title": "Instrument ID" - }, - "modification_date": { - "format": "date", - "title": "Date of modification", - "type": "string" - }, - "instrument_type": { - "allOf": [ - { - "$ref": "#/$defs/ImagingInstrumentType" - } - ], - "title": "Instrument type" - }, - "manufacturer": { - "discriminator": { - "mapping": { - "AA Opto Electronic": "#/$defs/AAOptoElectronic", - "ASUS": "#/$defs/Asus", - "Abcam": "#/$defs/Abcam", - "Addgene": "#/$defs/Addgene", - "Ailipu Technology Co": "#/$defs/AilipuTechnologyCo", - "Allen Institute": "#/$defs/AllenInstitute", - "Allen Institute for Brain Science": "#/$defs/AllenInstituteForBrainScience", - "Allen Institute for Neural Dynamics": "#/$defs/AllenInstituteForNeuralDynamics", - "Allied": "#/$defs/Allied", - "Applied Scientific Instrumentation": "#/$defs/AppliedScientificInstrumentation", - "Arecont Vision Costar": "#/$defs/ArecontVisionCostar", - "Basler": "#/$defs/Basler", - "Cambridge Technology": "#/$defs/CambridgeTechnology", - "Carl Zeiss": "#/$defs/CarlZeiss", - "Champalimaud Foundation": "#/$defs/ChampalimaudFoundation", - "Chan Zuckerberg Initiative": "#/$defs/ChanZuckerbergInitiative", - "Chroma": "#/$defs/Chroma", - "Coherent Scientific": "#/$defs/CoherentScientific", - "Columbia University": "#/$defs/ColumbiaUniversity", - "Computar": "#/$defs/Computar", - "Conoptics": "#/$defs/Conoptics", - "Custom": "#/$defs/Custom", - "Dodotronic": "#/$defs/Dodotronic", - "Doric": "#/$defs/Doric", - "Ealing": "#/$defs/Ealing", - "Edmund Optics": "#/$defs/EdmundOptics", - "Emory University": "#/$defs/EmoryUniversity", - "Euresys": "#/$defs/Euresys", - "Fujinon": "#/$defs/Fujinon", - "Hamamatsu": "#/$defs/Hamamatsu", - "Hamilton": "#/$defs/Hamilton", - "Huazhong University of Science and Technology": "#/$defs/HuazhongUniversityOfScienceAndTechnology", - "IR Robot Co": "#/$defs/IRRobotCo", - "ISL Products International": "#/$defs/ISLProductsInternational", - "Infinity Photo-Optical": "#/$defs/InfinityPhotoOptical", - "Integrated DNA Technologies": "#/$defs/IntegratedDNATechnologies", - "Interuniversity Microelectronics Center": "#/$defs/InteruniversityMicroelectronicsCenter", - "Invitrogen": "#/$defs/Invitrogen", - "Jackson Laboratory": "#/$defs/JacksonLaboratory", - "Janelia Research Campus": "#/$defs/JaneliaResearchCampus", - "Julabo": "#/$defs/Julabo", - "LG": "#/$defs/Lg", - "Leica": "#/$defs/Leica", - "LifeCanvas": "#/$defs/LifeCanvas", - "Lumen Dynamics": "#/$defs/LumenDynamics", - "MBF Bioscience": "#/$defs/MBFBioscience", - "MKS Newport": "#/$defs/MKSNewport", - "MPI": "#/$defs/Mpi", - "Meadowlark Optics": "#/$defs/MeadowlarkOptics", - "Michael J. Fox Foundation for Parkinson's Research": "#/$defs/MichaelJFoxFoundationForParkinsonsResearch", - "Midwest Optical Systems, Inc.": "#/$defs/MidwestOpticalSystems", - "Mitutuyo": "#/$defs/Mitutuyo", - "NResearch Inc": "#/$defs/NResearch", - "National Center for Complementary and Integrative Health": "#/$defs/NationalCenterForComplementaryAndIntegrativeHealth", - "National Institute of Mental Health": "#/$defs/NationalInstituteOfMentalHealth", - "National Institute of Neurological Disorders and Stroke": "#/$defs/NationalInstituteOfNeurologicalDisordersAndStroke", - "National Instruments": "#/$defs/NationalInstruments", - "Navitar": "#/$defs/Navitar", - "Neurophotometrics": "#/$defs/Neurophotometrics", - "New Scale Technologies": "#/$defs/NewScaleTechnologies", - "New York University": "#/$defs/NewYorkUniversity", - "Nikon": "#/$defs/Nikon", - "Olympus": "#/$defs/Olympus", - "Open Ephys Production Site": "#/$defs/OpenEphysProductionSite", - "Optotune": "#/$defs/Optotune", - "Other": "#/$defs/Other", - "Oxxius": "#/$defs/Oxxius", - "Prizmatix": "#/$defs/Prizmatix", - "Quantifi": "#/$defs/Quantifi", - "Raspberry Pi": "#/$defs/RaspberryPi", - "SICGEN": "#/$defs/Sicgen", - "Schneider-Kreuznach": "#/$defs/SchneiderKreuznach", - "Second Order Effects": "#/$defs/SecondOrderEffects", - "Semrock": "#/$defs/Semrock", - "Sigma-Aldritch": "#/$defs/SigmaAldritch", - "Simons Foundation": "#/$defs/SimonsFoundation", - "Spinnaker": "#/$defs/Spinnaker", - "Tamron": "#/$defs/Tamron", - "Technical Manufacturing Corporation": "#/$defs/TMC", - "Teledyne FLIR": "#/$defs/TeledyneFLIR", - "Templeton World Charity Foundation": "#/$defs/TempletonWorldCharityFoundation", - "The Imaging Source": "#/$defs/TheImagingSource", - "The Lee Company": "#/$defs/TheLeeCompany", - "Thermo Fisher": "#/$defs/Thermofisher", - "Thorlabs": "#/$defs/Thorlabs", - "Tymphany": "#/$defs/Tymphany", - "Vieworks": "#/$defs/Vieworks", - "Vortran": "#/$defs/Vortran", - "ams OSRAM": "#/$defs/AmsOsram" - }, - "propertyName": "name" - }, - "oneOf": [ - { - "$ref": "#/$defs/AAOptoElectronic" - }, - { - "$ref": "#/$defs/Abcam" - }, - { - "$ref": "#/$defs/Addgene" - }, - { - "$ref": "#/$defs/AilipuTechnologyCo" - }, - { - "$ref": "#/$defs/AllenInstitute" - }, - { - "$ref": "#/$defs/AllenInstituteForBrainScience" - }, - { - "$ref": "#/$defs/AllenInstituteForNeuralDynamics" - }, - { - "$ref": "#/$defs/Allied" - }, - { - "$ref": "#/$defs/AmsOsram" - }, - { - "$ref": "#/$defs/AppliedScientificInstrumentation" - }, - { - "$ref": "#/$defs/Asus" - }, - { - "$ref": "#/$defs/ArecontVisionCostar" - }, - { - "$ref": "#/$defs/Basler" - }, - { - "$ref": "#/$defs/CambridgeTechnology" - }, - { - "$ref": "#/$defs/ChampalimaudFoundation" - }, - { - "$ref": "#/$defs/ChanZuckerbergInitiative" - }, - { - "$ref": "#/$defs/Chroma" - }, - { - "$ref": "#/$defs/CoherentScientific" - }, - { - "$ref": "#/$defs/ColumbiaUniversity" - }, - { - "$ref": "#/$defs/Computar" - }, - { - "$ref": "#/$defs/Conoptics" - }, - { - "$ref": "#/$defs/Custom" - }, - { - "$ref": "#/$defs/Dodotronic" - }, - { - "$ref": "#/$defs/Doric" - }, - { - "$ref": "#/$defs/Ealing" - }, - { - "$ref": "#/$defs/EdmundOptics" - }, - { - "$ref": "#/$defs/EmoryUniversity" - }, - { - "$ref": "#/$defs/Euresys" - }, - { - "$ref": "#/$defs/Fujinon" - }, - { - "$ref": "#/$defs/Hamamatsu" - }, - { - "$ref": "#/$defs/Hamilton" - }, - { - "$ref": "#/$defs/HuazhongUniversityOfScienceAndTechnology" - }, - { - "$ref": "#/$defs/TheImagingSource" - }, - { - "$ref": "#/$defs/IntegratedDNATechnologies" - }, - { - "$ref": "#/$defs/InteruniversityMicroelectronicsCenter" - }, - { - "$ref": "#/$defs/InfinityPhotoOptical" - }, - { - "$ref": "#/$defs/Invitrogen" - }, - { - "$ref": "#/$defs/ISLProductsInternational" - }, - { - "$ref": "#/$defs/JacksonLaboratory" - }, - { - "$ref": "#/$defs/JaneliaResearchCampus" - }, - { - "$ref": "#/$defs/Julabo" - }, - { - "$ref": "#/$defs/TheLeeCompany" - }, - { - "$ref": "#/$defs/Leica" - }, - { - "$ref": "#/$defs/Lg" - }, - { - "$ref": "#/$defs/LifeCanvas" - }, - { - "$ref": "#/$defs/MeadowlarkOptics" - }, - { - "$ref": "#/$defs/IRRobotCo" - }, - { - "$ref": "#/$defs/MBFBioscience" - }, - { - "$ref": "#/$defs/MichaelJFoxFoundationForParkinsonsResearch" - }, - { - "$ref": "#/$defs/MidwestOpticalSystems" - }, - { - "$ref": "#/$defs/Mitutuyo" - }, - { - "$ref": "#/$defs/MKSNewport" - }, - { - "$ref": "#/$defs/Mpi" - }, - { - "$ref": "#/$defs/NationalCenterForComplementaryAndIntegrativeHealth" - }, - { - "$ref": "#/$defs/NationalInstituteOfMentalHealth" - }, - { - "$ref": "#/$defs/NationalInstituteOfNeurologicalDisordersAndStroke" - }, - { - "$ref": "#/$defs/NationalInstruments" - }, - { - "$ref": "#/$defs/Navitar" - }, - { - "$ref": "#/$defs/Neurophotometrics" - }, - { - "$ref": "#/$defs/NewScaleTechnologies" - }, - { - "$ref": "#/$defs/NewYorkUniversity" - }, - { - "$ref": "#/$defs/Nikon" - }, - { - "$ref": "#/$defs/NResearch" - }, - { - "$ref": "#/$defs/OpenEphysProductionSite" - }, - { - "$ref": "#/$defs/Olympus" - }, - { - "$ref": "#/$defs/Optotune" - }, - { - "$ref": "#/$defs/Oxxius" - }, - { - "$ref": "#/$defs/Prizmatix" - }, - { - "$ref": "#/$defs/Quantifi" - }, - { - "$ref": "#/$defs/RaspberryPi" - }, - { - "$ref": "#/$defs/SecondOrderEffects" - }, - { - "$ref": "#/$defs/Semrock" - }, - { - "$ref": "#/$defs/SchneiderKreuznach" - }, - { - "$ref": "#/$defs/Sicgen" - }, - { - "$ref": "#/$defs/SigmaAldritch" - }, - { - "$ref": "#/$defs/SimonsFoundation" - }, - { - "$ref": "#/$defs/Spinnaker" - }, - { - "$ref": "#/$defs/Tamron" - }, - { - "$ref": "#/$defs/TempletonWorldCharityFoundation" - }, - { - "$ref": "#/$defs/TeledyneFLIR" - }, - { - "$ref": "#/$defs/Thermofisher" - }, - { - "$ref": "#/$defs/Thorlabs" - }, - { - "$ref": "#/$defs/TMC" - }, - { - "$ref": "#/$defs/Tymphany" - }, - { - "$ref": "#/$defs/Vieworks" - }, - { - "$ref": "#/$defs/Vortran" - }, - { - "$ref": "#/$defs/CarlZeiss" - }, - { - "$ref": "#/$defs/LumenDynamics" - }, - { - "$ref": "#/$defs/Other" - } - ], - "title": "Instrument manufacturer" - }, - "temperature_control": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Temperature control" - }, - "humidity_control": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Humidity control" - }, - "optical_tables": { - "default": [], - "items": { - "$ref": "#/$defs/OpticalTable" - }, - "title": "Optical table", - "type": "array" - }, - "enclosure": { - "anyOf": [ - { - "$ref": "#/$defs/Enclosure" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Enclosure" - }, - "objectives": { - "items": { - "$ref": "#/$defs/Objective" - }, - "title": "Objectives", - "type": "array" - }, - "detectors": { - "default": [], - "items": { - "$ref": "#/$defs/Detector" - }, - "title": "Detectors", - "type": "array" - }, - "light_sources": { - "default": [], - "items": { - "discriminator": { - "mapping": { - "Lamp": "#/$defs/Lamp", - "Laser": "#/$defs/Laser", - "Light emitting diode": "#/$defs/LightEmittingDiode" - }, - "propertyName": "device_type" - }, - "oneOf": [ - { - "$ref": "#/$defs/Laser" - }, - { - "$ref": "#/$defs/LightEmittingDiode" - }, - { - "$ref": "#/$defs/Lamp" - } - ] - }, - "title": "Light sources", - "type": "array" - }, - "lenses": { - "default": [], - "items": { - "$ref": "#/$defs/Lens" - }, - "title": "Lenses", - "type": "array" - }, - "fluorescence_filters": { - "default": [], - "items": { - "$ref": "#/$defs/Filter" - }, - "title": "Fluorescence filters", - "type": "array" - }, - "motorized_stages": { - "default": [], - "items": { - "$ref": "#/$defs/MotorizedStage" - }, - "title": "Motorized stages", - "type": "array" - }, - "scanning_stages": { - "default": [], - "items": { - "$ref": "#/$defs/ScanningStage" - }, - "title": "Scanning motorized stages", - "type": "array" - }, - "additional_devices": { - "default": [], - "items": { - "$ref": "#/$defs/AdditionalImagingDevice" - }, - "title": "Additional devices", - "type": "array" - }, - "calibration_date": { - "anyOf": [ - { - "format": "date", - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Date of most recent calibration", - "title": "Calibration date" - }, - "calibration_data": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Path to calibration data from most recent calibration", - "title": "Calibration data" - }, - "com_ports": { - "default": [], - "items": { - "$ref": "#/$defs/Com" - }, - "title": "COM ports", - "type": "array" - }, - "daqs": { - "default": [], - "items": { - "$ref": "#/$defs/DAQDevice" - }, - "title": "DAQ", - "type": "array" - }, - "notes": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Notes" - } - }, - "required": [ - "modification_date", - "instrument_type", - "manufacturer", - "objectives" - ], - "title": "Instrument", - "type": "object" -} \ No newline at end of file diff --git a/src/metadata_chatbot/bedrock_model/ref/metadata.json b/src/metadata_chatbot/bedrock_model/ref/metadata.json deleted file mode 100644 index 396d5b4..0000000 --- a/src/metadata_chatbot/bedrock_model/ref/metadata.json +++ /dev/null @@ -1,177 +0,0 @@ -{ - "additionalProperties": false, - "description": "The records in the Data Asset Collection needs to contain certain fields\nto easily query and index the data.", - "properties": { - "describedBy": { - "const": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/metadata.py", - "default": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/metadata.py", - "title": "Describedby", - "type": "string" - }, - "schema_version": { - "const": "0.2.32", - "default": "0.2.32", - "title": "Schema Version" - }, - "_id": { - "description": "The unique id of the data asset.", - "format": "uuid", - "title": "Data Asset ID", - "type": "string" - }, - "name": { - "description": "Name of the data asset.", - "title": "Data Asset Name", - "type": "string" - }, - "created": { - "description": "The utc date and time the data asset created.", - "format": "date-time", - "title": "Created", - "type": "string" - }, - "last_modified": { - "description": "The utc date and time that the data asset was last modified.", - "format": "date-time", - "title": "Last Modified", - "type": "string" - }, - "location": { - "description": "Current location of the data asset.", - "title": "Location", - "type": "string" - }, - "metadata_status": { - "allOf": [ - { - "$ref": "#/$defs/MetadataStatus" - } - ], - "default": "Unknown", - "description": "The status of the metadata.", - "title": " Metadata Status" - }, - "external_links": { - "additionalProperties": { - "items": { - "type": "string" - }, - "type": "array" - }, - "default": [], - "description": "Links to the data asset on different platforms.", - "title": "External Links", - "type": "object" - }, - "subject": { - "anyOf": [ - { - "$ref": "#/$defs/Subject" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Subject of data collection.", - "title": "Subject" - }, - "data_description": { - "anyOf": [ - { - "$ref": "#/$defs/DataDescription" - }, - { - "type": "null" - } - ], - "default": null, - "description": "A logical collection of data files.", - "title": "Data Description" - }, - "procedures": { - "anyOf": [ - { - "$ref": "#/$defs/Procedures" - }, - { - "type": "null" - } - ], - "default": null, - "description": "All procedures performed on a subject.", - "title": "Procedures" - }, - "session": { - "anyOf": [ - { - "$ref": "#/$defs/Session" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Description of a session.", - "title": "Session" - }, - "rig": { - "anyOf": [ - { - "$ref": "#/$defs/Rig" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Rig.", - "title": "Rig" - }, - "processing": { - "anyOf": [ - { - "$ref": "#/$defs/Processing" - }, - { - "type": "null" - } - ], - "default": null, - "description": "All processes run on data.", - "title": "Processing" - }, - "acquisition": { - "anyOf": [ - { - "$ref": "#/$defs/Acquisition" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Imaging acquisition session", - "title": "Acquisition" - }, - "instrument": { - "anyOf": [ - { - "$ref": "#/$defs/Instrument" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Instrument, which is a collection of devices", - "title": "Instrument" - } - }, - "required": [ - "name", - "location" - ], - "title": "Metadata", - "type": "object" -} \ No newline at end of file diff --git a/src/metadata_chatbot/bedrock_model/ref/procedures_schema.json b/src/metadata_chatbot/bedrock_model/ref/procedures_schema.json deleted file mode 100644 index 36d82d6..0000000 --- a/src/metadata_chatbot/bedrock_model/ref/procedures_schema.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "additionalProperties": false, - "description": "Description of all procedures performed on a subject", - "properties": { - "describedBy": { - "const": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/procedures.py", - "default": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/procedures.py", - "title": "Describedby", - "type": "string" - }, - "schema_version": { - "const": "0.13.14", - "default": "0.13.14", - "title": "Schema Version" - }, - "subject_id": { - "description": "Unique identifier for the subject. If this is not a Allen LAS ID, indicate this in the Notes.", - "title": "Subject ID", - "type": "string" - }, - "subject_procedures": { - "default": [], - "items": { - "discriminator": { - "mapping": { - "Other Subject Procedure": "#/$defs/OtherSubjectProcedure", - "Surgery": "#/$defs/Surgery", - "Training": "#/$defs/TrainingProtocol", - "Water restriction": "#/$defs/WaterRestriction" - }, - "propertyName": "procedure_type" - }, - "oneOf": [ - { - "$ref": "#/$defs/Surgery" - }, - { - "$ref": "#/$defs/TrainingProtocol" - }, - { - "$ref": "#/$defs/WaterRestriction" - }, - { - "$ref": "#/$defs/OtherSubjectProcedure" - } - ] - }, - "title": "Subject Procedures", - "type": "array" - }, - "specimen_procedures": { - "default": [], - "items": { - "$ref": "#/$defs/SpecimenProcedure" - }, - "title": "Specimen Procedures", - "type": "array" - }, - "notes": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Notes" - } - }, - "required": [ - "subject_id" - ], - "title": "Procedures", - "type": "object" -} \ No newline at end of file diff --git a/src/metadata_chatbot/bedrock_model/ref/processing_schema.json b/src/metadata_chatbot/bedrock_model/ref/processing_schema.json deleted file mode 100644 index 01336a8..0000000 --- a/src/metadata_chatbot/bedrock_model/ref/processing_schema.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - - "additionalProperties": false, - "description": "Description of all processes run on data", - "properties": { - "describedBy": { - "const": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/processing.py", - "default": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/processing.py", - "title": "Describedby", - "type": "string" - }, - "schema_version": { - "const": "0.4.8", - "default": "0.4.8", - "title": "Schema Version" - }, - "processing_pipeline": { - "allOf": [ - { - "$ref": "#/$defs/PipelineProcess" - } - ], - "description": "Pipeline used to process data", - "title": "Processing Pipeline" - }, - "analyses": { - "default": [], - "description": "Analysis steps taken after processing", - "items": { - "$ref": "#/$defs/AnalysisProcess" - }, - "title": "Analysis Steps", - "type": "array" - }, - "notes": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Notes" - } - }, - "required": [ - "processing_pipeline" - ], - "title": "Processing", - "type": "object" -} \ No newline at end of file diff --git a/src/metadata_chatbot/bedrock_model/ref/rig_schema.json b/src/metadata_chatbot/bedrock_model/ref/rig_schema.json deleted file mode 100644 index 592ebb4..0000000 --- a/src/metadata_chatbot/bedrock_model/ref/rig_schema.json +++ /dev/null @@ -1,409 +0,0 @@ -{ - - "additionalProperties": false, - "description": "Description of a rig", - "properties": { - "describedBy": { - "const": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/rig.py", - "default": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/rig.py", - "title": "Describedby", - "type": "string" - }, - "schema_version": { - "const": "0.5.4", - "default": "0.5.4", - "title": "Schema Version" - }, - "rig_id": { - "description": "Unique rig identifier, name convention: --", - "pattern": "^[a-zA-Z0-9]+_[a-zA-Z0-9-]+_\\d{8}$", - "title": "Rig ID", - "type": "string" - }, - "modification_date": { - "format": "date", - "title": "Date of modification", - "type": "string" - }, - "mouse_platform": { - "discriminator": { - "mapping": { - "Arena": "#/$defs/Arena", - "Disc": "#/$defs/Disc", - "Treadmill": "#/$defs/aind_data_schema__components__devices__Treadmill", - "Tube": "#/$defs/Tube", - "Wheel": "#/$defs/Wheel" - }, - "propertyName": "device_type" - }, - "oneOf": [ - { - "$ref": "#/$defs/Disc" - }, - { - "$ref": "#/$defs/Wheel" - }, - { - "$ref": "#/$defs/Tube" - }, - { - "$ref": "#/$defs/aind_data_schema__components__devices__Treadmill" - }, - { - "$ref": "#/$defs/Arena" - } - ], - "title": "Mouse Platform" - }, - "stimulus_devices": { - "default": [], - "items": { - "discriminator": { - "mapping": { - "Monitor": "#/$defs/Monitor", - "Olfactometer": "#/$defs/aind_data_schema__components__devices__Olfactometer", - "Reward delivery": "#/$defs/RewardDelivery", - "Speaker": "#/$defs/Speaker" - }, - "propertyName": "device_type" - }, - "oneOf": [ - { - "$ref": "#/$defs/Monitor" - }, - { - "$ref": "#/$defs/aind_data_schema__components__devices__Olfactometer" - }, - { - "$ref": "#/$defs/RewardDelivery" - }, - { - "$ref": "#/$defs/Speaker" - } - ] - }, - "title": "Stimulus devices", - "type": "array" - }, - "cameras": { - "default": [], - "items": { - "$ref": "#/$defs/CameraAssembly" - }, - "title": "Camera assemblies", - "type": "array" - }, - "enclosure": { - "anyOf": [ - { - "$ref": "#/$defs/Enclosure" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Enclosure" - }, - "ephys_assemblies": { - "default": [], - "items": { - "$ref": "#/$defs/EphysAssembly" - }, - "title": "Ephys probes", - "type": "array" - }, - "fiber_assemblies": { - "default": [], - "items": { - "$ref": "#/$defs/FiberAssembly" - }, - "title": "Inserted fiber optics", - "type": "array" - }, - "stick_microscopes": { - "default": [], - "items": { - "$ref": "#/$defs/CameraAssembly" - }, - "title": "Stick microscopes", - "type": "array" - }, - "laser_assemblies": { - "default": [], - "items": { - "$ref": "#/$defs/LaserAssembly" - }, - "title": "Laser modules", - "type": "array" - }, - "patch_cords": { - "default": [], - "items": { - "$ref": "#/$defs/Patch" - }, - "title": "Patch cords", - "type": "array" - }, - "light_sources": { - "default": [], - "items": { - "discriminator": { - "mapping": { - "Lamp": "#/$defs/Lamp", - "Laser": "#/$defs/Laser", - "Light emitting diode": "#/$defs/LightEmittingDiode" - }, - "propertyName": "device_type" - }, - "oneOf": [ - { - "$ref": "#/$defs/Laser" - }, - { - "$ref": "#/$defs/LightEmittingDiode" - }, - { - "$ref": "#/$defs/Lamp" - } - ] - }, - "title": "Light sources", - "type": "array" - }, - "detectors": { - "default": [], - "items": { - "$ref": "#/$defs/Detector" - }, - "title": "Detectors", - "type": "array" - }, - "objectives": { - "default": [], - "items": { - "$ref": "#/$defs/Objective" - }, - "title": "Objectives", - "type": "array" - }, - "filters": { - "default": [], - "items": { - "$ref": "#/$defs/Filter" - }, - "title": "Filters", - "type": "array" - }, - "lenses": { - "default": [], - "items": { - "$ref": "#/$defs/Lens" - }, - "title": "Lenses", - "type": "array" - }, - "digital_micromirror_devices": { - "default": [], - "items": { - "$ref": "#/$defs/DigitalMicromirrorDevice" - }, - "title": "DMDs", - "type": "array" - }, - "polygonal_scanners": { - "default": [], - "items": { - "$ref": "#/$defs/PolygonalScanner" - }, - "title": "Polygonal scanners", - "type": "array" - }, - "pockels_cells": { - "default": [], - "items": { - "$ref": "#/$defs/PockelsCell" - }, - "title": "Pockels cells", - "type": "array" - }, - "additional_devices": { - "default": [], - "items": { - "$ref": "#/$defs/Device" - }, - "title": "Additional devices", - "type": "array" - }, - "daqs": { - "default": [], - "items": { - "discriminator": { - "mapping": { - "DAQ Device": "#/$defs/DAQDevice", - "Harp device": "#/$defs/HarpDevice", - "Neuropixels basestation": "#/$defs/NeuropixelsBasestation", - "Open Ephys acquisition board": "#/$defs/OpenEphysAcquisitionBoard" - }, - "propertyName": "device_type" - }, - "oneOf": [ - { - "$ref": "#/$defs/HarpDevice" - }, - { - "$ref": "#/$defs/NeuropixelsBasestation" - }, - { - "$ref": "#/$defs/OpenEphysAcquisitionBoard" - }, - { - "$ref": "#/$defs/DAQDevice" - } - ] - }, - "title": "Data acquisition devices", - "type": "array" - }, - "calibrations": { - "items": { - "$ref": "#/$defs/Calibration" - }, - "title": "Full calibration of devices", - "type": "array" - }, - "ccf_coordinate_transform": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Path to file that details the CCF-to-lab coordinate transform", - "title": "CCF coordinate transform" - }, - "origin": { - "anyOf": [ - { - "$ref": "#/$defs/Origin" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Origin point for rig position transforms" - }, - "rig_axes": { - "anyOf": [ - { - "items": { - "$ref": "#/$defs/Axis" - }, - "maxItems": 3, - "minItems": 3, - "type": "array" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Rig axes" - }, - "modalities": { - "items": { - "discriminator": { - "mapping": { - "Behavior": "#/$defs/aind_data_schema_models__modalities__Behavior", - "Behavior videos": "#/$defs/BehaviorVideos", - "Confocal microscopy": "#/$defs/Confocal", - "Electromyography": "#/$defs/Electromyography", - "Extracellular electrophysiology": "#/$defs/Ecephys", - "Fiber photometry": "#/$defs/Fib", - "Fluorescence micro-optical sectioning tomography": "#/$defs/Fmost", - "Intracellular electrophysiology": "#/$defs/Icephys", - "Intrinsic signal imaging": "#/$defs/Isi", - "Magnetic resonance imaging": "#/$defs/Mri", - "Multiplexed error-robust fluorescence in situ hybridization": "#/$defs/Merfish", - "Planar optical physiology": "#/$defs/POphys", - "Scanned line projection imaging": "#/$defs/Slap", - "Selective plane illumination microscopy": "#/$defs/Spim" - }, - "propertyName": "name" - }, - "oneOf": [ - { - "$ref": "#/$defs/aind_data_schema_models__modalities__Behavior" - }, - { - "$ref": "#/$defs/BehaviorVideos" - }, - { - "$ref": "#/$defs/Confocal" - }, - { - "$ref": "#/$defs/Ecephys" - }, - { - "$ref": "#/$defs/Electromyography" - }, - { - "$ref": "#/$defs/Fmost" - }, - { - "$ref": "#/$defs/Icephys" - }, - { - "$ref": "#/$defs/Isi" - }, - { - "$ref": "#/$defs/Fib" - }, - { - "$ref": "#/$defs/Merfish" - }, - { - "$ref": "#/$defs/Mri" - }, - { - "$ref": "#/$defs/POphys" - }, - { - "$ref": "#/$defs/Slap" - }, - { - "$ref": "#/$defs/Spim" - } - ] - }, - "title": "Modalities", - "type": "array", - "uniqueItems": true - }, - "notes": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Notes" - } - }, - "required": [ - "rig_id", - "modification_date", - "mouse_platform", - "calibrations", - "modalities" - ], - "title": "Rig", - "type": "object" -} \ No newline at end of file diff --git a/src/metadata_chatbot/bedrock_model/ref/session_schema.json b/src/metadata_chatbot/bedrock_model/ref/session_schema.json deleted file mode 100644 index 0a65d95..0000000 --- a/src/metadata_chatbot/bedrock_model/ref/session_schema.json +++ /dev/null @@ -1,245 +0,0 @@ -{ - "additionalProperties": false, - "description": "Description of a physiology and/or behavior session", - "properties": { - "describedBy": { - "const": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/session.py", - "default": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/session.py", - "title": "Describedby", - "type": "string" - }, - "schema_version": { - "const": "0.3.4", - "default": "0.3.4", - "title": "Schema Version" - }, - "protocol_id": { - "default": [], - "description": "DOI for protocols.io", - "items": { - "type": "string" - }, - "title": "Protocol ID", - "type": "array" - }, - "experimenter_full_name": { - "description": "First and last name of the experimenter(s).", - "items": { - "type": "string" - }, - "title": "Experimenter(s) full name", - "type": "array" - }, - "session_start_time": { - "format": "date-time", - "title": "Session start time", - "type": "string" - }, - "session_end_time": { - "anyOf": [ - { - "format": "date-time", - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Session end time" - }, - "session_type": { - "title": "Session type", - "type": "string" - }, - "iacuc_protocol": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "IACUC protocol" - }, - "rig_id": { - "title": "Rig ID", - "type": "string" - }, - "calibrations": { - "default": [], - "description": "Calibrations of rig devices prior to session", - "items": { - "$ref": "#/$defs/Calibration" - }, - "title": "Calibrations", - "type": "array" - }, - "maintenance": { - "default": [], - "description": "Maintenance of rig devices prior to session", - "items": { - "$ref": "#/$defs/Maintenance" - }, - "title": "Maintenance", - "type": "array" - }, - "subject_id": { - "title": "Subject ID", - "type": "string" - }, - "animal_weight_prior": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Animal weight before procedure", - "title": "Animal weight (g)" - }, - "animal_weight_post": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Animal weight after procedure", - "title": "Animal weight (g)" - }, - "weight_unit": { - "allOf": [ - { - "$ref": "#/$defs/MassUnit" - } - ], - "default": "gram", - "title": "Weight unit" - }, - "anaesthesia": { - "anyOf": [ - { - "$ref": "#/$defs/Anaesthetic" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Anaesthesia" - }, - "data_streams": { - "description": "A data stream is a collection of devices that are recorded simultaneously. Each session can include multiple streams (e.g., if the manipulators are moved to a new location)", - "items": { - "$ref": "#/$defs/Stream" - }, - "title": "Data streams", - "type": "array" - }, - "stimulus_epochs": { - "default": [], - "items": { - "$ref": "#/$defs/StimulusEpoch" - }, - "title": "Stimulus", - "type": "array" - }, - "mouse_platform_name": { - "title": "Mouse platform", - "type": "string" - }, - "active_mouse_platform": { - "description": "Is the mouse platform being actively controlled", - "title": "Active mouse platform", - "type": "boolean" - }, - "headframe_registration": { - "anyOf": [ - { - "$ref": "#/$defs/Affine3dTransform" - }, - { - "type": "null" - } - ], - "default": null, - "description": "MRI transform matrix for headframe", - "title": "Headframe registration" - }, - "reward_delivery": { - "anyOf": [ - { - "$ref": "#/$defs/RewardDeliveryConfig" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Reward delivery" - }, - "reward_consumed_total": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Total reward consumed (mL)" - }, - "reward_consumed_unit": { - "allOf": [ - { - "$ref": "#/$defs/VolumeUnit" - } - ], - "default": "milliliter", - "title": "Reward consumed unit" - }, - "notes": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Notes" - } - }, - "required": [ - "experimenter_full_name", - "session_start_time", - "session_type", - "rig_id", - "subject_id", - "data_streams", - "mouse_platform_name", - "active_mouse_platform" - ], - "title": "Session", - "type": "object" -} \ No newline at end of file diff --git a/src/metadata_chatbot/bedrock_model/ref/subject_609281_metadata.json b/src/metadata_chatbot/bedrock_model/ref/subject_609281_metadata.json deleted file mode 100644 index 8c697f6..0000000 --- a/src/metadata_chatbot/bedrock_model/ref/subject_609281_metadata.json +++ /dev/null @@ -1,59 +0,0 @@ -[{ - "_id": "b9fec2e2-9ba3-42ca-a6ec-cf2cbc9b56c6", - "acquisition": null, - "created": "2024-06-20T21:13:59.324230", - "data_description": { - "creation_date": "2022-11-03", - "creation_time": "13:49:18", - "data_level": "raw data", - "describedBy": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/data_description.py", - "funding_source": [], - "group": null, - "institution": "AIND", - "license": "CC-BY-4.0", - "modality": "exaSPIM", - "name": "exaSPIM_609281_2022-11-03_13-49-18", - "project_id": null, - "project_name": null, - "restrictions": null, - "schema_version": "0.3.0", - "subject_id": "609281" - }, - "describedBy": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/metadata.py", - "external_links": [], - "instrument": { - "cameras": [], - "lasers": [] - }, - "last_modified": "2024-06-20T21:13:59.324234", - "location": "s3://aind-open-data/exaSPIM_609281_2022-11-03_13-49-18", - "metadata_status": "Unknown", - "name": "exaSPIM_609281_2022-11-03_13-49-18", - "procedures": null, - "processing": null, - "rig": null, - "schema_version": "0.2.7", - "session": null, - "subject": { - "background_strain": null, - "breeding_group": "Slc17a6-IRES-Cre(ND)", - "date_of_birth": "2021-11-06", - "describedBy": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/site-packages/aind_data_schema/subject.py", - "genotype": "Slc17a6-IRES-Cre/wt", - "home_cage_enrichment": null, - "light_cycle": null, - "maternal_genotype": "wt/wt", - "maternal_id": "595446", - "mgi_allele_ids": null, - "notes": null, - "paternal_genotype": "Slc17a6-IRES-Cre/wt", - "paternal_id": "593407", - "restrictions": null, - "schema_version": "0.2.1", - "sex": "Male", - "source": null, - "species": "Mus musculus", - "subject_id": "609281", - "wellness_reports": null - } -}] \ No newline at end of file diff --git a/src/metadata_chatbot/bedrock_model/ref/subject_schema.json b/src/metadata_chatbot/bedrock_model/ref/subject_schema.json deleted file mode 100644 index 4938bfb..0000000 --- a/src/metadata_chatbot/bedrock_model/ref/subject_schema.json +++ /dev/null @@ -1,212 +0,0 @@ -{ - "additionalProperties": false, - "description": "Description of a subject of data collection", - "properties": { - "describedBy": { - "const": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/subject.py", - "default": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/subject.py", - "title": "Describedby", - "type": "string" - }, - "schema_version": { - "const": "0.5.9", - "default": "0.5.9", - "title": "Schema Version" - }, - "subject_id": { - "description": "Unique identifier for the subject. If this is not a Allen LAS ID, indicate this in the Notes.", - "title": "Subject ID", - "type": "string" - }, - "sex": { - "$ref": "#/$defs/Sex" - }, - "date_of_birth": { - "format": "date", - "title": "Date of birth", - "type": "string" - }, - "genotype": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Genotype of the animal providing both alleles", - "title": "Genotype" - }, - "species": { - "discriminator": { - "mapping": { - "Callithrix jacchus": "#/$defs/CallithrixJacchus", - "Homo sapiens": "#/$defs/HomoSapiens", - "Macaca mulatta": "#/$defs/MacacaMulatta", - "Mus musculus": "#/$defs/MusMusculus", - "Rattus norvegicus": "#/$defs/RattusNorvegicus" - }, - "propertyName": "name" - }, - "oneOf": [ - { - "$ref": "#/$defs/CallithrixJacchus" - }, - { - "$ref": "#/$defs/HomoSapiens" - }, - { - "$ref": "#/$defs/MacacaMulatta" - }, - { - "$ref": "#/$defs/MusMusculus" - }, - { - "$ref": "#/$defs/RattusNorvegicus" - } - ], - "title": "Species" - }, - "alleles": { - "default": [], - "description": "Allele names and persistent IDs", - "items": { - "$ref": "#/$defs/PIDName" - }, - "title": "Alleles", - "type": "array" - }, - "background_strain": { - "anyOf": [ - { - "$ref": "#/$defs/BackgroundStrain" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Background strain" - }, - "breeding_info": { - "anyOf": [ - { - "$ref": "#/$defs/BreedingInfo" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Breeding Info" - }, - "source": { - "description": "Where the subject was acquired from. If bred in-house, use Allen Institute.", - "discriminator": { - "mapping": { - "Allen Institute": "#/$defs/AllenInstitute", - "Columbia University": "#/$defs/ColumbiaUniversity", - "Huazhong University of Science and Technology": "#/$defs/HuazhongUniversityOfScienceAndTechnology", - "Jackson Laboratory": "#/$defs/JacksonLaboratory", - "Janelia Research Campus": "#/$defs/JaneliaResearchCampus", - "New York University": "#/$defs/NewYorkUniversity", - "Other": "#/$defs/Other" - }, - "propertyName": "name" - }, - "oneOf": [ - { - "$ref": "#/$defs/AllenInstitute" - }, - { - "$ref": "#/$defs/ColumbiaUniversity" - }, - { - "$ref": "#/$defs/HuazhongUniversityOfScienceAndTechnology" - }, - { - "$ref": "#/$defs/JaneliaResearchCampus" - }, - { - "$ref": "#/$defs/JacksonLaboratory" - }, - { - "$ref": "#/$defs/NewYorkUniversity" - }, - { - "$ref": "#/$defs/Other" - } - ], - "title": "Source" - }, - "rrid": { - "anyOf": [ - { - "$ref": "#/$defs/PIDName" - }, - { - "type": "null" - } - ], - "default": null, - "description": "RRID of mouse if acquired from supplier", - "title": "RRID" - }, - "restrictions": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Any restrictions on use or publishing based on subject source", - "title": "Restrictions" - }, - "wellness_reports": { - "default": [], - "items": { - "$ref": "#/$defs/WellnessReport" - }, - "title": "Wellness Report", - "type": "array" - }, - "housing": { - "anyOf": [ - { - "$ref": "#/$defs/Housing" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Housing" - }, - "notes": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Notes" - } - }, - "required": [ - "subject_id", - "sex", - "date_of_birth", - "species", - "source" - ], - "title": "Subject", - "type": "object" -} \ No newline at end of file diff --git a/src/metadata_chatbot/bedrock_model/system_prompt.py b/src/metadata_chatbot/bedrock_model/system_prompt.py deleted file mode 100644 index 2c16cd4..0000000 --- a/src/metadata_chatbot/bedrock_model/system_prompt.py +++ /dev/null @@ -1,142 +0,0 @@ -import os, json, re, logging -from pathlib import Path - -cwd = os.path.dirname(os.path.realpath(__file__)) -folder = Path(f"{cwd}\\ref") - -schema_types = [] - - -for name in os.listdir(folder): - #loading in schema files - f = open(f'{folder}\\{name}') - file = json.load(f) - if (re.search(r'\d', name)): - sample_metadata = file - if "schema.json" in name: - schema_types.append(file) - if name == "metadata.json": - metadata_schema = file - -system_prompt = f""" -You are a neuroscientist with extensive knowledge about processes involving in neuroscience research. -You are also an expert in crafting queries and projections in MongoDB. - -Here is a list of schemas that contains information about the structure of a JSON file. -Each schema is provided in a specified format and each file corresponds to a different section of an experiment. -List of schemas: {schema_types} - -The Metadata schema shows how the different schema types are arranged, and how to appropriately access them. -For example, in order to access something within the procedures field, you will have to start the query with "procedures." -Metadata schema: {metadata_schema} - -I provide you with a sample, filled out metadata schema. It may contain missing information but serves as a reference to what a metadata file looks like. -You can use it as a guide to better structure your queries. -Sample metadata: {sample_metadata} - -Your task is to read the user's question, which will adhere to certain guidelines or formats and create a MongoDB query and projection, to - -Here are some examples: -Input: Give me the query to find subject's whose breeding group is Chat-IRES-Cre_Jax006410 -Output: "subject.breeding_info.breeding_group": "Chat-IRES-Cre_Jax006410" - -Input: I want to find the first 5 data asset ids of ecephys experimenets missing procedures. -Output: - "data_description.modality.name": "Extracellular electrophysiology", "procedures": "$exists": "false"' -List of field names to retrieve: ["_id", "name", "subject.subject_id"] -Answer: ['_id': 'de899de4-98e6-4b2a-8441-cfa72dcdd48f','name': 'ecephys_719093_2024-05-14_16-56-58','subject': 'subject_id': '719093'], -['_id': '82489f47-0217-4da2-90ce-0889e9c8a6d2','name': 'ecephys_719093_2024-05-15_15-01-10', 'subject': 'subject_id': '719093'], -['_id': 'f1780343-0f67-4d3d-9e6c-0a643adb1805','name': 'ecephys_719093_2024-05-16_15-13-26','subject': 'subject_id': '719093'], -['_id': 'eb7b3807-02be-4b30-946d-99da0071e587','name': 'ecephys_719093_2024-05-15_15-53-49','subject': 'subject_id': '719093'], -['_id': 'fdd9b3ca-8ac0-4b92-8bda-f392b5bb091c','name': 'ecephys_719093_2024-05-16_16-03-04','subject': 'subject_id': '719093'] - -Input: What are the unique modalities in the database? -Output: -"The unique modality types found in the database are: -['Optical physiology', 'Frame-projected independent-fiber photometry', 'Behavior videos', 'Hyperspectral fiber photometry', 'Extracellular electrophysiology', -'Electrophysiology', 'Multiplane optical physiology', 'Fiber photometry', 'Selective plane illumination microscopy', 'Planar optical physiology', None, -'Dual inverted selective plane illumination microscopy', 'Behavior', 'Trained behavior'] - -Note: Provide the query in curly brackets, appropirately place quotation marks. - -When retrieving experiment names, pull the information through the data description module. - -Even though the nature of mongodb queries is to provide false statements with the word false, in this case you will convert all words like false and null to strings -- "false" or "null". - -When asked to provide a query, use tools, execute the query in the database, and return the retrieved information. Provide the reasoning, query and your answer in tags. -For projection queries, when finding a field that's missing, try field_name : null instead of $exists: false, it's most likely that the field name exists but rather there's no information provided within the field. - -For aggregation queries, use a project stage first to minimize the size of the queries before proceeding with the remaining steps. - -Provide an analysis of the results of the query. -For example, do not end your answer with: -The query first projects to include only the `data_description.modality` field, then unwinds the modality array to get individual modality objects. It groups the documents by the modality name and counts them using the `$sum` accumulator. -Finally, it projects to include only the modality name and count fields. The results show the count of each modality present in the database. -I want to see the actual summary of results retrieved and be straightforward in your answer. Each sentence produced should directly answer the question asked. -When asked about each modality or each type of something, provide examples for ALL modalities, do NOT say "...and so on for the other modalities present in the database" or any version of this phrase. -Provide a summary of the retrieved input, including numerical values. -When asked a question like how many experiments of each modality are there, I want to see an answer like this. -For example: -Optical Physiology: 40, Frame-projected independent-fiber photometry: 383, Behavior videos: 4213, Hyperspectral fiber photometry: 105, Extracellular electrophysiology: 2618, Electrophysiology: 12, -Multiplane optical physiology: 13, Fiber photometry: 1761, Selective plane illumination microscopy: 3485, Planar optical physiology: 1330, Trained behavior: 32, None: 1481, Dual inverted selective plane illumination microscopy: 6, Behavior: 11016 - -If the retrieved information from the database is too big to fit the context window, state that you are unable to synthesize the retrieved information in the given context window. - -If you are unable to provide an answer, decline to answer. Do not provide an answer you are not confident of. - -Do not hallucinate. -""" -print(system_prompt) -summary_system_prompt = f""" -You are a neuroscientist with extensive knowledge about processes involves in neuroscience research. -You are also an expert in crafting queries for MongoDB. - -I will provide you with a list of schemas that contains information about the accepted inputs of variable names in a JSON file. -Each schema is provided in a specified format and each file corresponds to a different section of an experiment. -List of schemas: {schema_types} - -The Metadata schema shows how the different schema types are arranged, and how to appropriately access them. -For example, in order to access something within the procedures field, you will have to start the query with "procedures." -Metadata schema: {metadata_schema} - -I provide you with a sample, filled out metadata schema. It may contain missing information but serves as a reference to what a metadata file looks like. -You can use it as a guide to better structure your queries. -Sample metadata: {sample_metadata} - -Your task is to read the user's question, which will adhere to certain guidelines or formats. -You will only be prompted with a record ID number, and your only task is to retrieve the record and summarize information related to the modality used and subject information. -Include information about the modalities used and the subject genotype. - -Here are some examples: -Input: 719f0ac6-7d01-4586-beb9-21f52c422590 -Output: - -This record contains metadata about a behavioral experiment session with a mouse (subject ID 711039). The session involved a foraging task with auditory go cues and fiber photometry recordings. -The mouse had a Dbh-Cre genotype and was injected with a jGCaMP8m virus bilaterally in the locus coeruleus region. Optical fibers were implanted at those injection sites. -During the ~85 minute session, the mouse completed 564 trials and earned 0.558 mL of water reward through correct lick responses to auditory go cues (7.5 kHz tones at 71 dB). -Fiber photometry data was simultaneously recorded from the four implanted fibers, with 20 μW output power per fiber. Video data was recorded from two cameras monitoring the mouse's face/body. -The behavioral rig had an enclosure, reward delivery spouts, speakers, LED light sources, filters, lenses and CMOS cameras for photometry. - - -Input: 2dc06357-cc30-4fd5-9e8b-f7fae7e9ba5d -Output - -This record contains metadata for a behavior experiment conducted on subject 719360, a male C57BL6J mouse born on 2024-01-03. -The experiment was performed at the Allen Institute for Neural Dynamics on 2024-04-08 using a disc-shaped mouse platform and visual/auditory stimuli presented on a monitor and speaker. -The mouse underwent surgery for a craniotomy and headframe implantation prior to the experiment. -During the ~1 hour session, the mouse performed a dynamic routing task with visual grating and auditory noise stimuli, consuming 0.135 mL of water reward. -The data is stored at s3://aind-private-data-prod-o5171v/behavior_719360_2024-04-08_13-07-29 and the metadata status is listed as Invalid, though no specific issues are noted. - - -Note: Provide the query in curly brackets, appropirately place quotation marks. - -When retrieving experiment names, pull the information through the data description module. - -Even though the nature of mongodb queries is to provide false statements with the word false, in this case you will convert all words like false and null to strings -- "false" or "null". - -When asked to provide a query, use tools, execute the query in the database, and return the retrieved information. - -Summarize the retrieved asset in natural language. - -If you are unable to provide an answer, decline to answer. Do not hallucinate an answer. Decline to answer instead. -""" \ No newline at end of file diff --git a/src/metadata_chatbot/bedrock_model/tools.py b/src/metadata_chatbot/bedrock_model/tools.py deleted file mode 100644 index e562335..0000000 --- a/src/metadata_chatbot/bedrock_model/tools.py +++ /dev/null @@ -1,112 +0,0 @@ -from aind_data_access_api.document_db import MetadataDbClient -from aind_data_access_api.document_db_ssh import DocumentDbSSHClient, DocumentDbSSHCredentials -import json - -API_GATEWAY_HOST = "api.allenneuraldynamics.org" -DATABASE = "metadata_index" -COLLECTION = "data_assets" - -docdb_api_client = MetadataDbClient( - host=API_GATEWAY_HOST, - database=DATABASE, - collection=COLLECTION, -) - -#credentials = DocumentDbSSHCredentials() - -def doc_retrieval(filter_query: dict) -> list: - """Given a MongoDB query, this function retrieves and returns the appropriate documents. - - Parameters - ---------- - filter_query - MongoDB query - - Returns - ------- - list - List of retrieved documents - """ - limit = 1000 - paginate_batch_size = 1000 - response = docdb_api_client.retrieve_docdb_records( - filter_query=filter_query, - limit=limit, - paginate_batch_size=paginate_batch_size - ) - return(response) - -def projection_retrieval(filter_query: dict, field_name_list: list) -> list: - """Given a MongoDB query and list of projections, this function retrieves - and returns the appropriate projections in the documents. - - Parameters - ---------- - credentials - DocDB credentials, initialized through DocumentDbSSHCredentials - - filter_query - MongoDB query - - field_name_list - Field names to specifically retrieve from documents - - Returns - ------- - list - List of retrieved documents - """ - projection = {"name" : 1} - if field_name_list: - for field_name in field_name_list: - projection[field_name] = 1 - - response = docdb_api_client.retrieve_docdb_records( - filter_query=filter_query, - projection=projection - ) - return response - -def aggregation_retrieval(agg_pipeline: list) -> list: - """Given a MongoDB query and list of projections, this function retrieves and returns the - relevant information in the documents. - Use a project stage as the first stage to minimize the size of the queries before proceeding with the remaining steps. - The input to $map must be an array not a string, avoid using it in the $project stage. - - Parameters - ---------- - agg_pipeline - MongoDB aggregation pipeline - - Returns - ------- - list - List of retrieved documents - """ - result = docdb_api_client.aggregate_docdb_records( - pipeline=agg_pipeline - ) - return result - -def tool_call(tool_name:str, tool_inputs:dict) -> str: - - if tool_name == 'doc_retrieval': - filter_query = json.loads(tool_inputs['filter']) - retrieved_info_list = doc_retrieval(filter_query) #retrieved info type, dictionary - - elif tool_name == 'projection_retrieval': - filter_query = json.loads(tool_inputs['filter']) - field_name_list = json.loads(tool_inputs['fieldNameList']) - retrieved_info_list = projection_retrieval(filter_query, field_name_list) - #retrieved_info = json.dumps(retrieved_info_list)[:1000] - - elif tool_name == 'aggregation_retrieval': - #print("Loading agg pipeline...") - agg_pipeline = json.loads(tool_inputs['pipeline']) - #print(type(tool_inputs['pipeline'])) - retrieved_info_list = aggregation_retrieval(agg_pipeline) - #print("Retrieved info ready") - - retrieved_info = " ".join(map(str, retrieved_info_list)) - #print(retrieved_info) - return(retrieved_info)