diff --git a/Dockerfile b/Dockerfile index d0ba874..05d7a67 100644 --- a/Dockerfile +++ b/Dockerfile @@ -48,7 +48,7 @@ RUN python -m pip install -e ."[ode,tests,dkg-client,sbml]" WORKDIR /jupyter # Kernel must be placed in a specific spot in the filesystem -COPY beaker /usr/local/share/jupyter/kernels/beaker +COPY beaker_kernel /usr/local/share/jupyter/kernels/beaker_kernel # Copy src code over RUN chown 1000:1000 /jupyter diff --git a/Makefile b/Makefile index 440e9c7..c7b37f8 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ build: .PHONY:dev dev:service/dev_ui/build/index.js if [[ "$$(docker compose ps | grep 'jupyter')" == "" ]]; then \ - docker compose pull && \ + docker compose pull; \ docker compose up -d --build && \ (sleep 1; python -m webbrowser "http://localhost:8888/dev_ui"); \ docker compose logs -f jupyter || true; \ @@ -43,7 +43,7 @@ dev-install:.env ENVDIR=$$(poetry -q run python -c 'import os; print(os.environ.get("VIRTUAL_ENV", ""))'); \ KERNEL_INSTALL_PATH=$${ENVDIR}/share/jupyter/kernels/beaker; \ if [[ ! -e "$${KERNEL_INSTALL_PATH}" && -n "$${ENVDIR}" ]]; then \ - ln -s "${BASEDIR}/beaker" "$${KERNEL_INSTALL_PATH}"; \ + ln -s "${BASEDIR}/beaker_kernel" "$${KERNEL_INSTALL_PATH}"; \ fi; \ if [[ ! -e "service/test.ipynb" ]]; then \ cp service/dev_ui/test.ipynb service/test.ipynb; \ diff --git a/beaker/kernel.json b/beaker/kernel.json deleted file mode 100644 index 55d5b0a..0000000 --- a/beaker/kernel.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "argv": [ - "python3", "-m", "beaker.kernel", - "start", "{connection_file}" - ], - "display_name": "Chatty LLM Agent", - "language": "chatty" -} diff --git a/beaker/__init__.py b/beaker_kernel/__init__.py similarity index 100% rename from beaker/__init__.py rename to beaker_kernel/__init__.py diff --git a/contexts/__init__.py b/beaker_kernel/contexts/__init__.py similarity index 100% rename from contexts/__init__.py rename to beaker_kernel/contexts/__init__.py diff --git a/contexts/subkernels/__init__.py b/beaker_kernel/contexts/dataset/__init__.py similarity index 100% rename from contexts/subkernels/__init__.py rename to beaker_kernel/contexts/dataset/__init__.py diff --git a/beaker_kernel/contexts/dataset/agent.py b/beaker_kernel/contexts/dataset/agent.py new file mode 100644 index 0000000..58dfed4 --- /dev/null +++ b/beaker_kernel/contexts/dataset/agent.py @@ -0,0 +1,75 @@ +import json +import logging +import re + +from archytas.react import Undefined +from archytas.tool_utils import AgentRef, LoopControllerRef, tool, toolset + +from beaker_kernel.lib.agent import BaseAgent +from beaker_kernel.lib.context import BaseContext +from beaker_kernel.lib.jupyter_kernel_proxy import JupyterMessage +from beaker_kernel.lib.toolset import BaseToolset + +logging.disable(logging.WARNING) # Disable warnings +logger = logging.Logger(__name__) + +@toolset() +class DatasetToolset: + + @tool() + async def generate_code( + self, query: str, agent: AgentRef, loop: LoopControllerRef + ) -> None: + """ + Generated code to be run in an interactive Jupyter notebook for the purpose of exploring, modifying and visualizing a Dataframe. + + Input is a full grammatically correct question about or request for an action to be performed on the loaded dataframe. + + Args: + query (str): A fully grammatically correct question about the current dataset. + + """ + # set up the agent + # str: Valid and correct python code that fulfills the user's request. + var_sections = [] + for var_name, dataset_obj in agent.context.dataset_map.items(): + df_info = await agent.context.describe_dataset(var_name) + var_sections.append(f""" +You have access to a variable name `{var_name}` that is a {agent.context.metadata.get("df_lib_name", "Pandas")} Dataframe with the following structure: +{df_info} +--- End description of variable `{var_name}` +""") + prompt = f""" +You are a programmer writing code to help with scientific data analysis and manipulation in {agent.context.metadata.get("name", "a Jupyter notebook")}. + +Please write code that satisfies the user's request below. + +{"".join(var_sections)} + +If you are asked to modify or update the dataframe, modify the dataframe in place, keeping the updated variable the same unless specifically specified otherwise. + +You also have access to the libraries {agent.context.metadata.get("libraries", "that are common for these tasks")}. + +Please generate the code as if you were programming inside a Jupyter Notebook and the code is to be executed inside a cell. +You MUST wrap the code with a line containing three backticks (```) before and after the generated code. +No addtional text is needed in the response, just the code block. +""" + + llm_response = await agent.oneshot(prompt=prompt, query=query) + loop.set_state(loop.STOP_SUCCESS) + preamble, code, coda = re.split("```\w*", llm_response) + result = json.dumps( + { + "action": "code_cell", + "language": agent.context.lang, + "content": code.strip(), + } + ) + return result + + +class DatasetAgent(BaseAgent): + + def __init__(self, context: BaseContext = None, tools: list = None, **kwargs): + tools = [DatasetToolset] + super().__init__(context, tools, **kwargs) diff --git a/contexts/codesets/dataset/julia-1.9/df_download.jl b/beaker_kernel/contexts/dataset/code/julia/df_download.jl similarity index 100% rename from contexts/codesets/dataset/julia-1.9/df_download.jl rename to beaker_kernel/contexts/dataset/code/julia/df_download.jl diff --git a/contexts/codesets/dataset/julia-1.9/df_info.jl b/beaker_kernel/contexts/dataset/code/julia/df_info.jl similarity index 100% rename from contexts/codesets/dataset/julia-1.9/df_info.jl rename to beaker_kernel/contexts/dataset/code/julia/df_info.jl diff --git a/contexts/codesets/dataset/julia-1.9/df_save_as.jl b/beaker_kernel/contexts/dataset/code/julia/df_save_as.jl similarity index 100% rename from contexts/codesets/dataset/julia-1.9/df_save_as.jl rename to beaker_kernel/contexts/dataset/code/julia/df_save_as.jl diff --git a/contexts/codesets/dataset/julia-1.9/load_df.jl b/beaker_kernel/contexts/dataset/code/julia/load_df.jl similarity index 100% rename from contexts/codesets/dataset/julia-1.9/load_df.jl rename to beaker_kernel/contexts/dataset/code/julia/load_df.jl diff --git a/contexts/codesets/dataset/julia-1.9/metadata.json b/beaker_kernel/contexts/dataset/code/julia/metadata.json similarity index 100% rename from contexts/codesets/dataset/julia-1.9/metadata.json rename to beaker_kernel/contexts/dataset/code/julia/metadata.json diff --git a/contexts/codesets/dataset/julia-1.9/setup.jl b/beaker_kernel/contexts/dataset/code/julia/setup.jl similarity index 100% rename from contexts/codesets/dataset/julia-1.9/setup.jl rename to beaker_kernel/contexts/dataset/code/julia/setup.jl diff --git a/contexts/codesets/dataset/python3/df_download.py b/beaker_kernel/contexts/dataset/code/python3/df_download.py similarity index 95% rename from contexts/codesets/dataset/python3/df_download.py rename to beaker_kernel/contexts/dataset/code/python3/df_download.py index 2ce8a77..036005b 100644 --- a/contexts/codesets/dataset/python3/df_download.py +++ b/beaker_kernel/contexts/dataset/code/python3/df_download.py @@ -1,5 +1,4 @@ import pandas as pd; import io -import time output_buff = io.BytesIO() {{ var_name|default("df") }}.to_csv(output_buff, index=False, header=True) output_buff.seek(0) diff --git a/contexts/codesets/dataset/python3/df_info.py b/beaker_kernel/contexts/dataset/code/python3/df_info.py similarity index 100% rename from contexts/codesets/dataset/python3/df_info.py rename to beaker_kernel/contexts/dataset/code/python3/df_info.py diff --git a/contexts/codesets/dataset/python3/df_save_as.py b/beaker_kernel/contexts/dataset/code/python3/df_save_as.py similarity index 100% rename from contexts/codesets/dataset/python3/df_save_as.py rename to beaker_kernel/contexts/dataset/code/python3/df_save_as.py diff --git a/contexts/codesets/dataset/python3/load_df.py b/beaker_kernel/contexts/dataset/code/python3/load_df.py similarity index 100% rename from contexts/codesets/dataset/python3/load_df.py rename to beaker_kernel/contexts/dataset/code/python3/load_df.py diff --git a/contexts/codesets/dataset/python3/metadata.json b/beaker_kernel/contexts/dataset/code/python3/metadata.json similarity index 100% rename from contexts/codesets/dataset/python3/metadata.json rename to beaker_kernel/contexts/dataset/code/python3/metadata.json diff --git a/contexts/codesets/dataset/python3/setup.py b/beaker_kernel/contexts/dataset/code/python3/setup.py similarity index 100% rename from contexts/codesets/dataset/python3/setup.py rename to beaker_kernel/contexts/dataset/code/python3/setup.py diff --git a/contexts/codesets/dataset/ir/df_download.r b/beaker_kernel/contexts/dataset/code/rlang/df_download.r similarity index 100% rename from contexts/codesets/dataset/ir/df_download.r rename to beaker_kernel/contexts/dataset/code/rlang/df_download.r diff --git a/contexts/codesets/dataset/ir/df_info.r b/beaker_kernel/contexts/dataset/code/rlang/df_info.r similarity index 100% rename from contexts/codesets/dataset/ir/df_info.r rename to beaker_kernel/contexts/dataset/code/rlang/df_info.r diff --git a/contexts/codesets/dataset/ir/df_save_as.r b/beaker_kernel/contexts/dataset/code/rlang/df_save_as.r similarity index 100% rename from contexts/codesets/dataset/ir/df_save_as.r rename to beaker_kernel/contexts/dataset/code/rlang/df_save_as.r diff --git a/contexts/codesets/dataset/ir/load_df.r b/beaker_kernel/contexts/dataset/code/rlang/load_df.r similarity index 100% rename from contexts/codesets/dataset/ir/load_df.r rename to beaker_kernel/contexts/dataset/code/rlang/load_df.r diff --git a/contexts/codesets/dataset/ir/metadata.json b/beaker_kernel/contexts/dataset/code/rlang/metadata.json similarity index 100% rename from contexts/codesets/dataset/ir/metadata.json rename to beaker_kernel/contexts/dataset/code/rlang/metadata.json diff --git a/contexts/codesets/dataset/ir/setup.r b/beaker_kernel/contexts/dataset/code/rlang/setup.r similarity index 100% rename from contexts/codesets/dataset/ir/setup.r rename to beaker_kernel/contexts/dataset/code/rlang/setup.r diff --git a/contexts/toolsets/dataset_toolset.py b/beaker_kernel/contexts/dataset/context.py similarity index 66% rename from contexts/toolsets/dataset_toolset.py rename to beaker_kernel/contexts/dataset/context.py index 997d060..445498b 100644 --- a/contexts/toolsets/dataset_toolset.py +++ b/beaker_kernel/contexts/dataset/context.py @@ -1,55 +1,36 @@ -import codecs -import copy -import datetime -import json -import logging import os -import re import requests -import tempfile -from functools import partial -from typing import Optional, Callable, List, Tuple, Any +from typing import TYPE_CHECKING, Any, Dict -from archytas.tool_utils import tool, toolset, AgentRef, LoopControllerRef +from beaker_kernel.lib.context import BaseContext -from .base import BaseToolset -from lib.jupyter_kernel_proxy import JupyterMessage +from .agent import DatasetAgent +if TYPE_CHECKING: + from beaker_kernel.kernel import LLMKernel + from beaker_kernel.lib.agent import BaseAgent + from beaker_kernel.lib.subkernels.base import BaseSubkernel -logging.disable(logging.WARNING) # Disable warnings -logger = logging.Logger(__name__) +class DatasetContext(BaseContext): -@toolset() -class DatasetToolset(BaseToolset): - """ """ + slug: str = "dataset" + agent_cls: "BaseAgent" = DatasetAgent - dataset_map: Optional[dict[str, dict[str, Any]]] + def __init__(self, beaker_kernel: "LLMKernel", subkernel: "BaseSubkernel", config: Dict[str, Any]) -> None: - # { - # "df": {"id": 12345, "filename": "dataset.csv"}, - # "df2": {"id": 54321} - # "df_map": {"id": 12345, "filename": "mappings.csv"}, - # } - - def __init__(self, context, *args, **kwargs): - super().__init__(context=context, *args, **kwargs) self.dataset_map = {} self.intercepts = { "download_dataset_request": (self.download_dataset_request, "shell"), "save_dataset_request": (self.save_dataset_request, "shell"), } - self.reset() + + super().__init__(beaker_kernel, subkernel, self.agent_cls, config) async def setup(self, config, parent_header): - # DEPRECATED: Backwards compatible handling of "old style" single id contexts - if len(config) == 1 and "id" in config: - dataset_id = config["id"] - print(f"Processing dataset w/id {dataset_id}") - await self.set_dataset(dataset_id, parent_header=parent_header) - else: - print(f"Processing datasets w/ids {', '.join(config.values())}") - await self.set_datasets(config, parent_header=parent_header) + self.config = config + print(f"Processing datasets w/ids {', '.join(self.config.values())}") + await self.set_datasets(self.config, parent_header=parent_header) async def post_execute(self, message): await self.update_dataset_map(parent_header=message.parent_header) @@ -99,7 +80,7 @@ async def load_dataframes(self): self.get_code("load_df", {"var_map": var_map}), ] ) - await self.context.execute(command) + await self.execute(command) await self.update_dataset_map() def reset(self): @@ -116,14 +97,14 @@ async def send_df_preview_message( } for var_name, df in self.dataset_map.items() } - self.context.kernel.send_response( + self.beaker_kernel.send_response( "iopub", "dataset", preview, parent_header=parent_header ) return data async def update_dataset_map(self, parent_header={}): code = self.get_code("df_info") - df_info_response = await self.context.kernel.evaluate( + df_info_response = await self.beaker_kernel.evaluate( code, parent_header=parent_header, ) @@ -200,57 +181,6 @@ async def describe_dataset(self, var_name) -> str: """ return output - @tool() - async def generate_code( - self, query: str, agent: AgentRef, loop: LoopControllerRef - ) -> None: - """ - Generated code to be run in an interactive Jupyter notebook for the purpose of exploring, modifying and visualizing a Dataframe. - - Input is a full grammatically correct question about or request for an action to be performed on the loaded dataframe. - - Args: - query (str): A fully grammatically correct question about the current dataset. - - """ - # set up the agent - # str: Valid and correct python code that fulfills the user's request. - var_sections = [] - for var_name, dataset_obj in self.dataset_map.items(): - df_info = await self.describe_dataset(var_name) - var_sections.append(f""" -You have access to a variable name `{var_name}` that is a {self.metadata.get("df_lib_name", "Pandas")} Dataframe with the following structure: -{df_info} ---- End description of variable `{var_name}` -""") - prompt = f""" -You are a programmer writing code to help with scientific data analysis and manipulation in {self.metadata.get("name", "a Jupyter notebook")}. - -Please write code that satisfies the user's request below. - -{"".join(var_sections)} - -If you are asked to modify or update the dataframe, modify the dataframe in place, keeping the updated variable the same unless specifically specified otherwise. - -You also have access to the libraries {self.metadata.get("libraries", "that are common for these tasks")}. - -Please generate the code as if you were programming inside a Jupyter Notebook and the code is to be executed inside a cell. -You MUST wrap the code with a line containing three backticks (```) before and after the generated code. -No addtional text is needed in the response, just the code block. -""" - - llm_response = await agent.oneshot(prompt=prompt, query=query) - loop.set_state(loop.STOP_SUCCESS) - preamble, code, coda = re.split("```\w*", llm_response) - result = json.dumps( - { - "action": "code_cell", - "language": self.context.lang, - "content": code.strip(), - } - ) - return result - async def download_dataset_request(self, queue, message_id, data): message = JupyterMessage.parse(data) content = message.content @@ -260,9 +190,9 @@ async def download_dataset_request(self, queue, message_id, data): # TODO: This doesn't work very well. Is very slow to encode, and transfer all of the required messages multiple times proxies through the proxy kernel. # We should find a better way to accomplish this if it's needed. code = self.get_code("df_download", {"var_name": var_name}) - df_response = await self.context.evaluate(code) + df_response = await self.evaluate(code) df_contents = df_response.get("stdout_list") - self.context.kernel.send_response( + self.beaker_kernel.send_response( "iopub", "download_response", { @@ -297,12 +227,12 @@ async def save_dataset_request(self, queue, message_id, data): } ) - df_response = await self.context.evaluate(code) + df_response = await self.evaluate(code) if df_response: new_dataset_id = df_response.get("return", {}).get("dataset_id", None) if new_dataset_id: - self.context.kernel.send_response( + self.beaker_kernel.send_response( "iopub", "save_dataset_response", { diff --git a/lib/__init__.py b/beaker_kernel/contexts/decapodes/__init__.py similarity index 100% rename from lib/__init__.py rename to beaker_kernel/contexts/decapodes/__init__.py diff --git a/beaker_kernel/contexts/decapodes/agent.py b/beaker_kernel/contexts/decapodes/agent.py new file mode 100644 index 0000000..179f9ad --- /dev/null +++ b/beaker_kernel/contexts/decapodes/agent.py @@ -0,0 +1,67 @@ +import json +import logging +import re + +from archytas.tool_utils import AgentRef, LoopControllerRef, tool, toolset + +from beaker_kernel.lib.agent import BaseAgent +from beaker_kernel.lib.context import BaseContext + +logging.disable(logging.WARNING) # Disable warnings +logger = logging.Logger(__name__) + + +@toolset() +class DecapodesToolset: + """ + Toolset used for working with the Julia package Decacpodes, a framework for doing descrete exterior calculus based modeling. + """ + + @tool() + async def generate_code( + self, query: str, agent: AgentRef, loop: LoopControllerRef + ) -> None: + """ + Generated Julia code to be run in an interactive Jupyter notebook for the purpose of exploring and modifying systems the DecaExpr. + + Input is a full grammatically correct question about or request for an action to be performed on the loaded model. + + Assume that the expression is already loaded and has the variable named `_expr`. + Information about the dataframe can be loaded with the `model_structure` tool. + + Args: + query (str): A fully grammatically correct queistion about the current model. + """ + prompt = f""" +You are a programmer writing code to help with scientific data analysis and manipulation in Julia. + +Please write code that satisfies the user's request below. + +You have access to a variable name `_expr` that is a Decapodes SyntacticModel model with the following structure: +{await agent.context.model_structure()} + +Your generated will be in the form `_expr = parse_decapode(quote ...modified object.. end)` + +Please generate the code as if you were programming inside a Jupyter Notebook and the code is to be executed inside a cell. +You MUST wrap the code with a line containing three backticks (```) before and after the generated code. +No addtional text is needed in the response, just the code block. +""" + + llm_response = await agent.oneshot(prompt=prompt, query=query) + loop.set_state(loop.STOP_SUCCESS) + preamble, code, coda = re.split("```\w*", llm_response) + result = json.dumps( + { + "action": "code_cell", + "language": "julia-1.9", + "content": code.strip(), + } + ) + return result + + +class DecapodesAgent(BaseAgent): + + def __init__(self, context: BaseContext = None, tools: list = None, **kwargs): + tools = [DecapodesToolset] + super().__init__(context, tools, **kwargs) diff --git a/contexts/codesets/decapodes/julia-1.9/construct_expr.jl b/beaker_kernel/contexts/decapodes/code/julia/construct_expr.jl similarity index 100% rename from contexts/codesets/decapodes/julia-1.9/construct_expr.jl rename to beaker_kernel/contexts/decapodes/code/julia/construct_expr.jl diff --git a/contexts/codesets/decapodes/julia-1.9/expr_to_info.jl b/beaker_kernel/contexts/decapodes/code/julia/expr_to_info.jl similarity index 100% rename from contexts/codesets/decapodes/julia-1.9/expr_to_info.jl rename to beaker_kernel/contexts/decapodes/code/julia/expr_to_info.jl diff --git a/contexts/codesets/decapodes/julia-1.9/load_model.jl b/beaker_kernel/contexts/decapodes/code/julia/load_model.jl similarity index 100% rename from contexts/codesets/decapodes/julia-1.9/load_model.jl rename to beaker_kernel/contexts/decapodes/code/julia/load_model.jl diff --git a/contexts/codesets/decapodes/julia-1.9/metadata.json b/beaker_kernel/contexts/decapodes/code/julia/metadata.json similarity index 100% rename from contexts/codesets/decapodes/julia-1.9/metadata.json rename to beaker_kernel/contexts/decapodes/code/julia/metadata.json diff --git a/contexts/codesets/decapodes/julia-1.9/output_model.jl b/beaker_kernel/contexts/decapodes/code/julia/output_model.jl similarity index 100% rename from contexts/codesets/decapodes/julia-1.9/output_model.jl rename to beaker_kernel/contexts/decapodes/code/julia/output_model.jl diff --git a/contexts/codesets/decapodes/julia-1.9/setup.jl b/beaker_kernel/contexts/decapodes/code/julia/setup.jl similarity index 100% rename from contexts/codesets/decapodes/julia-1.9/setup.jl rename to beaker_kernel/contexts/decapodes/code/julia/setup.jl diff --git a/contexts/toolsets/decapode_creation_toolset.py b/beaker_kernel/contexts/decapodes/context.py similarity index 65% rename from contexts/toolsets/decapode_creation_toolset.py rename to beaker_kernel/contexts/decapodes/context.py index 3d735e2..1253553 100644 --- a/contexts/toolsets/decapode_creation_toolset.py +++ b/beaker_kernel/contexts/decapodes/context.py @@ -1,58 +1,56 @@ - -import copy -import datetime import json import logging import os -import re +from typing import TYPE_CHECKING, Any, Dict + import requests -import sys -from typing import Optional, Any -from archytas.tool_utils import tool, toolset, AgentRef, LoopControllerRef +from beaker_kernel.lib.context import BaseContext +from beaker_kernel.lib.jupyter_kernel_proxy import JupyterMessage -from .base import BaseToolset -from lib.jupyter_kernel_proxy import JupyterMessage +from .agent import DecapodesAgent -logging.disable(logging.WARNING) # Disable warnings -logger = logging.Logger(__name__) +if TYPE_CHECKING: + from beaker_kernel.kernel import LLMKernel + from beaker_kernel.lib.subkernels.base import BaseSubkernel -@toolset() -class DecapodesCreationToolset(BaseToolset): - """ """ +logger = logging.getLogger(__name__) - toolset_name = "decapodes_creation" - codeset_name = "decapodes" - decapodes_expression_dsl: Optional[str] = None - target: str = "decapode" +class DecapodesContext(BaseContext): - def __init__(self, context, *args, **kwargs): - super().__init__(context=context, *args, **kwargs) + slug = "decapodes" + agent_cls = DecapodesAgent + + def __init__(self, beaker_kernel: "LLMKernel", subkernel: "BaseSubkernel", config: Dict[str, Any]) -> None: + self.target = "decapode" self.intercepts = { "save_amr_request": (self.save_amr_request, "shell"), "construct_amr_request": (self.construct_amr, "shell"), "compile_expr_request": (self.compile_expr, "shell"), } self.reset() + super().__init__(beaker_kernel, subkernel, self.agent_cls, config) + async def setup(self, config, parent_header): + self.config = config var_names = list(config.keys()) def fetch_model(model_id): meta_url = f"{os.environ['DATA_SERVICE_URL']}/models/{model_id}" - response = requests.get(meta_url) + response = requests.get(meta_url) if response.status_code >= 300: raise Exception(f"Failed to retrieve model {model_id} from server returning {response.status_code}") model = json.dumps(response.json()["model"]) return model - + load_commands = [ '%s = parse_json_acset(SummationDecapode{Symbol, Symbol, Symbol},"""%s""")' % (var_name, fetch_model(decapode_id)) for var_name, decapode_id in config.items() ] - + command = "\n".join( [ self.get_code("setup"), @@ -61,7 +59,7 @@ def fetch_model(model_id): ] ) print(f"Running command:\n-------\n{command}\n---------") - await self.context.execute(command) + await self.execute(command) print("Decapodes creation environment set up") @@ -132,82 +130,23 @@ async def model_structure(self) -> str: # Update the local dataframe to match what's in the shell. # This will be factored out when we switch around to allow using multiple runtimes. amr = ( - await self.context.evaluate( + await self.evaluate( f"_expr |> string" ) )["return"] return json.dumps(amr, indent=2) - - @tool() - async def generate_response( - self, query: str, agent: AgentRef, loop: LoopControllerRef - ) -> None: - """ - DO NOT USE THIS TOOL. IT IS USELESS. - - - Args: - query (str): A fully grammatically correct queistion about the current model. - """ - return - - - @tool() - async def generate_code( - self, query: str, agent: AgentRef, loop: LoopControllerRef - ) -> None: - """ - Generated Julia code to be run in an interactive Jupyter notebook for the purpose of exploring and modifying systems the DecaExpr. - - Input is a full grammatically correct question about or request for an action to be performed on the loaded model. - - Assume that the expression is already loaded and has the variable named `_expr`. - Information about the dataframe can be loaded with the `model_structure` tool. - - Args: - query (str): A fully grammatically correct queistion about the current model. - """ - # set up the agent - # str: Valid and correct julia code that fulfills the user's request. - prompt = f""" -You are a programmer writing code to help with scientific data analysis and manipulation in Julia. - -Please write code that satisfies the user's request below. - -You have access to a variable name `_expr` that is a Decapodes SyntacticModel model with the following structure: -{await self.model_structure()} - -Your generated will be in the form `_expr = parse_decapode(quote ...modified object.. end)` - -Please generate the code as if you were programming inside a Jupyter Notebook and the code is to be executed inside a cell. -You MUST wrap the code with a line containing three backticks (```) before and after the generated code. -No addtional text is needed in the response, just the code block. -""" - - llm_response = await agent.oneshot(prompt=prompt, query=query) - loop.set_state(loop.STOP_SUCCESS) - preamble, code, coda = re.split("```\w*", llm_response) - result = json.dumps( - { - "action": "code_cell", - "language": "julia-1.9", - "content": code.strip(), - } - ) - return result - async def send_decapodes_preview_message( self, server=None, target_stream=None, data=None, parent_header=None ): if parent_header is None: parent_header = {} - preview = await self.context.evaluate(self.get_code("expr_to_info", {"target": self.target})) + preview = await self.evaluate(self.get_code("expr_to_info", {"target": self.target})) content = preview["return"] if content is None: raise RuntimeError("Info not returned for preview") - self.context.kernel.send_response( + self.beaker_kernel.send_response( "iopub", "decapodes_preview", content, parent_header=parent_header ) @@ -224,9 +163,9 @@ async def compile_expr(self, server, target_stream, data): "nothing" ] ) - await self.context.execute(command) + await self.execute(command) - self.context.kernel.send_response( + self.beaker_kernel.send_response( "iopub", "compile_expr_response", {"successs": True}, parent_header=message.header ) await self.send_decapodes_preview_message(parent_header=message.header) @@ -248,7 +187,7 @@ async def construct_amr(self, server, target_stream, data): if id_value: header['id'] = id_value - preview = await self.context.evaluate(self.get_code("expr_to_info", {"target": self.target})) + preview = await self.evaluate(self.get_code("expr_to_info", {"target": self.target})) model = preview["return"]["application/json"] amr = { @@ -258,7 +197,7 @@ async def construct_amr(self, server, target_stream, data): "annotations": [], } - self.context.kernel.send_response( + self.beaker_kernel.send_response( "iopub", "construct_amr_response", amr, parent_header=message.header ) @@ -269,7 +208,7 @@ async def save_amr_request(self, server, target_stream, data): header = content["header"] header["_type"] = "Header" - preview = await self.context.evaluate(self.get_code("expr_to_info", {"target": self.target})) + preview = await self.evaluate(self.get_code("expr_to_info", {"target": self.target})) model = preview["return"]["application/json"] amr = { @@ -284,6 +223,6 @@ async def save_amr_request(self, server, target_stream, data): ) new_model_id = create_req.json()["id"] - self.context.kernel.send_response( + self.beaker_kernel.send_response( "iopub", "save_model_response", content, parent_header=message.header ) diff --git a/contexts/codesets/mira_model/python3/model_info.py b/beaker_kernel/contexts/mira_model/__init__.py similarity index 100% rename from contexts/codesets/mira_model/python3/model_info.py rename to beaker_kernel/contexts/mira_model/__init__.py diff --git a/beaker_kernel/contexts/mira_model/agent.py b/beaker_kernel/contexts/mira_model/agent.py new file mode 100644 index 0000000..5aabfad --- /dev/null +++ b/beaker_kernel/contexts/mira_model/agent.py @@ -0,0 +1,159 @@ +import json +import logging +import re + +import requests +from archytas.react import Undefined +from archytas.tool_utils import AgentRef, LoopControllerRef, tool, toolset + +from beaker_kernel.lib.agent import BaseAgent +from beaker_kernel.lib.context import BaseContext +from beaker_kernel.lib.jupyter_kernel_proxy import JupyterMessage +from beaker_kernel.lib.toolset import BaseToolset + +logging.disable(logging.WARNING) # Disable warnings +logger = logging.Logger(__name__) + +@toolset() +class MiraModelToolset: + @tool() + async def generate_code( + self, query: str, agent: AgentRef, loop: LoopControllerRef + ) -> None: + """ + Generated Python code to be run in an interactive Jupyter notebook for the purpose of exploring, modifying and visualizing a Pandas Dataframe. + + Input is a full grammatically correct question about or request for an action to be performed on the loaded model. + + Assume that the model is already loaded and has the variable named `model`. + Information about the dataframe can be loaded with the `model_structure` tool. + + Args: + query (str): A fully grammatically correct queistion about the current model. + """ + + prompt = f""" +You are a programmer writing code to help with scientific data analysis and manipulation in Python. + +Please write code that satisfies the user's request below. + +You have access to a variable name `model` that is a Petrinet model with the following structure: +{await agent.context.model_structure()} + + +If you are asked to modify or update the model, modify the model in place, keeping the updated variable to still be named `model`. +You have access to the MIRA libraries. + +If you are asked to stratify the model, use the available function named `stratify` that is defined by the following python code: +```````````````````` +def stratify( + template_model: mira.metamodel.template_model.TemplateModel, + *, + key: str, + strata: Collection[str], + strata_curie_to_name: Optional[Mapping[str, str]] = None, + strata_name_lookup: bool = False, + structure: Optional[Iterable[Tuple[str, str]]] = None, + directed: bool = False, + conversion_cls: Type[mira.metamodel.templates.Template] = mira.metamodel.templates.NaturalConversion, + cartesian_control: bool = False, + modify_names: bool = True, + params_to_stratify: Optional[Collection[str]] = None, + params_to_preserve: Optional[Collection[str]] = None, + concepts_to_stratify: Optional[Collection[str]] = None, + concepts_to_preserve: Optional[Collection[str]] = None, +) -> mira.metamodel.template_model.TemplateModel + + Multiplies a model into several strata. + + E.g., can turn the SIR model into a two-city SIR model by splitting each concept into + two derived concepts, each with the context for one of the two cities + + Parameters + ---------- + template_model : + A template model + key : + The (singular) name of the stratification, e.g., ``"city"`` + strata : + A list of the values for stratification, e.g., ``["boston", "nyc"]`` + or ``[geonames:4930956, geonames:5128581]``. + strata_curie_to_name : + If provided, should map from a key used in ``strata`` to a name. + For example, ``{{"geonames:4930956": "boston", + "geonames:5128581": "nyc"}}``. + strata_name_lookup : + If true, will try to look up the entity names of the strata values + under the assumption that they are curies. This flag has no impact + if ``strata_curie_to_name`` is given. + structure : + An iterable of pairs corresponding to a directed network structure + where each of the pairs has two strata. If none given, will assume a complete + network structure. If no structure is necessary, pass an empty list. + directed : + Should the reverse direction conversions be added based on the given structure? + conversion_cls : + The template class to be used for conversions between strata + defined by the network structure. Defaults to :class:`NaturalConversion` + cartesian_control : + If true, splits all control relationships based on the stratification. + + This should be true for an SIR epidemiology model, the susceptibility to + infected transition is controlled by infected. If the model is stratified by + vaccinated and unvaccinated, then the transition from vaccinated + susceptible population to vaccinated infected populations should be + controlled by both infected vaccinated and infected unvaccinated + populations. + + This should be false for stratification of an SIR epidemiology model based + on cities, since the infected population in one city won't (directly, + through the perspective of the model) affect the infection of susceptible + population in another city. + modify_names : + If true, will modify the names of the concepts to include the strata + (e.g., ``"S"`` becomes ``"S_boston"``). If false, will keep the original + names. + params_to_stratify : + A list of parameters to stratify. If none given, will stratify all + parameters. + params_to_preserve: + A list of parameters to preserve. If none given, will stratify all + parameters. + concepts_to_stratify : + A list of concepts to stratify. If none given, will stratify all + concepts. + concepts_to_preserve: + A list of concepts to preserve. If none given, will stratify all + concepts. + + Returns + ------- + : + A stratified template model +```````````````````` + +You also have access to the libraries pandas, numpy, scipy, matplotlib and the full mira python library. + +Please generate the code as if you were programming inside a Jupyter Notebook and the code is to be executed inside a cell. +You MUST wrap the code with a line containing three backticks (```) before and after the generated code. +No addtional text is needed in the response, just the code block. +""" + + llm_response = await agent.oneshot(prompt=prompt, query=query) + loop.set_state(loop.STOP_SUCCESS) + preamble, code, coda = re.split("```\w*", llm_response) + result = json.dumps( + { + "action": "code_cell", + "language": "python3", + "content": code.strip(), + } + ) + return result + + +class MiraModelAgent(BaseAgent): + + def __init__(self, context: BaseContext = None, tools: list = None, **kwargs): + tools = [MiraModelToolset] + super().__init__(context, tools, **kwargs) diff --git a/contexts/codesets/mira_model/python3/load_model.py b/beaker_kernel/contexts/mira_model/code/python3/load_model.py similarity index 100% rename from contexts/codesets/mira_model/python3/load_model.py rename to beaker_kernel/contexts/mira_model/code/python3/load_model.py diff --git a/contexts/codesets/mira_model/python3/metadata.json b/beaker_kernel/contexts/mira_model/code/python3/metadata.json similarity index 100% rename from contexts/codesets/mira_model/python3/metadata.json rename to beaker_kernel/contexts/mira_model/code/python3/metadata.json diff --git a/beaker_kernel/contexts/mira_model/code/python3/model_info.py b/beaker_kernel/contexts/mira_model/code/python3/model_info.py new file mode 100644 index 0000000..e69de29 diff --git a/contexts/codesets/mira_model/python3/model_preview.py b/beaker_kernel/contexts/mira_model/code/python3/model_preview.py similarity index 100% rename from contexts/codesets/mira_model/python3/model_preview.py rename to beaker_kernel/contexts/mira_model/code/python3/model_preview.py diff --git a/beaker_kernel/contexts/mira_model/code/python3/model_to_json.py b/beaker_kernel/contexts/mira_model/code/python3/model_to_json.py new file mode 100644 index 0000000..40214b0 --- /dev/null +++ b/beaker_kernel/contexts/mira_model/code/python3/model_to_json.py @@ -0,0 +1,2 @@ +from mira.modeling.amr.petrinet import template_model_to_petrinet_json +template_model_to_petrinet_json({{ var_name|default("model") }}) diff --git a/contexts/codesets/mira_model/python3/reset.py b/beaker_kernel/contexts/mira_model/code/python3/reset.py similarity index 100% rename from contexts/codesets/mira_model/python3/reset.py rename to beaker_kernel/contexts/mira_model/code/python3/reset.py diff --git a/contexts/codesets/mira_model/python3/setup.py b/beaker_kernel/contexts/mira_model/code/python3/setup.py similarity index 66% rename from contexts/codesets/mira_model/python3/setup.py rename to beaker_kernel/contexts/mira_model/code/python3/setup.py index 31f5f89..c05a435 100644 --- a/contexts/codesets/mira_model/python3/setup.py +++ b/beaker_kernel/contexts/mira_model/code/python3/setup.py @@ -1,5 +1,4 @@ import requests; import pandas as pd; import numpy as np; import scipy; import json; import mira; -#from mira.modeling.askenet.petrinet import AskeNetPetriNetModel; from mira.sources.askenet.petrinet import template_model_from_askenet_json; import sympy; import itertools; from mira.metamodel import *; from mira.modeling import Model; from mira.sources.amr import model_from_json; from mira.modeling.viz import GraphicalModel; diff --git a/contexts/codesets/mira_model/python3/stratify.py b/beaker_kernel/contexts/mira_model/code/python3/stratify.py similarity index 100% rename from contexts/codesets/mira_model/python3/stratify.py rename to beaker_kernel/contexts/mira_model/code/python3/stratify.py diff --git a/contexts/toolsets/mira_model_toolset.py b/beaker_kernel/contexts/mira_model/context.py similarity index 53% rename from contexts/toolsets/mira_model_toolset.py rename to beaker_kernel/contexts/mira_model/context.py index 5c774f9..85dcf4c 100644 --- a/contexts/toolsets/mira_model_toolset.py +++ b/beaker_kernel/contexts/mira_model/context.py @@ -3,41 +3,43 @@ import json import logging import os -import re +from typing import TYPE_CHECKING, Any, Dict, Optional + import requests -import sys -from typing import Optional, Any -from archytas.tool_utils import tool, toolset, AgentRef, LoopControllerRef +from beaker_kernel.lib.context import BaseContext +from beaker_kernel.lib.jupyter_kernel_proxy import JupyterMessage + +from .agent import MiraModelAgent -from .base import BaseToolset -from lib.jupyter_kernel_proxy import JupyterMessage +if TYPE_CHECKING: + from beaker_kernel.kernel import LLMKernel + from beaker_kernel.lib.subkernels.base import BaseSubkernel -logging.disable(logging.WARNING) # Disable warnings -logger = logging.Logger(__name__) +logger = logging.getLogger(__name__) -@toolset() -class MiraModelToolset(BaseToolset): - """ """ +class MiraModelContext(BaseContext): - toolset_name = "mira_model" + slug = "mira_model" + agent_cls = MiraModelAgent model_id: Optional[str] model_json: Optional[str] model_dict: Optional[dict[str, Any]] var_name: Optional[str] = "model" - def __init__(self, context, *args, **kwargs): - super().__init__(context=context, *args, **kwargs) + def __init__(self, beaker_kernel: "LLMKernel", subkernel: "BaseSubkernel", config: Dict[str, Any]) -> None: self.intercepts = { "save_amr_request": (self.save_amr_request, "shell"), "reset_request": (self.reset_request, "shell"), "stratify_request": (self.stratify_request, "shell"), } self.reset() + super().__init__(beaker_kernel, subkernel, self.agent_cls, config) async def setup(self, config, parent_header): + self.config = config item_id = config["id"] item_type = config.get("type", "model") print(f"Processing {item_type} AMR {item_id} as a MIRA model") @@ -45,7 +47,6 @@ async def setup(self, config, parent_header): item_id, item_type, parent_header=parent_header ) - async def post_execute(self, message): await self.send_mira_preview_message(parent_header=message.parent_header) @@ -80,7 +81,7 @@ async def load_mira(self): ] ) print(f"Running command:\n-------\n{command}\n---------") - await self.context.execute(command) + await self.execute(command) def reset(self): self.model_id = None @@ -115,145 +116,18 @@ async def model_structure(self) -> str: # Update the local dataframe to match what's in the shell. # This will be factored out when we switch around to allow using multiple runtimes. amr = ( - await self.context.evaluate( - f"AskeNetPetriNetModel(Model({self.var_name})).to_json()" - ) + await self.evaluate(self.get_code("model_to_json", {"var_name": self.var_name})) )["return"] return json.dumps(amr, indent=2) - @tool() - async def generate_code( - self, query: str, agent: AgentRef, loop: LoopControllerRef - ) -> None: - """ - Generated Python code to be run in an interactive Jupyter notebook for the purpose of exploring, modifying and visualizing a Pandas Dataframe. - - Input is a full grammatically correct question about or request for an action to be performed on the loaded model. - - Assume that the model is already loaded and has the variable named `model`. - Information about the dataframe can be loaded with the `model_structure` tool. - - Args: - query (str): A fully grammatically correct queistion about the current model. - """ - # set up the agent - # str: Valid and correct python code that fulfills the user's request. - prompt = f""" -You are a programmer writing code to help with scientific data analysis and manipulation in Python. - -Please write code that satisfies the user's request below. - -You have access to a variable name `model` that is a Petrinet model with the following structure: -{await self.model_structure()} - - -If you are asked to modify or update the model, modify the model in place, keeping the updated variable to still be named `model`. -You have access to the MIRA libraries. - -If you are asked to stratify the model, use the available function named `stratify` that is defined by the following python code: -```````````````````` -def stratify( - template_model: TemplateModel, - *, - key: str, - strata: Collection[str], - structure: Optional[Iterable[Tuple[str, str]]] = None, - directed: bool = False, - conversion_cls: Type[Template] = NaturalConversion, - cartesian_control: bool = False, - modify_names: bool = True, - params_to_stratify: Optional[Collection[str]] = None, - params_to_preserve: Optional[Collection[str]] = None, - concepts_to_stratify: Optional[Collection[str]] = None, - concepts_to_preserve: Optional[Collection[str]] = None, -) -> TemplateModel: - \"\"\"Multiplies a model into several strata. - - E.g., can turn the SIR model into a two-city SIR model by splitting each concept into - two derived concepts, each with the context for one of the two cities - - Parameters - ---------- - template_model : - A template model - key : - The (singular) name of the stratification, e.g., ``"city"`` - strata : - A list of the values for stratification, e.g., ``["boston", "nyc"]`` - structure : - An iterable of pairs corresponding to a directed network structure - where each of the pairs has two strata. If none given, will assume a complete - network structure. If no structure is necessary, pass an empty list. - directed : - Should the reverse direction conversions be added based on the given structure? - conversion_cls : - The template class to be used for conversions between strata - defined by the network structure. Defaults to :class:`NaturalConversion` - cartesian_control : - If true, splits all control relationships based on the stratification. - - This should be true for an SIR epidemiology model, the susceptibility to - infected transition is controlled by infected. If the model is stratified by - vaccinated and unvaccinated, then the transition from vaccinated - susceptible population to vaccinated infected populations should be - controlled by both infected vaccinated and infected unvaccinated - populations. - - This should be false for stratification of an SIR epidemiology model based - on cities, since the infected population in one city won't (directly, - through the perspective of the model) affect the infection of susceptible - population in another city. - modify_names : - If true, will modify the names of the concepts to include the strata - (e.g., ``"S"`` becomes ``"S_boston"``). If false, will keep the original - names. - params_to_stratify : - A list of parameters to stratify. If none given, will stratify all - parameters. - params_to_preserve: - A list of parameters to preserve. If none given, will stratify all - parameters. - concepts_to_stratify : - A list of concepts to stratify. If none given, will stratify all - concepts. - concepts_to_preserve: - A list of concepts to preserve. If none given, will stratify all - concepts. - - Returns - ------- - : - A stratified template model - \"\"\" -```````````````````` - -You also have access to the libraries pandas, numpy, scipy, matplotlib and the full mira python library. - -Please generate the code as if you were programming inside a Jupyter Notebook and the code is to be executed inside a cell. -You MUST wrap the code with a line containing three backticks (```) before and after the generated code. -No addtional text is needed in the response, just the code block. -""" - - llm_response = await agent.oneshot(prompt=prompt, query=query) - loop.set_state(loop.STOP_SUCCESS) - preamble, code, coda = re.split("```\w*", llm_response) - result = json.dumps( - { - "action": "code_cell", - "language": "python3", - "content": code.strip(), - } - ) - return result - async def send_mira_preview_message( self, server=None, target_stream=None, data=None, parent_header={} ): try: - preview = await self.context.evaluate(self.get_code("model_preview")) + preview = await self.evaluate(self.get_code("model_preview"), {"var_name": self.var_name}) content = preview["return"] - self.context.kernel.send_response( + self.beaker_kernel.send_response( "iopub", "model_preview", content, parent_header=parent_header ) except Exception as e: @@ -266,7 +140,7 @@ async def save_amr_request(self, server, target_stream, data): new_name = content.get("name") new_model: dict = ( - await self.context.evaluate( + await self.evaluate( f"template_model_to_petrinet_json({self.var_name})" ) )["return"] @@ -300,7 +174,7 @@ async def save_amr_request(self, server, target_stream, data): new_model_id = create_req.json()["id"] content = {"model_id": new_model_id} - self.context.kernel.send_response( + self.beaker_kernel.send_response( "iopub", "save_model_response", content, parent_header=message.header ) @@ -314,7 +188,7 @@ async def stratify_request(self, server, target_stream, data): if stratify_args is None: # Error logger.error("stratify_args must be set on stratify requests.") - self.context.kernel.send_response( + self.beaker_kernel.send_response( "iopub", "error", { "ename": "ValueError", "evalue": "stratify_args must be set on stratify requests", @@ -326,14 +200,14 @@ async def stratify_request(self, server, target_stream, data): "var_name": model_name, "stratify_kwargs": repr(stratify_args), }) - stratify_result = await self.context.execute(stratify_code) + stratify_result = await self.execute(stratify_code) content = { "success": True, "executed_code": stratify_result["parent"].content["code"], } - self.context.kernel.send_response( + self.beaker_kernel.send_response( "iopub", "stratify_response", content, parent_header=message.header ) await self.send_mira_preview_message(parent_header=message.header) @@ -347,14 +221,14 @@ async def reset_request(self, server, target_stream, data): reset_code = self.get_code("reset", { "var_name": model_name, }) - reset_result = await self.context.execute(reset_code) + reset_result = await self.execute(reset_code) content = { "success": True, "executed_code": reset_result["parent"].content["code"], } - self.context.kernel.send_response( + self.beaker_kernel.send_response( "iopub", "reset_response", content, parent_header=message.header ) await self.send_mira_preview_message(parent_header=message.header) diff --git a/beaker_kernel/contexts/pypackage/__init__.py b/beaker_kernel/contexts/pypackage/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/beaker_kernel/contexts/pypackage/agent.py b/beaker_kernel/contexts/pypackage/agent.py new file mode 100644 index 0000000..df1b71f --- /dev/null +++ b/beaker_kernel/contexts/pypackage/agent.py @@ -0,0 +1,50 @@ +import logging + +from archytas.react import Undefined +from archytas.tool_utils import (AgentRef, LoopControllerRef, is_tool, tool, + toolset) + +from beaker_kernel.lib.agent import BaseAgent +from beaker_kernel.lib.context import BaseContext + + +logger = logging.getLogger(__name__) + +@toolset() +class Toolset: + """My toolset""" + + + @tool() + async def retrieve_documentation( + self, target: str, agent: AgentRef, loop: LoopControllerRef + ) -> None: + """ + This function retrieves documentation about a Python module. + + You should use this to discover what is available within a package and determine the proper syntax and functionality on how to use the code. + Querying against the module or package should list all avialable submodules and functions that exist, so you can use this to discover available + functions and the query the function to get usage information. + + Args: + target (str): Python package, module or function for which documentation is requested + """ + code = f''' +try: + import {target} +except ImportError: + pass +help({target}) +''' + r = await agent.context.evaluate(code) + return str(r) + + retrieve_documentation.__doc__ + +class PyPackageAgent(BaseAgent): + def __init__(self, context: BaseContext = None, tools: list = None, **kwargs): + tools = [Toolset] + libraries = { + + } + super().__init__(context, tools, **kwargs) diff --git a/beaker_kernel/contexts/pypackage/context.py b/beaker_kernel/contexts/pypackage/context.py new file mode 100644 index 0000000..39fa68f --- /dev/null +++ b/beaker_kernel/contexts/pypackage/context.py @@ -0,0 +1,21 @@ +from typing import TYPE_CHECKING, Any, Dict + +from beaker_kernel.lib.context import BaseContext +from beaker_kernel.lib.subkernels.python import PythonSubkernel + +from .agent import PyPackageAgent + +if TYPE_CHECKING: + from beaker_kernel.kernel import LLMKernel + from beaker_kernel.lib.agent import BaseAgent + from beaker_kernel.lib.subkernels.base import BaseSubkernel + +class PyPackageContext(BaseContext): + + slug = "pypackage" + agent_cls: "BaseAgent" = PyPackageAgent + + def __init__(self, beaker_kernel: "LLMKernel", subkernel: "BaseSubkernel", config: Dict[str, Any]) -> None: + if not isinstance(subkernel, PythonSubkernel): + raise ValueError("This context is only valid for Python.") + super().__init__(beaker_kernel, subkernel, self.agent_cls, config) diff --git a/beaker_kernel/kernel.json b/beaker_kernel/kernel.json new file mode 100644 index 0000000..373f907 --- /dev/null +++ b/beaker_kernel/kernel.json @@ -0,0 +1,8 @@ +{ + "argv": [ + "python3", "-m", "beaker_kernel.kernel", + "start", "{connection_file}" + ], + "display_name": "Beaker Kernel", + "language": "beaker" +} diff --git a/beaker/kernel.py b/beaker_kernel/kernel.py similarity index 89% rename from beaker/kernel.py rename to beaker_kernel/kernel.py index 59e430a..23467ad 100644 --- a/beaker/kernel.py +++ b/beaker_kernel/kernel.py @@ -4,28 +4,30 @@ import json import logging import os -import requests import sys import traceback -from typing import Optional +from typing import TYPE_CHECKING, Optional +import requests from tornado import ioloop -from lib.jupyter_kernel_proxy import ( - KernelProxyManager, - JupyterMessage, - InterceptionFilter, - KERNEL_SOCKETS, - KERNEL_SOCKETS_NAMES, -) -from contexts.contexts import Context -from contexts.subkernels.base import BaseSubkernel -from contexts.subkernels.python import PythonSubkernel -from contexts.subkernels.julia import JuliaSubkernel -from contexts.subkernels.rlang import RSubkernel -from contexts.toolsets import DatasetToolset, MiraModelToolset -from contexts.toolsets.decapode_creation_toolset import DecapodesCreationToolset - +# TODO: Move context import to autodiscovery +from .contexts.dataset.context import DatasetContext +from .contexts.decapodes.context import DecapodesContext +from .contexts.mira_model.context import MiraModelContext +from .contexts.pypackage.context import PyPackageContext +from .lib.context import BaseContext, collect_contexts +from .lib.jupyter_kernel_proxy import (KERNEL_SOCKETS, KERNEL_SOCKETS_NAMES, + InterceptionFilter, JupyterMessage, + KernelProxyManager) +# TODO: Move subkernel import to autodiscovery +from .lib.subkernels.julia import JuliaSubkernel +from .lib.subkernels.python import PythonSubkernel +from .lib.subkernels.rlang import RSubkernel + +if TYPE_CHECKING: + from .lib.agent import BaseAgent + from .lib.subkernels.base import BaseSubkernel logger = logging.getLogger(__name__) @@ -42,10 +44,11 @@ } -AVAILABLE_TOOLSETS = { - "dataset": DatasetToolset, - "mira_model": MiraModelToolset, - "decapodes_creation": DecapodesCreationToolset, +AVAILABLE_CONTEXTS = { + DatasetContext.slug: DatasetContext, + DecapodesContext.slug: DecapodesContext, + MiraModelContext.slug: MiraModelContext, + PyPackageContext.slug: PyPackageContext, } @@ -55,9 +58,9 @@ def get_socket(stream_name: str): class LLMKernel(KernelProxyManager): - implementation = "askem-chatty-py" + implementation = "askem-beaker" implementation_version = "0.1" - banner = "Chatty ASKEM" + banner = "Beaker Kernel" language_info = { "mimetype": "text/plain", @@ -65,19 +68,20 @@ class LLMKernel(KernelProxyManager): "file_extension": ".txt", } - context: Optional[Context] + context: Optional[BaseContext] internal_executions: set[str] - subkernel: BaseSubkernel + subkernel: "BaseSubkernel" subkernel_execution_tracking: dict[str, str] def __init__(self, server): self.internal_executions = set() self.subkernel_execution_tracking = {} self.subkernel_id = None - self.context = None + # self.context = None super().__init__(server) # We need to have a kernel when we start up, even though we can/will change the kernel/language when we set context self.new_kernel(language="python3") + self.context = PyPackageContext(beaker_kernel=self, subkernel=self.subkernel, config={}) self.add_intercepts() def add_intercepts(self): @@ -308,14 +312,15 @@ async def set_context(self, context_name, context_info, language="python3", pare logger.info("Subkernel changed: %s != %s", getattr(self.subkernel, "KERNEL_NAME", "unknown"), language) self.new_kernel(language=language) - toolset_class = AVAILABLE_TOOLSETS.get(context_name, None) - if not toolset_class: + context_cls = AVAILABLE_CONTEXTS.get(context_name, None) + if not context_cls: # TODO: Should we return an error if the requested toolset isn't available? return False # Create and setup context - self.context = Context(kernel=self, subkernel=self.subkernel, toolset_cls=toolset_class, config=context_info) - await self.context.setup(parent_header=parent_header) + # self.context = BaseContext(beaker_kernel=self, subkernel=self.subkernel, agent_cls=toolset_class, config=context_info) + self.context = context_cls(beaker_kernel=self, subkernel=self.subkernel, config=context_info) + await self.context.setup(config=context_info, parent_header=parent_header) async def post_execute(self, queue, message_id, data): message = JupyterMessage.parse(data) @@ -327,7 +332,7 @@ async def post_execute(self, queue, message_id, data): # Fetch event loop and ensure it's valid loop = asyncio.get_event_loop() - callback = getattr(self.context.toolset, "post_execute", None) + callback = getattr(self.context, "post_execute", None) if loop and callback and (callable(callback) or inspect.iscoroutinefunction(callback)): # If we have a callback function, then add it as a task to the execution loop so it runs loop.create_task(callback(message)) diff --git a/beaker_kernel/lib/__init__.py b/beaker_kernel/lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/beaker_kernel/lib/agent.py b/beaker_kernel/lib/agent.py new file mode 100644 index 0000000..6cde795 --- /dev/null +++ b/beaker_kernel/lib/agent.py @@ -0,0 +1,79 @@ +import json +import logging +import re +import typing + +from archytas.react import ReActAgent, Undefined +from archytas.tool_utils import AgentRef, LoopControllerRef, tool + +if typing.TYPE_CHECKING: + from .context import BaseContext + + +logger = logging.getLogger(__name__) + +@tool() +async def generate_code( + query: str, agent: AgentRef, loop: LoopControllerRef +) -> None: + """ + Generated Python code to be run in an interactive Jupyter notebook. + + Args: + query (str): A fully grammatically correct question or request. + """ + + + prompt = f""" +You are a programmer writing code to help with working in a Python notebook. + +Please write code that satisfies the user's request below. + +Please generate the code as if you were programming inside a Jupyter Notebook and the code is to be executed inside a cell. +You MUST wrap the code with a line containing three backticks (```) before and after the generated code. +No addtional text is needed in the response, just the code block. +""" + + llm_response = await agent.oneshot(prompt=prompt, query=query) + loop.set_state(loop.STOP_SUCCESS) + preamble, code, coda = re.split("```\w*", llm_response) + result = json.dumps( + { + "action": "code_cell", + "language": "python3", + "content": code.strip(), + } + ) + return result + +class BaseAgent(ReActAgent): + + context: "BaseContext" + + def __init__( + self, + context: "BaseContext" = None, + tools: list = None, + **kwargs, + ): + self.context = context + + # TODO: In the future, have a single generate_code function with the details + # of how the code is generated defined on the superclass agent if changes needed + # if not isinstance(tools, list): + # tools = [generate_code] + # else: + # if generate_code not in tools: + # tools.append(generate_code) + + super().__init__( + model="gpt-4", + # api_key=api_key, # TODO: get this from configuration + tools=tools, + verbose=True, + spinner=None, + rich_print=False, + allow_ask_user=False, + thought_handler=context.beaker_kernel.handle_thoughts, + **kwargs + ) diff --git a/contexts/codesets.py b/beaker_kernel/lib/codeset.py similarity index 91% rename from contexts/codesets.py rename to beaker_kernel/lib/codeset.py index 0369bdd..c1ac08d 100644 --- a/contexts/codesets.py +++ b/beaker_kernel/lib/codeset.py @@ -1,7 +1,8 @@ import json import os.path -from jinja2 import Environment, select_autoescape, FileSystemLoader, Template -from typing import Dict, Any +from typing import Any, Dict + +from jinja2 import Environment, FileSystemLoader, Template, select_autoescape # Persistent registry of templates, split into a tree structure by toolset, language, then name. templates: Dict[str, Dict[str, Dict[str, Template]]] = {} diff --git a/beaker_kernel/lib/context.py b/beaker_kernel/lib/context.py new file mode 100644 index 0000000..c219072 --- /dev/null +++ b/beaker_kernel/lib/context.py @@ -0,0 +1,96 @@ +import inspect +import json +import logging +import os.path +from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple + +from jinja2 import Environment, FileSystemLoader, Template, select_autoescape + +from .codeset import get_metadata, get_template + +if TYPE_CHECKING: + from archytas.react import ReActAgent + + from beaker_kernel.kernel import LLMKernel + + from .agent import BaseAgent + from .subkernels.base import BaseSubkernel + from .toolset import BaseToolset + +logger = logging.getLogger(__name__) + +class BaseContext: + slug: str + beaker_kernel: "LLMKernel" + subkernel: "BaseSubkernel" + toolset: "BaseToolset" + config: Dict[str, Any] + agent: "ReActAgent" + + intercepts: Dict[str, Tuple[Callable, str]] = {} + jinja_env: Optional[Environment] = None + templates: Dict[str, Template] = {} + + + def __init__(self, beaker_kernel: "LLMKernel", subkernel: "BaseSubkernel", agent_cls: "BaseAgent", config: Dict[str, Any]) -> None: # toolset_cls: Type["BaseToolset"], + self.beaker_kernel = beaker_kernel + self.subkernel = subkernel + # self.toolset = toolset_cls(context=self) + self.agent = agent_cls( + context=self, + tools=[], + ) + self.config = config + + # Add intercepts + for message, (handler, stream) in self.intercepts.items(): + self.beaker_kernel.add_intercept(message, handler, stream=stream) + + # Set auto-context from toolset + if getattr(self, "auto_context", None) is not None: + self.agent.set_auto_context("Default context", self.auto_context) + + class_dir = inspect.getfile(self.__class__) + code_dir = os.path.join(os.path.dirname(class_dir), "code", self.subkernel.SLUG) + if os.path.exists(code_dir): + self.jinja_env = Environment( + loader=FileSystemLoader(code_dir), + autoescape=select_autoescape() + ) + + for template_file in self.jinja_env.list_templates(): + template_name, _ = os.path.splitext(template_file) + template = self.jinja_env.get_template(template_file) + self.templates[template_name] = template + + async def setup(self, parent_header=None): + await self.toolset.setup(self.config, parent_header=parent_header) + + @property + def lang(self): + return self.subkernel.KERNEL_NAME + + @property + def metadata(self): + return json.loads(self.get_code('metadata')) + + def get_code(self, name, render_dict: Dict[str, Any]=None) -> str: + if render_dict is None: + render_dict = {} + template = self.templates.get(name, None) + if template is None: + raise ValueError( + f"'{name}' is not a defined codeset for context '{self.__class__.__name__}' and " + f"subkernel '{self.subkernel.DISPLAY_NAME} ({self.subkernel.KERNEL_NAME})'" + ) + return template.render(**render_dict) + + async def execute(self, command, response_handler=None, parent_header={}): + return await self.beaker_kernel.execute(command, response_handler, parent_header) + + async def evaluate(self, expression, parent_header={}): + return await self.beaker_kernel.evaluate(expression, parent_header) + + +def collect_contexts(path): + return [] diff --git a/lib/jupyter_kernel_proxy.py b/beaker_kernel/lib/jupyter_kernel_proxy.py similarity index 99% rename from lib/jupyter_kernel_proxy.py rename to beaker_kernel/lib/jupyter_kernel_proxy.py index d45ac41..b0babc0 100644 --- a/lib/jupyter_kernel_proxy.py +++ b/beaker_kernel/lib/jupyter_kernel_proxy.py @@ -5,25 +5,23 @@ # Author: Marc-Etienne M.Léveillé # See LICENSE file for redistribution. -import json -import hmac -import uuid -import hashlib import datetime import glob +import hashlib +import hmac +import json +import logging import os -import six -from collections import namedtuple, OrderedDict +import uuid +from collections import OrderedDict, namedtuple from operator import attrgetter -from jupyter_core.paths import jupyter_runtime_dir - +import six import zmq +from jupyter_core.paths import jupyter_runtime_dir from tornado import ioloop from zmq.eventloop import zmqstream -import logging - logger = logging.getLogger(__name__) diff --git a/beaker_kernel/lib/subkernels/__init__.py b/beaker_kernel/lib/subkernels/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/contexts/subkernels/base.py b/beaker_kernel/lib/subkernels/base.py similarity index 80% rename from contexts/subkernels/base.py rename to beaker_kernel/lib/subkernels/base.py index 93f77eb..d586f68 100644 --- a/contexts/subkernels/base.py +++ b/beaker_kernel/lib/subkernels/base.py @@ -1,13 +1,13 @@ import abc -from typing import Dict, Any -from ..codesets import get_template +from typing import Any, Dict + +from beaker_kernel.lib.codeset import get_template class BaseSubkernel(abc.ABC): DISPLAY_NAME: str + SLUG: str KERNEL_NAME: str - DATAFRAME_TYPE_NAME: str - def get_code(self, codeset_name: str, name: str, render_dict: Dict[str, Any]={}) -> str: return get_template(codeset_name, self.KERNEL_NAME, name, render_dict) diff --git a/contexts/subkernels/julia.py b/beaker_kernel/lib/subkernels/julia.py similarity index 93% rename from contexts/subkernels/julia.py rename to beaker_kernel/lib/subkernels/julia.py index 84941ea..854a648 100644 --- a/contexts/subkernels/julia.py +++ b/beaker_kernel/lib/subkernels/julia.py @@ -10,8 +10,8 @@ class JuliaSubkernel(BaseSubkernel): DISPLAY_NAME = "Julia" + SLUG = "julia" KERNEL_NAME = "julia-1.9" - DATAFRAME_TYPE_NAME = "DataFrames" @classmethod def parse_subkernel_return(cls, execution_result) -> Any: diff --git a/contexts/subkernels/python.py b/beaker_kernel/lib/subkernels/python.py similarity index 89% rename from contexts/subkernels/python.py rename to beaker_kernel/lib/subkernels/python.py index 1d6a3ad..03e03b7 100644 --- a/contexts/subkernels/python.py +++ b/beaker_kernel/lib/subkernels/python.py @@ -6,9 +6,8 @@ class PythonSubkernel(BaseSubkernel): DISPLAY_NAME = "Python 3" + SLUG = "python3" KERNEL_NAME = "python3" - DATAFRAME_TYPE_NAME = "pandas" - @classmethod def parse_subkernel_return(cls, execution_result) -> Any: @@ -16,4 +15,3 @@ def parse_subkernel_return(cls, execution_result) -> Any: if return_str: python_obj = ast.literal_eval(return_str) return python_obj - diff --git a/contexts/subkernels/rlang.py b/beaker_kernel/lib/subkernels/rlang.py similarity index 95% rename from contexts/subkernels/rlang.py rename to beaker_kernel/lib/subkernels/rlang.py index d894aeb..0b9bdd5 100644 --- a/contexts/subkernels/rlang.py +++ b/beaker_kernel/lib/subkernels/rlang.py @@ -10,10 +10,11 @@ class RSubkernel(BaseSubkernel): - DISPLAY_NAME = "r" + DISPLAY_NAME = "R" + SLUG = "rlang" KERNEL_NAME = "ir" DATAFRAME_TYPE_NAME = "data.frame" - + @classmethod def parse_subkernel_return(cls, execution_result) -> Any: # irkernel annoyingly does not return the last item in the code execution as the "return" item, so we print the response as part of the output diff --git a/contexts/toolsets/base.py b/beaker_kernel/lib/toolset.py similarity index 85% rename from contexts/toolsets/base.py rename to beaker_kernel/lib/toolset.py index 71452da..cf0cf25 100644 --- a/contexts/toolsets/base.py +++ b/beaker_kernel/lib/toolset.py @@ -1,22 +1,17 @@ from __future__ import annotations -import codecs -import copy -import datetime -import json + import logging -import os -import re +from typing import TYPE_CHECKING, Any, Callable, Dict + import requests -import tempfile -from typing import Optional, Callable, List, Tuple, Dict, Any, TYPE_CHECKING +from archytas.tool_utils import AgentRef, LoopControllerRef, tool, toolset -from ..codesets import get_metadata, get_template -from lib.jupyter_kernel_proxy import JupyterMessage -from archytas.tool_utils import tool, toolset, AgentRef, LoopControllerRef +from .codeset import get_metadata, get_template +from .jupyter_kernel_proxy import JupyterMessage if TYPE_CHECKING: - from beaker.kernel import LLMKernel - from contexts.contexts import Context + from beaker_kernel.kernel import LLMKernel + from beaker_kernel.lib.context import BaseContext logging.disable(logging.WARNING) # Disable warnings logger = logging.Logger(__name__) @@ -29,9 +24,9 @@ class BaseToolset: toolset_name: str codeset_name: str intercepts: dict[str, tuple[Callable, str]] - context: Context + context: BaseContext - def __init__(self, context: Context, *args, **kwargs): + def __init__(self, context: BaseContext, *args, **kwargs): super().__init__(*args, **kwargs) self.context = context if not getattr(self, 'toolset_name', None): diff --git a/contexts/contexts.py b/contexts/contexts.py deleted file mode 100644 index 23f6d02..0000000 --- a/contexts/contexts.py +++ /dev/null @@ -1,63 +0,0 @@ -import asyncio -import functools -import logging -from typing import Dict, Any, Type, TYPE_CHECKING - -from archytas.react import ReActAgent - -from .subkernels.base import BaseSubkernel -from .toolsets.base import BaseToolset -from .codesets import get_metadata, get_template - -if TYPE_CHECKING: - from beaker.kernel import LLMKernel - -logger = logging.getLogger(__name__) - -class Context: - subkernel: BaseSubkernel - toolset: BaseToolset - config: Dict[str, Any] - agent: ReActAgent - kernel: "LLMKernel" - - def __init__(self, kernel: "LLMKernel", subkernel: BaseSubkernel, toolset_cls: Type[BaseToolset], config: Dict[str, Any]) -> None: - self.kernel = kernel - self.subkernel = subkernel - self.toolset = toolset_cls(context=self) - self.agent = ReActAgent( - tools=[self.toolset], - allow_ask_user=False, - verbose=True, - spinner=None, - rich_print=False, - thought_handler=self.kernel.handle_thoughts, - ) - self.config = config - - # Add intercepts - for message, (handler, stream) in self.toolset.intercepts.items(): - self.kernel.add_intercept(message, handler, stream=stream) - - # Set auto-context from toolset - if getattr(self.toolset, "auto_context", None) is not None: - self.agent.set_auto_context("Default context", self.toolset.auto_context) - - async def setup(self, parent_header=None): - await self.toolset.setup(self.config, parent_header=parent_header) - - @property - def lang(self): - return self.subkernel.KERNEL_NAME - - def metadata(self): - return get_metadata(self.toolset.codeset_name, self.lang) - - def get_code(self, name, render_dict: Dict[str, Any]={}) -> str: - return get_template(self.toolset.codeset_name, self.lang, name, render_dict) - - async def execute(self, command, response_handler=None, parent_header={}): - return await self.kernel.execute(command, response_handler, parent_header) - - async def evaluate(self, expression, parent_header={}): - return await self.kernel.evaluate(expression, parent_header) diff --git a/contexts/toolsets/__init__.py b/contexts/toolsets/__init__.py deleted file mode 100644 index 466b3e0..0000000 --- a/contexts/toolsets/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .dataset_toolset import DatasetToolset -from .mira_model_toolset import MiraModelToolset diff --git a/docker-compose.yaml b/docker-compose.yaml index a581184..2a270fa 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -25,7 +25,7 @@ services: env_file: - .env volumes: - - ./beaker:/usr/local/share/jupyter/kernels/beaker + - ./beaker_kernel:/usr/local/share/jupyter/kernels/beaker_kernel - .:/jupyter depends_on: - julia-base-image diff --git a/service/dev_ui/src/index.ts b/service/dev_ui/src/index.ts index c102c97..a87a58a 100755 --- a/service/dev_ui/src/index.ts +++ b/service/dev_ui/src/index.ts @@ -288,7 +288,20 @@ async function createApp(manager: ServiceManager.IManager): void { llmButton.textContent = "Submit"; llmWidget.node.appendChild(llmContainer); - const languageSet = new Set(); + const languageMap = {}; + const setContext = () => { + languageSelect.innerHTML = ''; + const contextInfo = contexts[contextSelect.value]; + console.log(contextInfo); + // const languageSet = con[contextSelect.value]; + contextInfo.languages.forEach((lang) => { + const option = document.createElement('option'); + option.setAttribute("label", lang[0]); + option.setAttribute("value", lang[1]); + languageSelect.appendChild(option); + }); + contextPayloadInput.value = contextInfo.defaultPayload; + }; const contextWidget = new Widget(); const contextNode = document.createElement('div'); @@ -305,17 +318,12 @@ async function createApp(manager: ServiceManager.IManager): void { option.setAttribute("value", context); option.setAttribute("label", context); contextSelect.appendChild(option); - languages.forEach((lang) => {languageSet.add(lang)}); + languageMap[context] = languages; } - // for (const lang of ["python3", "julia"]) { - languageSet.forEach((lang) => { - const option = document.createElement('option'); - option.setAttribute("value", lang); - option.setAttribute("label", lang); - languageSelect.appendChild(option); - }); + contextSelect.onchange = setContext; + contextPayloadInput.className = 'json-input'; - contextPayloadInput.value = '{\n "df_hosp": "truth-incident-hospitalization",\n "df_cases": "truth-incident-case"\n}'; + contextPayloadInput.value = ''; contextButton.textContent = 'Submit'; contextButton.addEventListener("click", (e) => { setKernelContext({ @@ -348,7 +356,6 @@ async function createApp(manager: ServiceManager.IManager): void { messageButton.textContent = "Submit"; messageButton.onclick = (evt) => { - console.log(evt); let channel = messageChannelSelect.value; let msgType = messageTypeInput.value; let contentString = messagePayloadInput.value; @@ -402,6 +409,9 @@ async function createApp(manager: ServiceManager.IManager): void { mainPanel.addWidget(leftPanel); mainPanel.addWidget(nbWidget); + // Set up the context after the UI has been defined. + setContext(); + // Attach the panel to the DOM. Widget.attach(mainPanel, document.body); Widget.attach(completer, document.body); diff --git a/service/dev_ui/test.ipynb b/service/dev_ui/test.ipynb index 4df55d5..6d8f5b7 100644 --- a/service/dev_ui/test.ipynb +++ b/service/dev_ui/test.ipynb @@ -11,8 +11,8 @@ "metadata": { "anaconda-cloud": {}, "kernelspec": { - "display_name": "Chatty LLM Agent", - "language": "chatty", + "display_name": "Beaker Kernel", + "language": "beaker", "name": "beaker" }, "language_info": { diff --git a/service/main.py b/service/main.py index 7bd698c..e7032e1 100644 --- a/service/main.py +++ b/service/main.py @@ -32,20 +32,43 @@ def get(self): # TODO: Generate this from a single source of truth at startup time context_data = { "dataset": { - "languages": ["python3", "julia-1.9", "ir"], + "languages": [ + ["python3", "python3"], + ["julia", "julia-1.9"], + ["rlang", "ir"] + ], + "defaultPayload": ''' +{ + "df_hosp": "truth-incident-hospitalization", + "df_cases": "truth-incident-case" +} +'''.strip(), }, "mira_model": { - "languages": ["python3"], + "languages": [ + ["python3", "python3"] + ], + "defaultPayload": ''' +{ + "id": "sir-model-id" +} +'''.strip(), }, - "decapodes_creation": { - "languages": ["julia-1.9"], + "decapodes": { + "languages": [ + ["julia", "julia-1.9"] + ], + "defaultPayload": ''' +{ + "halfar": "ice_dynamics-id" +} +'''.strip(), }, } return self.write(context_data) - class AskemJupyterApp(LabServerApp): name = __name__ load_other_extensions = False