Skip to content

Commit

Permalink
Major refactor for readability and maintainability (#36)
Browse files Browse the repository at this point in the history
* Major refactor for readability and maintainability

Moved from confusing toolset/codeset/subkernels structure to more
encapsulated form.
Better seperation from LLM actions and rest of context.
Much improved inheritence structure with clear defaults and boundaries.
Seeds for auto-discovery and plugin architecture added.
beaker_kernel is now a full python library.
Improvements and fixes for existing code flows.
Improved dev_ui experience when changing context.

* Import flake and sort

* Cleanup debug logging

* Make git pull success optional

* Cleanup subkernel files
  • Loading branch information
mattprintz authored Dec 4, 2023
1 parent 6944f94 commit d8a4e06
Show file tree
Hide file tree
Showing 68 changed files with 759 additions and 503 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ RUN python -m pip install -e ."[ode,tests,dkg-client,sbml]"
WORKDIR /jupyter

# Kernel must be placed in a specific spot in the filesystem
COPY beaker /usr/local/share/jupyter/kernels/beaker
COPY beaker_kernel /usr/local/share/jupyter/kernels/beaker_kernel

# Copy src code over
RUN chown 1000:1000 /jupyter
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ build:
.PHONY:dev
dev:service/dev_ui/build/index.js
if [[ "$$(docker compose ps | grep 'jupyter')" == "" ]]; then \
docker compose pull && \
docker compose pull; \
docker compose up -d --build && \
(sleep 1; python -m webbrowser "http://localhost:8888/dev_ui"); \
docker compose logs -f jupyter || true; \
Expand Down Expand Up @@ -43,7 +43,7 @@ dev-install:.env
ENVDIR=$$(poetry -q run python -c 'import os; print(os.environ.get("VIRTUAL_ENV", ""))'); \
KERNEL_INSTALL_PATH=$${ENVDIR}/share/jupyter/kernels/beaker; \
if [[ ! -e "$${KERNEL_INSTALL_PATH}" && -n "$${ENVDIR}" ]]; then \
ln -s "${BASEDIR}/beaker" "$${KERNEL_INSTALL_PATH}"; \
ln -s "${BASEDIR}/beaker_kernel" "$${KERNEL_INSTALL_PATH}"; \
fi; \
if [[ ! -e "service/test.ipynb" ]]; then \
cp service/dev_ui/test.ipynb service/test.ipynb; \
Expand Down
8 changes: 0 additions & 8 deletions beaker/kernel.json

This file was deleted.

File renamed without changes.
File renamed without changes.
File renamed without changes.
75 changes: 75 additions & 0 deletions beaker_kernel/contexts/dataset/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import json
import logging
import re

from archytas.react import Undefined
from archytas.tool_utils import AgentRef, LoopControllerRef, tool, toolset

from beaker_kernel.lib.agent import BaseAgent
from beaker_kernel.lib.context import BaseContext
from beaker_kernel.lib.jupyter_kernel_proxy import JupyterMessage
from beaker_kernel.lib.toolset import BaseToolset

logging.disable(logging.WARNING) # Disable warnings
logger = logging.Logger(__name__)

@toolset()
class DatasetToolset:

@tool()
async def generate_code(
self, query: str, agent: AgentRef, loop: LoopControllerRef
) -> None:
"""
Generated code to be run in an interactive Jupyter notebook for the purpose of exploring, modifying and visualizing a Dataframe.
Input is a full grammatically correct question about or request for an action to be performed on the loaded dataframe.
Args:
query (str): A fully grammatically correct question about the current dataset.
"""
# set up the agent
# str: Valid and correct python code that fulfills the user's request.
var_sections = []
for var_name, dataset_obj in agent.context.dataset_map.items():
df_info = await agent.context.describe_dataset(var_name)
var_sections.append(f"""
You have access to a variable name `{var_name}` that is a {agent.context.metadata.get("df_lib_name", "Pandas")} Dataframe with the following structure:
{df_info}
--- End description of variable `{var_name}`
""")
prompt = f"""
You are a programmer writing code to help with scientific data analysis and manipulation in {agent.context.metadata.get("name", "a Jupyter notebook")}.
Please write code that satisfies the user's request below.
{"".join(var_sections)}
If you are asked to modify or update the dataframe, modify the dataframe in place, keeping the updated variable the same unless specifically specified otherwise.
You also have access to the libraries {agent.context.metadata.get("libraries", "that are common for these tasks")}.
Please generate the code as if you were programming inside a Jupyter Notebook and the code is to be executed inside a cell.
You MUST wrap the code with a line containing three backticks (```) before and after the generated code.
No addtional text is needed in the response, just the code block.
"""

llm_response = await agent.oneshot(prompt=prompt, query=query)
loop.set_state(loop.STOP_SUCCESS)
preamble, code, coda = re.split("```\w*", llm_response)
result = json.dumps(
{
"action": "code_cell",
"language": agent.context.lang,
"content": code.strip(),
}
)
return result


class DatasetAgent(BaseAgent):

def __init__(self, context: BaseContext = None, tools: list = None, **kwargs):
tools = [DatasetToolset]
super().__init__(context, tools, **kwargs)
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import pandas as pd; import io
import time
output_buff = io.BytesIO()
{{ var_name|default("df") }}.to_csv(output_buff, index=False, header=True)
output_buff.seek(0)
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,55 +1,36 @@
import codecs
import copy
import datetime
import json
import logging
import os
import re
import requests
import tempfile
from functools import partial
from typing import Optional, Callable, List, Tuple, Any
from typing import TYPE_CHECKING, Any, Dict

from archytas.tool_utils import tool, toolset, AgentRef, LoopControllerRef
from beaker_kernel.lib.context import BaseContext

from .base import BaseToolset
from lib.jupyter_kernel_proxy import JupyterMessage
from .agent import DatasetAgent

if TYPE_CHECKING:
from beaker_kernel.kernel import LLMKernel
from beaker_kernel.lib.agent import BaseAgent
from beaker_kernel.lib.subkernels.base import BaseSubkernel

logging.disable(logging.WARNING) # Disable warnings
logger = logging.Logger(__name__)

class DatasetContext(BaseContext):

@toolset()
class DatasetToolset(BaseToolset):
""" """
slug: str = "dataset"
agent_cls: "BaseAgent" = DatasetAgent

dataset_map: Optional[dict[str, dict[str, Any]]]
def __init__(self, beaker_kernel: "LLMKernel", subkernel: "BaseSubkernel", config: Dict[str, Any]) -> None:

# {
# "df": {"id": 12345, "filename": "dataset.csv"},
# "df2": {"id": 54321}
# "df_map": {"id": 12345, "filename": "mappings.csv"},
# }

def __init__(self, context, *args, **kwargs):
super().__init__(context=context, *args, **kwargs)
self.dataset_map = {}
self.intercepts = {
"download_dataset_request": (self.download_dataset_request, "shell"),
"save_dataset_request": (self.save_dataset_request, "shell"),
}
self.reset()

super().__init__(beaker_kernel, subkernel, self.agent_cls, config)

async def setup(self, config, parent_header):
# DEPRECATED: Backwards compatible handling of "old style" single id contexts
if len(config) == 1 and "id" in config:
dataset_id = config["id"]
print(f"Processing dataset w/id {dataset_id}")
await self.set_dataset(dataset_id, parent_header=parent_header)
else:
print(f"Processing datasets w/ids {', '.join(config.values())}")
await self.set_datasets(config, parent_header=parent_header)
self.config = config
print(f"Processing datasets w/ids {', '.join(self.config.values())}")
await self.set_datasets(self.config, parent_header=parent_header)

async def post_execute(self, message):
await self.update_dataset_map(parent_header=message.parent_header)
Expand Down Expand Up @@ -99,7 +80,7 @@ async def load_dataframes(self):
self.get_code("load_df", {"var_map": var_map}),
]
)
await self.context.execute(command)
await self.execute(command)
await self.update_dataset_map()

def reset(self):
Expand All @@ -116,14 +97,14 @@ async def send_df_preview_message(
}
for var_name, df in self.dataset_map.items()
}
self.context.kernel.send_response(
self.beaker_kernel.send_response(
"iopub", "dataset", preview, parent_header=parent_header
)
return data

async def update_dataset_map(self, parent_header={}):
code = self.get_code("df_info")
df_info_response = await self.context.kernel.evaluate(
df_info_response = await self.beaker_kernel.evaluate(
code,
parent_header=parent_header,
)
Expand Down Expand Up @@ -200,57 +181,6 @@ async def describe_dataset(self, var_name) -> str:
"""
return output

@tool()
async def generate_code(
self, query: str, agent: AgentRef, loop: LoopControllerRef
) -> None:
"""
Generated code to be run in an interactive Jupyter notebook for the purpose of exploring, modifying and visualizing a Dataframe.
Input is a full grammatically correct question about or request for an action to be performed on the loaded dataframe.
Args:
query (str): A fully grammatically correct question about the current dataset.
"""
# set up the agent
# str: Valid and correct python code that fulfills the user's request.
var_sections = []
for var_name, dataset_obj in self.dataset_map.items():
df_info = await self.describe_dataset(var_name)
var_sections.append(f"""
You have access to a variable name `{var_name}` that is a {self.metadata.get("df_lib_name", "Pandas")} Dataframe with the following structure:
{df_info}
--- End description of variable `{var_name}`
""")
prompt = f"""
You are a programmer writing code to help with scientific data analysis and manipulation in {self.metadata.get("name", "a Jupyter notebook")}.
Please write code that satisfies the user's request below.
{"".join(var_sections)}
If you are asked to modify or update the dataframe, modify the dataframe in place, keeping the updated variable the same unless specifically specified otherwise.
You also have access to the libraries {self.metadata.get("libraries", "that are common for these tasks")}.
Please generate the code as if you were programming inside a Jupyter Notebook and the code is to be executed inside a cell.
You MUST wrap the code with a line containing three backticks (```) before and after the generated code.
No addtional text is needed in the response, just the code block.
"""

llm_response = await agent.oneshot(prompt=prompt, query=query)
loop.set_state(loop.STOP_SUCCESS)
preamble, code, coda = re.split("```\w*", llm_response)
result = json.dumps(
{
"action": "code_cell",
"language": self.context.lang,
"content": code.strip(),
}
)
return result

async def download_dataset_request(self, queue, message_id, data):
message = JupyterMessage.parse(data)
content = message.content
Expand All @@ -260,9 +190,9 @@ async def download_dataset_request(self, queue, message_id, data):
# TODO: This doesn't work very well. Is very slow to encode, and transfer all of the required messages multiple times proxies through the proxy kernel.
# We should find a better way to accomplish this if it's needed.
code = self.get_code("df_download", {"var_name": var_name})
df_response = await self.context.evaluate(code)
df_response = await self.evaluate(code)
df_contents = df_response.get("stdout_list")
self.context.kernel.send_response(
self.beaker_kernel.send_response(
"iopub",
"download_response",
{
Expand Down Expand Up @@ -297,12 +227,12 @@ async def save_dataset_request(self, queue, message_id, data):
}
)

df_response = await self.context.evaluate(code)
df_response = await self.evaluate(code)

if df_response:
new_dataset_id = df_response.get("return", {}).get("dataset_id", None)
if new_dataset_id:
self.context.kernel.send_response(
self.beaker_kernel.send_response(
"iopub",
"save_dataset_response",
{
Expand Down
File renamed without changes.
67 changes: 67 additions & 0 deletions beaker_kernel/contexts/decapodes/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import json
import logging
import re

from archytas.tool_utils import AgentRef, LoopControllerRef, tool, toolset

from beaker_kernel.lib.agent import BaseAgent
from beaker_kernel.lib.context import BaseContext

logging.disable(logging.WARNING) # Disable warnings
logger = logging.Logger(__name__)


@toolset()
class DecapodesToolset:
"""
Toolset used for working with the Julia package Decacpodes, a framework for doing descrete exterior calculus based modeling.
"""

@tool()
async def generate_code(
self, query: str, agent: AgentRef, loop: LoopControllerRef
) -> None:
"""
Generated Julia code to be run in an interactive Jupyter notebook for the purpose of exploring and modifying systems the DecaExpr.
Input is a full grammatically correct question about or request for an action to be performed on the loaded model.
Assume that the expression is already loaded and has the variable named `_expr`.
Information about the dataframe can be loaded with the `model_structure` tool.
Args:
query (str): A fully grammatically correct queistion about the current model.
"""
prompt = f"""
You are a programmer writing code to help with scientific data analysis and manipulation in Julia.
Please write code that satisfies the user's request below.
You have access to a variable name `_expr` that is a Decapodes SyntacticModel model with the following structure:
{await agent.context.model_structure()}
Your generated will be in the form `_expr = parse_decapode(quote ...modified object.. end)`
Please generate the code as if you were programming inside a Jupyter Notebook and the code is to be executed inside a cell.
You MUST wrap the code with a line containing three backticks (```) before and after the generated code.
No addtional text is needed in the response, just the code block.
"""

llm_response = await agent.oneshot(prompt=prompt, query=query)
loop.set_state(loop.STOP_SUCCESS)
preamble, code, coda = re.split("```\w*", llm_response)
result = json.dumps(
{
"action": "code_cell",
"language": "julia-1.9",
"content": code.strip(),
}
)
return result


class DecapodesAgent(BaseAgent):

def __init__(self, context: BaseContext = None, tools: list = None, **kwargs):
tools = [DecapodesToolset]
super().__init__(context, tools, **kwargs)
Loading

0 comments on commit d8a4e06

Please sign in to comment.