From fdb254ab77c58ff55803e72137311307e4be0a86 Mon Sep 17 00:00:00 2001 From: liqun Date: Thu, 10 Oct 2024 11:27:34 +0800 Subject: [PATCH 1/4] support custom image --- taskweaver/ces/__init__.py | 4 ++- taskweaver/ces/environment.py | 35 +++++++++++++++++--------- taskweaver/ces/manager/sub_proc.py | 2 ++ taskweaver/module/execution_service.py | 7 ++++++ 4 files changed, 35 insertions(+), 13 deletions(-) diff --git a/taskweaver/ces/__init__.py b/taskweaver/ces/__init__.py index 6ecc8ee8..7d8a45eb 100644 --- a/taskweaver/ces/__init__.py +++ b/taskweaver/ces/__init__.py @@ -1,4 +1,4 @@ -from typing import Literal +from typing import Literal, Optional from taskweaver.ces.common import Manager from taskweaver.ces.manager.defer import DeferredManager @@ -8,11 +8,13 @@ def code_execution_service_factory( env_dir: str, kernel_mode: Literal["local", "container"] = "local", + custom_image: Optional[str] = None, ) -> Manager: def sub_proc_manager_factory() -> SubProcessManager: return SubProcessManager( env_dir=env_dir, kernel_mode=kernel_mode, + custom_image=custom_image, ) return DeferredManager( diff --git a/taskweaver/ces/environment.py b/taskweaver/ces/environment.py index ae7c21d2..84000204 100644 --- a/taskweaver/ces/environment.py +++ b/taskweaver/ces/environment.py @@ -111,12 +111,15 @@ class EnvMode(enum.Enum): class Environment: + DEFAULT_IMAGE = "taskweavercontainers/taskweaver-executor:latest" + def __init__( self, env_id: Optional[str] = None, env_dir: Optional[str] = None, env_mode: Optional[EnvMode] = EnvMode.Local, port_start_inside_container: Optional[int] = 12345, + custom_image: Optional[str] = None, ) -> None: self.session_dict: Dict[str, EnvSession] = {} self.id = get_id(prefix="env") if env_id is None else env_id @@ -145,19 +148,27 @@ def __init__( except docker.errors.DockerException as e: raise docker.errors.DockerException(f"Failed to connect to Docker daemon: {e}. ") - self.image_name = "taskweavercontainers/taskweaver-executor:latest" - try: - local_image = self.docker_client.images.get(self.image_name) - registry_image = self.docker_client.images.get_registry_data(self.image_name) - if local_image.id != registry_image.id: - logger.info(f"Local image {local_image.id} does not match registry image {registry_image.id}.") - raise docker.errors.ImageNotFound("Local image is outdated.") - except docker.errors.ImageNotFound: - logger.info("Pulling image from docker.io.") + if custom_image: + logger.info(f"Using custom image {custom_image}.") + self.image_name = custom_image + try: + self.docker_client.images.get(self.image_name) + except docker.errors.ImageNotFound: + raise docker.errors.ImageNotFound(f"Custom image {self.image_name} not found.") + else: + self.image_name = self.DEFAULT_IMAGE try: - self.docker_client.images.pull(self.image_name) - except docker.errors.DockerException as e: - raise docker.errors.DockerException(f"Failed to pull image: {e}. ") + local_image = self.docker_client.images.get(self.image_name) + registry_image = self.docker_client.images.get_registry_data(self.image_name) + if local_image.id != registry_image.id: + logger.info(f"Local image {local_image.id} does not match registry image {registry_image.id}.") + raise docker.errors.ImageNotFound("Local image is outdated.") + except docker.errors.ImageNotFound: + logger.info("Pulling image from docker.io.") + try: + self.docker_client.images.pull(self.image_name) + except docker.errors.DockerException as e: + raise docker.errors.DockerException(f"Failed to pull image: {e}. ") self.session_container_dict: Dict[str, str] = {} self.port_start_inside_container = port_start_inside_container diff --git a/taskweaver/ces/manager/sub_proc.py b/taskweaver/ces/manager/sub_proc.py index aee81c28..e7fa450d 100644 --- a/taskweaver/ces/manager/sub_proc.py +++ b/taskweaver/ces/manager/sub_proc.py @@ -57,6 +57,7 @@ def __init__( env_id: Optional[str] = None, env_dir: Optional[str] = None, kernel_mode: KernelModeType = "local", + custom_image: Optional[str] = None, ) -> None: from taskweaver.ces.environment import Environment, EnvMode @@ -76,6 +77,7 @@ def __init__( env_id, env_dir, env_mode=env_mode, + custom_image=custom_image, ) def initialize(self) -> None: diff --git a/taskweaver/module/execution_service.py b/taskweaver/module/execution_service.py index 0640ae39..ac46ac67 100644 --- a/taskweaver/module/execution_service.py +++ b/taskweaver/module/execution_service.py @@ -19,6 +19,7 @@ def _configure(self) -> None: "kernel_mode", "container", ) + assert self.kernel_mode in ["local", "container"], f"Invalid kernel mode: {self.kernel_mode}" if self.kernel_mode == "local": print( "TaskWeaver is running in the `local` mode. This implies that " @@ -27,6 +28,11 @@ def _configure(self) -> None: "More information can be found in the documentation " "(https://microsoft.github.io/TaskWeaver/docs/code_execution/).", ) + self.custom_image = self._get_str( + "custom_image", + default=None, + required=False, + ) class ExecutionServiceModule(Module): @@ -39,5 +45,6 @@ def provide_executor_manager(self, config: ExecutionServiceConfig) -> Manager: self.manager = code_execution_service_factory( env_dir=config.env_dir, kernel_mode=config.kernel_mode, + custom_image=config.custom_image, ) return self.manager From 6d3b6586ffbf038c85137106cc389a2e20b98fa8 Mon Sep 17 00:00:00 2001 From: liqun Date: Thu, 10 Oct 2024 15:03:31 +0800 Subject: [PATCH 2/4] fix container import issue --- .gitattributes | 1 + docker/ces_container/Dockerfile | 2 +- scripts/build_executor.ps1 | 2 +- taskweaver/ces/kernel/launcher.py | 8 +++++++- 4 files changed, 10 insertions(+), 3 deletions(-) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..dfdb8b77 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.sh text eol=lf diff --git a/docker/ces_container/Dockerfile b/docker/ces_container/Dockerfile index 768f3686..1d17c2fe 100644 --- a/docker/ces_container/Dockerfile +++ b/docker/ces_container/Dockerfile @@ -17,7 +17,7 @@ COPY taskweaver/__init__.py /app/taskweaver/__init__.py COPY docker/ces_container/entrypoint.sh /app/entrypoint.sh RUN chmod +x /app/entrypoint.sh -ENV PYTHONPATH "${PYTHONPATH}:/app" +ENV PYTHONPATH="/app" ENTRYPOINT ["/app/entrypoint.sh"] diff --git a/scripts/build_executor.ps1 b/scripts/build_executor.ps1 index 68631fa5..5ef6096c 100644 --- a/scripts/build_executor.ps1 +++ b/scripts/build_executor.ps1 @@ -1,7 +1,7 @@ $scriptDirectory = $PSScriptRoot Write-Host "The script directory is: $scriptDirectory" -$version = "0.2" +$version = "0.3" $imageName = "taskweavercontainers/taskweaver-executor" $imageFullName = "${imageName}:${version}" diff --git a/taskweaver/ces/kernel/launcher.py b/taskweaver/ces/kernel/launcher.py index 6d516a8d..759129a9 100644 --- a/taskweaver/ces/kernel/launcher.py +++ b/taskweaver/ces/kernel/launcher.py @@ -1,7 +1,6 @@ import os import sys -from taskweaver.ces.kernel.ext import TaskWeaverZMQShellDisplayHook from taskweaver.ces.kernel.kernel_logging import logger kernel_mode = os.getenv("TASKWEAVER_KERNEL_MODE", "local") @@ -56,6 +55,8 @@ def start_app(): from ipykernel.kernelapp import IPKernelApp from ipykernel.zmqshell import ZMQInteractiveShell + from taskweaver.ces.kernel.ext import TaskWeaverZMQShellDisplayHook + # override displayhook_class for skipping output suppress token issue ZMQInteractiveShell.displayhook_class = TaskWeaverZMQShellDisplayHook @@ -82,6 +83,11 @@ def start_app(): if __name__ == "__main__": if sys.path[0] == "": del sys.path[0] + import site + + user_site_packages = site.getusersitepackages() + if user_site_packages not in sys.path: + sys.path.append(site.getusersitepackages()) logger.info("Starting process...") logger.info("sys.path: %s", sys.path) logger.info("os.getcwd(): %s", os.getcwd()) From d01a0f5f5f2d17150d1a730b169067cb5bb27921 Mon Sep 17 00:00:00 2001 From: liqun Date: Thu, 10 Oct 2024 19:02:22 +0800 Subject: [PATCH 3/4] fix package issues --- docker/ces_container/entrypoint.sh | 3 ++- taskweaver/ces/kernel/launcher.py | 4 ---- tests/unit_tests/test_environment.py | 14 ++++++++------ 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/docker/ces_container/entrypoint.sh b/docker/ces_container/entrypoint.sh index 3b13a3be..8ece511d 100644 --- a/docker/ces_container/entrypoint.sh +++ b/docker/ces_container/entrypoint.sh @@ -9,5 +9,6 @@ groupmod -g $GROUP_ID taskweaver chown -R taskweaver:taskweaver /app -su taskweaver -c "python -m taskweaver.ces.kernel.launcher" +su taskweaver -c "python -m venv --system-site-packages venv" +su taskweaver -c "bash -c 'source venv/bin/activate; python -m taskweaver.ces.kernel.launcher'" diff --git a/taskweaver/ces/kernel/launcher.py b/taskweaver/ces/kernel/launcher.py index 759129a9..d5638e07 100644 --- a/taskweaver/ces/kernel/launcher.py +++ b/taskweaver/ces/kernel/launcher.py @@ -83,11 +83,7 @@ def start_app(): if __name__ == "__main__": if sys.path[0] == "": del sys.path[0] - import site - user_site_packages = site.getusersitepackages() - if user_site_packages not in sys.path: - sys.path.append(site.getusersitepackages()) logger.info("Starting process...") logger.info("sys.path: %s", sys.path) logger.info("os.getcwd(): %s", os.getcwd()) diff --git a/tests/unit_tests/test_environment.py b/tests/unit_tests/test_environment.py index 6d5a36fa..e737af7b 100644 --- a/tests/unit_tests/test_environment.py +++ b/tests/unit_tests/test_environment.py @@ -161,7 +161,11 @@ def test_environment_start_outside_container(): cwd = os.path.dirname(os.path.abspath(__file__)) sessions = os.path.join(cwd, "sessions") try: - env = Environment("local", env_mode=EnvMode.Container) + env = Environment( + "local", + env_mode=EnvMode.Container, + custom_image="taskweavercontainers/taskweaver-executor:0.3", + ) env.start_session( session_id="session_id", session_dir=os.path.join(sessions, "session_id"), @@ -177,13 +181,11 @@ def test_environment_start_outside_container(): connection_file = glob.glob(conn_file_glob)[0] ports_file = os.path.join(ces_dir, "ports.json") assert os.path.isfile(ports_file) - - connect_and_execute_code(connection_file, ports_file) - - saved_file = os.path.join(session_dir, "cwd", "filename.txt") - assert os.path.isfile(saved_file) + code = "!pip install yfinance\nimport sys\nprint(sys.path)\nimport yfinance as yf\nprint(yf.__version__)" + connect_and_execute_code(connection_file, ports_file, code=code) env.stop_session("session_id") + finally: # delete sessions shutil.rmtree(sessions) From d7df0cdf171a5e8a9f3b5bb305ea85c85b6b58c5 Mon Sep 17 00:00:00 2001 From: liqun Date: Mon, 21 Oct 2024 15:24:23 +0800 Subject: [PATCH 4/4] support openai json schema --- requirements.txt | 3 ++- scripts/build_executor.sh | 2 +- .../code_interpreter/code_generator_prompt.yaml | 8 ++++---- taskweaver/llm/base.py | 2 +- taskweaver/llm/openai.py | 8 ++++++++ taskweaver/planner/planner_prompt.yaml | 9 ++++++--- 6 files changed, 22 insertions(+), 10 deletions(-) diff --git a/requirements.txt b/requirements.txt index 367c0def..f8b73c3d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,8 @@ pandas>=2.0.0 matplotlib>=3.4 seaborn>=0.11 python-dotenv>=1.0.0 -openai>=1.2.4 +openai>=1.42.0 +pydantic>=2.8.2 pyyaml>=6.0 scikit-learn>=1.2.2 click>=8.0.1 diff --git a/scripts/build_executor.sh b/scripts/build_executor.sh index e25642e7..2affdafc 100644 --- a/scripts/build_executor.sh +++ b/scripts/build_executor.sh @@ -4,7 +4,7 @@ scriptDirectory="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" echo "The script directory is: $scriptDirectory" -version="0.2" +version="0.3" imageName="taskweavercontainers/taskweaver-executor" imageFullName="$imageName:$version" diff --git a/taskweaver/code_interpreter/code_interpreter/code_generator_prompt.yaml b/taskweaver/code_interpreter/code_interpreter/code_generator_prompt.yaml index 673d7f49..c5f44160 100644 --- a/taskweaver/code_interpreter/code_interpreter/code_generator_prompt.yaml +++ b/taskweaver/code_interpreter/code_interpreter/code_generator_prompt.yaml @@ -36,7 +36,6 @@ response_json_schema: |- "properties": { "thought": { "type": "string", - "maxLength": 1000, "description": "The thoughts before generating the code." }, "reply_type": { @@ -49,7 +48,6 @@ response_json_schema: |- }, "reply_content": { "type": "string", - "minLength": 10, "description": "The actual content of the response. If the reply_type is 'python', the content should be a valid python code snippet. Make sure escaping the special characters (e.g., '\\', '/', and '\"') in the strings for JSON format." } }, @@ -57,12 +55,14 @@ response_json_schema: |- "thought", "reply_type", "reply_content" - ] + ], + "additionalProperties": false } }, "required": [ "response" - ] + ], + "additionalProperties": false } diff --git a/taskweaver/llm/base.py b/taskweaver/llm/base.py index 60976056..f29d943c 100644 --- a/taskweaver/llm/base.py +++ b/taskweaver/llm/base.py @@ -55,7 +55,7 @@ def _configure(self) -> None: self.response_format: Optional[str] = self._get_enum( "response_format", - options=["json_object", "text"], + options=["json_object", "text", "json_schema"], default="json_object", ) diff --git a/taskweaver/llm/openai.py b/taskweaver/llm/openai.py index 0f386937..d2d2fe6e 100644 --- a/taskweaver/llm/openai.py +++ b/taskweaver/llm/openai.py @@ -180,6 +180,14 @@ def chat_completion( response_format = kwargs["response_format"] elif self.config.response_format == "json_object": response_format = {"type": "json_object"} + elif self.config.response_format == "json_schema": + response_format = {"type": "json_schema"} + assert "json_schema" in kwargs, "JSON schema is required for JSON schema response format" + response_format["json_schema"] = { + "name": "response", + "strict": True, + "schema": kwargs["json_schema"], + } else: response_format = None diff --git a/taskweaver/planner/planner_prompt.yaml b/taskweaver/planner/planner_prompt.yaml index 851daf70..d6426f6d 100644 --- a/taskweaver/planner/planner_prompt.yaml +++ b/taskweaver/planner/planner_prompt.yaml @@ -150,11 +150,14 @@ response_json_schema: |- "plan", "current_plan_step", "send_to", - "message" - ] + "message", + "review" + ], + "additionalProperties": false } }, "required": [ "response" - ] + ], + "additionalProperties": false }