diff --git a/docs/modules/usage/how-to/evaluation-harness.md b/docs/modules/usage/how-to/evaluation-harness.md
index 32717675e3d0..daf144d11e88 100644
--- a/docs/modules/usage/how-to/evaluation-harness.md
+++ b/docs/modules/usage/how-to/evaluation-harness.md
@@ -134,9 +134,11 @@ To create an evaluation workflow for your benchmark, follow these steps:
 
 4. Create a function to process each instance:
    ```python
+   from openhands.utils.async_utils import call_async_from_sync
    def process_instance(instance: pd.Series, metadata: EvalMetadata) -> EvalOutput:
        config = get_config(instance, metadata)
        runtime = create_runtime(config)
+       call_async_from_sync(runtime.connect)
        initialize_runtime(runtime, instance)
 
        instruction = get_instruction(instance, metadata)
diff --git a/evaluation/EDA/run_infer.py b/evaluation/EDA/run_infer.py
index 81c7455e0041..2c896939a751 100644
--- a/evaluation/EDA/run_infer.py
+++ b/evaluation/EDA/run_infer.py
@@ -23,6 +23,7 @@
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import MessageAction
+from openhands.utils.async_utils import call_async_from_sync
 
 game = None
 
@@ -119,6 +120,7 @@ def process_instance(
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
 
     state: State | None = asyncio.run(
         run_controller(
diff --git a/evaluation/agent_bench/run_infer.py b/evaluation/agent_bench/run_infer.py
index f0ea180f4ba0..d6fcc62e0798 100644
--- a/evaluation/agent_bench/run_infer.py
+++ b/evaluation/agent_bench/run_infer.py
@@ -33,6 +33,7 @@
 from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 
 def get_config(
@@ -210,6 +211,7 @@ def process_instance(
     # =============================================
 
     runtime: Runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
 
     initialize_runtime(runtime, instance=instance)
 
diff --git a/evaluation/aider_bench/run_infer.py b/evaluation/aider_bench/run_infer.py
index c59de4f441f8..fa1bb9534a83 100644
--- a/evaluation/aider_bench/run_infer.py
+++ b/evaluation/aider_bench/run_infer.py
@@ -33,6 +33,7 @@
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 # Configure visibility of unit tests to the Agent.
 USE_UNIT_TESTS = os.environ.get('USE_UNIT_TESTS', 'false').lower() == 'true'
@@ -207,6 +208,7 @@ def process_instance(
     # =============================================
 
     runtime: Runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
 
     initialize_runtime(runtime, instance=instance)
 
diff --git a/evaluation/biocoder/run_infer.py b/evaluation/biocoder/run_infer.py
index 9dcff6d6ef42..4535ccba4e4e 100644
--- a/evaluation/biocoder/run_infer.py
+++ b/evaluation/biocoder/run_infer.py
@@ -30,6 +30,7 @@
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
     'CodeActAgent': functools.partial(
@@ -275,7 +276,7 @@ def process_instance(
     instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
 
     runtime = create_runtime(config)
-
+    call_async_from_sync(runtime.connect)
     initialize_runtime(runtime, instance)
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
diff --git a/evaluation/bird/run_infer.py b/evaluation/bird/run_infer.py
index 0fba8c5c6456..adb498cd2eb1 100644
--- a/evaluation/bird/run_infer.py
+++ b/evaluation/bird/run_infer.py
@@ -33,6 +33,7 @@
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 
 def codeact_user_response(state: State) -> str:
@@ -403,6 +404,7 @@ def execute_sql(db_path, sql):
     instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     initialize_runtime(runtime, instance)
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
diff --git a/evaluation/gaia/run_infer.py b/evaluation/gaia/run_infer.py
index f5794d3ad3cb..c02cd0aee737 100644
--- a/evaluation/gaia/run_infer.py
+++ b/evaluation/gaia/run_infer.py
@@ -29,6 +29,7 @@
 from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 DATASET_CACHE_DIR = os.path.join(os.path.dirname(__file__), 'data')
 
@@ -142,6 +143,7 @@ def process_instance(
     logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     initialize_runtime(runtime, instance)
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
diff --git a/evaluation/gorilla/run_infer.py b/evaluation/gorilla/run_infer.py
index ac49a8078d5b..873cb7f89694 100644
--- a/evaluation/gorilla/run_infer.py
+++ b/evaluation/gorilla/run_infer.py
@@ -25,6 +25,7 @@
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import MessageAction
+from openhands.utils.async_utils import call_async_from_sync
 
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
     'CodeActAgent': codeact_user_response,
@@ -81,6 +82,7 @@ def process_instance(
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     state: State | None = asyncio.run(
         run_controller(
             config=config,
diff --git a/evaluation/gpqa/run_infer.py b/evaluation/gpqa/run_infer.py
index fe7ff4bf1c18..8fd4034c9d5e 100644
--- a/evaluation/gpqa/run_infer.py
+++ b/evaluation/gpqa/run_infer.py
@@ -48,6 +48,7 @@
     MessageAction,
 )
 from openhands.events.observation import Observation
+from openhands.utils.async_utils import call_async_from_sync
 
 ACTION_FORMAT = """
 <<FINAL_ANSWER||
@@ -215,7 +216,7 @@ def process_instance(
 """
 
     runtime = create_runtime(config)
-
+    call_async_from_sync(runtime.connect)
     state: State | None = asyncio.run(
         run_controller(
             config=config,
diff --git a/evaluation/humanevalfix/run_infer.py b/evaluation/humanevalfix/run_infer.py
index 9b0abbc298a8..25fee65561fc 100644
--- a/evaluation/humanevalfix/run_infer.py
+++ b/evaluation/humanevalfix/run_infer.py
@@ -38,6 +38,7 @@
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 IMPORT_HELPER = {
     'python': [
@@ -233,6 +234,7 @@ def process_instance(
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     initialize_runtime(runtime, instance)
     state: State | None = asyncio.run(
         run_controller(
diff --git a/evaluation/integration_tests/run_infer.py b/evaluation/integration_tests/run_infer.py
index ab631ab397ad..a530041f92f7 100644
--- a/evaluation/integration_tests/run_infer.py
+++ b/evaluation/integration_tests/run_infer.py
@@ -25,6 +25,7 @@
 from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import MessageAction
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 FAKE_RESPONSES = {
     'CodeActAgent': codeact_user_response,
@@ -33,6 +34,7 @@
 
 def get_config(
     metadata: EvalMetadata,
+    instance_id: str,
 ) -> AppConfig:
     config = AppConfig(
         default_agent=metadata.agent_class,
@@ -49,6 +51,14 @@ def get_config(
         workspace_base=None,
         workspace_mount_path=None,
     )
+    if metadata.llm_config.log_completions:
+        metadata.llm_config.log_completions_folder = os.path.join(
+            metadata.eval_output_dir, 'llm_completions', instance_id
+        )
+        logger.info(
+            f'Logging LLM completions for instance {instance_id} to '
+            f'{metadata.llm_config.log_completions_folder}'
+        )
     config.set_llm_config(metadata.llm_config)
     return config
 
@@ -58,7 +68,7 @@ def process_instance(
     metadata: EvalMetadata,
     reset_logger: bool = True,
 ) -> EvalOutput:
-    config = get_config(metadata)
+    config = get_config(metadata, instance.instance_id)
 
     # Setup the logger properly, so you can run multi-processing to parallelize the evaluation
     if reset_logger:
@@ -92,6 +102,7 @@ def process_instance(
     # =============================================
 
     runtime: Runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
 
     test_class.initialize_runtime(runtime)
 
diff --git a/evaluation/logic_reasoning/run_infer.py b/evaluation/logic_reasoning/run_infer.py
index b07af7b08cdf..5b7d35f21130 100644
--- a/evaluation/logic_reasoning/run_infer.py
+++ b/evaluation/logic_reasoning/run_infer.py
@@ -30,6 +30,7 @@
 )
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
     'CodeActAgent': codeact_user_response,
@@ -202,6 +203,7 @@ def process_instance(
     instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     initialize_runtime(runtime, instance)
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
diff --git a/evaluation/miniwob/run_infer.py b/evaluation/miniwob/run_infer.py
index ccd36f2d389b..9c2aaf1e0963 100644
--- a/evaluation/miniwob/run_infer.py
+++ b/evaluation/miniwob/run_infer.py
@@ -35,6 +35,7 @@
     BROWSER_EVAL_GET_GOAL_ACTION,
     BROWSER_EVAL_GET_REWARDS_ACTION,
 )
+from openhands.utils.async_utils import call_async_from_sync
 
 SUPPORTED_AGENT_CLS = {'BrowsingAgent'}
 
@@ -127,6 +128,7 @@ def process_instance(
         logger.info(f'Starting evaluation for instance {env_id}.')
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     task_str = initialize_runtime(runtime)
     state: State | None = asyncio.run(
         run_controller(
diff --git a/evaluation/mint/run_infer.py b/evaluation/mint/run_infer.py
index 80127420a431..8017b194d8d8 100644
--- a/evaluation/mint/run_infer.py
+++ b/evaluation/mint/run_infer.py
@@ -33,6 +33,7 @@
 )
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 
 def codeact_user_response_mint(state: State, task: Task, task_config: dict[str, int]):
@@ -176,6 +177,7 @@ def process_instance(
     )
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     initialize_runtime(runtime)
 
     state: State | None = asyncio.run(
diff --git a/evaluation/mint/tasks/reasoning.py b/evaluation/mint/tasks/reasoning.py
index 6640c6e1ee6a..08cf320c359f 100644
--- a/evaluation/mint/tasks/reasoning.py
+++ b/evaluation/mint/tasks/reasoning.py
@@ -131,11 +131,9 @@ def extract_options(self, prompt: str) -> dict:
 
 
 def compare_two_numbers(p, gt):
-    if isinstance(p, int) or isinstance(p, float):
+    if isinstance(p, (int, float)):
         pass
-    elif isinstance(p, list) or isinstance(p, bool) or isinstance(p, str):
-        return False
-    elif isinstance(p, tuple) or isinstance(p, complex) or isinstance(p, dict):
+    elif isinstance(p, (bool, complex, dict, list, str, tuple)):
         return False
     else:
         raise ValueError(p)
@@ -227,8 +225,8 @@ def extract_answer(self, solution: str) -> Any:
             prediction = prediction.replace('°', '')
 
         # Detect the boolean keyword in the generation
-        if prediction in ['true', 'yes', 'false', 'no']:
-            if prediction == 'true' or prediction == 'yes':
+        if prediction in ('true', 'yes', 'false', 'no'):
+            if prediction in ('true', 'yes'):
                 prediction = 'True'
             else:
                 prediction = 'False'
@@ -342,7 +340,7 @@ def success(self, solution: str) -> bool:
         answer_type = self._answer_type
         gt = self.extract_answer(self.reference)
 
-        if isinstance(prediction, (str, int, float)) or isinstance(prediction, list):
+        if isinstance(prediction, (str, int, float, list)):
             # Comparing prediction against the reference
             if answer_type in ['bool', 'option', 'Option']:
                 cur_correct = int(prediction == f'({gt})') or int(prediction == gt)
diff --git a/evaluation/ml_bench/run_infer.py b/evaluation/ml_bench/run_infer.py
index 0f1ba0ab957a..deec068f3392 100644
--- a/evaluation/ml_bench/run_infer.py
+++ b/evaluation/ml_bench/run_infer.py
@@ -42,6 +42,7 @@
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 config = load_app_config()
 
@@ -233,6 +234,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
     instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     initialize_runtime(runtime, instance)
 
     # Run the agent
diff --git a/evaluation/swe_bench/eval_infer.py b/evaluation/swe_bench/eval_infer.py
index 14429fc9859d..cf6d71d3b3ee 100644
--- a/evaluation/swe_bench/eval_infer.py
+++ b/evaluation/swe_bench/eval_infer.py
@@ -28,6 +28,7 @@
 from openhands.core.main import create_runtime
 from openhands.events.action import CmdRunAction
 from openhands.events.observation import CmdOutputObservation
+from openhands.utils.async_utils import call_async_from_sync
 
 # TODO: migrate all swe-bench docker to ghcr.io/openhands
 DOCKER_IMAGE_PREFIX = os.environ.get('EVAL_DOCKER_IMAGE_PREFIX', 'docker.io/xingyaoww/')
@@ -128,7 +129,7 @@ def process_instance(
         )
 
     runtime = create_runtime(config)
-
+    call_async_from_sync(runtime.connect)
     # Get patch and save it to /tmp/patch.diff
     with tempfile.TemporaryDirectory() as temp_dir:
         # Patch file
diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py
index 18b7daac0108..9ac1e0cf6639 100644
--- a/evaluation/swe_bench/run_infer.py
+++ b/evaluation/swe_bench/run_infer.py
@@ -35,6 +35,7 @@
 from openhands.events.serialization.event import event_to_dict
 from openhands.runtime.base import Runtime
 from openhands.runtime.utils.shutdown_listener import sleep_if_should_continue
+from openhands.utils.async_utils import call_async_from_sync
 
 USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
 USE_INSTANCE_IMAGE = os.environ.get('USE_INSTANCE_IMAGE', 'false').lower() == 'true'
@@ -143,6 +144,14 @@ def get_config(
         workspace_base=None,
         workspace_mount_path=None,
     )
+    if metadata.llm_config.log_completions:
+        metadata.llm_config.log_completions_folder = os.path.join(
+            metadata.eval_output_dir, 'llm_completions', instance['instance_id']
+        )
+        logger.info(
+            f'Logging LLM completions for instance {instance["instance_id"]} to '
+            f'{metadata.llm_config.log_completions_folder}'
+        )
     config.set_llm_config(metadata.llm_config)
     return config
 
@@ -372,6 +381,7 @@ def process_instance(
         logger.info(f'Starting evaluation for instance {instance.instance_id}.')
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
 
     try:
         initialize_runtime(runtime, instance)
@@ -432,7 +442,6 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        llm_completions=state.extra_data.get('llm_completions', []),
         error=state.last_error if state and state.last_error else None,
     )
     return output
diff --git a/evaluation/toolqa/run_infer.py b/evaluation/toolqa/run_infer.py
index 85f565120ac9..5c2c53422785 100644
--- a/evaluation/toolqa/run_infer.py
+++ b/evaluation/toolqa/run_infer.py
@@ -26,6 +26,7 @@
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
     'CodeActAgent': codeact_user_response,
@@ -103,6 +104,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
     logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     initialize_runtime(runtime)
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
diff --git a/evaluation/utils/shared.py b/evaluation/utils/shared.py
index d184b5b98037..d33658f339d2 100644
--- a/evaluation/utils/shared.py
+++ b/evaluation/utils/shared.py
@@ -61,7 +61,6 @@ class EvalOutput(BaseModel):
     history: (
         list[dict[str, Any]] | list[tuple[dict[str, Any], dict[str, Any]]] | None
     ) = None
-    llm_completions: list[dict[str, Any]] | None = None
     metrics: dict[str, Any] | None = None
     error: str | None = None
 
diff --git a/evaluation/webarena/run_infer.py b/evaluation/webarena/run_infer.py
index 2291aa08c8d4..cfc2bdae493a 100644
--- a/evaluation/webarena/run_infer.py
+++ b/evaluation/webarena/run_infer.py
@@ -35,6 +35,7 @@
     BROWSER_EVAL_GET_GOAL_ACTION,
     BROWSER_EVAL_GET_REWARDS_ACTION,
 )
+from openhands.utils.async_utils import call_async_from_sync
 
 SUPPORTED_AGENT_CLS = {'BrowsingAgent'}
 
@@ -143,6 +144,7 @@ def process_instance(
         logger.info(f'Starting evaluation for instance {env_id}.')
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     task_str = initialize_runtime(runtime)
 
     state: State | None = asyncio.run(
diff --git a/frontend/__tests__/components/chat/ChatInput.test.tsx b/frontend/__tests__/components/chat/ChatInput.test.tsx
deleted file mode 100644
index 75b565f848ff..000000000000
--- a/frontend/__tests__/components/chat/ChatInput.test.tsx
+++ /dev/null
@@ -1,119 +0,0 @@
-import userEvent from "@testing-library/user-event";
-import { render, screen } from "@testing-library/react";
-import { describe, afterEach, vi, it, expect } from "vitest";
-import ChatInput from "#/components/chat/ChatInput";
-
-describe.skip("ChatInput", () => {
-  afterEach(() => {
-    vi.clearAllMocks();
-  });
-
-  const onSendMessage = vi.fn();
-
-  it("should render a textarea", () => {
-    render(<ChatInput onSendMessage={onSendMessage} />);
-    expect(screen.getByRole("textbox")).toBeInTheDocument();
-  });
-
-  it("should be able to be set as disabled", async () => {
-    const user = userEvent.setup();
-    render(<ChatInput disabled onSendMessage={onSendMessage} />);
-
-    const textarea = screen.getByRole("textbox");
-    const button = screen.getByRole("button");
-
-    expect(textarea).not.toBeDisabled(); // user can still type
-    expect(button).toBeDisabled(); // user cannot submit
-
-    await user.type(textarea, "Hello, world!");
-    await user.keyboard("{Enter}");
-
-    expect(onSendMessage).not.toHaveBeenCalled();
-  });
-
-  it("should render with a placeholder", () => {
-    render(<ChatInput onSendMessage={onSendMessage} />);
-
-    const textarea = screen.getByPlaceholderText(
-      /CHAT_INTERFACE\$INPUT_PLACEHOLDER/i,
-    );
-    expect(textarea).toBeInTheDocument();
-  });
-
-  it("should render a send button", () => {
-    render(<ChatInput onSendMessage={onSendMessage} />);
-    expect(screen.getByRole("button")).toBeInTheDocument();
-  });
-
-  it("should call sendChatMessage with the input when the send button is clicked", async () => {
-    const user = userEvent.setup();
-    render(<ChatInput onSendMessage={onSendMessage} />);
-
-    const textarea = screen.getByRole("textbox");
-    const button = screen.getByRole("button");
-
-    await user.type(textarea, "Hello, world!");
-    await user.click(button);
-
-    expect(onSendMessage).toHaveBeenCalledWith("Hello, world!", []);
-    // Additionally, check if it was called exactly once
-    expect(onSendMessage).toHaveBeenCalledTimes(1);
-  });
-
-  it("should be able to send a message when the enter key is pressed", async () => {
-    const user = userEvent.setup();
-    render(<ChatInput onSendMessage={onSendMessage} />);
-    const textarea = screen.getByRole("textbox");
-
-    await user.type(textarea, "Hello, world!");
-    await user.keyboard("{Enter}");
-
-    expect(onSendMessage).toHaveBeenCalledWith("Hello, world!", []);
-  });
-
-  it("should NOT send a message when shift + enter is pressed", async () => {
-    const user = userEvent.setup();
-    render(<ChatInput onSendMessage={onSendMessage} />);
-    const textarea = screen.getByRole("textbox");
-
-    await user.type(textarea, "Hello, world!");
-    await user.keyboard("{Shift>} {Enter}"); // Shift + Enter
-
-    expect(onSendMessage).not.toHaveBeenCalled();
-  });
-
-  it("should NOT send an empty message", async () => {
-    const user = userEvent.setup();
-    render(<ChatInput onSendMessage={onSendMessage} />);
-    const textarea = screen.getByRole("textbox");
-    const button = screen.getByRole("button");
-
-    await user.type(textarea, " ");
-
-    // with enter key
-    await user.keyboard("{Enter}");
-    expect(onSendMessage).not.toHaveBeenCalled();
-
-    // with button click
-    await user.click(button);
-    expect(onSendMessage).not.toHaveBeenCalled();
-  });
-
-  it("should clear the input message after sending a message", async () => {
-    const user = userEvent.setup();
-    render(<ChatInput onSendMessage={onSendMessage} />);
-    const textarea = screen.getByRole("textbox");
-    const button = screen.getByRole("button");
-
-    await user.type(textarea, "Hello, world!");
-    expect(textarea).toHaveValue("Hello, world!");
-
-    await user.click(button);
-    expect(textarea).toHaveValue("");
-  });
-
-  // this is already implemented but need to figure out how to test it
-  it.todo(
-    "should NOT send a message when the enter key is pressed while composing",
-  );
-});
diff --git a/frontend/__tests__/components/chat/chat-input.test.tsx b/frontend/__tests__/components/chat/chat-input.test.tsx
new file mode 100644
index 000000000000..e10e3d26f3ee
--- /dev/null
+++ b/frontend/__tests__/components/chat/chat-input.test.tsx
@@ -0,0 +1,161 @@
+import userEvent from "@testing-library/user-event";
+import { render, screen } from "@testing-library/react";
+import { describe, afterEach, vi, it, expect } from "vitest";
+import { ChatInput } from "#/components/chat-input";
+
+describe("ChatInput", () => {
+  const onSubmitMock = vi.fn();
+
+  afterEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it("should render a textarea", () => {
+    render(<ChatInput onSubmit={onSubmitMock} />);
+    expect(screen.getByTestId("chat-input")).toBeInTheDocument();
+    expect(screen.getByRole("textbox")).toBeInTheDocument();
+  });
+
+  it("should call onSubmit when the user types and presses enter", async () => {
+    const user = userEvent.setup();
+    render(<ChatInput onSubmit={onSubmitMock} />);
+    const textarea = screen.getByRole("textbox");
+
+    await user.type(textarea, "Hello, world!");
+    await user.keyboard("{Enter}");
+
+    expect(onSubmitMock).toHaveBeenCalledWith("Hello, world!");
+  });
+
+  it("should call onSubmit when pressing the submit button", async () => {
+    const user = userEvent.setup();
+    render(<ChatInput onSubmit={onSubmitMock} />);
+    const textarea = screen.getByRole("textbox");
+    const button = screen.getByRole("button");
+
+    await user.type(textarea, "Hello, world!");
+    await user.click(button);
+
+    expect(onSubmitMock).toHaveBeenCalledWith("Hello, world!");
+  });
+
+  it("should not call onSubmit when the message is empty", async () => {
+    const user = userEvent.setup();
+    render(<ChatInput onSubmit={onSubmitMock} />);
+    const button = screen.getByRole("button");
+
+    await user.click(button);
+    expect(onSubmitMock).not.toHaveBeenCalled();
+
+    await user.keyboard("{Enter}");
+    expect(onSubmitMock).not.toHaveBeenCalled();
+  });
+
+  it("should disable submit", async () => {
+    const user = userEvent.setup();
+    render(<ChatInput disabled onSubmit={onSubmitMock} />);
+
+    const button = screen.getByRole("button");
+    const textarea = screen.getByRole("textbox");
+
+    await user.type(textarea, "Hello, world!");
+
+    expect(button).toBeDisabled();
+    await user.click(button);
+    expect(onSubmitMock).not.toHaveBeenCalled();
+
+    await user.keyboard("{Enter}");
+    expect(onSubmitMock).not.toHaveBeenCalled();
+  });
+
+  it("should render a placeholder", () => {
+    render(
+      <ChatInput placeholder="Enter your message" onSubmit={onSubmitMock} />,
+    );
+
+    const textarea = screen.getByPlaceholderText("Enter your message");
+    expect(textarea).toBeInTheDocument();
+  });
+
+  it("should create a newline instead of submitting when shift + enter is pressed", async () => {
+    const user = userEvent.setup();
+    render(<ChatInput onSubmit={onSubmitMock} />);
+    const textarea = screen.getByRole("textbox");
+
+    await user.type(textarea, "Hello, world!");
+    await user.keyboard("{Shift>} {Enter}"); // Shift + Enter
+
+    expect(onSubmitMock).not.toHaveBeenCalled();
+    // expect(textarea).toHaveValue("Hello, world!\n");
+  });
+
+  it("should clear the input message after sending a message", async () => {
+    const user = userEvent.setup();
+    render(<ChatInput onSubmit={onSubmitMock} />);
+    const textarea = screen.getByRole("textbox");
+    const button = screen.getByRole("button");
+
+    await user.type(textarea, "Hello, world!");
+    await user.keyboard("{Enter}");
+    expect(textarea).toHaveValue("");
+
+    await user.type(textarea, "Hello, world!");
+    await user.click(button);
+    expect(textarea).toHaveValue("");
+  });
+
+  it("should hide the submit button", () => {
+    render(<ChatInput onSubmit={onSubmitMock} showButton={false} />);
+    expect(screen.queryByRole("button")).not.toBeInTheDocument();
+  });
+
+  it("should call onChange when the user types", async () => {
+    const user = userEvent.setup();
+    const onChangeMock = vi.fn();
+    render(<ChatInput onSubmit={onSubmitMock} onChange={onChangeMock} />);
+    const textarea = screen.getByRole("textbox");
+
+    await user.type(textarea, "Hello, world!");
+
+    expect(onChangeMock).toHaveBeenCalledTimes("Hello, world!".length);
+  });
+
+  it("should have set the passed value", () => {
+    render(<ChatInput value="Hello, world!" onSubmit={onSubmitMock} />);
+    const textarea = screen.getByRole("textbox");
+
+    expect(textarea).toHaveValue("Hello, world!");
+  });
+
+  it("should display the stop button and trigger the callback", async () => {
+    const user = userEvent.setup();
+    const onStopMock = vi.fn();
+    render(
+      <ChatInput onSubmit={onSubmitMock} button="stop" onStop={onStopMock} />,
+    );
+    const stopButton = screen.getByTestId("stop-button");
+
+    await user.click(stopButton);
+    expect(onStopMock).toHaveBeenCalledOnce();
+  });
+
+  it("should call onFocus and onBlur when the textarea is focused and blurred", async () => {
+    const user = userEvent.setup();
+    const onFocusMock = vi.fn();
+    const onBlurMock = vi.fn();
+    render(
+      <ChatInput
+        onSubmit={onSubmitMock}
+        onFocus={onFocusMock}
+        onBlur={onBlurMock}
+      />,
+    );
+    const textarea = screen.getByRole("textbox");
+
+    await user.click(textarea);
+    expect(onFocusMock).toHaveBeenCalledOnce();
+
+    await user.tab();
+    expect(onBlurMock).toHaveBeenCalledOnce();
+  });
+});
diff --git a/frontend/__tests__/components/image-preview.test.tsx b/frontend/__tests__/components/image-preview.test.tsx
new file mode 100644
index 000000000000..ef39fb0f4777
--- /dev/null
+++ b/frontend/__tests__/components/image-preview.test.tsx
@@ -0,0 +1,32 @@
+import { render, screen } from "@testing-library/react";
+import userEvent from "@testing-library/user-event";
+import { describe, expect, it, vi } from "vitest";
+import { ImagePreview } from "#/components/image-preview";
+
+describe("ImagePreview", () => {
+  it("should render an image", () => {
+    render(
+      <ImagePreview src="https://example.com/image.jpg" onRemove={vi.fn} />,
+    );
+    const img = screen.getByRole("img");
+
+    expect(screen.getByTestId("image-preview")).toBeInTheDocument();
+    expect(img).toHaveAttribute("src", "https://example.com/image.jpg");
+  });
+
+  it("should call onRemove when the close button is clicked", async () => {
+    const user = userEvent.setup();
+    const onRemoveMock = vi.fn();
+    render(
+      <ImagePreview
+        src="https://example.com/image.jpg"
+        onRemove={onRemoveMock}
+      />,
+    );
+
+    const closeButton = screen.getByRole("button");
+    await user.click(closeButton);
+
+    expect(onRemoveMock).toHaveBeenCalledOnce();
+  });
+});
diff --git a/frontend/__tests__/components/interactive-chat-box.test.tsx b/frontend/__tests__/components/interactive-chat-box.test.tsx
new file mode 100644
index 000000000000..5d775682cb7d
--- /dev/null
+++ b/frontend/__tests__/components/interactive-chat-box.test.tsx
@@ -0,0 +1,119 @@
+import { render, screen, within } from "@testing-library/react";
+import userEvent from "@testing-library/user-event";
+import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
+import { InteractiveChatBox } from "#/components/interactive-chat-box";
+
+describe("InteractiveChatBox", () => {
+  const onSubmitMock = vi.fn();
+  const onStopMock = vi.fn();
+
+  beforeAll(() => {
+    global.URL.createObjectURL = vi
+      .fn()
+      .mockReturnValue("blob:http://example.com");
+  });
+
+  afterEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it("should render", () => {
+    render(<InteractiveChatBox onSubmit={onSubmitMock} onStop={onStopMock} />);
+
+    const chatBox = screen.getByTestId("interactive-chat-box");
+    within(chatBox).getByTestId("chat-input");
+    within(chatBox).getByTestId("upload-image-input");
+  });
+
+  it("should display the image previews when images are uploaded", async () => {
+    const user = userEvent.setup();
+    render(<InteractiveChatBox onSubmit={onSubmitMock} onStop={onStopMock} />);
+
+    const file = new File(["(⌐□_□)"], "chucknorris.png", { type: "image/png" });
+    const input = screen.getByTestId("upload-image-input");
+
+    expect(screen.queryAllByTestId("image-preview")).toHaveLength(0);
+
+    await user.upload(input, file);
+    expect(screen.queryAllByTestId("image-preview")).toHaveLength(1);
+
+    const files = [
+      new File(["(⌐□_□)"], "chucknorris2.png", { type: "image/png" }),
+      new File(["(⌐□_□)"], "chucknorris3.png", { type: "image/png" }),
+    ];
+
+    await user.upload(input, files);
+    expect(screen.queryAllByTestId("image-preview")).toHaveLength(3);
+  });
+
+  it("should remove the image preview when the close button is clicked", async () => {
+    const user = userEvent.setup();
+    render(<InteractiveChatBox onSubmit={onSubmitMock} onStop={onStopMock} />);
+
+    const file = new File(["(⌐□_□)"], "chucknorris.png", { type: "image/png" });
+    const input = screen.getByTestId("upload-image-input");
+
+    await user.upload(input, file);
+    expect(screen.queryAllByTestId("image-preview")).toHaveLength(1);
+
+    const imagePreview = screen.getByTestId("image-preview");
+    const closeButton = within(imagePreview).getByRole("button");
+    await user.click(closeButton);
+
+    expect(screen.queryAllByTestId("image-preview")).toHaveLength(0);
+  });
+
+  it("should call onSubmit with the message and images", async () => {
+    const user = userEvent.setup();
+    render(<InteractiveChatBox onSubmit={onSubmitMock} onStop={onStopMock} />);
+
+    const textarea = within(screen.getByTestId("chat-input")).getByRole(
+      "textbox",
+    );
+    const input = screen.getByTestId("upload-image-input");
+    const file = new File(["(⌐□_□)"], "chucknorris.png", { type: "image/png" });
+
+    await user.upload(input, file);
+    await user.type(textarea, "Hello, world!");
+    await user.keyboard("{Enter}");
+
+    expect(onSubmitMock).toHaveBeenCalledWith("Hello, world!", [file]);
+
+    // clear images after submission
+    expect(screen.queryAllByTestId("image-preview")).toHaveLength(0);
+  });
+
+  it("should disable the submit button", async () => {
+    const user = userEvent.setup();
+    render(
+      <InteractiveChatBox
+        isDisabled
+        onSubmit={onSubmitMock}
+        onStop={onStopMock}
+      />,
+    );
+
+    const button = screen.getByRole("button");
+    expect(button).toBeDisabled();
+
+    await user.click(button);
+    expect(onSubmitMock).not.toHaveBeenCalled();
+  });
+
+  it("should display the stop button if set and call onStop when clicked", async () => {
+    const user = userEvent.setup();
+    render(
+      <InteractiveChatBox
+        mode="stop"
+        onSubmit={onSubmitMock}
+        onStop={onStopMock}
+      />,
+    );
+
+    const stopButton = screen.getByTestId("stop-button");
+    expect(stopButton).toBeInTheDocument();
+
+    await user.click(stopButton);
+    expect(onStopMock).toHaveBeenCalledOnce();
+  });
+});
diff --git a/frontend/__tests__/components/upload-image-input.test.tsx b/frontend/__tests__/components/upload-image-input.test.tsx
new file mode 100644
index 000000000000..77f89ee8851a
--- /dev/null
+++ b/frontend/__tests__/components/upload-image-input.test.tsx
@@ -0,0 +1,71 @@
+import { render, screen } from "@testing-library/react";
+import userEvent from "@testing-library/user-event";
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { UploadImageInput } from "#/components/upload-image-input";
+
+describe("UploadImageInput", () => {
+  const user = userEvent.setup();
+  const onUploadMock = vi.fn();
+
+  afterEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it("should render an input", () => {
+    render(<UploadImageInput onUpload={onUploadMock} />);
+    expect(screen.getByTestId("upload-image-input")).toBeInTheDocument();
+  });
+
+  it("should call onUpload when a file is selected", async () => {
+    render(<UploadImageInput onUpload={onUploadMock} />);
+
+    const file = new File(["(⌐□_□)"], "chucknorris.png", { type: "image/png" });
+    const input = screen.getByTestId("upload-image-input");
+
+    await user.upload(input, file);
+
+    expect(onUploadMock).toHaveBeenNthCalledWith(1, [file]);
+  });
+
+  it("should call onUpload when multiple files are selected", async () => {
+    render(<UploadImageInput onUpload={onUploadMock} />);
+
+    const files = [
+      new File(["(⌐□_□)"], "chucknorris.png", { type: "image/png" }),
+      new File(["(⌐□_□)"], "chucknorris2.png", { type: "image/png" }),
+    ];
+    const input = screen.getByTestId("upload-image-input");
+
+    await user.upload(input, files);
+
+    expect(onUploadMock).toHaveBeenNthCalledWith(1, files);
+  });
+
+  it("should not upload any file that is not an image", async () => {
+    render(<UploadImageInput onUpload={onUploadMock} />);
+
+    const file = new File(["(⌐□_□)"], "chucknorris.txt", {
+      type: "text/plain",
+    });
+    const input = screen.getByTestId("upload-image-input");
+
+    await user.upload(input, file);
+
+    expect(onUploadMock).not.toHaveBeenCalled();
+  });
+
+  it("should render custom labels", () => {
+    const { rerender } = render(<UploadImageInput onUpload={onUploadMock} />);
+    expect(screen.getByTestId("default-label")).toBeInTheDocument();
+
+    function CustomLabel() {
+      return <span>Custom label</span>;
+    }
+    rerender(
+      <UploadImageInput onUpload={onUploadMock} label={<CustomLabel />} />,
+    );
+
+    expect(screen.getByText("Custom label")).toBeInTheDocument();
+    expect(screen.queryByTestId("default-label")).not.toBeInTheDocument();
+  });
+});
diff --git a/frontend/src/api/open-hands.ts b/frontend/src/api/open-hands.ts
index 38bda53e96d9..0ef84c0278c2 100644
--- a/frontend/src/api/open-hands.ts
+++ b/frontend/src/api/open-hands.ts
@@ -201,7 +201,7 @@ class OpenHands {
   static async getGitHubAccessToken(
     code: string,
   ): Promise<GitHubAccessTokenResponse> {
-    const response = await fetch(`${OpenHands.BASE_URL}/github/callback`, {
+    const response = await fetch(`${OpenHands.BASE_URL}/api/github/callback`, {
       method: "POST",
       body: JSON.stringify({ code }),
       headers: {
@@ -218,7 +218,7 @@ class OpenHands {
    * @returns Whether the user is authenticated
    */
   static async isAuthenticated(login: string): Promise<boolean> {
-    const response = await fetch(`${OpenHands.BASE_URL}/authenticate`, {
+    const response = await fetch(`${OpenHands.BASE_URL}/api/authenticate`, {
       method: "POST",
       body: JSON.stringify({ login }),
       headers: {
diff --git a/frontend/src/assets/chevron-left.tsx b/frontend/src/assets/chevron-left.tsx
new file mode 100644
index 000000000000..b2ae9abdde09
--- /dev/null
+++ b/frontend/src/assets/chevron-left.tsx
@@ -0,0 +1,28 @@
+interface ChevronLeftProps {
+  width?: number;
+  height?: number;
+  active?: boolean;
+}
+
+export function ChevronLeft({
+  width = 20,
+  height = 20,
+  active,
+}: ChevronLeftProps) {
+  return (
+    <svg
+      width={width}
+      height={height}
+      viewBox={`0 0 ${width} ${height}`}
+      fill="none"
+      xmlns="http://www.w3.org/2000/svg"
+    >
+      <path
+        fillRule="evenodd"
+        clipRule="evenodd"
+        d="M11.204 15.0037L6.65511 9.99993L11.204 4.99617L12.1289 5.83701L8.34444 9.99993L12.1289 14.1628L11.204 15.0037Z"
+        fill={active ? "#D4D4D4" : "#525252"}
+      />
+    </svg>
+  );
+}
diff --git a/frontend/src/assets/chevron-right.tsx b/frontend/src/assets/chevron-right.tsx
new file mode 100644
index 000000000000..4d7119d4a99a
--- /dev/null
+++ b/frontend/src/assets/chevron-right.tsx
@@ -0,0 +1,28 @@
+interface ChevronRightProps {
+  width?: number;
+  height?: number;
+  active?: boolean;
+}
+
+export function ChevronRight({
+  width = 20,
+  height = 20,
+  active,
+}: ChevronRightProps) {
+  return (
+    <svg
+      width={width}
+      height={height}
+      viewBox={`0 0 ${width} ${height}`}
+      fill="none"
+      xmlns="http://www.w3.org/2000/svg"
+    >
+      <path
+        fillRule="evenodd"
+        clipRule="evenodd"
+        d="M8.79602 4.99634L13.3449 10.0001L8.79602 15.0038L7.87109 14.163L11.6556 10.0001L7.87109 5.83718L8.79602 4.99634Z"
+        fill={active ? "#D4D4D4" : "#525252"}
+      />
+    </svg>
+  );
+}
diff --git a/frontend/src/assets/close.svg b/frontend/src/assets/close.svg
new file mode 100644
index 000000000000..d43761a6f10a
--- /dev/null
+++ b/frontend/src/assets/close.svg
@@ -0,0 +1,5 @@
+<svg width="11" height="11" viewBox="0 0 11 11" fill="none" xmlns="http://www.w3.org/2000/svg">
+  <path fill-rule="evenodd" clip-rule="evenodd"
+    d="M5.69949 5.72974L7.91965 7.9505L8.35077 7.51999L6.13001 5.29922L8.35077 3.07907L7.92026 2.64795L5.69949 4.86871L3.47934 2.64795L3.04883 3.07907L5.26898 5.29922L3.04883 7.51938L3.47934 7.9505L5.69949 5.72974Z"
+    fill="black" />
+</svg>
diff --git a/frontend/src/components/attach-image-label.tsx b/frontend/src/components/attach-image-label.tsx
new file mode 100644
index 000000000000..f3b9c7ebc13a
--- /dev/null
+++ b/frontend/src/components/attach-image-label.tsx
@@ -0,0 +1,10 @@
+import Clip from "#/assets/clip.svg?react";
+
+export function AttachImageLabel() {
+  return (
+    <div className="flex self-start items-center text-[#A3A3A3] text-xs leading-[18px] -tracking-[0.08px] cursor-pointer">
+      <Clip width={16} height={16} />
+      Attach images
+    </div>
+  );
+}
diff --git a/frontend/src/components/chat-input.tsx b/frontend/src/components/chat-input.tsx
new file mode 100644
index 000000000000..0aa4348e1539
--- /dev/null
+++ b/frontend/src/components/chat-input.tsx
@@ -0,0 +1,108 @@
+import React from "react";
+import TextareaAutosize from "react-textarea-autosize";
+import ArrowSendIcon from "#/assets/arrow-send.svg?react";
+import { cn } from "#/utils/utils";
+
+interface ChatInputProps {
+  name?: string;
+  button?: "submit" | "stop";
+  disabled?: boolean;
+  placeholder?: string;
+  showButton?: boolean;
+  value?: string;
+  maxRows?: number;
+  onSubmit: (message: string) => void;
+  onStop?: () => void;
+  onChange?: (message: string) => void;
+  onFocus?: () => void;
+  onBlur?: () => void;
+  className?: React.HTMLAttributes<HTMLDivElement>["className"];
+}
+
+export function ChatInput({
+  name,
+  button = "submit",
+  disabled,
+  placeholder,
+  showButton = true,
+  value,
+  maxRows = 4,
+  onSubmit,
+  onStop,
+  onChange,
+  onFocus,
+  onBlur,
+  className,
+}: ChatInputProps) {
+  const textareaRef = React.useRef<HTMLTextAreaElement>(null);
+
+  const handleSubmitMessage = () => {
+    if (textareaRef.current?.value) {
+      onSubmit(textareaRef.current.value);
+      textareaRef.current.value = "";
+    }
+  };
+
+  const handleKeyPress = (event: React.KeyboardEvent<HTMLTextAreaElement>) => {
+    if (event.key === "Enter" && !event.shiftKey) {
+      event.preventDefault();
+      handleSubmitMessage();
+    }
+  };
+
+  const handleChange = (event: React.ChangeEvent<HTMLTextAreaElement>) => {
+    onChange?.(event.target.value);
+  };
+
+  return (
+    <div
+      data-testid="chat-input"
+      className="flex items-end justify-end grow gap-1 min-h-6"
+    >
+      <TextareaAutosize
+        ref={textareaRef}
+        name={name}
+        placeholder={placeholder}
+        onKeyDown={handleKeyPress}
+        onChange={handleChange}
+        onFocus={onFocus}
+        onBlur={onBlur}
+        value={value}
+        minRows={1}
+        maxRows={maxRows}
+        className={cn(
+          "grow text-sm self-center placeholder:text-neutral-400 text-white resize-none bg-transparent outline-none ring-0",
+          "transition-[height] duration-200 ease-in-out",
+          className,
+        )}
+      />
+      {showButton && (
+        <>
+          {button === "submit" && (
+            <button
+              aria-label="Send"
+              disabled={disabled}
+              onClick={handleSubmitMessage}
+              type="submit"
+              className="border border-white rounded-lg w-6 h-6 hover:bg-neutral-500 focus:bg-neutral-500 flex items-center justify-center"
+            >
+              <ArrowSendIcon />
+            </button>
+          )}
+          {button === "stop" && (
+            <button
+              data-testid="stop-button"
+              aria-label="Stop"
+              disabled={disabled}
+              onClick={onStop}
+              type="button"
+              className="border border-white rounded-lg w-6 h-6 hover:bg-neutral-500 focus:bg-neutral-500 flex items-center justify-center"
+            >
+              <div className="w-[10px] h-[10px] bg-white" />
+            </button>
+          )}
+        </>
+      )}
+    </div>
+  );
+}
diff --git a/frontend/src/components/chat/ChatInput.tsx b/frontend/src/components/chat/ChatInput.tsx
deleted file mode 100644
index a228f7fff703..000000000000
--- a/frontend/src/components/chat/ChatInput.tsx
+++ /dev/null
@@ -1,162 +0,0 @@
-import { Textarea } from "@nextui-org/react";
-import React from "react";
-import { useTranslation } from "react-i18next";
-import { useSelector } from "react-redux";
-import { I18nKey } from "#/i18n/declaration";
-import Clip from "#/assets/clip.svg?react";
-import { RootState } from "#/store";
-import AgentState from "#/types/AgentState";
-import { useSocket } from "#/context/socket";
-import { generateAgentStateChangeEvent } from "#/services/agentStateService";
-import { cn } from "#/utils/utils";
-import ArrowSendIcon from "#/assets/arrow-send.svg?react";
-import { convertImageToBase64 } from "#/utils/convert-image-to-base-64";
-
-interface ChatInputProps {
-  disabled?: boolean;
-  onSendMessage: (message: string, image_urls: string[]) => void;
-}
-
-function ChatInput({ disabled = false, onSendMessage }: ChatInputProps) {
-  const { send } = useSocket();
-  const { t } = useTranslation();
-  const { curAgentState } = useSelector((state: RootState) => state.agent);
-
-  const [message, setMessage] = React.useState("");
-  const [files, setFiles] = React.useState<File[]>([]);
-  // This is true when the user is typing in an IME (e.g., Chinese, Japanese)
-  const [isComposing, setIsComposing] = React.useState(false);
-
-  const handleSendChatMessage = async () => {
-    if (curAgentState === AgentState.RUNNING) {
-      send(generateAgentStateChangeEvent(AgentState.STOPPED));
-      return;
-    }
-
-    if (message.trim()) {
-      let base64images: string[] = [];
-      if (files.length > 0) {
-        base64images = await Promise.all(
-          files.map((file) => convertImageToBase64(file)),
-        );
-      }
-      onSendMessage(message, base64images);
-      setMessage("");
-      setFiles([]);
-    }
-  };
-
-  const onKeyPress = (event: React.KeyboardEvent<HTMLInputElement>) => {
-    if (event.key === "Enter" && !event.shiftKey && !isComposing) {
-      event.preventDefault(); // prevent a new line
-      if (!disabled) {
-        handleSendChatMessage();
-      }
-    }
-  };
-
-  const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
-    if (event.target.files) {
-      setFiles((prev) => [...prev, ...Array.from(event.target.files!)]);
-    }
-  };
-
-  const removeFile = (index: number) => {
-    setFiles((prevFiles) => prevFiles.filter((_, i) => i !== index));
-  };
-
-  const handlePaste = (event: React.ClipboardEvent<HTMLInputElement>) => {
-    const clipboardItems = Array.from(event.clipboardData.items);
-    const pastedFiles: File[] = [];
-    clipboardItems.forEach((item) => {
-      if (item.type.startsWith("image/")) {
-        const file = item.getAsFile();
-        if (file) {
-          pastedFiles.push(file);
-        }
-      }
-    });
-    if (pastedFiles.length > 0) {
-      setFiles((prevFiles) => [...prevFiles, ...pastedFiles]);
-      event.preventDefault();
-    }
-  };
-
-  return (
-    <div className="w-full relative text-base flex">
-      <Textarea
-        value={message}
-        startContent={
-          <label
-            htmlFor="file-input"
-            className="cursor-pointer"
-            aria-label={t(I18nKey.CHAT_INTERFACE$TOOLTIP_UPLOAD_IMAGE)}
-          >
-            <Clip width={24} height={24} />
-            <input
-              type="file"
-              accept="image/*"
-              onChange={handleFileChange}
-              className="hidden"
-              id="file-input"
-              multiple
-            />
-          </label>
-        }
-        onChange={(e) => setMessage(e.target.value)}
-        onKeyDown={onKeyPress}
-        onCompositionStart={() => setIsComposing(true)}
-        onCompositionEnd={() => setIsComposing(false)}
-        placeholder={t(I18nKey.CHAT_INTERFACE$INPUT_PLACEHOLDER)}
-        onPaste={handlePaste}
-        className="pb-3 px-3"
-        classNames={{
-          inputWrapper: "bg-neutral-700 border border-neutral-600 rounded-lg",
-          input: "pr-16 text-neutral-400",
-        }}
-        maxRows={10}
-        minRows={1}
-        variant="bordered"
-      />
-      <button
-        type="button"
-        onClick={handleSendChatMessage}
-        disabled={disabled}
-        className={cn(
-          "bg-transparent border rounded-lg p-[7px] border-white hover:opacity-80 cursor-pointer select-none absolute right-5 bottom-[19px] transition active:bg-white active:text-black",
-          "w-6 h-6 flex items-center justify-center",
-          "disabled:cursor-not-allowed disabled:border-neutral-400 disabled:text-neutral-400",
-          "hover:bg-neutral-500",
-        )}
-        aria-label={t(I18nKey.CHAT_INTERFACE$TOOLTIP_SEND_MESSAGE)}
-      >
-        {curAgentState !== AgentState.RUNNING && <ArrowSendIcon />}
-        {curAgentState === AgentState.RUNNING && (
-          <div className="w-[10px] h-[10px] bg-white" />
-        )}
-      </button>
-      {files.length > 0 && (
-        <div className="absolute bottom-16 right-5 flex space-x-2 p-4 border-1 border-neutral-500 bg-neutral-800 rounded-lg">
-          {files.map((file, index) => (
-            <div key={index} className="relative">
-              <img
-                src={URL.createObjectURL(file)}
-                alt="upload preview"
-                className="w-24 h-24 object-contain rounded bg-white"
-              />
-              <button
-                type="button"
-                onClick={() => removeFile(index)}
-                className="absolute top-0 right-0 bg-black border border-grey-200 text-white rounded-full w-5 h-5 flex pb-1 items-center justify-center"
-              >
-                &times;
-              </button>
-            </div>
-          ))}
-        </div>
-      )}
-    </div>
-  );
-}
-
-export default ChatInput;
diff --git a/frontend/src/components/chat/ChatInterface.tsx b/frontend/src/components/chat/ChatInterface.tsx
index 1b84645cf2be..6542b468ebbc 100644
--- a/frontend/src/components/chat/ChatInterface.tsx
+++ b/frontend/src/components/chat/ChatInterface.tsx
@@ -4,7 +4,6 @@ import { RiArrowRightDoubleLine } from "react-icons/ri";
 import { useTranslation } from "react-i18next";
 import { VscArrowDown } from "react-icons/vsc";
 import { useDisclosure } from "@nextui-org/react";
-import ChatInput from "./ChatInput";
 import Chat from "./Chat";
 import TypingIndicator from "./TypingIndicator";
 import { RootState } from "#/store";
@@ -18,6 +17,9 @@ import { useSocket } from "#/context/socket";
 import ThumbsUpIcon from "#/assets/thumbs-up.svg?react";
 import ThumbsDownIcon from "#/assets/thumbs-down.svg?react";
 import { cn } from "#/utils/utils";
+import { InteractiveChatBox } from "../interactive-chat-box";
+import { convertImageToBase64 } from "#/utils/convert-image-to-base-64";
+import { generateAgentStateChangeEvent } from "#/services/agentStateService";
 
 interface ScrollButtonProps {
   onClick: () => void;
@@ -63,12 +65,19 @@ function ChatInterface() {
     onOpenChange: onFeedbackModalOpenChange,
   } = useDisclosure();
 
-  const handleSendMessage = (content: string, imageUrls: string[]) => {
+  const handleSendMessage = async (content: string, files: File[]) => {
+    const promises = files.map((file) => convertImageToBase64(file));
+    const imageUrls = await Promise.all(promises);
+
     const timestamp = new Date().toISOString();
     dispatch(addUserMessage({ content, imageUrls, timestamp }));
     send(createChatMessage(content, imageUrls, timestamp));
   };
 
+  const handleStop = () => {
+    send(generateAgentStateChangeEvent(AgentState.STOPPED));
+  };
+
   const shareFeedback = async (polarity: "positive" | "negative") => {
     onFeedbackModalOpen();
     setFeedbackPolarity(polarity);
@@ -100,7 +109,7 @@ function ChatInterface() {
         <Chat messages={messages} curAgentState={curAgentState} />
       </div>
 
-      <div>
+      <div className="px-4 pb-4">
         <div className="relative">
           {feedbackShared !== messages.length && messages.length > 3 && (
             <div
@@ -156,12 +165,14 @@ function ChatInterface() {
           </div>
         </div>
 
-        <ChatInput
-          disabled={
+        <InteractiveChatBox
+          isDisabled={
             curAgentState === AgentState.LOADING ||
             curAgentState === AgentState.AWAITING_USER_CONFIRMATION
           }
-          onSendMessage={handleSendMessage}
+          mode={curAgentState === AgentState.RUNNING ? "stop" : "submit"}
+          onSubmit={handleSendMessage}
+          onStop={handleStop}
         />
       </div>
       <FeedbackModal
diff --git a/frontend/src/components/image-carousel.tsx b/frontend/src/components/image-carousel.tsx
new file mode 100644
index 000000000000..ee42594900e9
--- /dev/null
+++ b/frontend/src/components/image-carousel.tsx
@@ -0,0 +1,74 @@
+import React from "react";
+import { ChevronLeft } from "#/assets/chevron-left";
+import { ChevronRight } from "#/assets/chevron-right";
+import { ImagePreview } from "./image-preview";
+import { cn } from "#/utils/utils";
+
+interface ImageCarouselProps {
+  size: "small" | "large";
+  images: string[];
+  onRemove: (index: number) => void;
+}
+
+export function ImageCarousel({
+  size = "small",
+  images,
+  onRemove,
+}: ImageCarouselProps) {
+  const scrollContainerRef = React.useRef<HTMLDivElement>(null);
+  const [isScrollable, setIsScrollable] = React.useState(false);
+  const [isAtStart, setIsAtStart] = React.useState(true);
+  const [isAtEnd, setIsAtEnd] = React.useState(false);
+
+  React.useEffect(() => {
+    const scrollContainer = scrollContainerRef.current;
+
+    if (scrollContainer) {
+      const hasScroll =
+        scrollContainer.scrollWidth > scrollContainer.clientWidth;
+      setIsScrollable(hasScroll);
+    }
+  }, [images]);
+
+  const handleScroll = (event: React.UIEvent<HTMLDivElement>) => {
+    const scrollContainer = event.currentTarget;
+    setIsAtStart(scrollContainer.scrollLeft === 0);
+    setIsAtEnd(
+      scrollContainer.scrollLeft + scrollContainer.clientWidth ===
+        scrollContainer.scrollWidth,
+    );
+  };
+
+  return (
+    <div className="relative">
+      {isScrollable && (
+        <div className="absolute right-full transform top-1/2 -translate-y-1/2">
+          <ChevronLeft active={!isAtStart} />
+        </div>
+      )}
+      <div
+        ref={scrollContainerRef}
+        onScroll={handleScroll}
+        className={cn(
+          "flex overflow-x-auto",
+          size === "small" && "gap-2",
+          size === "large" && "gap-4",
+        )}
+      >
+        {images.map((src, index) => (
+          <ImagePreview
+            key={index}
+            size={size}
+            src={src}
+            onRemove={() => onRemove(index)}
+          />
+        ))}
+      </div>
+      {isScrollable && (
+        <div className="absolute left-full transform top-1/2 -translate-y-1/2">
+          <ChevronRight active={!isAtEnd} />
+        </div>
+      )}
+    </div>
+  );
+}
diff --git a/frontend/src/components/image-preview.tsx b/frontend/src/components/image-preview.tsx
new file mode 100644
index 000000000000..910fc57024b8
--- /dev/null
+++ b/frontend/src/components/image-preview.tsx
@@ -0,0 +1,39 @@
+import CloseIcon from "#/assets/close.svg?react";
+import { cn } from "#/utils/utils";
+
+interface ImagePreviewProps {
+  src: string;
+  onRemove: () => void;
+  size?: "small" | "large";
+}
+
+export function ImagePreview({
+  src,
+  onRemove,
+  size = "small",
+}: ImagePreviewProps) {
+  return (
+    <div data-testid="image-preview" className="relative w-fit shrink-0">
+      <img
+        role="img"
+        src={src}
+        alt=""
+        className={cn(
+          "rounded object-cover",
+          size === "small" && "w-[62px] h-[62px]",
+          size === "large" && "w-[100px] h-[100px]",
+        )}
+      />
+      <button
+        type="button"
+        onClick={onRemove}
+        className={cn(
+          "bg-neutral-400 rounded-full w-3 h-3 flex items-center justify-center",
+          "absolute right-[3px] top-[3px]",
+        )}
+      >
+        <CloseIcon width={10} height={10} />
+      </button>
+    </div>
+  );
+}
diff --git a/frontend/src/components/interactive-chat-box.tsx b/frontend/src/components/interactive-chat-box.tsx
new file mode 100644
index 000000000000..640a6e1ad417
--- /dev/null
+++ b/frontend/src/components/interactive-chat-box.tsx
@@ -0,0 +1,69 @@
+import React from "react";
+import { UploadImageInput } from "./upload-image-input";
+import { ChatInput } from "./chat-input";
+import { cn } from "#/utils/utils";
+import { ImageCarousel } from "./image-carousel";
+
+interface InteractiveChatBoxProps {
+  isDisabled?: boolean;
+  mode?: "stop" | "submit";
+  onSubmit: (message: string, images: File[]) => void;
+  onStop: () => void;
+}
+
+export function InteractiveChatBox({
+  isDisabled,
+  mode = "submit",
+  onSubmit,
+  onStop,
+}: InteractiveChatBoxProps) {
+  const [images, setImages] = React.useState<File[]>([]);
+
+  const handleUpload = (files: File[]) => {
+    setImages((prevImages) => [...prevImages, ...files]);
+  };
+
+  const handleRemoveImage = (index: number) => {
+    setImages((prevImages) => {
+      const newImages = [...prevImages];
+      newImages.splice(index, 1);
+      return newImages;
+    });
+  };
+
+  const handleSubmit = (message: string) => {
+    onSubmit(message, images);
+    setImages([]);
+  };
+
+  return (
+    <div
+      data-testid="interactive-chat-box"
+      className="flex flex-col gap-[10px]"
+    >
+      {images.length > 0 && (
+        <ImageCarousel
+          size="small"
+          images={images.map((image) => URL.createObjectURL(image))}
+          onRemove={handleRemoveImage}
+        />
+      )}
+
+      <div
+        className={cn(
+          "flex items-end gap-1",
+          "bg-neutral-700 border border-neutral-600 rounded-lg px-2 py-[10px]",
+        )}
+      >
+        <UploadImageInput onUpload={handleUpload} />
+        <ChatInput
+          disabled={isDisabled}
+          button={mode}
+          placeholder="What do you want to build?"
+          onSubmit={handleSubmit}
+          onStop={onStop}
+        />
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/components/modals/security/invariant/Invariant.tsx b/frontend/src/components/modals/security/invariant/Invariant.tsx
index 8372ae47437d..afb6bf2cb3ec 100644
--- a/frontend/src/components/modals/security/invariant/Invariant.tsx
+++ b/frontend/src/components/modals/security/invariant/Invariant.tsx
@@ -171,7 +171,7 @@ function SecurityInvariant(): JSX.Element {
           {logs.map((log: SecurityAnalyzerLog, index: number) => (
             <div
               key={index}
-              className={`mb-2 p-2 rounded-lg ${log.confirmed_changed && log.is_confirmed === "confirmed" ? "border-green-800" : "border-red-800"}`}
+              className={`mb-2 p-2 rounded-lg ${log.confirmed_changed && log.confirmation_state === "confirmed" ? "border-green-800" : "border-red-800"}`}
               style={{
                 backgroundColor: "rgba(128, 128, 128, 0.2)",
                 borderWidth: log.confirmed_changed ? "2px" : "0",
@@ -179,7 +179,7 @@ function SecurityInvariant(): JSX.Element {
             >
               <p className="text-sm relative break-words">
                 {log.content}
-                {(log.is_confirmed === "awaiting_confirmation" ||
+                {(log.confirmation_state === "awaiting_confirmation" ||
                   log.confirmed_changed) && (
                   <IoAlertCircle className="absolute top-0 right-0" />
                 )}
diff --git a/frontend/src/components/upload-image-input.tsx b/frontend/src/components/upload-image-input.tsx
new file mode 100644
index 000000000000..e97d1f427f34
--- /dev/null
+++ b/frontend/src/components/upload-image-input.tsx
@@ -0,0 +1,26 @@
+import Clip from "#/assets/clip.svg?react";
+
+interface UploadImageInputProps {
+  onUpload: (files: File[]) => void;
+  label?: React.ReactNode;
+}
+
+export function UploadImageInput({ onUpload, label }: UploadImageInputProps) {
+  const handleUpload = (event: React.ChangeEvent<HTMLInputElement>) => {
+    if (event.target.files) onUpload(Array.from(event.target.files));
+  };
+
+  return (
+    <label className="cursor-pointer">
+      {label || <Clip data-testid="default-label" width={24} height={24} />}
+      <input
+        data-testid="upload-image-input"
+        type="file"
+        accept="image/*"
+        multiple
+        hidden
+        onChange={handleUpload}
+      />
+    </label>
+  );
+}
diff --git a/frontend/src/routes/_oh._index/route.tsx b/frontend/src/routes/_oh._index/route.tsx
index 023bf181618e..d1e8b170ab4e 100644
--- a/frontend/src/routes/_oh._index/route.tsx
+++ b/frontend/src/routes/_oh._index/route.tsx
@@ -7,7 +7,6 @@ import {
   useRouteLoaderData,
 } from "@remix-run/react";
 import React from "react";
-import { useDispatch, useSelector } from "react-redux";
 import { SuggestionBox } from "./suggestion-box";
 import { TaskForm } from "./task-form";
 import { HeroHeading } from "./hero-heading";
@@ -20,29 +19,10 @@ import ModalButton from "#/components/buttons/ModalButton";
 import GitHubLogo from "#/assets/branding/github-logo.svg?react";
 import { ConnectToGitHubModal } from "#/components/modals/connect-to-github-modal";
 import { ModalBackdrop } from "#/components/modals/modal-backdrop";
-import store, { RootState } from "#/store";
-import { removeFile, setInitialQuery } from "#/state/initial-query-slice";
+import store from "#/store";
+import { setInitialQuery } from "#/state/initial-query-slice";
 import { clientLoader as rootClientLoader } from "#/routes/_oh";
-import { UploadedFilePreview } from "./uploaded-file-preview";
-
-interface AttachedFilesSliderProps {
-  files: string[];
-  onRemove: (file: string) => void;
-}
-
-function AttachedFilesSlider({ files, onRemove }: AttachedFilesSliderProps) {
-  return (
-    <div className="flex gap-2 overflow-auto">
-      {files.map((file, index) => (
-        <UploadedFilePreview
-          key={index}
-          file={file}
-          onRemove={() => onRemove(file)}
-        />
-      ))}
-    </div>
-  );
-}
+import OpenHands from "#/api/open-hands";
 
 interface GitHubAuthProps {
   onConnectToGitHub: () => void;
@@ -70,6 +50,18 @@ function GitHubAuth({
 }
 
 export const clientLoader = async ({ request }: ClientLoaderFunctionArgs) => {
+  let isSaas = false;
+  let githubClientId: string | null = null;
+
+  try {
+    const config = await OpenHands.getConfig();
+    isSaas = config.APP_MODE === "saas";
+    githubClientId = config.GITHUB_CLIENT_ID;
+  } catch (error) {
+    isSaas = false;
+    githubClientId = null;
+  }
+
   const token = localStorage.getItem("token");
   if (token) return redirect("/app");
 
@@ -83,11 +75,10 @@ export const clientLoader = async ({ request }: ClientLoaderFunctionArgs) => {
   }
 
   let githubAuthUrl: string | null = null;
-  if (window.__APP_MODE__ === "saas") {
-    const clientId = window.__GITHUB_CLIENT_ID__;
+  if (isSaas) {
     const requestUrl = new URL(request.url);
     const redirectUri = `${requestUrl.origin}/oauth/github/callback`;
-    githubAuthUrl = `https://github.com/login/oauth/authorize?client_id=${clientId}&redirect_uri=${encodeURIComponent(redirectUri)}&scope=repo,user,workflow`;
+    githubAuthUrl = `https://github.com/login/oauth/authorize?client_id=${githubClientId}&redirect_uri=${encodeURIComponent(redirectUri)}&scope=repo,user,workflow`;
   }
 
   return json({ repositories, githubAuthUrl });
@@ -107,10 +98,6 @@ function Home() {
   const [connectToGitHubModalOpen, setConnectToGitHubModalOpen] =
     React.useState(false);
   const [importedFile, setImportedFile] = React.useState<File | null>(null);
-  const textareaRef = React.useRef<HTMLTextAreaElement>(null);
-
-  const dispatch = useDispatch();
-  const { files } = useSelector((state: RootState) => state.initalQuery);
 
   const handleConnectToGitHub = () => {
     if (githubAuthUrl) {
@@ -125,16 +112,7 @@ function Home() {
       <HeroHeading />
       <div className="flex flex-col gap-16 w-[600px] items-center">
         <div className="flex flex-col gap-2 w-full">
-          <TaskForm
-            importedProjectZip={importedFile}
-            textareaRef={textareaRef}
-          />
-          {files.length > 0 && (
-            <AttachedFilesSlider
-              files={files}
-              onRemove={(file) => dispatch(removeFile(file))}
-            />
-          )}
+          <TaskForm importedProjectZip={importedFile} />
         </div>
         <div className="flex gap-4 w-full">
           <SuggestionBox
@@ -170,8 +148,6 @@ function Home() {
                       if (event.target.files) {
                         const zip = event.target.files[0];
                         setImportedFile(zip);
-                        // focus on the task form
-                        textareaRef.current?.focus();
                       } else {
                         // TODO: handle error
                       }
diff --git a/frontend/src/routes/_oh._index/task-form.tsx b/frontend/src/routes/_oh._index/task-form.tsx
index 4f6a68dd9986..72171ef04099 100644
--- a/frontend/src/routes/_oh._index/task-form.tsx
+++ b/frontend/src/routes/_oh._index/task-form.tsx
@@ -1,106 +1,32 @@
 import React from "react";
 import { Form, useNavigation } from "@remix-run/react";
 import { useDispatch, useSelector } from "react-redux";
-import Send from "#/assets/send.svg?react";
-import Clip from "#/assets/clip.svg?react";
-import { cn } from "#/utils/utils";
 import { RootState } from "#/store";
-import { addFile, setImportedProjectZip } from "#/state/initial-query-slice";
+import {
+  addFile,
+  removeFile,
+  setImportedProjectZip,
+} from "#/state/initial-query-slice";
 import { SuggestionBubble } from "#/components/suggestion-bubble";
 import { SUGGESTIONS } from "#/utils/suggestions";
 import { convertImageToBase64 } from "#/utils/convert-image-to-base-64";
-
-const convertZipToBase64 = async (file: File) => {
-  const reader = new FileReader();
-
-  return new Promise<string>((resolve) => {
-    reader.onload = () => {
-      resolve(reader.result as string);
-    };
-    reader.readAsDataURL(file);
-  });
-};
-
-interface MainTextareaInputProps {
-  disabled: boolean;
-  placeholder: string;
-  value: string;
-  onChange: (e: React.ChangeEvent<HTMLTextAreaElement>) => void;
-  formRef: React.RefObject<HTMLFormElement>;
-}
-
-const MainTextareaInput = React.forwardRef<
-  HTMLTextAreaElement,
-  MainTextareaInputProps
->(({ disabled, placeholder, value, onChange, formRef }, ref) => {
-  const adjustHeight = () => {
-    const MAX_LINES = 15;
-
-    // ref can either be a callback ref or a MutableRefObject
-    const textarea = typeof ref === "function" ? null : ref?.current;
-    if (textarea) {
-      textarea.style.height = "auto"; // Reset to auto to recalculate scroll height
-      const { scrollHeight } = textarea;
-
-      // Calculate based on line height and max lines
-      const lineHeight = parseInt(
-        window.getComputedStyle(textarea).lineHeight,
-        10,
-      );
-      const maxHeight = lineHeight * MAX_LINES;
-
-      textarea.style.height = `${Math.min(scrollHeight, maxHeight)}px`;
-    }
-  };
-
-  React.useEffect(() => {
-    adjustHeight();
-  }, [value]);
-
-  return (
-    <textarea
-      ref={ref}
-      disabled={disabled}
-      name="q"
-      rows={1}
-      placeholder={placeholder}
-      onChange={onChange}
-      onKeyDown={(e) => {
-        if (e.key === "Enter" && !e.shiftKey) {
-          e.preventDefault();
-          formRef.current?.requestSubmit();
-        }
-      }}
-      value={value}
-      className={cn(
-        "bg-[#404040] placeholder:text-[#A3A3A3] border border-[#525252] w-full rounded-lg px-4 py-[18px] text-[17px] leading-5",
-        "pr-[calc(16px+24px)]", // 24px for the send button
-        "focus:bg-[#525252]",
-        "resize-none",
-      )}
-    />
-  );
-});
-
-MainTextareaInput.displayName = "MainTextareaInput";
-
-const getRandomKey = (obj: Record<string, string>) => {
-  const keys = Object.keys(obj);
-  const randomKey = keys[Math.floor(Math.random() * keys.length)];
-
-  return randomKey;
-};
+import { ChatInput } from "#/components/chat-input";
+import { UploadImageInput } from "#/components/upload-image-input";
+import { ImageCarousel } from "#/components/image-carousel";
+import { getRandomKey } from "#/utils/get-random-key";
+import { convertZipToBase64 } from "#/utils/convert-zip-to-base64";
+import { AttachImageLabel } from "#/components/attach-image-label";
+import { cn } from "#/utils/utils";
 
 interface TaskFormProps {
   importedProjectZip: File | null;
-  textareaRef?: React.RefObject<HTMLTextAreaElement>;
 }
 
-export function TaskForm({ importedProjectZip, textareaRef }: TaskFormProps) {
+export function TaskForm({ importedProjectZip }: TaskFormProps) {
   const dispatch = useDispatch();
   const navigation = useNavigation();
 
-  const { selectedRepository } = useSelector(
+  const { selectedRepository, files } = useSelector(
     (state: RootState) => state.initalQuery,
   );
 
@@ -114,6 +40,7 @@ export function TaskForm({ importedProjectZip, textareaRef }: TaskFormProps) {
   const [suggestion, setSuggestion] = React.useState(
     getRandomKey(hasLoadedProject ? SUGGESTIONS.repo : SUGGESTIONS["non-repo"]),
   );
+  const [inputIsFocused, setInputIsFocused] = React.useState(false);
 
   React.useEffect(() => {
     // Display a suggestion based on whether a repository is selected
@@ -140,10 +67,6 @@ export function TaskForm({ importedProjectZip, textareaRef }: TaskFormProps) {
     setText(value);
   };
 
-  const handleChange = (e: React.ChangeEvent<HTMLTextAreaElement>) => {
-    setText(e.target.value);
-  };
-
   const handleSubmitForm = async () => {
     // This is handled on top of the form submission
     if (importedProjectZip) {
@@ -153,6 +76,14 @@ export function TaskForm({ importedProjectZip, textareaRef }: TaskFormProps) {
     }
   };
 
+  const placeholder = React.useMemo(() => {
+    if (selectedRepository) {
+      return `What would you like to change in ${selectedRepository}?`;
+    }
+
+    return "What do you want to build?";
+  }, [selectedRepository]);
+
   return (
     <div className="flex flex-col gap-2 w-full">
       <Form
@@ -167,53 +98,46 @@ export function TaskForm({ importedProjectZip, textareaRef }: TaskFormProps) {
           onClick={onClickSuggestion}
           onRefresh={onRefreshSuggestion}
         />
-        <div className="relative w-full">
-          <MainTextareaInput
-            ref={textareaRef}
-            disabled={navigation.state === "submitting"}
-            placeholder={
-              selectedRepository
-                ? `What would you like to change in ${selectedRepository}?`
-                : "What do you want to build?"
-            }
-            onChange={handleChange}
+        <div
+          className={cn(
+            "border border-neutral-600 px-4 py-[17px] rounded-lg text-[17px] leading-5 w-full",
+            inputIsFocused ? "bg-neutral-600" : "bg-neutral-700",
+          )}
+        >
+          <ChatInput
+            name="q"
+            onSubmit={() => {
+              formRef.current?.requestSubmit();
+            }}
+            onChange={(message) => setText(message)}
+            onFocus={() => setInputIsFocused(true)}
+            onBlur={() => setInputIsFocused(false)}
+            placeholder={placeholder}
             value={text}
-            formRef={formRef}
+            maxRows={15}
+            showButton={!!text}
+            className="text-[17px] leading-5"
+            disabled={navigation.state === "submitting"}
           />
-          {!!text && (
-            <button
-              type="submit"
-              aria-label="Submit"
-              className="absolute right-4 top-4"
-              disabled={navigation.state === "loading"}
-            >
-              <Send width={24} height={24} />
-            </button>
-          )}
         </div>
       </Form>
-      <label className="flex self-start items-center text-[#A3A3A3] text-xs leading-[18px] -tracking-[0.08px] cursor-pointer">
-        <Clip width={16} height={16} />
-        Attach images
-        <input
-          hidden
-          type="file"
-          accept="image/*"
-          id="file-input"
-          multiple
-          onChange={(event) => {
-            if (event.target.files) {
-              Array.from(event.target.files).forEach((file) => {
-                convertImageToBase64(file).then((base64) => {
-                  dispatch(addFile(base64));
-                });
-              });
-            } else {
-              // TODO: handle error
-            }
-          }}
+      <UploadImageInput
+        onUpload={async (uploadedFiles) => {
+          const promises = uploadedFiles.map(convertImageToBase64);
+          const base64Images = await Promise.all(promises);
+          base64Images.forEach((base64) => {
+            dispatch(addFile(base64));
+          });
+        }}
+        label={<AttachImageLabel />}
+      />
+      {files.length > 0 && (
+        <ImageCarousel
+          size="large"
+          images={files}
+          onRemove={(index) => dispatch(removeFile(index))}
         />
-      </label>
+      )}
     </div>
   );
 }
diff --git a/frontend/src/routes/_oh._index/uploaded-file-preview.tsx b/frontend/src/routes/_oh._index/uploaded-file-preview.tsx
deleted file mode 100644
index 4dc0a760c75e..000000000000
--- a/frontend/src/routes/_oh._index/uploaded-file-preview.tsx
+++ /dev/null
@@ -1,23 +0,0 @@
-interface UploadedFilePreviewProps {
-  file: string; // base64
-  onRemove: () => void;
-}
-
-export function UploadedFilePreview({
-  file,
-  onRemove,
-}: UploadedFilePreviewProps) {
-  return (
-    <div className="relative flex-shrink-0">
-      <button
-        type="button"
-        aria-label="Remove"
-        onClick={onRemove}
-        className="absolute right-1 top-1 text-[#A3A3A3] hover:text-danger"
-      >
-        &times;
-      </button>
-      <img src={file} alt="" className="w-16 h-16 aspect-auto rounded" />
-    </div>
-  );
-}
diff --git a/frontend/src/services/actions.ts b/frontend/src/services/actions.ts
index ee6a1b1a0071..46b6aad85130 100644
--- a/frontend/src/services/actions.ts
+++ b/frontend/src/services/actions.ts
@@ -1,6 +1,5 @@
 import { addAssistantMessage, addUserMessage } from "#/state/chatSlice";
 import { setCode, setActiveFilepath } from "#/state/codeSlice";
-import { appendInput } from "#/state/commandSlice";
 import { appendJupyterInput } from "#/state/jupyterSlice";
 import {
   ActionSecurityRisk,
@@ -56,21 +55,12 @@ const messageActions = {
     if (message.args.thought) {
       store.dispatch(addAssistantMessage(message.args.thought));
     }
-    if (
-      !message.args.is_confirmed ||
-      message.args.is_confirmed !== "rejected"
-    ) {
-      store.dispatch(appendInput(message.args.command));
-    }
   },
   [ActionType.RUN_IPYTHON]: (message: ActionMessage) => {
     if (message.args.thought) {
       store.dispatch(addAssistantMessage(message.args.thought));
     }
-    if (
-      !message.args.is_confirmed ||
-      message.args.is_confirmed !== "rejected"
-    ) {
+    if (message.args.confirmation_state !== "rejected") {
       store.dispatch(appendJupyterInput(message.args.code));
     }
   },
@@ -98,7 +88,7 @@ export function handleActionMessage(message: ActionMessage) {
   if (
     (message.action === ActionType.RUN ||
       message.action === ActionType.RUN_IPYTHON) &&
-    message.args.is_confirmed === "awaiting_confirmation"
+    message.args.confirmation_state === "awaiting_confirmation"
   ) {
     if (message.args.thought) {
       store.dispatch(addAssistantMessage(message.args.thought));
diff --git a/frontend/src/state/initial-query-slice.ts b/frontend/src/state/initial-query-slice.ts
index 320282243bec..7575363301cd 100644
--- a/frontend/src/state/initial-query-slice.ts
+++ b/frontend/src/state/initial-query-slice.ts
@@ -21,8 +21,8 @@ export const selectedFilesSlice = createSlice({
     addFile(state, action: PayloadAction<string>) {
       state.files.push(action.payload);
     },
-    removeFile(state, action: PayloadAction<string>) {
-      state.files = state.files.filter((file) => file !== action.payload);
+    removeFile(state, action: PayloadAction<number>) {
+      state.files.splice(action.payload, 1);
     },
     clearFiles(state) {
       state.files = [];
diff --git a/frontend/src/state/securityAnalyzerSlice.ts b/frontend/src/state/securityAnalyzerSlice.ts
index c56f53f9cfc9..22ecb0995966 100644
--- a/frontend/src/state/securityAnalyzerSlice.ts
+++ b/frontend/src/state/securityAnalyzerSlice.ts
@@ -11,7 +11,7 @@ export type SecurityAnalyzerLog = {
   id: number;
   content: string;
   security_risk: ActionSecurityRisk;
-  is_confirmed?: "awaiting_confirmation" | "confirmed" | "rejected";
+  confirmation_state?: "awaiting_confirmation" | "confirmed" | "rejected";
   confirmed_changed: boolean;
 };
 
@@ -32,20 +32,20 @@ export const securityAnalyzerSlice = createSlice({
           action.payload.args.content ||
           action.payload.message,
         security_risk: action.payload.args.security_risk as ActionSecurityRisk,
-        is_confirmed: action.payload.args.is_confirmed,
+        confirmation_state: action.payload.args.confirmation_state,
         confirmed_changed: false,
       };
 
       const existingLog = state.logs.find(
         (stateLog) =>
           stateLog.id === log.id ||
-          (stateLog.is_confirmed === "awaiting_confirmation" &&
+          (stateLog.confirmation_state === "awaiting_confirmation" &&
             stateLog.content === log.content),
       );
 
       if (existingLog) {
-        if (existingLog.is_confirmed !== log.is_confirmed) {
-          existingLog.is_confirmed = log.is_confirmed;
+        if (existingLog.confirmation_state !== log.confirmation_state) {
+          existingLog.confirmation_state = log.confirmation_state;
           existingLog.confirmed_changed = true;
         }
       } else {
diff --git a/frontend/src/types/core/actions.ts b/frontend/src/types/core/actions.ts
index b19657b614fe..a94c2da3ef5c 100644
--- a/frontend/src/types/core/actions.ts
+++ b/frontend/src/types/core/actions.ts
@@ -12,7 +12,7 @@ export interface CommandAction extends OpenHandsActionEvent<"run"> {
   source: "agent";
   args: {
     command: string;
-    is_confirmed: "confirmed" | "rejected" | "awaiting_confirmation";
+    confirmation_state: "confirmed" | "rejected" | "awaiting_confirmation";
     thought: string;
     hidden?: boolean;
   };
@@ -32,7 +32,7 @@ export interface IPythonAction extends OpenHandsActionEvent<"run_ipython"> {
   source: "agent";
   args: {
     code: string;
-    is_confirmed: "confirmed" | "rejected" | "awaiting_confirmation";
+    confirmation_state: "confirmed" | "rejected" | "awaiting_confirmation";
     kernel_init_code: string;
     thought: string;
   };
diff --git a/frontend/src/utils/convert-zip-to-base64.ts b/frontend/src/utils/convert-zip-to-base64.ts
new file mode 100644
index 000000000000..34b57acdf19a
--- /dev/null
+++ b/frontend/src/utils/convert-zip-to-base64.ts
@@ -0,0 +1,10 @@
+export const convertZipToBase64 = async (file: File) => {
+  const reader = new FileReader();
+
+  return new Promise<string>((resolve) => {
+    reader.onload = () => {
+      resolve(reader.result as string);
+    };
+    reader.readAsDataURL(file);
+  });
+};
diff --git a/frontend/src/utils/get-random-key.ts b/frontend/src/utils/get-random-key.ts
new file mode 100644
index 000000000000..7667fb8b1ee4
--- /dev/null
+++ b/frontend/src/utils/get-random-key.ts
@@ -0,0 +1,6 @@
+export const getRandomKey = (obj: Record<string, string>) => {
+  const keys = Object.keys(obj);
+  const randomKey = keys[Math.floor(Math.random() * keys.length)];
+
+  return randomKey;
+};
diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
index cacd68353732..c8342ca11f70 100644
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -113,14 +113,16 @@ def action_to_str(self, action: Action) -> str:
         return ''
 
     def get_action_message(self, action: Action) -> Message | None:
-        if (
-            isinstance(action, AgentDelegateAction)
-            or isinstance(action, CmdRunAction)
-            or isinstance(action, IPythonRunCellAction)
-            or isinstance(action, MessageAction)
-            or isinstance(action, FileEditAction)
-            or (isinstance(action, AgentFinishAction) and action.source == 'agent')
-        ):
+        if isinstance(
+            action,
+            (
+                AgentDelegateAction,
+                CmdRunAction,
+                IPythonRunCellAction,
+                MessageAction,
+                FileEditAction,
+            ),
+        ) or (isinstance(action, AgentFinishAction) and action.source == 'agent'):
             content = [TextContent(text=self.action_to_str(action))]
 
             if (
@@ -139,7 +141,9 @@ def get_observation_message(self, obs: Observation) -> Message | None:
         max_message_chars = self.llm.config.max_message_chars
         obs_prefix = 'OBSERVATION:\n'
         if isinstance(obs, CmdOutputObservation):
-            text = obs_prefix + truncate_content(obs.content, max_message_chars)
+            text = obs_prefix + truncate_content(
+                obs.content + obs.interpreter_details, max_message_chars
+            )
             text += (
                 f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]'
             )
diff --git a/openhands/agenthub/codeact_swe_agent/codeact_swe_agent.py b/openhands/agenthub/codeact_swe_agent/codeact_swe_agent.py
index d35acb63c810..6fc679aec449 100644
--- a/openhands/agenthub/codeact_swe_agent/codeact_swe_agent.py
+++ b/openhands/agenthub/codeact_swe_agent/codeact_swe_agent.py
@@ -89,11 +89,7 @@ def action_to_str(self, action: Action) -> str:
         return ''
 
     def get_action_message(self, action: Action) -> Message | None:
-        if (
-            isinstance(action, CmdRunAction)
-            or isinstance(action, IPythonRunCellAction)
-            or isinstance(action, MessageAction)
-        ):
+        if isinstance(action, (CmdRunAction, IPythonRunCellAction, MessageAction)):
             content = [TextContent(text=self.action_to_str(action))]
 
             if (
@@ -112,7 +108,9 @@ def get_action_message(self, action: Action) -> Message | None:
     def get_observation_message(self, obs: Observation) -> Message | None:
         max_message_chars = self.llm.config.max_message_chars
         if isinstance(obs, CmdOutputObservation):
-            text = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars)
+            text = 'OBSERVATION:\n' + truncate_content(
+                obs.content + obs.interpreter_details, max_message_chars
+            )
             text += (
                 f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]'
             )
diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 55ca61ddddee..946f6a7d327f 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -132,10 +132,6 @@ def update_state_before_step(self):
     async def update_state_after_step(self):
         # update metrics especially for cost. Use deepcopy to avoid it being modified by agent.reset()
         self.state.local_metrics = copy.deepcopy(self.agent.llm.metrics)
-        if 'llm_completions' not in self.state.extra_data:
-            self.state.extra_data['llm_completions'] = []
-        self.state.extra_data['llm_completions'].extend(self.agent.llm.llm_completions)
-        self.agent.llm.llm_completions.clear()
 
     async def report_error(self, message: str, exception: Exception | None = None):
         """Reports an error to the user and sends the exception to the LLM next step, in the hope it can self-correct.
@@ -224,8 +220,8 @@ async def _handle_observation(self, observation: Observation):
         """
         if (
             self._pending_action
-            and hasattr(self._pending_action, 'is_confirmed')
-            and self._pending_action.is_confirmed
+            and hasattr(self._pending_action, 'confirmation_state')
+            and self._pending_action.confirmation_state
             == ActionConfirmationStatus.AWAITING_CONFIRMATION
         ):
             return
@@ -330,9 +326,10 @@ async def set_agent_state_to(self, new_state: AgentState):
             if hasattr(self._pending_action, 'thought'):
                 self._pending_action.thought = ''  # type: ignore[union-attr]
             if new_state == AgentState.USER_CONFIRMED:
-                self._pending_action.is_confirmed = ActionConfirmationStatus.CONFIRMED  # type: ignore[attr-defined]
+                confirmation_state = ActionConfirmationStatus.CONFIRMED
             else:
-                self._pending_action.is_confirmed = ActionConfirmationStatus.REJECTED  # type: ignore[attr-defined]
+                confirmation_state = ActionConfirmationStatus.REJECTED
+            self._pending_action.confirmation_state = confirmation_state  # type: ignore[attr-defined]
             self.event_stream.add_event(self._pending_action, EventSource.AGENT)
 
         self.state.agent_state = new_state
@@ -453,13 +450,15 @@ async def _step(self) -> None:
             if self.state.confirmation_mode and (
                 type(action) is CmdRunAction or type(action) is IPythonRunCellAction
             ):
-                action.is_confirmed = ActionConfirmationStatus.AWAITING_CONFIRMATION
+                action.confirmation_state = (
+                    ActionConfirmationStatus.AWAITING_CONFIRMATION
+                )
             self._pending_action = action
 
         if not isinstance(action, NullAction):
             if (
-                hasattr(action, 'is_confirmed')
-                and action.is_confirmed
+                hasattr(action, 'confirmation_state')
+                and action.confirmation_state
                 == ActionConfirmationStatus.AWAITING_CONFIRMATION
             ):
                 await self.set_agent_state_to(AgentState.AWAITING_USER_CONFIRMATION)
diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py
index 8b968e3f4c88..230d5f2e81ac 100644
--- a/openhands/controller/stuck.py
+++ b/openhands/controller/stuck.py
@@ -33,8 +33,7 @@ def is_stuck(self):
                 (isinstance(event, MessageAction) and event.source == EventSource.USER)
                 or
                 # there might be some NullAction or NullObservation in the history at least for now
-                isinstance(event, NullAction)
-                or isinstance(event, NullObservation)
+                isinstance(event, (NullAction, NullObservation))
             )
         ]
 
diff --git a/openhands/core/cli.py b/openhands/core/cli.py
index 73b11f30e8d1..6a2620790f6e 100644
--- a/openhands/core/cli.py
+++ b/openhands/core/cli.py
@@ -114,6 +114,7 @@ async def main():
         sid=sid,
         plugins=agent_cls.sandbox_plugins,
     )
+    await runtime.connect()
 
     controller = AgentController(
         agent=agent,
diff --git a/openhands/core/config/llm_config.py b/openhands/core/config/llm_config.py
index 12e2f4afa3ee..ac07b70e0ba6 100644
--- a/openhands/core/config/llm_config.py
+++ b/openhands/core/config/llm_config.py
@@ -40,6 +40,7 @@ class LLMConfig:
         disable_vision: If model is vision capable, this option allows to disable image processing (useful for cost reduction).
         caching_prompt: Use the prompt caching feature if provided by the LLM and supported by the provider.
         log_completions: Whether to log LLM completions to the state.
+        log_completions_folder: The folder to log LLM completions to. Required if log_completions is True.
         draft_editor: A more efficient LLM to use for file editing. Introduced in [PR 3985](https://github.com/All-Hands-AI/OpenHands/pull/3985).
     """
 
@@ -73,6 +74,7 @@ class LLMConfig:
     disable_vision: bool | None = None
     caching_prompt: bool = True
     log_completions: bool = False
+    log_completions_folder: str | None = None
     draft_editor: Optional['LLMConfig'] = None
 
     def defaults_to_dict(self) -> dict:
diff --git a/openhands/core/main.py b/openhands/core/main.py
index 3934349f0c4e..110856d6e66f 100644
--- a/openhands/core/main.py
+++ b/openhands/core/main.py
@@ -122,6 +122,7 @@ async def run_controller(
 
     if runtime is None:
         runtime = create_runtime(config, sid=sid)
+        await runtime.connect()
 
     event_stream = runtime.event_stream
     # restore cli session if enabled
diff --git a/openhands/events/action/commands.py b/openhands/events/action/commands.py
index 94fe1d5f5d96..5a7e1fef5257 100644
--- a/openhands/events/action/commands.py
+++ b/openhands/events/action/commands.py
@@ -28,7 +28,7 @@ class CmdRunAction(Action):
     hidden: bool = False
     action: str = ActionType.RUN
     runnable: ClassVar[bool] = True
-    is_confirmed: ActionConfirmationStatus = ActionConfirmationStatus.CONFIRMED
+    confirmation_state: ActionConfirmationStatus = ActionConfirmationStatus.CONFIRMED
     security_risk: ActionSecurityRisk | None = None
 
     @property
@@ -49,7 +49,7 @@ class IPythonRunCellAction(Action):
     thought: str = ''
     action: str = ActionType.RUN_IPYTHON
     runnable: ClassVar[bool] = True
-    is_confirmed: ActionConfirmationStatus = ActionConfirmationStatus.CONFIRMED
+    confirmation_state: ActionConfirmationStatus = ActionConfirmationStatus.CONFIRMED
     security_risk: ActionSecurityRisk | None = None
     kernel_init_code: str = ''  # code to run in the kernel (if the kernel is restarted)
 
diff --git a/openhands/events/observation/commands.py b/openhands/events/observation/commands.py
index 90a3690b589b..a182168e694a 100644
--- a/openhands/events/observation/commands.py
+++ b/openhands/events/observation/commands.py
@@ -13,6 +13,7 @@ class CmdOutputObservation(Observation):
     exit_code: int = 0
     hidden: bool = False
     observation: str = ObservationType.RUN
+    interpreter_details: str = ''
 
     @property
     def error(self) -> bool:
diff --git a/openhands/events/serialization/event.py b/openhands/events/serialization/event.py
index 36883ae436ae..ee15ab6955e8 100644
--- a/openhands/events/serialization/event.py
+++ b/openhands/events/serialization/event.py
@@ -93,7 +93,7 @@ def event_to_memory(event: 'Event', max_message_chars: int) -> dict:
     if 'args' in d:
         d['args'].pop('blocking', None)
         d['args'].pop('keep_prompt', None)
-        d['args'].pop('is_confirmed', None)
+        d['args'].pop('confirmation_state', None)
 
     if 'extras' in d:
         remove_fields(d['extras'], DELETE_FROM_MEMORY_EXTRAS)
diff --git a/openhands/events/serialization/utils.py b/openhands/events/serialization/utils.py
index fbc91e5c1484..de448e01429b 100644
--- a/openhands/events/serialization/utils.py
+++ b/openhands/events/serialization/utils.py
@@ -11,7 +11,7 @@ def remove_fields(obj, fields: set[str]):
                 del obj[field]
         for _, value in obj.items():
             remove_fields(value, fields)
-    elif isinstance(obj, list) or isinstance(obj, tuple):
+    elif isinstance(obj, (list, tuple)):
         for item in obj:
             remove_fields(item, fields)
     elif hasattr(obj, '__dataclass_fields__'):
diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index 698f4c4a86db..9eb3a08aa990 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -1,4 +1,6 @@
 import copy
+import json
+import os
 import time
 import warnings
 from functools import partial
@@ -45,11 +47,9 @@
 # remove this when we gemini and deepseek are supported
 CACHE_PROMPT_SUPPORTED_MODELS = [
     'claude-3-5-sonnet-20240620',
+    'claude-3-5-sonnet-20241022',
     'claude-3-haiku-20240307',
     'claude-3-opus-20240229',
-    'anthropic/claude-3-opus-20240229',
-    'anthropic/claude-3-haiku-20240307',
-    'anthropic/claude-3-5-sonnet-20240620',
 ]
 
 
@@ -79,11 +79,6 @@ def __init__(
         self.cost_metric_supported: bool = True
         self.config: LLMConfig = copy.deepcopy(config)
 
-        # list of LLM completions (for logging purposes). Each completion is a dict with the following keys:
-        # - 'messages': list of messages
-        # - 'response': response from the LLM
-        self.llm_completions: list[dict[str, Any]] = []
-
         # litellm actually uses base Exception here for unknown model
         self.model_info: ModelInfo | None = None
         try:
@@ -97,6 +92,13 @@ def __init__(
         except Exception as e:
             logger.warning(f'Could not get model info for {config.model}:\n{e}')
 
+        if self.config.log_completions:
+            if self.config.log_completions_folder is None:
+                raise RuntimeError(
+                    'log_completions_folder is required when log_completions is enabled'
+                )
+            os.makedirs(self.config.log_completions_folder, exist_ok=True)
+
         # Set the max tokens in an LM-specific way if not set
         if self.config.max_input_tokens is None:
             if (
@@ -196,14 +198,24 @@ def wrapper(*args, **kwargs):
 
                 # log for evals or other scripts that need the raw completion
                 if self.config.log_completions:
-                    self.llm_completions.append(
-                        {
-                            'messages': messages,
-                            'response': resp,
-                            'timestamp': time.time(),
-                            'cost': self._completion_cost(resp),
-                        }
+                    assert self.config.log_completions_folder is not None
+                    log_file = os.path.join(
+                        self.config.log_completions_folder,
+                        # use the metric model name (for draft editor)
+                        f'{self.metrics.model_name}-{time.time()}.json',
                     )
+                    with open(log_file, 'w') as f:
+                        json.dump(
+                            {
+                                'messages': messages,
+                                'response': resp,
+                                'args': args,
+                                'kwargs': kwargs,
+                                'timestamp': time.time(),
+                                'cost': self._completion_cost(resp),
+                            },
+                            f,
+                        )
 
                 message_back: str = resp['choices'][0]['message']['content']
 
@@ -259,7 +271,10 @@ def is_caching_prompt_active(self) -> bool:
             self.config.caching_prompt is True
             and self.model_info is not None
             and self.model_info.get('supports_prompt_caching', False)
-            and self.config.model in CACHE_PROMPT_SUPPORTED_MODELS
+            and (
+                self.config.model in CACHE_PROMPT_SUPPORTED_MODELS
+                or self.config.model.split('/')[-1] in CACHE_PROMPT_SUPPORTED_MODELS
+            )
         )
 
     def _post_completion(self, response: ModelResponse) -> None:
@@ -399,7 +414,6 @@ def __repr__(self):
 
     def reset(self):
         self.metrics.reset()
-        self.llm_completions = []
 
     def format_messages_for_llm(self, messages: Message | list[Message]) -> list[dict]:
         if isinstance(messages, Message):
diff --git a/openhands/runtime/action_execution_server.py b/openhands/runtime/action_execution_server.py
index 719d206e5a1b..2da1372532da 100644
--- a/openhands/runtime/action_execution_server.py
+++ b/openhands/runtime/action_execution_server.py
@@ -9,16 +9,13 @@
 import asyncio
 import io
 import os
-import re
 import shutil
-import subprocess
 import tempfile
 import time
 from contextlib import asynccontextmanager
 from pathlib import Path
 from zipfile import ZipFile
 
-import pexpect
 from fastapi import Depends, FastAPI, HTTPException, Request, UploadFile
 from fastapi.exceptions import RequestValidationError
 from fastapi.responses import JSONResponse, StreamingResponse
@@ -40,6 +37,7 @@
 from openhands.events.observation import (
     CmdOutputObservation,
     ErrorObservation,
+    FatalErrorObservation,
     FileReadObservation,
     FileWriteObservation,
     IPythonRunCellObservation,
@@ -53,8 +51,9 @@
     JupyterPlugin,
     Plugin,
 )
-from openhands.runtime.utils import split_bash_commands
+from openhands.runtime.utils.bash import BashSession
 from openhands.runtime.utils.files import insert_lines, read_lines
+from openhands.runtime.utils.runtime_init import init_user_and_working_directory
 from openhands.utils.async_utils import wait_all
 
 
@@ -66,7 +65,6 @@ class ActionRequest(BaseModel):
 INIT_COMMANDS = [
     'git config --global user.name "openhands" && git config --global user.email "openhands@all-hands.dev" && alias git="git --no-pager"',
 ]
-SOFT_TIMEOUT_SECONDS = 5
 
 SESSION_API_KEY = os.environ.get('SESSION_API_KEY')
 api_key_header = APIKeyHeader(name='X-Session-API-Key', auto_error=False)
@@ -92,12 +90,20 @@ def __init__(
         browsergym_eval_env: str | None,
     ) -> None:
         self.plugins_to_load = plugins_to_load
+        self._initial_pwd = work_dir
         self.username = username
         self.user_id = user_id
-        self.pwd = work_dir  # current PWD
-        self._initial_pwd = work_dir
-        self._init_user(self.username, self.user_id)
-        self._init_bash_shell(self.pwd, self.username)
+        _updated_user_id = init_user_and_working_directory(
+            username=username, user_id=self.user_id, initial_pwd=work_dir
+        )
+        if _updated_user_id is not None:
+            self.user_id = _updated_user_id
+
+        self.bash_session = BashSession(
+            work_dir=work_dir,
+            username=username,
+        )
+
         self.lock = asyncio.Lock()
         self.plugins: dict[str, Plugin] = {}
         self.browser = BrowserEnv(browsergym_eval_env)
@@ -132,129 +138,10 @@ async def _init_plugin(self, plugin: Plugin):
 
         if isinstance(plugin, JupyterPlugin):
             await self.run_ipython(
-                IPythonRunCellAction(code=f'import os; os.chdir("{self.pwd}")')
-            )
-
-    def _init_user(self, username: str, user_id: int) -> None:
-        """Create working directory and user if not exists.
-        It performs the following steps effectively:
-        * Creates the Working Directory:
-            - Uses mkdir -p to create the directory.
-            - Sets ownership to username:root.
-            - Adjusts permissions to be readable and writable by group and others.
-        * User Verification and Creation:
-            - Checks if the user exists using id -u.
-            - If the user exists with the correct UID, it skips creation.
-            - If the UID differs, it logs a warning and updates self.user_id.
-            - If the user doesn't exist, it proceeds to create the user.
-        * Sudo Configuration:
-            - Appends %sudo ALL=(ALL) NOPASSWD:ALL to /etc/sudoers to grant
-              passwordless sudo access to the sudo group.
-            - Adds the user to the sudo group with the useradd command, handling
-              UID conflicts by incrementing the UID if necessary.
-        """
-
-        # First create the working directory, independent of the user
-        logger.info(f'Client working directory: {self.initial_pwd}')
-        command = f'umask 002; mkdir -p {self.initial_pwd}'
-        output = subprocess.run(command, shell=True, capture_output=True)
-        out_str = output.stdout.decode()
-
-        command = f'chown -R {username}:root {self.initial_pwd}'
-        output = subprocess.run(command, shell=True, capture_output=True)
-        out_str += output.stdout.decode()
-
-        command = f'chmod g+rw {self.initial_pwd}'
-        output = subprocess.run(command, shell=True, capture_output=True)
-        out_str += output.stdout.decode()
-        logger.debug(f'Created working directory. Output: [{out_str}]')
-
-        # Skip root since it is already created
-        if username == 'root':
-            return
-
-        # Check if the username already exists
-        existing_user_id = -1
-        try:
-            result = subprocess.run(
-                f'id -u {username}', shell=True, check=True, capture_output=True
-            )
-            existing_user_id = int(result.stdout.decode().strip())
-
-            # The user ID already exists, skip setup
-            if existing_user_id == user_id:
-                logger.debug(
-                    f'User `{username}` already has the provided UID {user_id}. Skipping user setup.'
-                )
-            else:
-                logger.warning(
-                    f'User `{username}` already exists with UID {existing_user_id}. Skipping user setup.'
-                )
-                self.user_id = existing_user_id
-            return
-        except subprocess.CalledProcessError as e:
-            # Returncode 1 indicates, that the user does not exist yet
-            if e.returncode == 1:
-                logger.debug(
-                    f'User `{username}` does not exist. Proceeding with user creation.'
-                )
-            else:
-                logger.error(
-                    f'Error checking user `{username}`, skipping setup:\n{e}\n'
+                IPythonRunCellAction(
+                    code=f'import os; os.chdir("{self.bash_session.pwd}")'
                 )
-                raise
-
-        # Add sudoer
-        sudoer_line = r"echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers"
-        output = subprocess.run(sudoer_line, shell=True, capture_output=True)
-        if output.returncode != 0:
-            raise RuntimeError(f'Failed to add sudoer: {output.stderr.decode()}')
-        logger.debug(f'Added sudoer successfully. Output: [{output.stdout.decode()}]')
-
-        command = (
-            f'useradd -rm -d /home/{username} -s /bin/bash '
-            f'-g root -G sudo -u {user_id} {username}'
-        )
-        output = subprocess.run(command, shell=True, capture_output=True)
-        if output.returncode == 0:
-            logger.debug(
-                f'Added user `{username}` successfully with UID {user_id}. Output: [{output.stdout.decode()}]'
             )
-        else:
-            raise RuntimeError(
-                f'Failed to create user `{username}` with UID {user_id}. Output: [{output.stderr.decode()}]'
-            )
-
-    def _init_bash_shell(self, work_dir: str, username: str) -> None:
-        self.shell = pexpect.spawn(
-            f'su {username}',
-            encoding='utf-8',
-            codec_errors='replace',
-            echo=False,
-        )
-        self.__bash_PS1 = (
-            r'[PEXPECT_BEGIN]\n'
-            r'$(which python >/dev/null 2>&1 && echo "[Python Interpreter: $(which python)]\n")'
-            r'\u@\h:\w\n'
-            r'[PEXPECT_END]'
-        )
-
-        # This should NOT match "PS1=\u@\h:\w [PEXPECT]$" when `env` is executed
-        self.__bash_expect_regex = r'\[PEXPECT_BEGIN\]\s*(.*?)\s*([a-z0-9_-]*)@([a-zA-Z0-9.-]*):(.+)\s*\[PEXPECT_END\]'
-        # Set umask to allow group write permissions
-        self.shell.sendline(f'umask 002; export PS1="{self.__bash_PS1}"; export PS2=""')
-        self.shell.expect(self.__bash_expect_regex)
-
-        self.shell.sendline(
-            f'if [ ! -d "{work_dir}" ]; then mkdir -p "{work_dir}"; fi && cd "{work_dir}"'
-        )
-        self.shell.expect(self.__bash_expect_regex)
-        logger.debug(
-            f'Bash initialized. Working directory: {work_dir}. Output: [{self.shell.before}]'
-        )
-        # Ensure the group has write permissions on the working directory
-        self.shell.sendline(f'chmod g+rw "{work_dir}"')
-        self.shell.expect(self.__bash_expect_regex)
 
     async def _init_bash_commands(self):
         logger.info(f'Initializing by running {len(INIT_COMMANDS)} bash commands...')
@@ -262,7 +149,8 @@ async def _init_bash_commands(self):
             action = CmdRunAction(command=command)
             action.timeout = 300
             logger.debug(f'Executing init command: {command}')
-            obs: CmdOutputObservation = await self.run(action)
+            obs = await self.run(action)
+            assert isinstance(obs, CmdOutputObservation)
             logger.debug(
                 f'Init command outputs (exit code: {obs.exit_code}): {obs.content}'
             )
@@ -270,156 +158,6 @@ async def _init_bash_commands(self):
 
         logger.info('Bash init commands completed')
 
-    def _get_bash_prompt_and_update_pwd(self):
-        ps1 = self.shell.after
-        if ps1 == pexpect.EOF:
-            logger.error(f'Bash shell EOF! {self.shell.after=}, {self.shell.before=}')
-            raise RuntimeError('Bash shell EOF')
-        if ps1 == pexpect.TIMEOUT:
-            logger.warning('Bash shell timeout')
-            return ''
-
-        # begin at the last occurrence of '[PEXPECT_BEGIN]'.
-        # In multi-line bash commands, the prompt will be repeated
-        # and the matched regex captures all of them
-        # - we only want the last one (newest prompt)
-        _begin_pos = ps1.rfind('[PEXPECT_BEGIN]')
-        if _begin_pos != -1:
-            ps1 = ps1[_begin_pos:]
-
-        # parse the ps1 to get username, hostname, and working directory
-        matched = re.match(self.__bash_expect_regex, ps1)
-        assert (
-            matched is not None
-        ), f'Failed to parse bash prompt: {ps1}. This should not happen.'
-        other_info, username, hostname, working_dir = matched.groups()
-        working_dir = working_dir.rstrip()
-        self.pwd = os.path.expanduser(working_dir)
-
-        # re-assemble the prompt
-        # ignore the hostname AND use 'openhands-workspace'
-        prompt = f'{other_info.strip()}\n{username}@openhands-workspace:{working_dir} '
-        if username == 'root':
-            prompt += '#'
-        else:
-            prompt += '$'
-        return prompt + ' '
-
-    def _execute_bash(
-        self,
-        command: str,
-        timeout: int,
-        keep_prompt: bool = True,
-        kill_on_timeout: bool = True,
-    ) -> tuple[str, int]:
-        logger.debug(f'Executing command: {command}')
-        self.shell.sendline(command)
-        return self._continue_bash(
-            timeout=timeout, keep_prompt=keep_prompt, kill_on_timeout=kill_on_timeout
-        )
-
-    def _interrupt_bash(
-        self,
-        action_timeout: int | None,
-        interrupt_timeout: int | None = None,
-        max_retries: int = 2,
-    ) -> tuple[str, int]:
-        interrupt_timeout = interrupt_timeout or 1  # default timeout for SIGINT
-        # try to interrupt the bash shell use SIGINT
-        while max_retries > 0:
-            self.shell.sendintr()  # send SIGINT to the shell
-            logger.debug('Sent SIGINT to bash. Waiting for output...')
-            try:
-                self.shell.expect(self.__bash_expect_regex, timeout=interrupt_timeout)
-                output = self.shell.before
-                logger.debug(f'Received output after SIGINT: {output}')
-                exit_code = 130  # SIGINT
-
-                _additional_msg = ''
-                if action_timeout is not None:
-                    _additional_msg = (
-                        f'Command timed out after {action_timeout} seconds. '
-                    )
-                output += (
-                    '\r\n\r\n'
-                    + f'[{_additional_msg}SIGINT was sent to interrupt the command.]'
-                )
-                return output, exit_code
-            except pexpect.TIMEOUT as e:
-                logger.warning(f'Bash pexpect.TIMEOUT while waiting for SIGINT: {e}')
-                max_retries -= 1
-
-        # fall back to send control-z
-        logger.error(
-            'Failed to get output after SIGINT. Max retries reached. Sending control-z...'
-        )
-        self.shell.sendcontrol('z')
-        self.shell.expect(self.__bash_expect_regex)
-        output = self.shell.before
-        logger.debug(f'Received output after control-z: {output}')
-        # Try to kill the job
-        self.shell.sendline('kill -9 %1')
-        self.shell.expect(self.__bash_expect_regex)
-        logger.debug(f'Received output after killing job %1: {self.shell.before}')
-        output += self.shell.before
-
-        _additional_msg = ''
-        if action_timeout is not None:
-            _additional_msg = f'Command timed out after {action_timeout} seconds. '
-        output += (
-            '\r\n\r\n'
-            + f'[{_additional_msg}SIGINT was sent to interrupt the command, but failed. The command was killed.]'
-        )
-
-        # Try to get the exit code again
-        self.shell.sendline('echo $?')
-        self.shell.expect(self.__bash_expect_regex)
-        _exit_code_output = self.shell.before
-        exit_code = self._parse_exit_code(_exit_code_output)
-
-        return output, exit_code
-
-    def _parse_exit_code(self, output: str) -> int:
-        try:
-            exit_code = int(output.strip().split()[0])
-        except Exception:
-            logger.error('Error getting exit code from bash script')
-            # If we try to run an invalid shell script the output sometimes includes error text
-            # rather than the error code - we assume this is an error
-            exit_code = 2
-        return exit_code
-
-    def _continue_bash(
-        self,
-        timeout: int,
-        keep_prompt: bool = True,
-        kill_on_timeout: bool = True,
-    ) -> tuple[str, int]:
-        logger.debug(f'Continuing bash with timeout={timeout}')
-        try:
-            self.shell.expect(self.__bash_expect_regex, timeout=timeout)
-
-            output = self.shell.before
-
-            # Get exit code
-            self.shell.sendline('echo $?')
-            logger.debug('Requesting exit code...')
-            self.shell.expect(self.__bash_expect_regex, timeout=timeout)
-            _exit_code_output = self.shell.before
-            exit_code = self._parse_exit_code(_exit_code_output)
-        except pexpect.TIMEOUT as e:
-            logger.warning(f'Bash pexpect.TIMEOUT while executing bash command: {e}')
-            if kill_on_timeout:
-                output, exit_code = self._interrupt_bash(action_timeout=timeout)
-            else:
-                output = self.shell.before or ''
-                exit_code = -1
-        finally:
-            bash_prompt = self._get_bash_prompt_and_update_pwd()
-            if keep_prompt:
-                output += '\r\n' + bash_prompt
-        return output, exit_code
-
     async def run_action(self, action) -> Observation:
         action_type = action.action
         logger.debug(f'Running action:\n{action}')
@@ -427,51 +165,10 @@ async def run_action(self, action) -> Observation:
         logger.debug(f'Action output:\n{observation}')
         return observation
 
-    async def run(self, action: CmdRunAction) -> CmdOutputObservation:
-        try:
-            assert (
-                action.timeout is not None
-            ), f'Timeout argument is required for CmdRunAction: {action}'
-            commands = split_bash_commands(action.command)
-            all_output = ''
-            for command in commands:
-                if command == '':
-                    output, exit_code = self._continue_bash(
-                        timeout=SOFT_TIMEOUT_SECONDS,
-                        keep_prompt=action.keep_prompt,
-                        kill_on_timeout=False,
-                    )
-                elif command.lower() == 'ctrl+c':
-                    output, exit_code = self._interrupt_bash(
-                        action_timeout=None,  # intentionally None
-                    )
-                else:
-                    output, exit_code = self._execute_bash(
-                        command,
-                        timeout=SOFT_TIMEOUT_SECONDS
-                        if not action.blocking
-                        else action.timeout,
-                        keep_prompt=action.keep_prompt,
-                        kill_on_timeout=False if not action.blocking else True,
-                    )
-                if all_output:
-                    # previous output already exists with prompt "user@hostname:working_dir #""
-                    # we need to add the command to the previous output,
-                    # so model knows the following is the output of another action)
-                    all_output = all_output.rstrip() + ' ' + command + '\r\n'
-
-                all_output += str(output) + '\r\n'
-                if exit_code != 0:
-                    break
-            return CmdOutputObservation(
-                command_id=-1,
-                content=all_output.rstrip('\r\n'),
-                command=action.command,
-                hidden=action.hidden,
-                exit_code=exit_code,
-            )
-        except UnicodeDecodeError:
-            raise RuntimeError('Command output could not be decoded as utf-8')
+    async def run(
+        self, action: CmdRunAction
+    ) -> CmdOutputObservation | FatalErrorObservation:
+        return self.bash_session.run(action)
 
     async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
         if 'jupyter' in self.plugins:
@@ -479,21 +176,27 @@ async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
             # This is used to make AgentSkills in Jupyter aware of the
             # current working directory in Bash
             jupyter_pwd = getattr(self, '_jupyter_pwd', None)
-            if self.pwd != jupyter_pwd:
-                logger.debug(f'{self.pwd} != {jupyter_pwd} -> reset Jupyter PWD')
-                reset_jupyter_pwd_code = f'import os; os.chdir("{self.pwd}")'
+            if self.bash_session.pwd != jupyter_pwd:
+                logger.debug(
+                    f'{self.bash_session.pwd} != {jupyter_pwd} -> reset Jupyter PWD'
+                )
+                reset_jupyter_pwd_code = (
+                    f'import os; os.chdir("{self.bash_session.pwd}")'
+                )
                 _aux_action = IPythonRunCellAction(code=reset_jupyter_pwd_code)
                 _reset_obs: IPythonRunCellObservation = await _jupyter_plugin.run(
                     _aux_action
                 )
                 logger.debug(
-                    f'Changed working directory in IPython to: {self.pwd}. Output: {_reset_obs}'
+                    f'Changed working directory in IPython to: {self.bash_session.pwd}. Output: {_reset_obs}'
                 )
-                self._jupyter_pwd = self.pwd
+                self._jupyter_pwd = self.bash_session.pwd
 
             obs: IPythonRunCellObservation = await _jupyter_plugin.run(action)
             obs.content = obs.content.rstrip()
-            obs.content += f'\n[Jupyter current working directory: {self.pwd}]'
+            obs.content += (
+                f'\n[Jupyter current working directory: {self.bash_session.pwd}]'
+            )
             obs.content += f'\n[Jupyter Python interpreter: {_jupyter_plugin.python_interpreter_path}]'
             return obs
         else:
@@ -501,15 +204,6 @@ async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
                 'JupyterRequirement not found. Unable to run IPython action.'
             )
 
-    def _get_working_directory(self):
-        # NOTE: this is part of initialization, so we hard code the timeout
-        result, exit_code = self._execute_bash('pwd', timeout=60, keep_prompt=False)
-        if exit_code != 0:
-            raise RuntimeError(
-                f'Failed to get working directory (exit code: {exit_code}): {result}'
-            )
-        return result.strip()
-
     def _resolve_path(self, path: str, working_dir: str) -> str:
         filepath = Path(path)
         if not filepath.is_absolute():
@@ -519,7 +213,7 @@ def _resolve_path(self, path: str, working_dir: str) -> str:
     async def read(self, action: FileReadAction) -> Observation:
         # NOTE: the client code is running inside the sandbox,
         # so there's no need to check permission
-        working_dir = self._get_working_directory()
+        working_dir = self.bash_session.workdir
         filepath = self._resolve_path(action.path, working_dir)
         try:
             with open(filepath, 'r', encoding='utf-8') as file:
@@ -539,7 +233,7 @@ async def read(self, action: FileReadAction) -> Observation:
         return FileReadObservation(path=filepath, content=code_view)
 
     async def write(self, action: FileWriteAction) -> Observation:
-        working_dir = self._get_working_directory()
+        working_dir = self.bash_session.workdir
         filepath = self._resolve_path(action.path, working_dir)
 
         insert = action.content.split('\n')
@@ -600,7 +294,7 @@ async def browse_interactive(self, action: BrowseInteractiveAction) -> Observati
         return await browse(action, self.browser)
 
     def close(self):
-        self.shell.close()
+        self.bash_session.close()
         self.browser.close()
 
 
diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py
index 2646c06768f2..0d752d71be28 100644
--- a/openhands/runtime/base.py
+++ b/openhands/runtime/base.py
@@ -140,8 +140,9 @@ def run_action(self, action: Action) -> Observation:
         if not action.runnable:
             return NullObservation('')
         if (
-            hasattr(action, 'is_confirmed')
-            and action.is_confirmed == ActionConfirmationStatus.AWAITING_CONFIRMATION
+            hasattr(action, 'confirmation_state')
+            and action.confirmation_state
+            == ActionConfirmationStatus.AWAITING_CONFIRMATION
         ):
             return NullObservation('')
         action_type = action.action  # type: ignore[attr-defined]
@@ -152,8 +153,8 @@ def run_action(self, action: Action) -> Observation:
                 f'Action {action_type} is not supported in the current runtime.'
             )
         if (
-            hasattr(action, 'is_confirmed')
-            and action.is_confirmed == ActionConfirmationStatus.REJECTED
+            getattr(action, 'confirmation_state', None)
+            == ActionConfirmationStatus.REJECTED
         ):
             return UserRejectObservation(
                 'Action has been rejected by the user! Waiting for further user input.'
@@ -171,6 +172,10 @@ def __enter__(self) -> 'Runtime':
     def __exit__(self, exc_type, exc_value, traceback) -> None:
         self.close()
 
+    @abstractmethod
+    async def connect(self) -> None:
+        pass
+
     # ====================================================================
     # Action execution
     # ====================================================================
diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py
index 358e3c090790..1cf409692257 100644
--- a/openhands/runtime/impl/eventstream/eventstream_runtime.py
+++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py
@@ -1,6 +1,7 @@
 import os
 import tempfile
 import threading
+from functools import lru_cache
 from typing import Callable
 from zipfile import ZipFile
 
@@ -153,7 +154,6 @@ def __init__(
         self.session = requests.Session()
         self.status_message_callback = status_message_callback
 
-        self.send_status_message('STATUS$STARTING_RUNTIME')
         self.docker_client: docker.DockerClient = self._init_docker_client()
         self.base_container_image = self.config.sandbox.base_container_image
         self.runtime_container_image = self.config.sandbox.runtime_container_image
@@ -174,31 +174,7 @@ def __init__(
         self.skip_container_logs = (
             os.environ.get('SKIP_CONTAINER_LOGS', 'false').lower() == 'true'
         )
-        if self.runtime_container_image is None:
-            if self.base_container_image is None:
-                raise ValueError(
-                    'Neither runtime container image nor base container image is set'
-                )
-            logger.info('Preparing container, this might take a few minutes...')
-            self.send_status_message('STATUS$STARTING_CONTAINER')
-            self.runtime_container_image = build_runtime_image(
-                self.base_container_image,
-                self.runtime_builder,
-                platform=self.config.sandbox.platform,
-                extra_deps=self.config.sandbox.runtime_extra_deps,
-                force_rebuild=self.config.sandbox.force_rebuild_runtime,
-            )
-
-        if not attach_to_existing:
-            self._init_container(
-                sandbox_workspace_dir=self.config.workspace_mount_path_in_sandbox,  # e.g. /workspace
-                mount_dir=self.config.workspace_mount_path,  # e.g. /opt/openhands/_test_workspace
-                plugins=plugins,
-            )
-        else:
-            self._attach_to_container()
 
-        # Will initialize both the event stream and the env vars
         self.init_base_runtime(
             config,
             event_stream,
@@ -209,11 +185,39 @@ def __init__(
             attach_to_existing,
         )
 
+    async def connect(self):
+        self.send_status_message('STATUS$STARTING_RUNTIME')
+        if not self.attach_to_existing:
+            if self.runtime_container_image is None:
+                if self.base_container_image is None:
+                    raise ValueError(
+                        'Neither runtime container image nor base container image is set'
+                    )
+                logger.info('Preparing container, this might take a few minutes...')
+                self.send_status_message('STATUS$STARTING_CONTAINER')
+                self.runtime_container_image = build_runtime_image(
+                    self.base_container_image,
+                    self.runtime_builder,
+                    platform=self.config.sandbox.platform,
+                    extra_deps=self.config.sandbox.runtime_extra_deps,
+                    force_rebuild=self.config.sandbox.force_rebuild_runtime,
+                )
+
+            self._init_container(
+                sandbox_workspace_dir=self.config.workspace_mount_path_in_sandbox,  # e.g. /workspace
+                mount_dir=self.config.workspace_mount_path,  # e.g. /opt/openhands/_test_workspace
+                plugins=self.plugins,
+            )
+
+        else:
+            self._attach_to_container()
+
         logger.info('Waiting for client to become ready...')
         self.send_status_message('STATUS$WAITING_FOR_CLIENT')
-
         self._wait_until_alive()
-        self.setup_initial_env()
+
+        if not self.attach_to_existing:
+            self.setup_initial_env()
 
         logger.info(
             f'Container initialized with plugins: {[plugin.name for plugin in self.plugins]}'
@@ -221,6 +225,7 @@ def __init__(
         self.send_status_message(' ')
 
     @staticmethod
+    @lru_cache(maxsize=1)
     def _init_docker_client() -> docker.DockerClient:
         try:
             return docker.from_env()
@@ -327,7 +332,7 @@ def _init_container(
                 f'Error: Instance {self.container_name} FAILED to start container!\n'
             )
             logger.exception(e)
-            self.close(close_client=False)
+            self.close()
             raise e
 
     def _attach_to_container(self):
@@ -388,11 +393,10 @@ def _wait_until_alive(self):
             logger.error(msg)
             raise RuntimeError(msg)
 
-    def close(self, close_client: bool = True, rm_all_containers: bool = True):
+    def close(self, rm_all_containers: bool = True):
         """Closes the EventStreamRuntime and associated objects
 
         Parameters:
-        - close_client (bool): Whether to close the DockerClient
         - rm_all_containers (bool): Whether to remove all containers with the 'openhands-sandbox-' prefix
         """
 
@@ -402,6 +406,9 @@ def close(self, close_client: bool = True, rm_all_containers: bool = True):
         if self.session:
             self.session.close()
 
+        if self.attach_to_existing:
+            return
+
         try:
             containers = self.docker_client.containers.list(all=True)
             for container in containers:
@@ -426,9 +433,6 @@ def close(self, close_client: bool = True, rm_all_containers: bool = True):
         except docker.errors.NotFound:  # yes, this can happen!
             pass
 
-        if close_client:
-            self.docker_client.close()
-
     def run_action(self, action: Action) -> Observation:
         if isinstance(action, FileEditAction):
             return self.edit(action)
@@ -441,8 +445,8 @@ def run_action(self, action: Action) -> Observation:
             if not action.runnable:
                 return NullObservation('')
             if (
-                hasattr(action, 'is_confirmed')
-                and action.is_confirmed
+                hasattr(action, 'confirmation_state')
+                and action.confirmation_state
                 == ActionConfirmationStatus.AWAITING_CONFIRMATION
             ):
                 return NullObservation('')
@@ -454,8 +458,8 @@ def run_action(self, action: Action) -> Observation:
                     f'Action {action_type} is not supported in the current runtime.'
                 )
             if (
-                hasattr(action, 'is_confirmed')
-                and action.is_confirmed == ActionConfirmationStatus.REJECTED
+                getattr(action, 'confirmation_state', None)
+                == ActionConfirmationStatus.REJECTED
             ):
                 return UserRejectObservation(
                     'Action has been rejected by the user! Waiting for further user input.'
diff --git a/openhands/runtime/impl/modal/modal_runtime.py b/openhands/runtime/impl/modal/modal_runtime.py
index 2aae1b99c97f..be4603a802c4 100644
--- a/openhands/runtime/impl/modal/modal_runtime.py
+++ b/openhands/runtime/impl/modal/modal_runtime.py
@@ -1,7 +1,6 @@
 import os
 import tempfile
 import threading
-import uuid
 from pathlib import Path
 from typing import Callable, Generator
 
@@ -12,11 +11,19 @@
 from openhands.core.config import AppConfig
 from openhands.core.logger import openhands_logger as logger
 from openhands.events import EventStream
-from openhands.runtime.client.runtime import EventStreamRuntime, LogBuffer
+from openhands.runtime.impl.eventstream.eventstream_runtime import (
+    EventStreamRuntime,
+    LogBuffer,
+)
 from openhands.runtime.plugins import PluginRequirement
+from openhands.runtime.utils.command import get_remote_startup_command
 from openhands.runtime.utils.runtime_build import (
     prep_build_folder,
 )
+from openhands.utils.async_utils import call_sync_from_async
+
+# FIXME: this will not work in HA mode. We need a better way to track IDs
+MODAL_RUNTIME_IDS: dict[str, str] = {}
 
 
 # Modal's log generator returns strings, but the upstream LogBuffer expects bytes.
@@ -60,6 +67,7 @@ class ModalRuntime(EventStreamRuntime):
     """
 
     container_name_prefix = 'openhands-sandbox-'
+    sandbox: modal.Sandbox | None
 
     def __init__(
         self,
@@ -69,11 +77,13 @@ def __init__(
         plugins: list[PluginRequirement] | None = None,
         env_vars: dict[str, str] | None = None,
         status_message_callback: Callable | None = None,
+        attach_to_existing: bool = False,
     ):
         assert config.modal_api_token_id, 'Modal API token id is required'
         assert config.modal_api_token_secret, 'Modal API token secret is required'
 
         self.config = config
+        self.sandbox = None
 
         self.modal_client = modal.Client.from_credentials(
             config.modal_api_token_id, config.modal_api_token_secret
@@ -92,18 +102,11 @@ def __init__(
         self.container_port = 3000
 
         self.session = requests.Session()
-        self.instance_id = (
-            sid + '_' + str(uuid.uuid4()) if sid is not None else str(uuid.uuid4())
-        )
         self.status_message_callback = status_message_callback
-
-        self.send_status_message('STATUS$STARTING_RUNTIME')
         self.base_container_image_id = self.config.sandbox.base_container_image
         self.runtime_container_image_id = self.config.sandbox.runtime_container_image
         self.action_semaphore = threading.Semaphore(1)  # Ensure one action at a time
 
-        logger.info(f'ModalRuntime `{self.instance_id}`')
-
         # Buffer for container logs
         self.log_buffer: LogBuffer | None = None
 
@@ -112,32 +115,60 @@ def __init__(
                 f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}'
             )
 
+        self.init_base_runtime(
+            config,
+            event_stream,
+            sid,
+            plugins,
+            env_vars,
+            status_message_callback,
+            attach_to_existing,
+        )
+
+    async def connect(self):
+        self.send_status_message('STATUS$STARTING_RUNTIME')
+
+        logger.info(f'ModalRuntime `{self.sid}`')
+
         self.image = self._get_image_definition(
             self.base_container_image_id,
             self.runtime_container_image_id,
             self.config.sandbox.runtime_extra_deps,
         )
 
-        self.sandbox = self._init_sandbox(
-            sandbox_workspace_dir=self.config.workspace_mount_path_in_sandbox,
-            plugins=plugins,
-        )
+        if self.attach_to_existing:
+            if self.sid in MODAL_RUNTIME_IDS:
+                sandbox_id = MODAL_RUNTIME_IDS[self.sid]
+                logger.info(f'Attaching to existing Modal sandbox: {sandbox_id}')
+                self.sandbox = modal.Sandbox.from_id(
+                    sandbox_id, client=self.modal_client
+                )
+        else:
+            self.send_status_message('STATUS$PREPARING_CONTAINER')
+            await call_sync_from_async(
+                self._init_sandbox,
+                sandbox_workspace_dir=self.config.workspace_mount_path_in_sandbox,
+                plugins=self.plugins,
+            )
 
-        # Will initialize both the event stream and the env vars
-        self.init_base_runtime(
-            config, event_stream, sid, plugins, env_vars, status_message_callback
-        )
+            self.send_status_message('STATUS$CONTAINER_STARTED')
+
+        self.log_buffer = ModalLogBuffer(self.sandbox)
+        if self.sandbox is None:
+            raise Exception('Sandbox not initialized')
+        tunnel = self.sandbox.tunnels()[self.container_port]
+        self.api_url = tunnel.url
+        logger.info(f'Container started. Server url: {self.api_url}')
 
-        logger.info('Waiting for client to become ready...')
-        self.send_status_message('STATUS$WAITING_FOR_CLIENT')
+        if not self.attach_to_existing:
+            logger.info('Waiting for client to become ready...')
+            self.send_status_message('STATUS$WAITING_FOR_CLIENT')
 
         self._wait_until_alive()
         self.setup_initial_env()
 
-        logger.info(
-            f'Container initialized with plugins: {[plugin.name for plugin in self.plugins]}'
-        )
-        self.send_status_message(' ')
+        if not self.attach_to_existing:
+            self.send_status_message(' ')
 
     def _get_image_definition(
         self,
@@ -185,10 +216,9 @@ def _init_sandbox(
         self,
         sandbox_workspace_dir: str,
         plugins: list[PluginRequirement] | None = None,
-    ) -> modal.Sandbox:
+    ):
         try:
             logger.info('Preparing to start container...')
-            self.send_status_message('STATUS$PREPARING_CONTAINER')
             plugin_args = []
             if plugins is not None and len(plugins) > 0:
                 plugin_args.append('--plugins')
@@ -212,29 +242,16 @@ def _init_sandbox(
             env_secret = modal.Secret.from_dict(environment)
 
             logger.debug(f'Sandbox workspace: {sandbox_workspace_dir}')
-            sandbox_start_cmd: list[str] = [
-                '/openhands/micromamba/bin/micromamba',
-                'run',
-                '-n',
-                'openhands',
-                'poetry',
-                'run',
-                'python',
-                '-u',
-                '-m',
-                'openhands.runtime.client.client',
-                str(self.container_port),
-                '--working-dir',
+            sandbox_start_cmd = get_remote_startup_command(
+                self.container_port,
                 sandbox_workspace_dir,
-                *plugin_args,
-                '--username',
                 'openhands' if self.config.run_as_openhands else 'root',
-                '--user-id',
-                str(self.config.sandbox.user_id),
-                *browsergym_args,
-            ]
-
-            sandbox = modal.Sandbox.create(
+                self.config.sandbox.user_id,
+                plugin_args,
+                browsergym_args,
+            )
+            logger.debug(f'Starting container with command: {sandbox_start_cmd}')
+            self.sandbox = modal.Sandbox.create(
                 *sandbox_start_cmd,
                 secrets=[env_secret],
                 workdir='/openhands/code',
@@ -244,18 +261,11 @@ def _init_sandbox(
                 client=self.modal_client,
                 timeout=60 * 60,
             )
+            MODAL_RUNTIME_IDS[self.sid] = self.sandbox.object_id
+            logger.info('Container started')
 
-            tunnel = sandbox.tunnels()[self.container_port]
-            self.api_url = tunnel.url
-
-            self.log_buffer = ModalLogBuffer(sandbox)
-            logger.info(f'Container started. Server url: {self.api_url}')
-            self.send_status_message('STATUS$CONTAINER_STARTED')
-            return sandbox
         except Exception as e:
-            logger.error(
-                f'Error: Instance {self.instance_id} FAILED to start container!\n'
-            )
+            logger.error(f'Error: Instance {self.sid} FAILED to start container!\n')
             logger.exception(e)
             self.close()
             raise e
@@ -271,5 +281,5 @@ def close(self):
         if self.session:
             self.session.close()
 
-        if self.sandbox:
+        if not self.attach_to_existing and self.sandbox:
             self.sandbox.terminate()
diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py
index 40779f2abd12..8c25401af9cd 100644
--- a/openhands/runtime/impl/remote/remote_runtime.py
+++ b/openhands/runtime/impl/remote/remote_runtime.py
@@ -31,6 +31,7 @@
 from openhands.runtime.base import Runtime
 from openhands.runtime.builder.remote import RemoteRuntimeBuilder
 from openhands.runtime.plugins import PluginRequirement
+from openhands.runtime.utils.command import get_remote_startup_command
 from openhands.runtime.utils.request import (
     is_404_error,
     is_503_error,
@@ -77,11 +78,6 @@ def __init__(
         self.runtime_id: str | None = None
         self.runtime_url: str | None = None
 
-        self.sid = sid
-
-        self._start_or_attach_to_runtime(plugins, attach_to_existing)
-
-        # Initialize the eventstream and env vars
         super().__init__(
             config,
             event_stream,
@@ -91,15 +87,17 @@ def __init__(
             status_message_callback,
             attach_to_existing,
         )
+
+    async def connect(self):
+        self._start_or_attach_to_runtime()
+        self._wait_until_alive()
         self.setup_initial_env()
 
-    def _start_or_attach_to_runtime(
-        self, plugins: list[PluginRequirement] | None, attach_to_existing: bool = False
-    ):
+    def _start_or_attach_to_runtime(self):
         existing_runtime = self._check_existing_runtime()
         if existing_runtime:
             logger.info(f'Using existing runtime with ID: {self.runtime_id}')
-        elif attach_to_existing:
+        elif self.attach_to_existing:
             raise RuntimeError('Could not find existing runtime to attach to.')
         else:
             self.send_status_message('STATUS$STARTING_CONTAINER')
@@ -113,7 +111,7 @@ def _start_or_attach_to_runtime(
                     f'Running remote runtime with image: {self.config.sandbox.runtime_container_image}'
                 )
                 self.container_image = self.config.sandbox.runtime_container_image
-            self._start_runtime(plugins)
+            self._start_runtime()
         assert (
             self.runtime_id is not None
         ), 'Runtime ID is not set. This should never happen.'
@@ -197,28 +195,27 @@ def _build_runtime(self):
         if response.status_code != 200 or not response.json()['exists']:
             raise RuntimeError(f'Container image {self.container_image} does not exist')
 
-    def _start_runtime(self, plugins: list[PluginRequirement] | None):
+    def _start_runtime(self):
         # Prepare the request body for the /start endpoint
-        plugin_arg = ''
-        if plugins is not None and len(plugins) > 0:
-            plugin_arg = f'--plugins {" ".join([plugin.name for plugin in plugins])} '
-        browsergym_arg = (
-            f'--browsergym-eval-env {self.config.sandbox.browsergym_eval_env}'
-            if self.config.sandbox.browsergym_eval_env is not None
-            else ''
+        plugin_args = []
+        if self.plugins is not None and len(self.plugins) > 0:
+            plugin_args = ['--plugins'] + [plugin.name for plugin in self.plugins]
+        browsergym_args = []
+        if self.config.sandbox.browsergym_eval_env is not None:
+            browsergym_args = [
+                '--browsergym-eval-env'
+            ] + self.config.sandbox.browsergym_eval_env.split(' ')
+        command = get_remote_startup_command(
+            self.port,
+            self.config.workspace_mount_path_in_sandbox,
+            'openhands' if self.config.run_as_openhands else 'root',
+            self.config.sandbox.user_id,
+            plugin_args,
+            browsergym_args,
         )
         start_request = {
             'image': self.container_image,
-            'command': (
-                f'/openhands/micromamba/bin/micromamba run -n openhands '
-                'poetry run '
-                f'python -u -m openhands.runtime.action_execution_server {self.port} '
-                f'--working-dir {self.config.workspace_mount_path_in_sandbox} '
-                f'{plugin_arg}'
-                f'--username {"openhands" if self.config.run_as_openhands else "root"} '
-                f'--user-id {self.config.sandbox.user_id} '
-                f'{browsergym_arg}'
-            ),
+            'command': command,
             'working_dir': '/openhands/code/',
             'environment': {'DEBUG': 'true'} if self.config.debug else {},
             'runtime_id': self.sid,
@@ -295,11 +292,7 @@ def _wait_until_alive(self):
                 logger.info(
                     f'Runtime pod not found. Count: {not_found_count} / {max_not_found_count}'
                 )
-            elif (
-                pod_status == 'Failed'
-                or pod_status == 'Unknown'
-                or pod_status == 'Not Found'
-            ):
+            elif pod_status in ('Failed', 'Unknown', 'Not Found'):
                 # clean up the runtime
                 self.close()
                 raise RuntimeError(
@@ -325,7 +318,7 @@ def _wait_until_alive(self):
             raise RuntimeError(msg)
 
     def close(self, timeout: int = 10):
-        if self.config.sandbox.keep_remote_runtime_alive:
+        if self.config.sandbox.keep_remote_runtime_alive or self.attach_to_existing:
             self.session.close()
             return
         if self.runtime_id:
diff --git a/openhands/runtime/utils/__init__.py b/openhands/runtime/utils/__init__.py
index 71d9d4198c22..622a0b609573 100644
--- a/openhands/runtime/utils/__init__.py
+++ b/openhands/runtime/utils/__init__.py
@@ -1,7 +1,6 @@
-from openhands.runtime.utils.bash import split_bash_commands
 from openhands.runtime.utils.system import (
     display_number_matrix,
     find_available_tcp_port,
 )
 
-__all__ = ['display_number_matrix', 'find_available_tcp_port', 'split_bash_commands']
+__all__ = ['display_number_matrix', 'find_available_tcp_port']
diff --git a/openhands/runtime/utils/bash.py b/openhands/runtime/utils/bash.py
index 15ce3572cb9e..fba16787c6dc 100644
--- a/openhands/runtime/utils/bash.py
+++ b/openhands/runtime/utils/bash.py
@@ -1,6 +1,18 @@
+import os
+import re
+
 import bashlex
+import pexpect
 
 from openhands.core.logger import openhands_logger as logger
+from openhands.events.action import CmdRunAction
+from openhands.events.event import EventSource
+from openhands.events.observation import (
+    CmdOutputObservation,
+    FatalErrorObservation,
+)
+
+SOFT_TIMEOUT_SECONDS = 5
 
 
 def split_bash_commands(commands):
@@ -52,3 +64,271 @@ def split_bash_commands(commands):
             result.append(remaining)
             logger.debug(f'BASH PARSING result.append(remaining): {result[-1]}')
     return result
+
+
+class BashSession:
+    """A class that maintains a pexpect process and provides a simple interface for running commands and interacting with the shell."""
+
+    def __init__(self, work_dir: str, username: str):
+        self._pwd = work_dir
+
+        self.shell = pexpect.spawn(
+            f'su {username}',
+            encoding='utf-8',
+            codec_errors='replace',
+            echo=False,
+        )
+        self._init_bash_shell(work_dir)
+
+    def close(self):
+        self.shell.close()
+
+    @property
+    def pwd(self):
+        return self._pwd
+
+    @property
+    def workdir(self):
+        return self._get_working_directory()
+
+    def _get_working_directory(self):
+        # NOTE: this is part of initialization, so we hard code the timeout
+        result, exit_code = self._execute_bash('pwd', timeout=60, keep_prompt=False)
+        if exit_code != 0:
+            raise RuntimeError(
+                f'Failed to get working directory (exit code: {exit_code}): {result}'
+            )
+        return result.strip()
+
+    def _init_bash_shell(self, work_dir: str):
+        self.__bash_PS1 = (
+            r'[PEXPECT_BEGIN]\n'
+            r'$(which python >/dev/null 2>&1 && echo "[Python Interpreter: $(which python)]\n")'
+            r'\u@\h:\w\n'
+            r'[PEXPECT_END]'
+        )
+
+        # This should NOT match "PS1=\u@\h:\w [PEXPECT]$" when `env` is executed
+        self.__bash_expect_regex = r'\[PEXPECT_BEGIN\]\s*(.*?)\s*([a-z0-9_-]*)@([a-zA-Z0-9.-]*):(.+)\s*\[PEXPECT_END\]'
+        # Set umask to allow group write permissions
+        self.shell.sendline(f'umask 002; export PS1="{self.__bash_PS1}"; export PS2=""')
+        self.shell.expect(self.__bash_expect_regex)
+
+        self.shell.sendline(
+            f'if [ ! -d "{work_dir}" ]; then mkdir -p "{work_dir}"; fi && cd "{work_dir}"'
+        )
+        self.shell.expect(self.__bash_expect_regex)
+        logger.debug(
+            f'Bash initialized. Working directory: {work_dir}. Output: [{self.shell.before}]'
+        )
+        # Ensure the group has write permissions on the working directory
+        self.shell.sendline(f'chmod g+rw "{work_dir}"')
+        self.shell.expect(self.__bash_expect_regex)
+
+    def _get_bash_prompt_and_update_pwd(self):
+        ps1 = self.shell.after
+        if ps1 == pexpect.EOF:
+            logger.error(f'Bash shell EOF! {self.shell.after=}, {self.shell.before=}')
+            raise RuntimeError('Bash shell EOF')
+        if ps1 == pexpect.TIMEOUT:
+            logger.warning('Bash shell timeout')
+            return ''
+
+        # begin at the last occurrence of '[PEXPECT_BEGIN]'.
+        # In multi-line bash commands, the prompt will be repeated
+        # and the matched regex captures all of them
+        # - we only want the last one (newest prompt)
+        _begin_pos = ps1.rfind('[PEXPECT_BEGIN]')
+        if _begin_pos != -1:
+            ps1 = ps1[_begin_pos:]
+
+        # parse the ps1 to get username, hostname, and working directory
+        matched = re.match(self.__bash_expect_regex, ps1)
+        assert (
+            matched is not None
+        ), f'Failed to parse bash prompt: {ps1}. This should not happen.'
+        other_info, username, hostname, working_dir = matched.groups()
+        working_dir = working_dir.rstrip()
+        self._pwd = os.path.expanduser(working_dir)
+
+        # re-assemble the prompt
+        # ignore the hostname AND use 'openhands-workspace'
+        prompt = f'{other_info.strip()}\n{username}@openhands-workspace:{working_dir} '
+        if username == 'root':
+            prompt += '#'
+        else:
+            prompt += '$'
+        return prompt + ' '
+
+    def _execute_bash(
+        self,
+        command: str,
+        timeout: int,
+        keep_prompt: bool = True,
+        kill_on_timeout: bool = True,
+    ) -> tuple[str, int]:
+        logger.debug(f'Executing command: {command}')
+        self.shell.sendline(command)
+        return self._continue_bash(
+            timeout=timeout, keep_prompt=keep_prompt, kill_on_timeout=kill_on_timeout
+        )
+
+    def _interrupt_bash(
+        self,
+        action_timeout: int | None,
+        interrupt_timeout: int | None = None,
+        max_retries: int = 2,
+    ) -> tuple[str, int]:
+        interrupt_timeout = interrupt_timeout or 1  # default timeout for SIGINT
+        # try to interrupt the bash shell use SIGINT
+        while max_retries > 0:
+            self.shell.sendintr()  # send SIGINT to the shell
+            logger.debug('Sent SIGINT to bash. Waiting for output...')
+            try:
+                self.shell.expect(self.__bash_expect_regex, timeout=interrupt_timeout)
+                output = self.shell.before
+                logger.debug(f'Received output after SIGINT: {output}')
+                exit_code = 130  # SIGINT
+
+                _additional_msg = ''
+                if action_timeout is not None:
+                    _additional_msg = (
+                        f'Command timed out after {action_timeout} seconds. '
+                    )
+                output += (
+                    '\r\n\r\n'
+                    + f'[{_additional_msg}SIGINT was sent to interrupt the command.]'
+                )
+                return output, exit_code
+            except pexpect.TIMEOUT as e:
+                logger.warning(f'Bash pexpect.TIMEOUT while waiting for SIGINT: {e}')
+                max_retries -= 1
+
+        # fall back to send control-z
+        logger.error(
+            'Failed to get output after SIGINT. Max retries reached. Sending control-z...'
+        )
+        self.shell.sendcontrol('z')
+        self.shell.expect(self.__bash_expect_regex)
+        output = self.shell.before
+        logger.debug(f'Received output after control-z: {output}')
+        # Try to kill the job
+        self.shell.sendline('kill -9 %1')
+        self.shell.expect(self.__bash_expect_regex)
+        logger.debug(f'Received output after killing job %1: {self.shell.before}')
+        output += self.shell.before
+
+        _additional_msg = ''
+        if action_timeout is not None:
+            _additional_msg = f'Command timed out after {action_timeout} seconds. '
+        output += (
+            '\r\n\r\n'
+            + f'[{_additional_msg}SIGINT was sent to interrupt the command, but failed. The command was killed.]'
+        )
+
+        # Try to get the exit code again
+        self.shell.sendline('echo $?')
+        self.shell.expect(self.__bash_expect_regex)
+        _exit_code_output = self.shell.before
+        exit_code = self._parse_exit_code(_exit_code_output)
+
+        return output, exit_code
+
+    def _parse_exit_code(self, output: str) -> int:
+        try:
+            exit_code = int(output.strip().split()[0])
+        except Exception:
+            logger.error('Error getting exit code from bash script')
+            # If we try to run an invalid shell script the output sometimes includes error text
+            # rather than the error code - we assume this is an error
+            exit_code = 2
+        return exit_code
+
+    def _continue_bash(
+        self,
+        timeout: int,
+        keep_prompt: bool = True,
+        kill_on_timeout: bool = True,
+    ) -> tuple[str, int]:
+        logger.debug(f'Continuing bash with timeout={timeout}')
+        try:
+            self.shell.expect(self.__bash_expect_regex, timeout=timeout)
+
+            output = self.shell.before
+
+            # Get exit code
+            self.shell.sendline('echo $?')
+            logger.debug('Requesting exit code...')
+            self.shell.expect(self.__bash_expect_regex, timeout=timeout)
+            _exit_code_output = self.shell.before
+            exit_code = self._parse_exit_code(_exit_code_output)
+        except pexpect.TIMEOUT as e:
+            logger.warning(f'Bash pexpect.TIMEOUT while executing bash command: {e}')
+            if kill_on_timeout:
+                output, exit_code = self._interrupt_bash(action_timeout=timeout)
+            else:
+                output = self.shell.before or ''
+                exit_code = -1
+        finally:
+            bash_prompt = self._get_bash_prompt_and_update_pwd()
+            if keep_prompt:
+                output += '\r\n' + bash_prompt
+        return output, exit_code
+
+    def run(self, action: CmdRunAction) -> CmdOutputObservation | FatalErrorObservation:
+        try:
+            assert (
+                action.timeout is not None
+            ), f'Timeout argument is required for CmdRunAction: {action}'
+            commands = split_bash_commands(action.command)
+            all_output = ''
+            python_interpreter = ''
+            for command in commands:
+                if command == '':
+                    output, exit_code = self._continue_bash(
+                        timeout=SOFT_TIMEOUT_SECONDS,
+                        keep_prompt=action.keep_prompt,
+                        kill_on_timeout=False,
+                    )
+                elif command.lower() == 'ctrl+c':
+                    output, exit_code = self._interrupt_bash(
+                        action_timeout=None,  # intentionally None
+                    )
+                else:
+                    output, exit_code = self._execute_bash(
+                        command,
+                        timeout=SOFT_TIMEOUT_SECONDS
+                        if not action.blocking
+                        else action.timeout,
+                        keep_prompt=action.keep_prompt,
+                        kill_on_timeout=False if not action.blocking else True,
+                    )
+                    # Get rid of the python interpreter string from each line of the output.
+                    # We need it only once at the end.
+                    parts = output.rsplit('[Python Interpreter: ', 1)
+                    output = parts[0]
+                    if len(parts) == 2:
+                        python_interpreter = '[Python Interpreter: ' + parts[1]
+                if all_output:
+                    # previous output already exists so we add a newline
+                    all_output += '\r\n'
+
+                # If the command originated with the agent, append the command that was run...
+                if action.source == EventSource.AGENT:
+                    all_output += command + '\r\n'
+
+                all_output += str(output)
+                if exit_code != 0:
+                    break
+            return CmdOutputObservation(
+                command_id=-1,
+                content=all_output.rstrip('\r\n'),
+                command=action.command,
+                hidden=action.hidden,
+                exit_code=exit_code,
+                interpreter_details=python_interpreter,
+            )
+        except UnicodeDecodeError as e:
+            return FatalErrorObservation(
+                f'Runtime bash execution failed: Command output could not be decoded as utf-8. {str(e)}'
+            )
diff --git a/openhands/runtime/utils/command.py b/openhands/runtime/utils/command.py
new file mode 100644
index 000000000000..1617ec20f36f
--- /dev/null
+++ b/openhands/runtime/utils/command.py
@@ -0,0 +1,29 @@
+def get_remote_startup_command(
+    port: int,
+    sandbox_workspace_dir: str,
+    username: str,
+    user_id: int,
+    plugin_args: list[str],
+    browsergym_args: list[str],
+):
+    return [
+        '/openhands/micromamba/bin/micromamba',
+        'run',
+        '-n',
+        'openhands',
+        'poetry',
+        'run',
+        'python',
+        '-u',
+        '-m',
+        'openhands.runtime.action_execution_server',
+        str(port),
+        '--working-dir',
+        sandbox_workspace_dir,
+        *plugin_args,
+        '--username',
+        username,
+        '--user-id',
+        str(user_id),
+        *browsergym_args,
+    ]
diff --git a/openhands/runtime/utils/runtime_init.py b/openhands/runtime/utils/runtime_init.py
new file mode 100644
index 000000000000..9d42f5483a46
--- /dev/null
+++ b/openhands/runtime/utils/runtime_init.py
@@ -0,0 +1,103 @@
+import subprocess
+
+from openhands.core.logger import openhands_logger as logger
+
+
+def init_user_and_working_directory(
+    username: str, user_id: int, initial_pwd: str
+) -> int | None:
+    """Create working directory and user if not exists.
+    It performs the following steps effectively:
+    * Creates the Working Directory:
+        - Uses mkdir -p to create the directory.
+        - Sets ownership to username:root.
+        - Adjusts permissions to be readable and writable by group and others.
+    * User Verification and Creation:
+        - Checks if the user exists using id -u.
+        - If the user exists with the correct UID, it skips creation.
+        - If the UID differs, it logs a warning and return an updated user_id.
+        - If the user doesn't exist, it proceeds to create the user.
+    * Sudo Configuration:
+        - Appends %sudo ALL=(ALL) NOPASSWD:ALL to /etc/sudoers to grant
+            passwordless sudo access to the sudo group.
+        - Adds the user to the sudo group with the useradd command, handling
+            UID conflicts by incrementing the UID if necessary.
+
+    Args:
+        username (str): The username to create.
+        user_id (int): The user ID to assign to the user.
+        initial_pwd (str): The initial working directory to create.
+
+    Returns:
+        int | None: The user ID if it was updated, None otherwise.
+    """
+
+    # First create the working directory, independent of the user
+    logger.info(f'Client working directory: {initial_pwd}')
+    command = f'umask 002; mkdir -p {initial_pwd}'
+    output = subprocess.run(command, shell=True, capture_output=True)
+    out_str = output.stdout.decode()
+
+    command = f'chown -R {username}:root {initial_pwd}'
+    output = subprocess.run(command, shell=True, capture_output=True)
+    out_str += output.stdout.decode()
+
+    command = f'chmod g+rw {initial_pwd}'
+    output = subprocess.run(command, shell=True, capture_output=True)
+    out_str += output.stdout.decode()
+    logger.debug(f'Created working directory. Output: [{out_str}]')
+
+    # Skip root since it is already created
+    if username == 'root':
+        return None
+
+    # Check if the username already exists
+    existing_user_id = -1
+    try:
+        result = subprocess.run(
+            f'id -u {username}', shell=True, check=True, capture_output=True
+        )
+        existing_user_id = int(result.stdout.decode().strip())
+
+        # The user ID already exists, skip setup
+        if existing_user_id == user_id:
+            logger.debug(
+                f'User `{username}` already has the provided UID {user_id}. Skipping user setup.'
+            )
+        else:
+            logger.warning(
+                f'User `{username}` already exists with UID {existing_user_id}. Skipping user setup.'
+            )
+            return existing_user_id
+        return None
+    except subprocess.CalledProcessError as e:
+        # Returncode 1 indicates, that the user does not exist yet
+        if e.returncode == 1:
+            logger.debug(
+                f'User `{username}` does not exist. Proceeding with user creation.'
+            )
+        else:
+            logger.error(f'Error checking user `{username}`, skipping setup:\n{e}\n')
+            raise
+
+    # Add sudoer
+    sudoer_line = r"echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers"
+    output = subprocess.run(sudoer_line, shell=True, capture_output=True)
+    if output.returncode != 0:
+        raise RuntimeError(f'Failed to add sudoer: {output.stderr.decode()}')
+    logger.debug(f'Added sudoer successfully. Output: [{output.stdout.decode()}]')
+
+    command = (
+        f'useradd -rm -d /home/{username} -s /bin/bash '
+        f'-g root -G sudo -u {user_id} {username}'
+    )
+    output = subprocess.run(command, shell=True, capture_output=True)
+    if output.returncode == 0:
+        logger.debug(
+            f'Added user `{username}` successfully with UID {user_id}. Output: [{output.stdout.decode()}]'
+        )
+    else:
+        raise RuntimeError(
+            f'Failed to create user `{username}` with UID {user_id}. Output: [{output.stderr.decode()}]'
+        )
+    return None
diff --git a/openhands/runtime/utils/shutdown_listener.py b/openhands/runtime/utils/shutdown_listener.py
index 9941c9f27372..3aedd2672270 100644
--- a/openhands/runtime/utils/shutdown_listener.py
+++ b/openhands/runtime/utils/shutdown_listener.py
@@ -4,6 +4,7 @@
 
 import asyncio
 import signal
+import threading
 import time
 from types import FrameType
 
@@ -29,8 +30,11 @@ def _register_signal_handlers():
     if _should_exit is not None:
         return
     _should_exit = False
-    for sig in HANDLED_SIGNALS:
-        _register_signal_handler(sig)
+
+    # Check if we're in the main thread of the main interpreter
+    if threading.current_thread() is threading.main_thread():
+        for sig in HANDLED_SIGNALS:
+            _register_signal_handler(sig)
 
 
 def should_exit() -> bool:
diff --git a/openhands/security/invariant/analyzer.py b/openhands/security/invariant/analyzer.py
index 9d8b280716a7..7d6548be9d37 100644
--- a/openhands/security/invariant/analyzer.py
+++ b/openhands/security/invariant/analyzer.py
@@ -9,6 +9,7 @@
 from openhands.core.logger import openhands_logger as logger
 from openhands.events.action.action import (
     Action,
+    ActionConfirmationStatus,
     ActionSecurityRisk,
 )
 from openhands.events.event import Event, EventSource
@@ -137,8 +138,9 @@ async def should_confirm(self, event: Event) -> bool:
         return (
             risk is not None
             and risk < self.settings.get('RISK_SEVERITY', ActionSecurityRisk.MEDIUM)
-            and hasattr(event, 'is_confirmed')
-            and event.is_confirmed == 'awaiting_confirmation'
+            and hasattr(event, 'confirmation_state')
+            and event.confirmation_state
+            == ActionConfirmationStatus.AWAITING_CONFIRMATION
         )
 
     async def confirm(self, event: Event) -> None:
diff --git a/openhands/server/listen.py b/openhands/server/listen.py
index a0820312059d..9647b7bc8437 100644
--- a/openhands/server/listen.py
+++ b/openhands/server/listen.py
@@ -34,6 +34,7 @@
 from fastapi.security import HTTPBearer
 from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel
+from starlette.middleware.base import BaseHTTPMiddleware
 
 import openhands.agenthub  # noqa F401 (we import this to get the agents registered)
 from openhands.controller.agent import Agent
@@ -64,6 +65,9 @@
 file_store = get_file_store(config.file_store, config.file_store_path)
 session_manager = SessionManager(config, file_store)
 
+GITHUB_CLIENT_ID = os.getenv('GITHUB_CLIENT_ID', '').strip()
+GITHUB_CLIENT_SECRET = os.getenv('GITHUB_CLIENT_SECRET', '').strip()
+
 
 @asynccontextmanager
 async def lifespan(app: FastAPI):
@@ -81,6 +85,25 @@ async def lifespan(app: FastAPI):
     allow_headers=['*'],
 )
 
+
+class NoCacheMiddleware(BaseHTTPMiddleware):
+    """
+    Middleware to disable caching for all routes by adding appropriate headers
+    """
+
+    async def dispatch(self, request, call_next):
+        response = await call_next(request)
+        if not request.url.path.startswith('/assets'):
+            response.headers['Cache-Control'] = (
+                'no-cache, no-store, must-revalidate, max-age=0'
+            )
+            response.headers['Pragma'] = 'no-cache'
+            response.headers['Expires'] = '0'
+        return response
+
+
+app.add_middleware(NoCacheMiddleware)
+
 security_scheme = HTTPBearer()
 
 
@@ -180,9 +203,14 @@ async def attach_session(request: Request, call_next):
     Returns:
         Response: The response from the next middleware or route handler.
     """
-    if request.url.path.startswith('/api/options/') or not request.url.path.startswith(
-        '/api/'
-    ):
+    non_authed_paths = [
+        '/api/options/',
+        '/api/github/callback',
+        '/api/authenticate',
+    ]
+    if any(
+        request.url.path.startswith(path) for path in non_authed_paths
+    ) or not request.url.path.startswith('/api/'):
         response = await call_next(request)
         return response
 
@@ -211,16 +239,18 @@ async def attach_session(request: Request, call_next):
             content={'error': 'Invalid token'},
         )
 
-    request.state.conversation = await call_sync_from_async(
-        session_manager.attach_to_conversation, request.state.sid
+    request.state.conversation = await session_manager.attach_to_conversation(
+        request.state.sid
     )
     if request.state.conversation is None:
         return JSONResponse(
             status_code=status.HTTP_404_NOT_FOUND,
             content={'error': 'Session not found'},
         )
-
-    response = await call_next(request)
+    try:
+        response = await call_next(request)
+    finally:
+        await session_manager.detach_from_conversation(request.state.conversation)
     return response
 
 
@@ -254,7 +284,7 @@ async def websocket_endpoint(websocket: WebSocket):
         ```
     - Run a command:
         ```json
-        {"action": "run", "args": {"command": "ls -l", "thought": "", "is_confirmed": "confirmed"}}
+        {"action": "run", "args": {"command": "ls -l", "thought": "", "confirmation_state": "confirmed"}}
         ```
     - Run an IPython command:
         ```json
@@ -762,16 +792,16 @@ class AuthCode(BaseModel):
     code: str
 
 
-@app.post('/github/callback')
+@app.post('/api/github/callback')
 def github_callback(auth_code: AuthCode):
     # Prepare data for the token exchange request
     data = {
-        'client_id': os.getenv('GITHUB_CLIENT_ID'),
-        'client_secret': os.getenv('GITHUB_CLIENT_SECRET'),
+        'client_id': GITHUB_CLIENT_ID,
+        'client_secret': GITHUB_CLIENT_SECRET,
         'code': auth_code.code,
     }
 
-    logger.info(f'Exchanging code for token: {data}')
+    logger.info('Exchanging code for GitHub token')
 
     headers = {'Accept': 'application/json'}
     response = requests.post(
@@ -779,6 +809,7 @@ def github_callback(auth_code: AuthCode):
     )
 
     if response.status_code != 200:
+        logger.error(f'Failed to exchange code for token: {response.text}')
         return JSONResponse(
             status_code=status.HTTP_400_BAD_REQUEST,
             content={'error': 'Failed to exchange code for token'},
@@ -802,7 +833,7 @@ class User(BaseModel):
     login: str  # GitHub login handle
 
 
-@app.post('/authenticate')
+@app.post('/api/authenticate')
 def authenticate(user: User | None = None):
     waitlist = os.getenv('GITHUB_USER_LIST_FILE')
 
@@ -827,4 +858,13 @@ def authenticate(user: User | None = None):
     )
 
 
-app.mount('/', StaticFiles(directory='./frontend/build', html=True), name='dist')
+class SPAStaticFiles(StaticFiles):
+    async def get_response(self, path: str, scope):
+        try:
+            return await super().get_response(path, scope)
+        except Exception:
+            # FIXME: just making this HTTPException doesn't work for some reason
+            return await super().get_response('index.html', scope)
+
+
+app.mount('/', SPAStaticFiles(directory='./frontend/build', html=True), name='dist')
diff --git a/openhands/server/session/agent_session.py b/openhands/server/session/agent_session.py
index c408cfd92152..49415091cdc4 100644
--- a/openhands/server/session/agent_session.py
+++ b/openhands/server/session/agent_session.py
@@ -14,7 +14,6 @@
 from openhands.runtime.base import Runtime
 from openhands.security import SecurityAnalyzer, options
 from openhands.storage.files import FileStore
-from openhands.utils.async_utils import call_sync_from_async
 
 
 class AgentSession:
@@ -88,6 +87,7 @@ def _start_thread(self, *args):
         try:
             asyncio.run(self._start(*args), debug=True)
         except RuntimeError:
+            logger.error(f'Error starting session: {RuntimeError}', exc_info=True)
             logger.info('Session Finished')
 
     async def _start(
@@ -103,8 +103,7 @@ async def _start(
     ):
         self.loop = asyncio.get_running_loop()
         self._create_security_analyzer(config.security.security_analyzer)
-        await call_sync_from_async(
-            self._create_runtime,
+        await self._create_runtime(
             runtime_name=runtime_name,
             config=config,
             agent=agent,
@@ -157,7 +156,7 @@ def _create_security_analyzer(self, security_analyzer: str | None):
                 security_analyzer, SecurityAnalyzer
             )(self.event_stream)
 
-    def _create_runtime(
+    async def _create_runtime(
         self,
         runtime_name: str,
         config: AppConfig,
@@ -177,15 +176,16 @@ def _create_runtime(
 
         logger.info(f'Initializing runtime `{runtime_name}` now...')
         runtime_cls = get_runtime_cls(runtime_name)
+        self.runtime = runtime_cls(
+            config=config,
+            event_stream=self.event_stream,
+            sid=self.sid,
+            plugins=agent.sandbox_plugins,
+            status_message_callback=status_message_callback,
+        )
 
         try:
-            self.runtime = runtime_cls(
-                config=config,
-                event_stream=self.event_stream,
-                sid=self.sid,
-                plugins=agent.sandbox_plugins,
-                status_message_callback=status_message_callback,
-            )
+            await self.runtime.connect()
         except Exception as e:
             logger.error(f'Runtime initialization failed: {e}', exc_info=True)
             raise
diff --git a/openhands/server/session/conversation.py b/openhands/server/session/conversation.py
index a15ec22f3296..ad880840e5df 100644
--- a/openhands/server/session/conversation.py
+++ b/openhands/server/session/conversation.py
@@ -1,9 +1,12 @@
+import asyncio
+
 from openhands.core.config import AppConfig
 from openhands.events.stream import EventStream
 from openhands.runtime import get_runtime_cls
 from openhands.runtime.base import Runtime
 from openhands.security import SecurityAnalyzer, options
 from openhands.storage.files import FileStore
+from openhands.utils.async_utils import call_sync_from_async
 
 
 class Conversation:
@@ -34,3 +37,9 @@ def __init__(
             sid=self.sid,
             attach_to_existing=True,
         )
+
+    async def connect(self):
+        await self.runtime.connect()
+
+    async def disconnect(self):
+        asyncio.create_task(call_sync_from_async(self.runtime.close))
diff --git a/openhands/server/session/manager.py b/openhands/server/session/manager.py
index 5cc46fff4495..6c650feee73a 100644
--- a/openhands/server/session/manager.py
+++ b/openhands/server/session/manager.py
@@ -1,5 +1,6 @@
 import asyncio
 import time
+from dataclasses import dataclass, field
 from typing import Optional
 
 from fastapi import WebSocket
@@ -13,16 +14,15 @@
 from openhands.storage.files import FileStore
 
 
+@dataclass
 class SessionManager:
-    _sessions: dict[str, Session] = {}
+    config: AppConfig
+    file_store: FileStore
     cleanup_interval: int = 300
     session_timeout: int = 600
+    _sessions: dict[str, Session] = field(default_factory=dict)
     _session_cleanup_task: Optional[asyncio.Task] = None
 
-    def __init__(self, config: AppConfig, file_store: FileStore):
-        self.config = config
-        self.file_store = file_store
-
     async def __aenter__(self):
         if not self._session_cleanup_task:
             self._session_cleanup_task = asyncio.create_task(self._cleanup_sessions())
@@ -46,10 +46,15 @@ def get_session(self, sid: str) -> Session | None:
             return None
         return self._sessions.get(sid)
 
-    def attach_to_conversation(self, sid: str) -> Conversation | None:
+    async def attach_to_conversation(self, sid: str) -> Conversation | None:
         if not session_exists(sid, self.file_store):
             return None
-        return Conversation(sid, file_store=self.file_store, config=self.config)
+        c = Conversation(sid, file_store=self.file_store, config=self.config)
+        await c.connect()
+        return c
+
+    async def detach_from_conversation(self, conversation: Conversation):
+        await conversation.disconnect()
 
     async def send(self, sid: str, data: dict[str, object]) -> bool:
         """Sends data to the client."""
diff --git a/package-lock.json b/package-lock.json
new file mode 100644
index 000000000000..177e3d7a4065
--- /dev/null
+++ b/package-lock.json
@@ -0,0 +1,111 @@
+{
+  "name": "OpenHands",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "dependencies": {
+        "react-textarea-autosize": "^8.5.4"
+      }
+    },
+    "node_modules/@babel/runtime": {
+      "version": "7.25.9",
+      "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.25.9.tgz",
+      "integrity": "sha512-4zpTHZ9Cm6L9L+uIqghQX8ZXg8HKFcjYO3qHoO8zTmRm6HQUJ8SSJ+KRvbMBZn0EGVlT4DRYeQ/6hjlyXBh+Kg==",
+      "dependencies": {
+        "regenerator-runtime": "^0.14.0"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/js-tokens": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
+      "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
+      "peer": true
+    },
+    "node_modules/loose-envify": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
+      "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==",
+      "peer": true,
+      "dependencies": {
+        "js-tokens": "^3.0.0 || ^4.0.0"
+      },
+      "bin": {
+        "loose-envify": "cli.js"
+      }
+    },
+    "node_modules/react": {
+      "version": "18.3.1",
+      "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz",
+      "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==",
+      "peer": true,
+      "dependencies": {
+        "loose-envify": "^1.1.0"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/react-textarea-autosize": {
+      "version": "8.5.4",
+      "resolved": "https://registry.npmjs.org/react-textarea-autosize/-/react-textarea-autosize-8.5.4.tgz",
+      "integrity": "sha512-eSSjVtRLcLfFwFcariT77t9hcbVJHQV76b51QjQGarQIHml2+gM2lms0n3XrhnDmgK5B+/Z7TmQk5OHNzqYm/A==",
+      "dependencies": {
+        "@babel/runtime": "^7.20.13",
+        "use-composed-ref": "^1.3.0",
+        "use-latest": "^1.2.1"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "peerDependencies": {
+        "react": "^16.8.0 || ^17.0.0 || ^18.0.0"
+      }
+    },
+    "node_modules/regenerator-runtime": {
+      "version": "0.14.1",
+      "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz",
+      "integrity": "sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw=="
+    },
+    "node_modules/use-composed-ref": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/use-composed-ref/-/use-composed-ref-1.3.0.tgz",
+      "integrity": "sha512-GLMG0Jc/jiKov/3Ulid1wbv3r54K9HlMW29IWcDFPEqFkSO2nS0MuefWgMJpeHQ9YJeXDL3ZUF+P3jdXlZX/cQ==",
+      "peerDependencies": {
+        "react": "^16.8.0 || ^17.0.0 || ^18.0.0"
+      }
+    },
+    "node_modules/use-isomorphic-layout-effect": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/use-isomorphic-layout-effect/-/use-isomorphic-layout-effect-1.1.2.tgz",
+      "integrity": "sha512-49L8yCO3iGT/ZF9QttjwLF/ZD9Iwto5LnH5LmEdk/6cFmXddqi2ulF0edxTwjj+7mqvpVVGQWvbXZdn32wRSHA==",
+      "peerDependencies": {
+        "react": "^16.8.0 || ^17.0.0 || ^18.0.0"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/use-latest": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/use-latest/-/use-latest-1.2.1.tgz",
+      "integrity": "sha512-xA+AVm/Wlg3e2P/JiItTziwS7FK92LWrDB0p+hgXloIMuVCeJJ8v6f0eeHyPZaJrM+usM1FkFfbNCrJGs8A/zw==",
+      "dependencies": {
+        "use-isomorphic-layout-effect": "^1.1.1"
+      },
+      "peerDependencies": {
+        "react": "^16.8.0 || ^17.0.0 || ^18.0.0"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    }
+  }
+}
diff --git a/package.json b/package.json
new file mode 100644
index 000000000000..7e2bb145af35
--- /dev/null
+++ b/package.json
@@ -0,0 +1,5 @@
+{
+  "dependencies": {
+    "react-textarea-autosize": "^8.5.4"
+  }
+}
diff --git a/tests/runtime/conftest.py b/tests/runtime/conftest.py
index 7affbfc0408f..c291fb84afba 100644
--- a/tests/runtime/conftest.py
+++ b/tests/runtime/conftest.py
@@ -16,6 +16,7 @@
 from openhands.runtime.impl.remote.remote_runtime import RemoteRuntime
 from openhands.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
 from openhands.storage import get_file_store
+from openhands.utils.async_utils import call_async_from_sync
 
 TEST_IN_CI = os.getenv('TEST_IN_CI', 'False').lower() in ['true', '1', 'yes']
 TEST_RUNTIME = os.getenv('TEST_RUNTIME', 'eventstream').lower()
@@ -124,7 +125,7 @@ def cleanup():
 
 
 # Depending on TEST_RUNTIME, feed the appropriate box class(es) to the test.
-def get_box_classes():
+def get_runtime_classes():
     runtime = TEST_RUNTIME
     if runtime.lower() == 'eventstream':
         return [EventStreamRuntime]
@@ -161,8 +162,8 @@ def runtime_setup_session():
 
 # This assures that all tests run together per runtime, not alternating between them,
 # which cause errors (especially outside GitHub actions).
-@pytest.fixture(scope='module', params=get_box_classes())
-def box_class(request):
+@pytest.fixture(scope='module', params=get_runtime_classes())
+def runtime_cls(request):
     time.sleep(1)
     return request.param
 
@@ -202,7 +203,7 @@ def base_container_image(request):
 
 def _load_runtime(
     temp_dir,
-    box_class,
+    runtime_cls,
     run_as_openhands: bool = True,
     enable_auto_lint: bool = False,
     base_container_image: str | None = None,
@@ -252,12 +253,13 @@ def _load_runtime(
     file_store = get_file_store(config.file_store, config.file_store_path)
     event_stream = EventStream(sid, file_store)
 
-    runtime = box_class(
+    runtime = runtime_cls(
         config=config,
         event_stream=event_stream,
         sid=sid,
         plugins=plugins,
     )
+    call_async_from_sync(runtime.connect)
     time.sleep(2)
     return runtime
 
diff --git a/tests/runtime/test_bash.py b/tests/runtime/test_bash.py
index cfe480102d79..3673dd927c68 100644
--- a/tests/runtime/test_bash.py
+++ b/tests/runtime/test_bash.py
@@ -29,8 +29,8 @@ def _run_cmd_action(runtime, custom_command: str, keep_prompt=True):
     return obs
 
 
-def test_bash_command_pexcept(temp_dir, box_class, run_as_openhands):
-    runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
+def test_bash_command_pexcept(temp_dir, runtime_cls, run_as_openhands):
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
     try:
         # We set env var PS1="\u@\h:\w $"
         # and construct the PEXCEPT prompt base on it.
@@ -58,8 +58,8 @@ def test_bash_command_pexcept(temp_dir, box_class, run_as_openhands):
         _close_test_runtime(runtime)
 
 
-def test_bash_timeout_and_keyboard_interrupt(temp_dir, box_class, run_as_openhands):
-    runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
+def test_bash_timeout_and_keyboard_interrupt(temp_dir, runtime_cls, run_as_openhands):
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
     try:
         action = CmdRunAction(command='python -c "import time; time.sleep(10)"')
         action.timeout = 1
@@ -97,14 +97,14 @@ def test_bash_timeout_and_keyboard_interrupt(temp_dir, box_class, run_as_openhan
         obs = runtime.run_action(action)
         assert isinstance(obs, CmdOutputObservation)
         assert obs.exit_code == 0
-        assert '/workspace' in obs.content
+        assert '/workspace' in obs.interpreter_details
 
     finally:
         _close_test_runtime(runtime)
 
 
-def test_bash_pexcept_eof(temp_dir, box_class, run_as_openhands):
-    runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
+def test_bash_pexcept_eof(temp_dir, runtime_cls, run_as_openhands):
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
     try:
         action = CmdRunAction(command='python3 -m http.server 8080')
         action.timeout = 1
@@ -121,7 +121,7 @@ def test_bash_pexcept_eof(temp_dir, box_class, run_as_openhands):
         logger.info(obs, extra={'msg_type': 'OBSERVATION'})
         assert isinstance(obs, CmdOutputObservation)
         assert obs.exit_code == 0
-        assert '/workspace' in obs.content
+        assert '/workspace' in obs.interpreter_details
 
         # run it again!
         action = CmdRunAction(command='python3 -m http.server 8080')
@@ -139,13 +139,13 @@ def test_bash_pexcept_eof(temp_dir, box_class, run_as_openhands):
         obs = runtime.run_action(action)
         assert isinstance(obs, CmdOutputObservation)
         assert obs.exit_code == 0
-        assert '/workspace' in obs.content
+        assert '/workspace' in obs.interpreter_details
     finally:
         _close_test_runtime(runtime)
 
 
-def test_process_resistant_to_one_sigint(temp_dir, box_class, run_as_openhands):
-    runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
+def test_process_resistant_to_one_sigint(temp_dir, runtime_cls, run_as_openhands):
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
     try:
         # Create a bash script that ignores SIGINT up to 1 times
         script_content = """
@@ -190,15 +190,15 @@ def test_process_resistant_to_one_sigint(temp_dir, box_class, run_as_openhands):
         obs = runtime.run_action(action)
         assert isinstance(obs, CmdOutputObservation)
         assert obs.exit_code == 0
-        assert '/workspace' in obs.content
+        assert '/workspace' in obs.interpreter_details
         assert 'resistant_script.sh' in obs.content
 
     finally:
         _close_test_runtime(runtime)
 
 
-def test_process_resistant_to_multiple_sigint(temp_dir, box_class, run_as_openhands):
-    runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
+def test_process_resistant_to_multiple_sigint(temp_dir, runtime_cls, run_as_openhands):
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
     try:
         # Create a bash script that ignores SIGINT up to 2 times
         script_content = """
@@ -243,15 +243,15 @@ def test_process_resistant_to_multiple_sigint(temp_dir, box_class, run_as_openha
         obs = runtime.run_action(action)
         assert isinstance(obs, CmdOutputObservation)
         assert obs.exit_code == 0
-        assert '/workspace' in obs.content
+        assert '/workspace' in obs.interpreter_details
         assert 'resistant_script.sh' in obs.content
 
     finally:
         _close_test_runtime(runtime)
 
 
-def test_multiline_commands(temp_dir, box_class):
-    runtime = _load_runtime(temp_dir, box_class)
+def test_multiline_commands(temp_dir, runtime_cls):
+    runtime = _load_runtime(temp_dir, runtime_cls)
     try:
         # single multiline command
         obs = _run_cmd_action(runtime, 'echo \\\n -e "foo"')
@@ -264,14 +264,14 @@ def test_multiline_commands(temp_dir, box_class):
         assert 'hello\r\nworld' in obs.content
 
         # test whitespace
-        obs = _run_cmd_action(runtime, 'echo -e "\\n\\n\\n"')
+        obs = _run_cmd_action(runtime, 'echo -e "a\\n\\n\\nz"')
         assert obs.exit_code == 0, 'The exit code should be 0.'
         assert '\r\n\r\n\r\n' in obs.content
     finally:
         _close_test_runtime(runtime)
 
 
-def test_multiple_multiline_commands(temp_dir, box_class, run_as_openhands):
+def test_multiple_multiline_commands(temp_dir, runtime_cls, run_as_openhands):
     cmds = [
         'ls -l',
         'echo -e "hello\nworld"',
@@ -301,7 +301,7 @@ def test_multiple_multiline_commands(temp_dir, box_class, run_as_openhands):
     ]
     joined_cmds = '\n'.join(cmds)
 
-    runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
     try:
         obs = _run_cmd_action(runtime, joined_cmds)
         assert obs.exit_code == 0, 'The exit code should be 0.'
@@ -312,14 +312,13 @@ def test_multiple_multiline_commands(temp_dir, box_class, run_as_openhands):
         assert 'hello -v' in obs.content
         assert 'hello\r\nworld\r\nare\r\nyou\r\nthere?' in obs.content
         assert 'hello\r\nworld\r\nare\r\nyou\r\n\r\nthere?' in obs.content
-        assert 'hello\r\nworld "\r\n' in obs.content
     finally:
         _close_test_runtime(runtime)
 
 
-def test_no_ps2_in_output(temp_dir, box_class, run_as_openhands):
+def test_no_ps2_in_output(temp_dir, runtime_cls, run_as_openhands):
     """Test that the PS2 sign is not added to the output of a multiline command."""
-    runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
     try:
         obs = _run_cmd_action(runtime, 'echo -e "hello\nworld"')
         assert obs.exit_code == 0, 'The exit code should be 0.'
@@ -330,7 +329,7 @@ def test_no_ps2_in_output(temp_dir, box_class, run_as_openhands):
         _close_test_runtime(runtime)
 
 
-def test_multiline_command_loop(temp_dir, box_class):
+def test_multiline_command_loop(temp_dir, runtime_cls):
     # https://github.com/All-Hands-AI/OpenHands/issues/3143
     init_cmd = """
 mkdir -p _modules && \
@@ -348,7 +347,7 @@ def test_multiline_command_loop(temp_dir, box_class):
 done
 echo "success"
 """
-    runtime = _load_runtime(temp_dir, box_class)
+    runtime = _load_runtime(temp_dir, runtime_cls)
     try:
         obs = _run_cmd_action(runtime, init_cmd)
         assert obs.exit_code == 0, 'The exit code should be 0.'
@@ -361,8 +360,8 @@ def test_multiline_command_loop(temp_dir, box_class):
         _close_test_runtime(runtime)
 
 
-def test_cmd_run(temp_dir, box_class, run_as_openhands):
-    runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
+def test_cmd_run(temp_dir, runtime_cls, run_as_openhands):
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
     try:
         obs = _run_cmd_action(runtime, 'ls -l /openhands/workspace')
         assert obs.exit_code == 0
@@ -398,8 +397,8 @@ def test_cmd_run(temp_dir, box_class, run_as_openhands):
         _close_test_runtime(runtime)
 
 
-def test_run_as_user_correct_home_dir(temp_dir, box_class, run_as_openhands):
-    runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
+def test_run_as_user_correct_home_dir(temp_dir, runtime_cls, run_as_openhands):
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
     try:
         obs = _run_cmd_action(runtime, 'cd ~ && pwd')
         assert obs.exit_code == 0
@@ -411,8 +410,8 @@ def test_run_as_user_correct_home_dir(temp_dir, box_class, run_as_openhands):
         _close_test_runtime(runtime)
 
 
-def test_multi_cmd_run_in_single_line(temp_dir, box_class):
-    runtime = _load_runtime(temp_dir, box_class)
+def test_multi_cmd_run_in_single_line(temp_dir, runtime_cls):
+    runtime = _load_runtime(temp_dir, runtime_cls)
     try:
         obs = _run_cmd_action(runtime, 'pwd && ls -l')
         assert obs.exit_code == 0
@@ -422,8 +421,8 @@ def test_multi_cmd_run_in_single_line(temp_dir, box_class):
         _close_test_runtime(runtime)
 
 
-def test_stateful_cmd(temp_dir, box_class):
-    runtime = _load_runtime(temp_dir, box_class)
+def test_stateful_cmd(temp_dir, runtime_cls):
+    runtime = _load_runtime(temp_dir, runtime_cls)
     sandbox_dir = _get_sandbox_folder(runtime)
     try:
         obs = _run_cmd_action(runtime, 'mkdir -p test')
@@ -439,8 +438,8 @@ def test_stateful_cmd(temp_dir, box_class):
         _close_test_runtime(runtime)
 
 
-def test_failed_cmd(temp_dir, box_class):
-    runtime = _load_runtime(temp_dir, box_class)
+def test_failed_cmd(temp_dir, runtime_cls):
+    runtime = _load_runtime(temp_dir, runtime_cls)
     try:
         obs = _run_cmd_action(runtime, 'non_existing_command')
         assert obs.exit_code != 0, 'The exit code should not be 0 for a failed command.'
@@ -454,8 +453,8 @@ def _create_test_file(host_temp_dir):
         f.write('Hello, World!')
 
 
-def test_copy_single_file(temp_dir, box_class):
-    runtime = _load_runtime(temp_dir, box_class)
+def test_copy_single_file(temp_dir, runtime_cls):
+    runtime = _load_runtime(temp_dir, runtime_cls)
     try:
         sandbox_dir = _get_sandbox_folder(runtime)
         sandbox_file = os.path.join(sandbox_dir, 'test_file.txt')
@@ -484,8 +483,8 @@ def _create_host_test_dir_with_files(test_dir):
         f.write('File 2 content')
 
 
-def test_copy_directory_recursively(temp_dir, box_class):
-    runtime = _load_runtime(temp_dir, box_class)
+def test_copy_directory_recursively(temp_dir, runtime_cls):
+    runtime = _load_runtime(temp_dir, runtime_cls)
 
     sandbox_dir = _get_sandbox_folder(runtime)
     try:
@@ -513,8 +512,8 @@ def test_copy_directory_recursively(temp_dir, box_class):
         _close_test_runtime(runtime)
 
 
-def test_copy_to_non_existent_directory(temp_dir, box_class):
-    runtime = _load_runtime(temp_dir, box_class)
+def test_copy_to_non_existent_directory(temp_dir, runtime_cls):
+    runtime = _load_runtime(temp_dir, runtime_cls)
     try:
         sandbox_dir = _get_sandbox_folder(runtime)
         _create_test_file(temp_dir)
@@ -529,8 +528,8 @@ def test_copy_to_non_existent_directory(temp_dir, box_class):
         _close_test_runtime(runtime)
 
 
-def test_overwrite_existing_file(temp_dir, box_class):
-    runtime = _load_runtime(temp_dir, box_class)
+def test_overwrite_existing_file(temp_dir, runtime_cls):
+    runtime = _load_runtime(temp_dir, runtime_cls)
     try:
         sandbox_dir = _get_sandbox_folder(runtime)
 
@@ -557,8 +556,8 @@ def test_overwrite_existing_file(temp_dir, box_class):
         _close_test_runtime(runtime)
 
 
-def test_copy_non_existent_file(temp_dir, box_class):
-    runtime = _load_runtime(temp_dir, box_class)
+def test_copy_non_existent_file(temp_dir, runtime_cls):
+    runtime = _load_runtime(temp_dir, runtime_cls)
     try:
         sandbox_dir = _get_sandbox_folder(runtime)
         with pytest.raises(FileNotFoundError):
@@ -573,8 +572,8 @@ def test_copy_non_existent_file(temp_dir, box_class):
         _close_test_runtime(runtime)
 
 
-def test_copy_from_directory(temp_dir, box_class):
-    runtime: Runtime = _load_runtime(temp_dir, box_class)
+def test_copy_from_directory(temp_dir, runtime_cls):
+    runtime: Runtime = _load_runtime(temp_dir, runtime_cls)
     sandbox_dir = _get_sandbox_folder(runtime)
     try:
         temp_dir_copy = os.path.join(temp_dir, 'test_dir')
@@ -593,10 +592,10 @@ def test_copy_from_directory(temp_dir, box_class):
         _close_test_runtime(runtime)
 
 
-def test_keep_prompt(box_class, temp_dir):
+def test_keep_prompt(runtime_cls, temp_dir):
     runtime = _load_runtime(
         temp_dir,
-        box_class=box_class,
+        runtime_cls=runtime_cls,
         run_as_openhands=False,
     )
     try:
@@ -604,13 +603,13 @@ def test_keep_prompt(box_class, temp_dir):
 
         obs = _run_cmd_action(runtime, f'touch {sandbox_dir}/test_file.txt')
         assert obs.exit_code == 0
-        assert 'root@' in obs.content
+        assert 'root@' in obs.interpreter_details
 
         obs = _run_cmd_action(
             runtime, f'cat {sandbox_dir}/test_file.txt', keep_prompt=False
         )
         assert obs.exit_code == 0
-        assert 'root@' not in obs.content
+        assert 'root@' not in obs.interpreter_details
     finally:
         _close_test_runtime(runtime)
 
@@ -619,13 +618,13 @@ def test_keep_prompt(box_class, temp_dir):
     TEST_IN_CI != 'True',
     reason='This test is not working in WSL (file ownership)',
 )
-def test_git_operation(box_class):
+def test_git_operation(runtime_cls):
     # do not mount workspace, since workspace mount by tests will be owned by root
     # while the user_id we get via os.getuid() is different from root
     # which causes permission issues
     runtime = _load_runtime(
         temp_dir=None,
-        box_class=box_class,
+        runtime_cls=runtime_cls,
         # Need to use non-root user to expose issues
         run_as_openhands=True,
     )
@@ -671,8 +670,8 @@ def test_git_operation(box_class):
         _close_test_runtime(runtime)
 
 
-def test_python_version(temp_dir, box_class, run_as_openhands):
-    runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
+def test_python_version(temp_dir, runtime_cls, run_as_openhands):
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
     try:
         obs = runtime.run_action(CmdRunAction(command='python --version'))
 
diff --git a/tests/runtime/test_browsing.py b/tests/runtime/test_browsing.py
index 264d7c480320..f24e37cd06bf 100644
--- a/tests/runtime/test_browsing.py
+++ b/tests/runtime/test_browsing.py
@@ -22,8 +22,8 @@
 PY3_FOR_TESTING = '/openhands/micromamba/bin/micromamba run -n openhands python3'
 
 
-def test_simple_browse(temp_dir, box_class, run_as_openhands):
-    runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
+def test_simple_browse(temp_dir, runtime_cls, run_as_openhands):
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
 
     # Test browse
     action_cmd = CmdRunAction(
@@ -68,10 +68,10 @@ def test_simple_browse(temp_dir, box_class, run_as_openhands):
     _close_test_runtime(runtime)
 
 
-def test_browsergym_eval_env(box_class, temp_dir):
+def test_browsergym_eval_env(runtime_cls, temp_dir):
     runtime = _load_runtime(
         temp_dir,
-        box_class=box_class,
+        runtime_cls=runtime_cls,
         run_as_openhands=False,  # need root permission to access file
         base_container_image='xingyaoww/od-eval-miniwob:v1.0',
         browsergym_eval_env='browsergym/miniwob.choose-list',
diff --git a/tests/runtime/test_edit.py b/tests/runtime/test_edit.py
index 27c3a265cad9..b1508af226c1 100644
--- a/tests/runtime/test_edit.py
+++ b/tests/runtime/test_edit.py
@@ -31,8 +31,8 @@ def index():
     TEST_IN_CI != 'True',
     reason='This test requires LLM to run.',
 )
-def test_edit_from_scratch(temp_dir, box_class, run_as_openhands):
-    runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
+def test_edit_from_scratch(temp_dir, runtime_cls, run_as_openhands):
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
     try:
         action = FileEditAction(
             content=ORGINAL,
@@ -71,8 +71,8 @@ def index():
     TEST_IN_CI != 'True',
     reason='This test requires LLM to run.',
 )
-def test_edit(temp_dir, box_class, run_as_openhands):
-    runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
+def test_edit(temp_dir, runtime_cls, run_as_openhands):
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
     try:
         action = FileEditAction(
             content=ORGINAL,
@@ -130,8 +130,8 @@ def test_edit(temp_dir, box_class, run_as_openhands):
     TEST_IN_CI != 'True',
     reason='This test requires LLM to run.',
 )
-def test_edit_long_file(temp_dir, box_class, run_as_openhands):
-    runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
+def test_edit_long_file(temp_dir, runtime_cls, run_as_openhands):
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
     try:
         action = FileEditAction(
             content=ORIGINAL_LONG,
diff --git a/tests/runtime/test_env_vars.py b/tests/runtime/test_env_vars.py
index 79773457a56c..de65bf8101ed 100644
--- a/tests/runtime/test_env_vars.py
+++ b/tests/runtime/test_env_vars.py
@@ -13,9 +13,9 @@
 # ============================================================================================================================
 
 
-def test_env_vars_os_environ(temp_dir, box_class, run_as_openhands):
+def test_env_vars_os_environ(temp_dir, runtime_cls, run_as_openhands):
     with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
-        runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
+        runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
 
         obs: CmdOutputObservation = runtime.run_action(CmdRunAction(command='env'))
         print(obs)
@@ -27,13 +27,13 @@ def test_env_vars_os_environ(temp_dir, box_class, run_as_openhands):
         assert obs.exit_code == 0, 'The exit code should be 0.'
         assert (
             obs.content.strip().split('\n\r')[0].strip() == 'BAZ'
-        ), f'Output: [{obs.content}] for {box_class}'
+        ), f'Output: [{obs.content}] for {runtime_cls}'
 
         _close_test_runtime(runtime)
 
 
-def test_env_vars_runtime_operations(temp_dir, box_class):
-    runtime = _load_runtime(temp_dir, box_class)
+def test_env_vars_runtime_operations(temp_dir, runtime_cls):
+    runtime = _load_runtime(temp_dir, runtime_cls)
 
     # Test adding single env var
     runtime.add_env_vars({'QUUX': 'abc"def'})
@@ -67,10 +67,10 @@ def test_env_vars_runtime_operations(temp_dir, box_class):
     _close_test_runtime(runtime)
 
 
-def test_env_vars_added_by_config(temp_dir, box_class):
+def test_env_vars_added_by_config(temp_dir, runtime_cls):
     runtime = _load_runtime(
         temp_dir,
-        box_class,
+        runtime_cls,
         runtime_startup_env_vars={'ADDED_ENV_VAR': 'added_value'},
     )
 
diff --git a/tests/runtime/test_images.py b/tests/runtime/test_images.py
index e5f3ded04a8c..1dd7e295c415 100644
--- a/tests/runtime/test_images.py
+++ b/tests/runtime/test_images.py
@@ -11,7 +11,7 @@
 # ============================================================================================================================
 
 
-def test_bash_python_version(temp_dir, box_class, base_container_image):
+def test_bash_python_version(temp_dir, runtime_cls, base_container_image):
     """Make sure Python is available in bash."""
     if base_container_image not in [
         'python:3.12-bookworm',
@@ -19,7 +19,7 @@ def test_bash_python_version(temp_dir, box_class, base_container_image):
         pytest.skip('This test is only for python-related images')
 
     runtime = _load_runtime(
-        temp_dir, box_class, base_container_image=base_container_image
+        temp_dir, runtime_cls, base_container_image=base_container_image
     )
 
     action = CmdRunAction(command='which python')
@@ -45,7 +45,7 @@ def test_bash_python_version(temp_dir, box_class, base_container_image):
     _close_test_runtime(runtime)
 
 
-def test_nodejs_22_version(temp_dir, box_class, base_container_image):
+def test_nodejs_22_version(temp_dir, runtime_cls, base_container_image):
     """Make sure Node.js is available in bash."""
     if base_container_image not in [
         'node:22-bookworm',
@@ -53,7 +53,7 @@ def test_nodejs_22_version(temp_dir, box_class, base_container_image):
         pytest.skip('This test is only for nodejs-related images')
 
     runtime = _load_runtime(
-        temp_dir, box_class, base_container_image=base_container_image
+        temp_dir, runtime_cls, base_container_image=base_container_image
     )
 
     action = CmdRunAction(command='node --version')
@@ -66,7 +66,7 @@ def test_nodejs_22_version(temp_dir, box_class, base_container_image):
     _close_test_runtime(runtime)
 
 
-def test_go_version(temp_dir, box_class, base_container_image):
+def test_go_version(temp_dir, runtime_cls, base_container_image):
     """Make sure Go is available in bash."""
     if base_container_image not in [
         'golang:1.23-bookworm',
@@ -74,7 +74,7 @@ def test_go_version(temp_dir, box_class, base_container_image):
         pytest.skip('This test is only for go-related images')
 
     runtime = _load_runtime(
-        temp_dir, box_class, base_container_image=base_container_image
+        temp_dir, runtime_cls, base_container_image=base_container_image
     )
 
     action = CmdRunAction(command='go version')
diff --git a/tests/runtime/test_ipython.py b/tests/runtime/test_ipython.py
index c16baa7b0e82..afd66e4bd5f2 100644
--- a/tests/runtime/test_ipython.py
+++ b/tests/runtime/test_ipython.py
@@ -28,8 +28,8 @@
 # ============================================================================================================================
 
 
-def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhands):
-    runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
+def test_simple_cmd_ipython_and_fileop(temp_dir, runtime_cls, run_as_openhands):
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
 
     sandbox_dir = _get_sandbox_folder(runtime)
 
@@ -102,8 +102,8 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhands):
     TEST_IN_CI != 'True',
     reason='This test is not working in WSL (file ownership)',
 )
-def test_ipython_multi_user(temp_dir, box_class, run_as_openhands):
-    runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
+def test_ipython_multi_user(temp_dir, runtime_cls, run_as_openhands):
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
 
     # Test run ipython
     # get username
@@ -174,8 +174,8 @@ def test_ipython_multi_user(temp_dir, box_class, run_as_openhands):
     _close_test_runtime(runtime)
 
 
-def test_ipython_simple(temp_dir, box_class):
-    runtime = _load_runtime(temp_dir, box_class)
+def test_ipython_simple(temp_dir, runtime_cls):
+    runtime = _load_runtime(temp_dir, runtime_cls)
     sandbox_dir = _get_sandbox_folder(runtime)
 
     # Test run ipython
@@ -198,9 +198,9 @@ def test_ipython_simple(temp_dir, box_class):
     _close_test_runtime(runtime)
 
 
-def test_ipython_package_install(temp_dir, box_class, run_as_openhands):
+def test_ipython_package_install(temp_dir, runtime_cls, run_as_openhands):
     """Make sure that cd in bash also update the current working directory in ipython."""
-    runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
     sandbox_dir = _get_sandbox_folder(runtime)
 
     # It should error out since pymsgbox is not installed
diff --git a/tests/unit/test_action_serialization.py b/tests/unit/test_action_serialization.py
index df8fba1e02ff..87ea93531ccd 100644
--- a/tests/unit/test_action_serialization.py
+++ b/tests/unit/test_action_serialization.py
@@ -49,7 +49,7 @@ def serialization_deserialization(
     if 'args' in original_memory_dict:
         original_memory_dict['args'].pop('keep_prompt', None)
         original_memory_dict['args'].pop('blocking', None)
-        original_memory_dict['args'].pop('is_confirmed', None)
+        original_memory_dict['args'].pop('confirmation_state', None)
 
     # the rest should match
     assert (
@@ -103,7 +103,7 @@ def test_cmd_run_action_serialization_deserialization():
             'thought': '',
             'keep_prompt': True,
             'hidden': False,
-            'is_confirmed': ActionConfirmationStatus.CONFIRMED,
+            'confirmation_state': ActionConfirmationStatus.CONFIRMED,
         },
     }
     serialization_deserialization(original_action_dict, CmdRunAction)
diff --git a/tests/unit/test_observation_serialization.py b/tests/unit/test_observation_serialization.py
index 545df2f5f209..ae636ddf562b 100644
--- a/tests/unit/test_observation_serialization.py
+++ b/tests/unit/test_observation_serialization.py
@@ -52,6 +52,7 @@ def test_observation_event_props_serialization_deserialization():
             'command': 'ls -l',
             'command_id': 3,
             'hidden': False,
+            'interpreter_details': '',
         },
         'content': 'foo.txt',
     }
@@ -66,6 +67,7 @@ def test_command_output_observation_serialization_deserialization():
             'command': 'ls -l',
             'command_id': 3,
             'hidden': False,
+            'interpreter_details': '',
         },
         'message': 'Command `ls -l` executed with exit code 0.',
         'content': 'foo.txt',
diff --git a/tests/unit/test_security.py b/tests/unit/test_security.py
index ccfa6c6080e1..4f0aa4dc39be 100644
--- a/tests/unit/test_security.py
+++ b/tests/unit/test_security.py
@@ -222,7 +222,7 @@ def test_unsafe_bash_command(temp_dir: str):
                         arguments={
                             'code': "print('hello')",
                             'kernel_init_code': '',
-                            'is_confirmed': ActionConfirmationStatus.CONFIRMED,
+                            'confirmation_state': ActionConfirmationStatus.CONFIRMED,
                         },
                     ),
                 ),
@@ -267,7 +267,7 @@ def test_unsafe_bash_command(temp_dir: str):
                             'command': 'ls',
                             'hidden': False,
                             'keep_prompt': True,
-                            'is_confirmed': ActionConfirmationStatus.CONFIRMED,
+                            'confirmation_state': ActionConfirmationStatus.CONFIRMED,
                         },
                     ),
                 ),