diff --git a/docs/modules/usage/how-to/evaluation-harness.md b/docs/modules/usage/how-to/evaluation-harness.md index 32717675e3d0..daf144d11e88 100644 --- a/docs/modules/usage/how-to/evaluation-harness.md +++ b/docs/modules/usage/how-to/evaluation-harness.md @@ -134,9 +134,11 @@ To create an evaluation workflow for your benchmark, follow these steps: 4. Create a function to process each instance: ```python + from openhands.utils.async_utils import call_async_from_sync def process_instance(instance: pd.Series, metadata: EvalMetadata) -> EvalOutput: config = get_config(instance, metadata) runtime = create_runtime(config) + call_async_from_sync(runtime.connect) initialize_runtime(runtime, instance) instruction = get_instruction(instance, metadata) diff --git a/evaluation/EDA/run_infer.py b/evaluation/EDA/run_infer.py index 81c7455e0041..2c896939a751 100644 --- a/evaluation/EDA/run_infer.py +++ b/evaluation/EDA/run_infer.py @@ -23,6 +23,7 @@ from openhands.core.logger import openhands_logger as logger from openhands.core.main import create_runtime, run_controller from openhands.events.action import MessageAction +from openhands.utils.async_utils import call_async_from_sync game = None @@ -119,6 +120,7 @@ def process_instance( # Here's how you can run the agent (similar to the `main` function) and get the final task state runtime = create_runtime(config) + call_async_from_sync(runtime.connect) state: State | None = asyncio.run( run_controller( diff --git a/evaluation/agent_bench/run_infer.py b/evaluation/agent_bench/run_infer.py index f0ea180f4ba0..d6fcc62e0798 100644 --- a/evaluation/agent_bench/run_infer.py +++ b/evaluation/agent_bench/run_infer.py @@ -33,6 +33,7 @@ from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction from openhands.events.observation import CmdOutputObservation from openhands.runtime.base import Runtime +from openhands.utils.async_utils import call_async_from_sync def get_config( @@ -210,6 +211,7 @@ def process_instance( # ============================================= runtime: Runtime = create_runtime(config) + call_async_from_sync(runtime.connect) initialize_runtime(runtime, instance=instance) diff --git a/evaluation/aider_bench/run_infer.py b/evaluation/aider_bench/run_infer.py index c59de4f441f8..fa1bb9534a83 100644 --- a/evaluation/aider_bench/run_infer.py +++ b/evaluation/aider_bench/run_infer.py @@ -33,6 +33,7 @@ from openhands.events.action import CmdRunAction, MessageAction from openhands.events.observation import CmdOutputObservation from openhands.runtime.base import Runtime +from openhands.utils.async_utils import call_async_from_sync # Configure visibility of unit tests to the Agent. USE_UNIT_TESTS = os.environ.get('USE_UNIT_TESTS', 'false').lower() == 'true' @@ -207,6 +208,7 @@ def process_instance( # ============================================= runtime: Runtime = create_runtime(config) + call_async_from_sync(runtime.connect) initialize_runtime(runtime, instance=instance) diff --git a/evaluation/biocoder/run_infer.py b/evaluation/biocoder/run_infer.py index 9dcff6d6ef42..4535ccba4e4e 100644 --- a/evaluation/biocoder/run_infer.py +++ b/evaluation/biocoder/run_infer.py @@ -30,6 +30,7 @@ from openhands.events.action import CmdRunAction, MessageAction from openhands.events.observation import CmdOutputObservation from openhands.runtime.base import Runtime +from openhands.utils.async_utils import call_async_from_sync AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = { 'CodeActAgent': functools.partial( @@ -275,7 +276,7 @@ def process_instance( instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class] runtime = create_runtime(config) - + call_async_from_sync(runtime.connect) initialize_runtime(runtime, instance) # Here's how you can run the agent (similar to the `main` function) and get the final task state diff --git a/evaluation/bird/run_infer.py b/evaluation/bird/run_infer.py index 0fba8c5c6456..adb498cd2eb1 100644 --- a/evaluation/bird/run_infer.py +++ b/evaluation/bird/run_infer.py @@ -33,6 +33,7 @@ from openhands.events.action import CmdRunAction, MessageAction from openhands.events.observation import CmdOutputObservation from openhands.runtime.base import Runtime +from openhands.utils.async_utils import call_async_from_sync def codeact_user_response(state: State) -> str: @@ -403,6 +404,7 @@ def execute_sql(db_path, sql): instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class] runtime = create_runtime(config) + call_async_from_sync(runtime.connect) initialize_runtime(runtime, instance) # Here's how you can run the agent (similar to the `main` function) and get the final task state diff --git a/evaluation/gaia/run_infer.py b/evaluation/gaia/run_infer.py index f5794d3ad3cb..c02cd0aee737 100644 --- a/evaluation/gaia/run_infer.py +++ b/evaluation/gaia/run_infer.py @@ -29,6 +29,7 @@ from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction from openhands.events.observation import CmdOutputObservation from openhands.runtime.base import Runtime +from openhands.utils.async_utils import call_async_from_sync DATASET_CACHE_DIR = os.path.join(os.path.dirname(__file__), 'data') @@ -142,6 +143,7 @@ def process_instance( logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'}) runtime = create_runtime(config) + call_async_from_sync(runtime.connect) initialize_runtime(runtime, instance) # Here's how you can run the agent (similar to the `main` function) and get the final task state diff --git a/evaluation/gorilla/run_infer.py b/evaluation/gorilla/run_infer.py index ac49a8078d5b..873cb7f89694 100644 --- a/evaluation/gorilla/run_infer.py +++ b/evaluation/gorilla/run_infer.py @@ -25,6 +25,7 @@ from openhands.core.logger import openhands_logger as logger from openhands.core.main import create_runtime, run_controller from openhands.events.action import MessageAction +from openhands.utils.async_utils import call_async_from_sync AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = { 'CodeActAgent': codeact_user_response, @@ -81,6 +82,7 @@ def process_instance( # Here's how you can run the agent (similar to the `main` function) and get the final task state runtime = create_runtime(config) + call_async_from_sync(runtime.connect) state: State | None = asyncio.run( run_controller( config=config, diff --git a/evaluation/gpqa/run_infer.py b/evaluation/gpqa/run_infer.py index fe7ff4bf1c18..8fd4034c9d5e 100644 --- a/evaluation/gpqa/run_infer.py +++ b/evaluation/gpqa/run_infer.py @@ -48,6 +48,7 @@ MessageAction, ) from openhands.events.observation import Observation +from openhands.utils.async_utils import call_async_from_sync ACTION_FORMAT = """ < AppConfig: config = AppConfig( default_agent=metadata.agent_class, @@ -49,6 +51,14 @@ def get_config( workspace_base=None, workspace_mount_path=None, ) + if metadata.llm_config.log_completions: + metadata.llm_config.log_completions_folder = os.path.join( + metadata.eval_output_dir, 'llm_completions', instance_id + ) + logger.info( + f'Logging LLM completions for instance {instance_id} to ' + f'{metadata.llm_config.log_completions_folder}' + ) config.set_llm_config(metadata.llm_config) return config @@ -58,7 +68,7 @@ def process_instance( metadata: EvalMetadata, reset_logger: bool = True, ) -> EvalOutput: - config = get_config(metadata) + config = get_config(metadata, instance.instance_id) # Setup the logger properly, so you can run multi-processing to parallelize the evaluation if reset_logger: @@ -92,6 +102,7 @@ def process_instance( # ============================================= runtime: Runtime = create_runtime(config) + call_async_from_sync(runtime.connect) test_class.initialize_runtime(runtime) diff --git a/evaluation/logic_reasoning/run_infer.py b/evaluation/logic_reasoning/run_infer.py index b07af7b08cdf..5b7d35f21130 100644 --- a/evaluation/logic_reasoning/run_infer.py +++ b/evaluation/logic_reasoning/run_infer.py @@ -30,6 +30,7 @@ ) from openhands.events.observation import CmdOutputObservation from openhands.runtime.base import Runtime +from openhands.utils.async_utils import call_async_from_sync AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = { 'CodeActAgent': codeact_user_response, @@ -202,6 +203,7 @@ def process_instance( instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class] runtime = create_runtime(config) + call_async_from_sync(runtime.connect) initialize_runtime(runtime, instance) # Here's how you can run the agent (similar to the `main` function) and get the final task state diff --git a/evaluation/miniwob/run_infer.py b/evaluation/miniwob/run_infer.py index ccd36f2d389b..9c2aaf1e0963 100644 --- a/evaluation/miniwob/run_infer.py +++ b/evaluation/miniwob/run_infer.py @@ -35,6 +35,7 @@ BROWSER_EVAL_GET_GOAL_ACTION, BROWSER_EVAL_GET_REWARDS_ACTION, ) +from openhands.utils.async_utils import call_async_from_sync SUPPORTED_AGENT_CLS = {'BrowsingAgent'} @@ -127,6 +128,7 @@ def process_instance( logger.info(f'Starting evaluation for instance {env_id}.') runtime = create_runtime(config) + call_async_from_sync(runtime.connect) task_str = initialize_runtime(runtime) state: State | None = asyncio.run( run_controller( diff --git a/evaluation/mint/run_infer.py b/evaluation/mint/run_infer.py index 80127420a431..8017b194d8d8 100644 --- a/evaluation/mint/run_infer.py +++ b/evaluation/mint/run_infer.py @@ -33,6 +33,7 @@ ) from openhands.events.observation import CmdOutputObservation from openhands.runtime.base import Runtime +from openhands.utils.async_utils import call_async_from_sync def codeact_user_response_mint(state: State, task: Task, task_config: dict[str, int]): @@ -176,6 +177,7 @@ def process_instance( ) runtime = create_runtime(config) + call_async_from_sync(runtime.connect) initialize_runtime(runtime) state: State | None = asyncio.run( diff --git a/evaluation/mint/tasks/reasoning.py b/evaluation/mint/tasks/reasoning.py index 6640c6e1ee6a..08cf320c359f 100644 --- a/evaluation/mint/tasks/reasoning.py +++ b/evaluation/mint/tasks/reasoning.py @@ -131,11 +131,9 @@ def extract_options(self, prompt: str) -> dict: def compare_two_numbers(p, gt): - if isinstance(p, int) or isinstance(p, float): + if isinstance(p, (int, float)): pass - elif isinstance(p, list) or isinstance(p, bool) or isinstance(p, str): - return False - elif isinstance(p, tuple) or isinstance(p, complex) or isinstance(p, dict): + elif isinstance(p, (bool, complex, dict, list, str, tuple)): return False else: raise ValueError(p) @@ -227,8 +225,8 @@ def extract_answer(self, solution: str) -> Any: prediction = prediction.replace('°', '') # Detect the boolean keyword in the generation - if prediction in ['true', 'yes', 'false', 'no']: - if prediction == 'true' or prediction == 'yes': + if prediction in ('true', 'yes', 'false', 'no'): + if prediction in ('true', 'yes'): prediction = 'True' else: prediction = 'False' @@ -342,7 +340,7 @@ def success(self, solution: str) -> bool: answer_type = self._answer_type gt = self.extract_answer(self.reference) - if isinstance(prediction, (str, int, float)) or isinstance(prediction, list): + if isinstance(prediction, (str, int, float, list)): # Comparing prediction against the reference if answer_type in ['bool', 'option', 'Option']: cur_correct = int(prediction == f'({gt})') or int(prediction == gt) diff --git a/evaluation/ml_bench/run_infer.py b/evaluation/ml_bench/run_infer.py index 0f1ba0ab957a..deec068f3392 100644 --- a/evaluation/ml_bench/run_infer.py +++ b/evaluation/ml_bench/run_infer.py @@ -42,6 +42,7 @@ from openhands.events.action import CmdRunAction, MessageAction from openhands.events.observation import CmdOutputObservation from openhands.runtime.base import Runtime +from openhands.utils.async_utils import call_async_from_sync config = load_app_config() @@ -233,6 +234,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool = instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class] runtime = create_runtime(config) + call_async_from_sync(runtime.connect) initialize_runtime(runtime, instance) # Run the agent diff --git a/evaluation/swe_bench/eval_infer.py b/evaluation/swe_bench/eval_infer.py index 14429fc9859d..cf6d71d3b3ee 100644 --- a/evaluation/swe_bench/eval_infer.py +++ b/evaluation/swe_bench/eval_infer.py @@ -28,6 +28,7 @@ from openhands.core.main import create_runtime from openhands.events.action import CmdRunAction from openhands.events.observation import CmdOutputObservation +from openhands.utils.async_utils import call_async_from_sync # TODO: migrate all swe-bench docker to ghcr.io/openhands DOCKER_IMAGE_PREFIX = os.environ.get('EVAL_DOCKER_IMAGE_PREFIX', 'docker.io/xingyaoww/') @@ -128,7 +129,7 @@ def process_instance( ) runtime = create_runtime(config) - + call_async_from_sync(runtime.connect) # Get patch and save it to /tmp/patch.diff with tempfile.TemporaryDirectory() as temp_dir: # Patch file diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py index 18b7daac0108..9ac1e0cf6639 100644 --- a/evaluation/swe_bench/run_infer.py +++ b/evaluation/swe_bench/run_infer.py @@ -35,6 +35,7 @@ from openhands.events.serialization.event import event_to_dict from openhands.runtime.base import Runtime from openhands.runtime.utils.shutdown_listener import sleep_if_should_continue +from openhands.utils.async_utils import call_async_from_sync USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true' USE_INSTANCE_IMAGE = os.environ.get('USE_INSTANCE_IMAGE', 'false').lower() == 'true' @@ -143,6 +144,14 @@ def get_config( workspace_base=None, workspace_mount_path=None, ) + if metadata.llm_config.log_completions: + metadata.llm_config.log_completions_folder = os.path.join( + metadata.eval_output_dir, 'llm_completions', instance['instance_id'] + ) + logger.info( + f'Logging LLM completions for instance {instance["instance_id"]} to ' + f'{metadata.llm_config.log_completions_folder}' + ) config.set_llm_config(metadata.llm_config) return config @@ -372,6 +381,7 @@ def process_instance( logger.info(f'Starting evaluation for instance {instance.instance_id}.') runtime = create_runtime(config) + call_async_from_sync(runtime.connect) try: initialize_runtime(runtime, instance) @@ -432,7 +442,6 @@ def process_instance( metadata=metadata, history=histories, metrics=metrics, - llm_completions=state.extra_data.get('llm_completions', []), error=state.last_error if state and state.last_error else None, ) return output diff --git a/evaluation/toolqa/run_infer.py b/evaluation/toolqa/run_infer.py index 85f565120ac9..5c2c53422785 100644 --- a/evaluation/toolqa/run_infer.py +++ b/evaluation/toolqa/run_infer.py @@ -26,6 +26,7 @@ from openhands.events.action import CmdRunAction, MessageAction from openhands.events.observation import CmdOutputObservation from openhands.runtime.base import Runtime +from openhands.utils.async_utils import call_async_from_sync AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = { 'CodeActAgent': codeact_user_response, @@ -103,6 +104,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool = logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'}) runtime = create_runtime(config) + call_async_from_sync(runtime.connect) initialize_runtime(runtime) # Here's how you can run the agent (similar to the `main` function) and get the final task state diff --git a/evaluation/utils/shared.py b/evaluation/utils/shared.py index d184b5b98037..d33658f339d2 100644 --- a/evaluation/utils/shared.py +++ b/evaluation/utils/shared.py @@ -61,7 +61,6 @@ class EvalOutput(BaseModel): history: ( list[dict[str, Any]] | list[tuple[dict[str, Any], dict[str, Any]]] | None ) = None - llm_completions: list[dict[str, Any]] | None = None metrics: dict[str, Any] | None = None error: str | None = None diff --git a/evaluation/webarena/run_infer.py b/evaluation/webarena/run_infer.py index 2291aa08c8d4..cfc2bdae493a 100644 --- a/evaluation/webarena/run_infer.py +++ b/evaluation/webarena/run_infer.py @@ -35,6 +35,7 @@ BROWSER_EVAL_GET_GOAL_ACTION, BROWSER_EVAL_GET_REWARDS_ACTION, ) +from openhands.utils.async_utils import call_async_from_sync SUPPORTED_AGENT_CLS = {'BrowsingAgent'} @@ -143,6 +144,7 @@ def process_instance( logger.info(f'Starting evaluation for instance {env_id}.') runtime = create_runtime(config) + call_async_from_sync(runtime.connect) task_str = initialize_runtime(runtime) state: State | None = asyncio.run( diff --git a/frontend/__tests__/components/chat/ChatInput.test.tsx b/frontend/__tests__/components/chat/ChatInput.test.tsx deleted file mode 100644 index 75b565f848ff..000000000000 --- a/frontend/__tests__/components/chat/ChatInput.test.tsx +++ /dev/null @@ -1,119 +0,0 @@ -import userEvent from "@testing-library/user-event"; -import { render, screen } from "@testing-library/react"; -import { describe, afterEach, vi, it, expect } from "vitest"; -import ChatInput from "#/components/chat/ChatInput"; - -describe.skip("ChatInput", () => { - afterEach(() => { - vi.clearAllMocks(); - }); - - const onSendMessage = vi.fn(); - - it("should render a textarea", () => { - render(); - expect(screen.getByRole("textbox")).toBeInTheDocument(); - }); - - it("should be able to be set as disabled", async () => { - const user = userEvent.setup(); - render(); - - const textarea = screen.getByRole("textbox"); - const button = screen.getByRole("button"); - - expect(textarea).not.toBeDisabled(); // user can still type - expect(button).toBeDisabled(); // user cannot submit - - await user.type(textarea, "Hello, world!"); - await user.keyboard("{Enter}"); - - expect(onSendMessage).not.toHaveBeenCalled(); - }); - - it("should render with a placeholder", () => { - render(); - - const textarea = screen.getByPlaceholderText( - /CHAT_INTERFACE\$INPUT_PLACEHOLDER/i, - ); - expect(textarea).toBeInTheDocument(); - }); - - it("should render a send button", () => { - render(); - expect(screen.getByRole("button")).toBeInTheDocument(); - }); - - it("should call sendChatMessage with the input when the send button is clicked", async () => { - const user = userEvent.setup(); - render(); - - const textarea = screen.getByRole("textbox"); - const button = screen.getByRole("button"); - - await user.type(textarea, "Hello, world!"); - await user.click(button); - - expect(onSendMessage).toHaveBeenCalledWith("Hello, world!", []); - // Additionally, check if it was called exactly once - expect(onSendMessage).toHaveBeenCalledTimes(1); - }); - - it("should be able to send a message when the enter key is pressed", async () => { - const user = userEvent.setup(); - render(); - const textarea = screen.getByRole("textbox"); - - await user.type(textarea, "Hello, world!"); - await user.keyboard("{Enter}"); - - expect(onSendMessage).toHaveBeenCalledWith("Hello, world!", []); - }); - - it("should NOT send a message when shift + enter is pressed", async () => { - const user = userEvent.setup(); - render(); - const textarea = screen.getByRole("textbox"); - - await user.type(textarea, "Hello, world!"); - await user.keyboard("{Shift>} {Enter}"); // Shift + Enter - - expect(onSendMessage).not.toHaveBeenCalled(); - }); - - it("should NOT send an empty message", async () => { - const user = userEvent.setup(); - render(); - const textarea = screen.getByRole("textbox"); - const button = screen.getByRole("button"); - - await user.type(textarea, " "); - - // with enter key - await user.keyboard("{Enter}"); - expect(onSendMessage).not.toHaveBeenCalled(); - - // with button click - await user.click(button); - expect(onSendMessage).not.toHaveBeenCalled(); - }); - - it("should clear the input message after sending a message", async () => { - const user = userEvent.setup(); - render(); - const textarea = screen.getByRole("textbox"); - const button = screen.getByRole("button"); - - await user.type(textarea, "Hello, world!"); - expect(textarea).toHaveValue("Hello, world!"); - - await user.click(button); - expect(textarea).toHaveValue(""); - }); - - // this is already implemented but need to figure out how to test it - it.todo( - "should NOT send a message when the enter key is pressed while composing", - ); -}); diff --git a/frontend/__tests__/components/chat/chat-input.test.tsx b/frontend/__tests__/components/chat/chat-input.test.tsx new file mode 100644 index 000000000000..e10e3d26f3ee --- /dev/null +++ b/frontend/__tests__/components/chat/chat-input.test.tsx @@ -0,0 +1,161 @@ +import userEvent from "@testing-library/user-event"; +import { render, screen } from "@testing-library/react"; +import { describe, afterEach, vi, it, expect } from "vitest"; +import { ChatInput } from "#/components/chat-input"; + +describe("ChatInput", () => { + const onSubmitMock = vi.fn(); + + afterEach(() => { + vi.clearAllMocks(); + }); + + it("should render a textarea", () => { + render(); + expect(screen.getByTestId("chat-input")).toBeInTheDocument(); + expect(screen.getByRole("textbox")).toBeInTheDocument(); + }); + + it("should call onSubmit when the user types and presses enter", async () => { + const user = userEvent.setup(); + render(); + const textarea = screen.getByRole("textbox"); + + await user.type(textarea, "Hello, world!"); + await user.keyboard("{Enter}"); + + expect(onSubmitMock).toHaveBeenCalledWith("Hello, world!"); + }); + + it("should call onSubmit when pressing the submit button", async () => { + const user = userEvent.setup(); + render(); + const textarea = screen.getByRole("textbox"); + const button = screen.getByRole("button"); + + await user.type(textarea, "Hello, world!"); + await user.click(button); + + expect(onSubmitMock).toHaveBeenCalledWith("Hello, world!"); + }); + + it("should not call onSubmit when the message is empty", async () => { + const user = userEvent.setup(); + render(); + const button = screen.getByRole("button"); + + await user.click(button); + expect(onSubmitMock).not.toHaveBeenCalled(); + + await user.keyboard("{Enter}"); + expect(onSubmitMock).not.toHaveBeenCalled(); + }); + + it("should disable submit", async () => { + const user = userEvent.setup(); + render(); + + const button = screen.getByRole("button"); + const textarea = screen.getByRole("textbox"); + + await user.type(textarea, "Hello, world!"); + + expect(button).toBeDisabled(); + await user.click(button); + expect(onSubmitMock).not.toHaveBeenCalled(); + + await user.keyboard("{Enter}"); + expect(onSubmitMock).not.toHaveBeenCalled(); + }); + + it("should render a placeholder", () => { + render( + , + ); + + const textarea = screen.getByPlaceholderText("Enter your message"); + expect(textarea).toBeInTheDocument(); + }); + + it("should create a newline instead of submitting when shift + enter is pressed", async () => { + const user = userEvent.setup(); + render(); + const textarea = screen.getByRole("textbox"); + + await user.type(textarea, "Hello, world!"); + await user.keyboard("{Shift>} {Enter}"); // Shift + Enter + + expect(onSubmitMock).not.toHaveBeenCalled(); + // expect(textarea).toHaveValue("Hello, world!\n"); + }); + + it("should clear the input message after sending a message", async () => { + const user = userEvent.setup(); + render(); + const textarea = screen.getByRole("textbox"); + const button = screen.getByRole("button"); + + await user.type(textarea, "Hello, world!"); + await user.keyboard("{Enter}"); + expect(textarea).toHaveValue(""); + + await user.type(textarea, "Hello, world!"); + await user.click(button); + expect(textarea).toHaveValue(""); + }); + + it("should hide the submit button", () => { + render(); + expect(screen.queryByRole("button")).not.toBeInTheDocument(); + }); + + it("should call onChange when the user types", async () => { + const user = userEvent.setup(); + const onChangeMock = vi.fn(); + render(); + const textarea = screen.getByRole("textbox"); + + await user.type(textarea, "Hello, world!"); + + expect(onChangeMock).toHaveBeenCalledTimes("Hello, world!".length); + }); + + it("should have set the passed value", () => { + render(); + const textarea = screen.getByRole("textbox"); + + expect(textarea).toHaveValue("Hello, world!"); + }); + + it("should display the stop button and trigger the callback", async () => { + const user = userEvent.setup(); + const onStopMock = vi.fn(); + render( + , + ); + const stopButton = screen.getByTestId("stop-button"); + + await user.click(stopButton); + expect(onStopMock).toHaveBeenCalledOnce(); + }); + + it("should call onFocus and onBlur when the textarea is focused and blurred", async () => { + const user = userEvent.setup(); + const onFocusMock = vi.fn(); + const onBlurMock = vi.fn(); + render( + , + ); + const textarea = screen.getByRole("textbox"); + + await user.click(textarea); + expect(onFocusMock).toHaveBeenCalledOnce(); + + await user.tab(); + expect(onBlurMock).toHaveBeenCalledOnce(); + }); +}); diff --git a/frontend/__tests__/components/image-preview.test.tsx b/frontend/__tests__/components/image-preview.test.tsx new file mode 100644 index 000000000000..ef39fb0f4777 --- /dev/null +++ b/frontend/__tests__/components/image-preview.test.tsx @@ -0,0 +1,32 @@ +import { render, screen } from "@testing-library/react"; +import userEvent from "@testing-library/user-event"; +import { describe, expect, it, vi } from "vitest"; +import { ImagePreview } from "#/components/image-preview"; + +describe("ImagePreview", () => { + it("should render an image", () => { + render( + , + ); + const img = screen.getByRole("img"); + + expect(screen.getByTestId("image-preview")).toBeInTheDocument(); + expect(img).toHaveAttribute("src", "https://example.com/image.jpg"); + }); + + it("should call onRemove when the close button is clicked", async () => { + const user = userEvent.setup(); + const onRemoveMock = vi.fn(); + render( + , + ); + + const closeButton = screen.getByRole("button"); + await user.click(closeButton); + + expect(onRemoveMock).toHaveBeenCalledOnce(); + }); +}); diff --git a/frontend/__tests__/components/interactive-chat-box.test.tsx b/frontend/__tests__/components/interactive-chat-box.test.tsx new file mode 100644 index 000000000000..5d775682cb7d --- /dev/null +++ b/frontend/__tests__/components/interactive-chat-box.test.tsx @@ -0,0 +1,119 @@ +import { render, screen, within } from "@testing-library/react"; +import userEvent from "@testing-library/user-event"; +import { afterEach, beforeAll, describe, expect, it, vi } from "vitest"; +import { InteractiveChatBox } from "#/components/interactive-chat-box"; + +describe("InteractiveChatBox", () => { + const onSubmitMock = vi.fn(); + const onStopMock = vi.fn(); + + beforeAll(() => { + global.URL.createObjectURL = vi + .fn() + .mockReturnValue("blob:http://example.com"); + }); + + afterEach(() => { + vi.clearAllMocks(); + }); + + it("should render", () => { + render(); + + const chatBox = screen.getByTestId("interactive-chat-box"); + within(chatBox).getByTestId("chat-input"); + within(chatBox).getByTestId("upload-image-input"); + }); + + it("should display the image previews when images are uploaded", async () => { + const user = userEvent.setup(); + render(); + + const file = new File(["(⌐□_□)"], "chucknorris.png", { type: "image/png" }); + const input = screen.getByTestId("upload-image-input"); + + expect(screen.queryAllByTestId("image-preview")).toHaveLength(0); + + await user.upload(input, file); + expect(screen.queryAllByTestId("image-preview")).toHaveLength(1); + + const files = [ + new File(["(⌐□_□)"], "chucknorris2.png", { type: "image/png" }), + new File(["(⌐□_□)"], "chucknorris3.png", { type: "image/png" }), + ]; + + await user.upload(input, files); + expect(screen.queryAllByTestId("image-preview")).toHaveLength(3); + }); + + it("should remove the image preview when the close button is clicked", async () => { + const user = userEvent.setup(); + render(); + + const file = new File(["(⌐□_□)"], "chucknorris.png", { type: "image/png" }); + const input = screen.getByTestId("upload-image-input"); + + await user.upload(input, file); + expect(screen.queryAllByTestId("image-preview")).toHaveLength(1); + + const imagePreview = screen.getByTestId("image-preview"); + const closeButton = within(imagePreview).getByRole("button"); + await user.click(closeButton); + + expect(screen.queryAllByTestId("image-preview")).toHaveLength(0); + }); + + it("should call onSubmit with the message and images", async () => { + const user = userEvent.setup(); + render(); + + const textarea = within(screen.getByTestId("chat-input")).getByRole( + "textbox", + ); + const input = screen.getByTestId("upload-image-input"); + const file = new File(["(⌐□_□)"], "chucknorris.png", { type: "image/png" }); + + await user.upload(input, file); + await user.type(textarea, "Hello, world!"); + await user.keyboard("{Enter}"); + + expect(onSubmitMock).toHaveBeenCalledWith("Hello, world!", [file]); + + // clear images after submission + expect(screen.queryAllByTestId("image-preview")).toHaveLength(0); + }); + + it("should disable the submit button", async () => { + const user = userEvent.setup(); + render( + , + ); + + const button = screen.getByRole("button"); + expect(button).toBeDisabled(); + + await user.click(button); + expect(onSubmitMock).not.toHaveBeenCalled(); + }); + + it("should display the stop button if set and call onStop when clicked", async () => { + const user = userEvent.setup(); + render( + , + ); + + const stopButton = screen.getByTestId("stop-button"); + expect(stopButton).toBeInTheDocument(); + + await user.click(stopButton); + expect(onStopMock).toHaveBeenCalledOnce(); + }); +}); diff --git a/frontend/__tests__/components/upload-image-input.test.tsx b/frontend/__tests__/components/upload-image-input.test.tsx new file mode 100644 index 000000000000..77f89ee8851a --- /dev/null +++ b/frontend/__tests__/components/upload-image-input.test.tsx @@ -0,0 +1,71 @@ +import { render, screen } from "@testing-library/react"; +import userEvent from "@testing-library/user-event"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import { UploadImageInput } from "#/components/upload-image-input"; + +describe("UploadImageInput", () => { + const user = userEvent.setup(); + const onUploadMock = vi.fn(); + + afterEach(() => { + vi.clearAllMocks(); + }); + + it("should render an input", () => { + render(); + expect(screen.getByTestId("upload-image-input")).toBeInTheDocument(); + }); + + it("should call onUpload when a file is selected", async () => { + render(); + + const file = new File(["(⌐□_□)"], "chucknorris.png", { type: "image/png" }); + const input = screen.getByTestId("upload-image-input"); + + await user.upload(input, file); + + expect(onUploadMock).toHaveBeenNthCalledWith(1, [file]); + }); + + it("should call onUpload when multiple files are selected", async () => { + render(); + + const files = [ + new File(["(⌐□_□)"], "chucknorris.png", { type: "image/png" }), + new File(["(⌐□_□)"], "chucknorris2.png", { type: "image/png" }), + ]; + const input = screen.getByTestId("upload-image-input"); + + await user.upload(input, files); + + expect(onUploadMock).toHaveBeenNthCalledWith(1, files); + }); + + it("should not upload any file that is not an image", async () => { + render(); + + const file = new File(["(⌐□_□)"], "chucknorris.txt", { + type: "text/plain", + }); + const input = screen.getByTestId("upload-image-input"); + + await user.upload(input, file); + + expect(onUploadMock).not.toHaveBeenCalled(); + }); + + it("should render custom labels", () => { + const { rerender } = render(); + expect(screen.getByTestId("default-label")).toBeInTheDocument(); + + function CustomLabel() { + return Custom label; + } + rerender( + } />, + ); + + expect(screen.getByText("Custom label")).toBeInTheDocument(); + expect(screen.queryByTestId("default-label")).not.toBeInTheDocument(); + }); +}); diff --git a/frontend/src/api/open-hands.ts b/frontend/src/api/open-hands.ts index 38bda53e96d9..0ef84c0278c2 100644 --- a/frontend/src/api/open-hands.ts +++ b/frontend/src/api/open-hands.ts @@ -201,7 +201,7 @@ class OpenHands { static async getGitHubAccessToken( code: string, ): Promise { - const response = await fetch(`${OpenHands.BASE_URL}/github/callback`, { + const response = await fetch(`${OpenHands.BASE_URL}/api/github/callback`, { method: "POST", body: JSON.stringify({ code }), headers: { @@ -218,7 +218,7 @@ class OpenHands { * @returns Whether the user is authenticated */ static async isAuthenticated(login: string): Promise { - const response = await fetch(`${OpenHands.BASE_URL}/authenticate`, { + const response = await fetch(`${OpenHands.BASE_URL}/api/authenticate`, { method: "POST", body: JSON.stringify({ login }), headers: { diff --git a/frontend/src/assets/chevron-left.tsx b/frontend/src/assets/chevron-left.tsx new file mode 100644 index 000000000000..b2ae9abdde09 --- /dev/null +++ b/frontend/src/assets/chevron-left.tsx @@ -0,0 +1,28 @@ +interface ChevronLeftProps { + width?: number; + height?: number; + active?: boolean; +} + +export function ChevronLeft({ + width = 20, + height = 20, + active, +}: ChevronLeftProps) { + return ( + + + + ); +} diff --git a/frontend/src/assets/chevron-right.tsx b/frontend/src/assets/chevron-right.tsx new file mode 100644 index 000000000000..4d7119d4a99a --- /dev/null +++ b/frontend/src/assets/chevron-right.tsx @@ -0,0 +1,28 @@ +interface ChevronRightProps { + width?: number; + height?: number; + active?: boolean; +} + +export function ChevronRight({ + width = 20, + height = 20, + active, +}: ChevronRightProps) { + return ( + + + + ); +} diff --git a/frontend/src/assets/close.svg b/frontend/src/assets/close.svg new file mode 100644 index 000000000000..d43761a6f10a --- /dev/null +++ b/frontend/src/assets/close.svg @@ -0,0 +1,5 @@ + + + diff --git a/frontend/src/components/attach-image-label.tsx b/frontend/src/components/attach-image-label.tsx new file mode 100644 index 000000000000..f3b9c7ebc13a --- /dev/null +++ b/frontend/src/components/attach-image-label.tsx @@ -0,0 +1,10 @@ +import Clip from "#/assets/clip.svg?react"; + +export function AttachImageLabel() { + return ( +
+ + Attach images +
+ ); +} diff --git a/frontend/src/components/chat-input.tsx b/frontend/src/components/chat-input.tsx new file mode 100644 index 000000000000..0aa4348e1539 --- /dev/null +++ b/frontend/src/components/chat-input.tsx @@ -0,0 +1,108 @@ +import React from "react"; +import TextareaAutosize from "react-textarea-autosize"; +import ArrowSendIcon from "#/assets/arrow-send.svg?react"; +import { cn } from "#/utils/utils"; + +interface ChatInputProps { + name?: string; + button?: "submit" | "stop"; + disabled?: boolean; + placeholder?: string; + showButton?: boolean; + value?: string; + maxRows?: number; + onSubmit: (message: string) => void; + onStop?: () => void; + onChange?: (message: string) => void; + onFocus?: () => void; + onBlur?: () => void; + className?: React.HTMLAttributes["className"]; +} + +export function ChatInput({ + name, + button = "submit", + disabled, + placeholder, + showButton = true, + value, + maxRows = 4, + onSubmit, + onStop, + onChange, + onFocus, + onBlur, + className, +}: ChatInputProps) { + const textareaRef = React.useRef(null); + + const handleSubmitMessage = () => { + if (textareaRef.current?.value) { + onSubmit(textareaRef.current.value); + textareaRef.current.value = ""; + } + }; + + const handleKeyPress = (event: React.KeyboardEvent) => { + if (event.key === "Enter" && !event.shiftKey) { + event.preventDefault(); + handleSubmitMessage(); + } + }; + + const handleChange = (event: React.ChangeEvent) => { + onChange?.(event.target.value); + }; + + return ( +
+ + {showButton && ( + <> + {button === "submit" && ( + + )} + {button === "stop" && ( + + )} + + )} +
+ ); +} diff --git a/frontend/src/components/chat/ChatInput.tsx b/frontend/src/components/chat/ChatInput.tsx deleted file mode 100644 index a228f7fff703..000000000000 --- a/frontend/src/components/chat/ChatInput.tsx +++ /dev/null @@ -1,162 +0,0 @@ -import { Textarea } from "@nextui-org/react"; -import React from "react"; -import { useTranslation } from "react-i18next"; -import { useSelector } from "react-redux"; -import { I18nKey } from "#/i18n/declaration"; -import Clip from "#/assets/clip.svg?react"; -import { RootState } from "#/store"; -import AgentState from "#/types/AgentState"; -import { useSocket } from "#/context/socket"; -import { generateAgentStateChangeEvent } from "#/services/agentStateService"; -import { cn } from "#/utils/utils"; -import ArrowSendIcon from "#/assets/arrow-send.svg?react"; -import { convertImageToBase64 } from "#/utils/convert-image-to-base-64"; - -interface ChatInputProps { - disabled?: boolean; - onSendMessage: (message: string, image_urls: string[]) => void; -} - -function ChatInput({ disabled = false, onSendMessage }: ChatInputProps) { - const { send } = useSocket(); - const { t } = useTranslation(); - const { curAgentState } = useSelector((state: RootState) => state.agent); - - const [message, setMessage] = React.useState(""); - const [files, setFiles] = React.useState([]); - // This is true when the user is typing in an IME (e.g., Chinese, Japanese) - const [isComposing, setIsComposing] = React.useState(false); - - const handleSendChatMessage = async () => { - if (curAgentState === AgentState.RUNNING) { - send(generateAgentStateChangeEvent(AgentState.STOPPED)); - return; - } - - if (message.trim()) { - let base64images: string[] = []; - if (files.length > 0) { - base64images = await Promise.all( - files.map((file) => convertImageToBase64(file)), - ); - } - onSendMessage(message, base64images); - setMessage(""); - setFiles([]); - } - }; - - const onKeyPress = (event: React.KeyboardEvent) => { - if (event.key === "Enter" && !event.shiftKey && !isComposing) { - event.preventDefault(); // prevent a new line - if (!disabled) { - handleSendChatMessage(); - } - } - }; - - const handleFileChange = (event: React.ChangeEvent) => { - if (event.target.files) { - setFiles((prev) => [...prev, ...Array.from(event.target.files!)]); - } - }; - - const removeFile = (index: number) => { - setFiles((prevFiles) => prevFiles.filter((_, i) => i !== index)); - }; - - const handlePaste = (event: React.ClipboardEvent) => { - const clipboardItems = Array.from(event.clipboardData.items); - const pastedFiles: File[] = []; - clipboardItems.forEach((item) => { - if (item.type.startsWith("image/")) { - const file = item.getAsFile(); - if (file) { - pastedFiles.push(file); - } - } - }); - if (pastedFiles.length > 0) { - setFiles((prevFiles) => [...prevFiles, ...pastedFiles]); - event.preventDefault(); - } - }; - - return ( -
-