[Eval] DiscoveryBench OpenHands Integration #4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Workflow that uses the DummyAgent to run a simple task | |
name: Run E2E test with dummy agent | |
# Always run on "main" | |
# Always run on PRs | |
on: | |
push: | |
branches: | |
- main | |
pull_request: | |
# If triggered by a PR, it will be in the same group. However, each commit on main will be in its own unique group | |
concurrency: | |
group: ${{ github.workflow }}-${{ (github.head_ref && github.ref) || github.run_id }} | |
cancel-in-progress: true | |
jobs: | |
test: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Free Disk Space (Ubuntu) | |
uses: jlumbroso/free-disk-space@main | |
with: | |
# this might remove tools that are actually needed, | |
# if set to "true" but frees about 6 GB | |
tool-cache: true | |
# all of these default to true, but feel free to set to | |
# "false" if necessary for your workflow | |
android: true | |
dotnet: true | |
haskell: true | |
large-packages: true | |
docker-images: false | |
swap-storage: true | |
- name: Set up Docker Buildx | |
id: buildx | |
uses: docker/setup-buildx-action@v3 | |
- name: Install poetry via pipx | |
run: pipx install poetry | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.12' | |
cache: 'poetry' | |
- name: Install Python dependencies using Poetry | |
run: poetry install --without evaluation,llama-index | |
- name: Build Environment | |
run: make build | |
- name: Run tests | |
run: | | |
set -e | |
SANDBOX_FORCE_REBUILD_RUNTIME=True poetry run python3 openhands/core/main.py -t "do a flip" -d ./workspace/ -c DummyAgent | |
- name: Check exit code | |
run: | | |
if [ $? -ne 0 ]; then | |
echo "Test failed" | |
exit 1 | |
else | |
echo "Test passed" | |
fi |