Evaluate LLM Workflow and Post Comment #6

Workflow file for this run

.github/workflows/evaluation_pipeline.yml at 1a093ce

	name: Evaluate LLM Workflow and Post Comment

	permissions:
	contents: read
	pull-requests: write
	issues: write

	on: workflow_dispatch

	jobs:
	evaluation-pipeline:
	runs-on: ubuntu-latest
	defaults:
	run:
	working-directory: ./backend
	steps:
	- uses: actions/checkout@v4
	- name: Install Python
	uses: actions/setup-python@v4
	with:
	python-version: 3.11.6
	- name: Install poetry
	uses: abatilo/actions-poetry@v2
	- name: Setup a local virtual environment (if no poetry.toml file)
	run: \|
	poetry config virtualenvs.create true --local
	poetry config virtualenvs.in-project true --local
	- uses: actions/cache@v3
	name: Define a cache for the virtual environment based on the dependencies lock file
	with:
	path: ./.venv
	key: venv-${{ hashFiles('poetry.lock') }}
	- name: Install the project dependencies
	run: poetry install --with dev --no-interaction --no-root
	- name: Run eval pipeline test cases
	env:
	DATA_DIRECTORY: ./
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
	run: PYTHONPATH=. poetry run pytest tests/evaluation/test_query_flow.py --run-expensive
	- name: Run score aggregation script
	run: poetry run python tests/evaluation/aggregate_scores.py
	- name: Save scores as artifact
	uses: actions/upload-artifact@v3
	with:
	name: evaluation_scores.md
	path: ./backend/scores.md


	- name: Find associated pull request
	uses: jwalton/gh-find-current-pr@v1
	id: findPr
	# Only run this on PRs
	if: github.ref != 'refs/heads/main'
	continue-on-error: true
	with:
	github-token: ${{ secrets.GITHUB_TOKEN }}

	- name: Download main branch scores
	if: github.ref != 'refs/heads/main'
	uses: dawidd6/action-download-artifact@v2
	continue-on-error: true
	with:
	workflow: evaluate-llm.yml
	branch: main
	name: llm-evaluation-scores
	path: main-scores

	- name: Compare scores and generate report
	if: github.ref != 'refs/heads/main'
	run: \|
	poetry run python tests/evaluation/compare_scores.py \
	scores.md \
	main-scores/scores.md \
	comparison.md

	- name: Post comparison on PR
	if: success() && steps.findPr.outputs.number
	uses: marocchino/sticky-pull-request-comment@v2
	continue-on-error: true
	with:
	number: ${{ steps.findPr.outputs.number }}
	path: comparison.md

	- name: Upload comparison as artifact
	if: github.ref != 'refs/heads/main'
	uses: actions/upload-artifact@v3
	with:
	name: score-comparison
	path: comparison.md

	- name: Upload test results as artifact
	uses: actions/upload-artifact@v3
	with:
	name: test-results.csv
	path: ./backend/test_results.csv

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Evaluate LLM Workflow and Post Comment #6

Workflow file

Evaluate LLM Workflow and Post Comment #6

Jobs

Run details

Workflow file for this run