Skip to content

Commit

Permalink
ML/LlamaIndex: Add software tests and CI configuration
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Nov 5, 2024
1 parent 27c218a commit 20d28bf
Show file tree
Hide file tree
Showing 6 changed files with 180 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@ updates:
schedule:
interval: "daily"

- directory: "/topic/machine-learning/llama-index"
package-ecosystem: "pip"
schedule:
interval: "daily"

- directory: "/topic/machine-learning/mlops-mlflow"
package-ecosystem: "pip"
schedule:
Expand Down
82 changes: 82 additions & 0 deletions .github/workflows/ml-llamaindex.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
name: LlamaIndex

on:
pull_request:
branches: ~
paths:
- '.github/workflows/ml-llamaindex.yml'
- 'topic/machine-learning/llama-index/**'
- '/requirements.txt'
push:
branches: [ main ]
paths:
- '.github/workflows/ml-llamaindex.yml'
- 'topic/machine-learning/llama-index/**'
- '/requirements.txt'

# Allow job to be triggered manually.
workflow_dispatch:

# Run job each night after CrateDB nightly has been published.
schedule:
- cron: '0 3 * * *'

# Cancel in-progress jobs when pushing to the same branch.
concurrency:
cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.ref }}

jobs:
test:
name: "
Python: ${{ matrix.python-version }}
CrateDB: ${{ matrix.cratedb-version }}
on ${{ matrix.os }}"
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [
'ubuntu-latest',
]
python-version: [
'3.8',
'3.13',
]
cratedb-version: [ 'nightly' ]

services:
cratedb:
image: crate/crate:${{ matrix.cratedb-version }}
ports:
- 4200:4200
- 5432:5432
env:
CRATE_HEAP_SIZE: 4g

env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

steps:

- name: Acquire sources
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
architecture: x64
cache: 'pip'
cache-dependency-path: |
requirements.txt
topic/machine-learning/llama-index/requirements.txt
topic/machine-learning/llama-index/requirements-dev.txt
- name: Install utilities
run: |
pip install -r requirements.txt
- name: Validate topic/machine-learning/llama-index
run: |
ngr test --accept-no-venv topic/machine-learning/llama-index
23 changes: 23 additions & 0 deletions topic/machine-learning/llama-index/init.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
CREATE TABLE IF NOT EXISTS time_series_data (
timestamp TIMESTAMP,
value DOUBLE,
location STRING,
sensor_id INT
);

INSERT INTO time_series_data (timestamp, value, location, sensor_id)
VALUES
('2023-09-14T00:00:00', 10.5, 'Sensor A', 1),
('2023-09-14T01:00:00', 15.2, 'Sensor A', 1),
('2023-09-14T02:00:00', 18.9, 'Sensor A', 1),
('2023-09-14T03:00:00', 12.7, 'Sensor B', 2),
('2023-09-14T04:00:00', 17.3, 'Sensor B', 2),
('2023-09-14T05:00:00', 20.1, 'Sensor B', 2),
('2023-09-14T06:00:00', 22.5, 'Sensor A', 1),
('2023-09-14T07:00:00', 18.3, 'Sensor A', 1),
('2023-09-14T08:00:00', 16.8, 'Sensor A', 1),
('2023-09-14T09:00:00', 14.6, 'Sensor B', 2),
('2023-09-14T10:00:00', 13.2, 'Sensor B', 2),
('2023-09-14T11:00:00', 11.7, 'Sensor B', 2);

REFRESH TABLE time_series_data;
27 changes: 27 additions & 0 deletions topic/machine-learning/llama-index/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
[tool.pytest.ini_options]
minversion = "2.0"
addopts = """
-rfEX -p pytester --strict-markers --verbosity=3 --capture=no
--cov=. --cov-report=term-missing --cov-report=xml
"""

#log_level = "DEBUG"
#log_cli_level = "DEBUG"

testpaths = [
"*.py",
]
xfail_strict = true
markers = [
]

[tool.coverage.run]
branch = false

[tool.coverage.report]
fail_under = 0
show_missing = true
omit = [
"conftest.py",
"test*.py",
]
3 changes: 3 additions & 0 deletions topic/machine-learning/llama-index/requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
cratedb-toolkit
pueblo[testing]==0.0.9
sqlparse
40 changes: 40 additions & 0 deletions topic/machine-learning/llama-index/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from pathlib import Path

import pytest

from cratedb_toolkit.io.sql import DatabaseAdapter
from dotenv import load_dotenv

HERE = Path(__file__).parent


@pytest.fixture()
def cratedb() -> DatabaseAdapter:
return DatabaseAdapter(dburi="crate://crate@localhost:4200")


@pytest.fixture(scope="function", autouse=True)
def init_database(cratedb):
"""
Initialize database.
"""
cratedb.run_sql("DROP TABLE IF EXISTS time_series_data;")
cratedb.run_sql((HERE / "init.sql").read_text())


def test_main(cratedb, capsys):
"""
Execute `main.py` and verify outcome.
"""

# Load the standalone configuration also for software testing.
# On CI, `OPENAI_API_KEY` will need to be supplied externally.
load_dotenv("env.standalone")

# Invoke the workload, in-process.
from main import main
main()

# Verify the outcome.
out = capsys.readouterr().out
assert "Answer was: The average value for sensor 1 is approximately 17.03." in out

0 comments on commit 20d28bf

Please sign in to comment.