diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 4dd545a4..7c263ae7 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -114,6 +114,11 @@ updates: schedule: interval: "daily" + - directory: "/topic/machine-learning/llama-index" + package-ecosystem: "pip" + schedule: + interval: "daily" + - directory: "/topic/machine-learning/mlops-mlflow" package-ecosystem: "pip" schedule: diff --git a/.github/workflows/ml-llamaindex.yml b/.github/workflows/ml-llamaindex.yml new file mode 100644 index 00000000..67fa34e9 --- /dev/null +++ b/.github/workflows/ml-llamaindex.yml @@ -0,0 +1,82 @@ +name: LlamaIndex + +on: + pull_request: + branches: ~ + paths: + - '.github/workflows/ml-llamaindex.yml' + - 'topic/machine-learning/llama-index/**' + - '/requirements.txt' + push: + branches: [ main ] + paths: + - '.github/workflows/ml-llamaindex.yml' + - 'topic/machine-learning/llama-index/**' + - '/requirements.txt' + + # Allow job to be triggered manually. + workflow_dispatch: + + # Run job each night after CrateDB nightly has been published. + schedule: + - cron: '0 3 * * *' + +# Cancel in-progress jobs when pushing to the same branch. +concurrency: + cancel-in-progress: true + group: ${{ github.workflow }}-${{ github.ref }} + +jobs: + test: + name: " + Python: ${{ matrix.python-version }} + CrateDB: ${{ matrix.cratedb-version }} + on ${{ matrix.os }}" + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ + 'ubuntu-latest', + ] + python-version: [ + '3.8', + '3.13', + ] + cratedb-version: [ 'nightly' ] + + services: + cratedb: + image: crate/crate:${{ matrix.cratedb-version }} + ports: + - 4200:4200 + - 5432:5432 + env: + CRATE_HEAP_SIZE: 4g + + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + + steps: + + - name: Acquire sources + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + cache: 'pip' + cache-dependency-path: | + requirements.txt + topic/machine-learning/llama-index/requirements.txt + topic/machine-learning/llama-index/requirements-dev.txt + + - name: Install utilities + run: | + pip install -r requirements.txt + + - name: Validate topic/machine-learning/llama-index + run: | + ngr test --accept-no-venv topic/machine-learning/llama-index diff --git a/topic/machine-learning/llama-index/init.sql b/topic/machine-learning/llama-index/init.sql new file mode 100644 index 00000000..e59ad493 --- /dev/null +++ b/topic/machine-learning/llama-index/init.sql @@ -0,0 +1,23 @@ +CREATE TABLE IF NOT EXISTS time_series_data ( + timestamp TIMESTAMP, + value DOUBLE, + location STRING, + sensor_id INT +); + +INSERT INTO time_series_data (timestamp, value, location, sensor_id) +VALUES + ('2023-09-14T00:00:00', 10.5, 'Sensor A', 1), + ('2023-09-14T01:00:00', 15.2, 'Sensor A', 1), + ('2023-09-14T02:00:00', 18.9, 'Sensor A', 1), + ('2023-09-14T03:00:00', 12.7, 'Sensor B', 2), + ('2023-09-14T04:00:00', 17.3, 'Sensor B', 2), + ('2023-09-14T05:00:00', 20.1, 'Sensor B', 2), + ('2023-09-14T06:00:00', 22.5, 'Sensor A', 1), + ('2023-09-14T07:00:00', 18.3, 'Sensor A', 1), + ('2023-09-14T08:00:00', 16.8, 'Sensor A', 1), + ('2023-09-14T09:00:00', 14.6, 'Sensor B', 2), + ('2023-09-14T10:00:00', 13.2, 'Sensor B', 2), + ('2023-09-14T11:00:00', 11.7, 'Sensor B', 2); + +REFRESH TABLE time_series_data; diff --git a/topic/machine-learning/llama-index/pyproject.toml b/topic/machine-learning/llama-index/pyproject.toml new file mode 100644 index 00000000..d06dcec5 --- /dev/null +++ b/topic/machine-learning/llama-index/pyproject.toml @@ -0,0 +1,27 @@ +[tool.pytest.ini_options] +minversion = "2.0" +addopts = """ + -rfEX -p pytester --strict-markers --verbosity=3 --capture=no + --cov=. --cov-report=term-missing --cov-report=xml + """ + +#log_level = "DEBUG" +#log_cli_level = "DEBUG" + +testpaths = [ + "*.py", +] +xfail_strict = true +markers = [ +] + +[tool.coverage.run] +branch = false + +[tool.coverage.report] +fail_under = 0 +show_missing = true +omit = [ + "conftest.py", + "test*.py", +] diff --git a/topic/machine-learning/llama-index/requirements-dev.txt b/topic/machine-learning/llama-index/requirements-dev.txt new file mode 100644 index 00000000..39bc22df --- /dev/null +++ b/topic/machine-learning/llama-index/requirements-dev.txt @@ -0,0 +1,3 @@ +cratedb-toolkit +pueblo[testing]==0.0.9 +sqlparse diff --git a/topic/machine-learning/llama-index/test.py b/topic/machine-learning/llama-index/test.py new file mode 100644 index 00000000..3c81566a --- /dev/null +++ b/topic/machine-learning/llama-index/test.py @@ -0,0 +1,40 @@ +from pathlib import Path + +import pytest + +from cratedb_toolkit.io.sql import DatabaseAdapter +from dotenv import load_dotenv + +HERE = Path(__file__).parent + + +@pytest.fixture() +def cratedb() -> DatabaseAdapter: + return DatabaseAdapter(dburi="crate://crate@localhost:4200") + + +@pytest.fixture(scope="function", autouse=True) +def init_database(cratedb): + """ + Initialize database. + """ + cratedb.run_sql("DROP TABLE IF EXISTS time_series_data;") + cratedb.run_sql((HERE / "init.sql").read_text()) + + +def test_main(cratedb, capsys): + """ + Execute `main.py` and verify outcome. + """ + + # Load the standalone configuration also for software testing. + # On CI, `OPENAI_API_KEY` will need to be supplied externally. + load_dotenv("env.standalone") + + # Invoke the workload, in-process. + from main import main + main() + + # Verify the outcome. + out = capsys.readouterr().out + assert "Answer was: The average value for sensor 1 is approximately 17.03." in out