diff --git a/.github/workflows/environment.yaml b/.github/workflows/environment.yaml index e62f5e2..63e1887 100644 --- a/.github/workflows/environment.yaml +++ b/.github/workflows/environment.yaml @@ -1,5 +1,5 @@ name: Run tests -on: [push, pull_request] +on: [pull_request] jobs: Run-Tests: diff --git a/.github/workflows/publish-zarf-package.yaml b/.github/workflows/publish-zarf-package.yaml new file mode 100644 index 0000000..2ceec90 --- /dev/null +++ b/.github/workflows/publish-zarf-package.yaml @@ -0,0 +1,40 @@ +name: Publish Artifacts + +on: + push: + tags: + - "*" + +jobs: + zarf: + runs-on: ubuntu-latest + + steps: + - name: Check out code + uses: actions/checkout@v2 + + - name: Login to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Login to Registry1 + uses: docker/login-action@v3 + with: + registry: registry1.dso.mil + username: ${{ secrets.REGISTRY1_USERNAME }} + password: ${{ secrets.REGISTRY1_PASSWORD }} + + - name: Build API Image + run: make docker-build docker-push VERSION=$GITHUB_REF_NAME + + - name: Install Zarf + uses: defenseunicorns/setup-zarf@main + + - name: Build Zarf Package + run: zarf package create . --confirm + + - name: Publish Zarf Package + run: zarf package publish zarf-package-*.zst oci://ghcr.io/defenseunicorns/packages \ No newline at end of file diff --git a/.gitignore b/.gitignore index ac7abad..7696889 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,6 @@ leapfrogai_api.egg-info __pycache__ .vscode -config.yaml \ No newline at end of file +config.yaml +*.tar.zst +.python-version diff --git a/Makefile b/Makefile index 6dc9786..988d3c7 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -VERSION := $(shell git describe --abbrev=0 --tags) +VERSION ?= $(shell git describe --abbrev=0 --tags) create-venv: python -m venv .venv @@ -16,7 +16,7 @@ build-requirements: pip-compile -o requirements.txt pyproject.toml build-requirements-dev: - pip-compile --extra dev -o requirements-dev.txt pyproject.toml + pip-compile --extra dev -o requirements-dev.txt pyproject.toml --allow-unsafe test: pytest **/*.py @@ -24,8 +24,8 @@ test: dev: uvicorn main:app --port 3000 --reload -make docker-build: - docker build -t ghcr.io/defenseunicorns/leapfrogai-api:${VERSION} . +docker-build: + docker build -t ghcr.io/defenseunicorns/leapfrogai/leapfrogai-api:${VERSION} . -make docker-push: - docker push ghcr.io/defenseunicorns/leapfrogai-api:${VERSION} \ No newline at end of file +docker-push: + docker push ghcr.io/defenseunicorns/leapfrogai/leapfrogai-api:${VERSION} \ No newline at end of file diff --git a/backends/openai/grpc_client.py b/backends/openai/grpc_client.py index c22e0ed..d371db5 100644 --- a/backends/openai/grpc_client.py +++ b/backends/openai/grpc_client.py @@ -34,7 +34,7 @@ async def stream_completion(model: Model, request: leapfrogai.CompletionRequest) async def completion(model: Model, request: leapfrogai.CompletionRequest): async with grpc.aio.insecure_channel(model.backend) as channel: stub = leapfrogai.CompletionServiceStub(channel) - response: leapfrogai.CompletionResponse = stub.Complete(request) + response: leapfrogai.CompletionResponse = await stub.Complete(request) return CompletionResponse( model=model.name, @@ -42,7 +42,8 @@ async def completion(model: Model, request: leapfrogai.CompletionRequest): CompletionChoice( index=0, text=response.choices[0].text, - finish_reason=response.choices[0].finish_reason, + finish_reason=str(response.choices[0].finish_reason), + logprobs=None, ) ], ) @@ -63,7 +64,7 @@ async def stream_chat_completion( async def chat_completion(model: Model, request: leapfrogai.ChatCompletionRequest): async with grpc.aio.insecure_channel(model.backend) as channel: stub = leapfrogai.CompletionServiceStub(channel) - response: leapfrogai.ChatCompletionResponse = stub.Complete(request) + response: leapfrogai.ChatCompletionResponse = await stub.Complete(request) return ChatCompletionResponse( model=model.name, diff --git a/backends/openai/types.py b/backends/openai/types.py index 66cd861..5da8384 100644 --- a/backends/openai/types.py +++ b/backends/openai/types.py @@ -27,8 +27,8 @@ class CompletionRequest(BaseModel): class CompletionChoice(BaseModel): index: int text: str - logprobs: object | None - finish_reason: str + logprobs: object = None + finish_reason: str = "" class CompletionResponse(BaseModel): @@ -37,7 +37,7 @@ class CompletionResponse(BaseModel): created: int = 0 model: str = "" choices: list[CompletionChoice] - usage: Usage + usage: Usage = None ########## diff --git a/chart/Chart.yaml b/chart/Chart.yaml new file mode 100644 index 0000000..7553961 --- /dev/null +++ b/chart/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: leapfrogai +description: A deployment of AI tools + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.4.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" \ No newline at end of file diff --git a/chart/templates/api/deployment.yaml b/chart/templates/api/deployment.yaml new file mode 100644 index 0000000..26013b9 --- /dev/null +++ b/chart/templates/api/deployment.yaml @@ -0,0 +1,68 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: api-deployment +spec: + replicas: {{ .Values.api.replcias }} + strategy: + type: Recreate + selector: + matchLabels: + app: api + template: + metadata: + labels: + app: api + spec: + serviceAccountName: read-configmaps + containers: + - name: sidecar + image: registry1.dso.mil/ironbank/kiwigrid/k8s-sidecar:{{ .Values.image.kiwigridTag }} + volumeMounts: + - name: api-model + mountPath: /config/ + env: + - name: LABEL + value: "leapfrogai" + - name: FOLDER + value: /config/ + - name: RESOURCE + value: both + - name: UNIQUE_FILENAMES + value: "true" + - name: NAMESPACE + value: leapfrogai + - name: api-container + image: ghcr.io/defenseunicorns/leapfrogai-api:{{ .Values.image.lfaiAPITag }} + imagePullPolicy: Always + env: + - name: LFAI_CONFIG_PATH + value: /config/ + - name: LFAI_CONFIG_FILENAME + value: "*.toml" + - name: PORT + value: "{{ .Values.api.port }}" + ports: + - containerPort: 8080 + livenessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 10 + volumeMounts: + - name: api-model + mountPath: /config + volumes: + - name: api-model + emptyDir: {} + + + + diff --git a/chart/templates/api/permissions.yaml b/chart/templates/api/permissions.yaml new file mode 100644 index 0000000..08a6236 --- /dev/null +++ b/chart/templates/api/permissions.yaml @@ -0,0 +1,31 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: read-configmaps +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: read-configmaps +rules: +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: read-configmaps +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: read-configmaps +subjects: +- kind: ServiceAccount + name: read-configmaps \ No newline at end of file diff --git a/chart/templates/api/service.yaml b/chart/templates/api/service.yaml new file mode 100644 index 0000000..eac0fc5 --- /dev/null +++ b/chart/templates/api/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: api + annotations: + zarf.dev/connect-description: "Load the OpenAPI spec for the LFAI API" + zarf.dev/connect-url: "/docs" + labels: + zarf.dev/connect-name: lfai-api +spec: + selector: + app: api + ports: + - name: http + protocol: TCP + port: 8080 + targetPort: 8080 + type: ClusterIP diff --git a/chart/templates/namespace.yaml b/chart/templates/namespace.yaml new file mode 100644 index 0000000..dd74f90 --- /dev/null +++ b/chart/templates/namespace.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: Namespace +metadata: + labels: + istio-injection: {{ .Values.istio.injection }} + name: leapfrogai diff --git a/chart/templates/vs.yaml b/chart/templates/vs.yaml new file mode 100644 index 0000000..1d927ff --- /dev/null +++ b/chart/templates/vs.yaml @@ -0,0 +1,18 @@ +{{- if .Values.istio.enabled }} +apiVersion: networking.istio.io/v1beta1 +kind: VirtualService +metadata: + name: leapfrogai +spec: + gateways: + - istio-system/{{ .Values.istio.gateway }} + hosts: + - leapfrogai.{{ .Values.domain }} + http: + - route: + - destination: + host: api + port: + number: 8080 +{{- end }} +--- \ No newline at end of file diff --git a/chart/values.yaml b/chart/values.yaml new file mode 100644 index 0000000..480ab78 --- /dev/null +++ b/chart/values.yaml @@ -0,0 +1,17 @@ +domain: bigbang.dev + +image: + lfaiAPITag: 0.1.2 + kiwigridTag: 1.23.3 + +istio: + enabled: false + gateway: tenant + injection: disabled + +api: + replicas: 1 + port: 8080 + +monitoring: + enabled: false \ No newline at end of file diff --git a/lfai-values.yaml b/lfai-values.yaml new file mode 100644 index 0000000..a082141 --- /dev/null +++ b/lfai-values.yaml @@ -0,0 +1,10 @@ +domain: ###ZARF_VAR_DOMAIN### + +image: + lfaiAPITag: ###ZARF_CONST_LEAPFROGAI_API_VERSION### + kiwigridTag: ###ZARF_CONST_KIWIGRID_VERSION### + +istio: + enabled: ###ZARF_VAR_ISTIO_ENABLED### + gateway: ###ZARF_VAR_ISTIO_GATEWAY### + injection: ###ZARF_VAR_ISTIO_INJECTION### \ No newline at end of file diff --git a/main.py b/main.py index b5a6fa8..1f201d7 100644 --- a/main.py +++ b/main.py @@ -4,7 +4,22 @@ from backends.openai import router as openai_router from backends.openai.routes import * from utils import get_model_config +import asyncio app = FastAPI() + +# super simple healthz check +@app.get("/healthz") +async def healthz(): + return {"status": "ok"} + +@app.get("/models") +async def models(): + return get_model_config() + + +@app.on_event('startup') +async def watch_for_configs(): + asyncio.create_task(get_model_config().watch_and_load_configs()) + app.include_router(openai_router) -get_model_config().load() diff --git a/pyproject.toml b/pyproject.toml index bd8b9d4..1cc6227 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,11 +8,15 @@ dependencies = [ "pyyaml >= 6.0.1", "leapfrogai >= 0.3.3", "python-multipart >= 0.0.6", + "toml >= 0.10.2", ] -requires-python = ">=3.11.4" +requires-python = ">=3.11.6 <3.12.0" [project.optional-dependencies] dev = ["pip-tools", "pytest", "black", "isort"] [tool.pip-tools] generate-hashes = true + +[tool.setuptools.packages.find] +exclude = ["chart"] diff --git a/requirements-dev.txt b/requirements-dev.txt index 96d82eb..594f719 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --extra=dev --generate-hashes --output-file=requirements-dev.txt pyproject.toml +# pip-compile --allow-unsafe --extra=dev --generate-hashes --output-file=requirements-dev.txt pyproject.toml # annotated-types==0.5.0 \ --hash=sha256:47cdc3490d9ac1506ce92c7aaa76c579dc3509ff11e098fc867e5130ab7be802 \ @@ -298,6 +298,10 @@ pytest==7.4.2 \ --hash=sha256:1d881c6124e08ff0a1bb75ba3ec0bfd8b5354a01c194ddd5a0a870a48d99b002 \ --hash=sha256:a766259cfab564a2ad52cb1aae1b881a75c3eb7e34ca3779697c23ed47c47069 # via leapfrogai-api (pyproject.toml) +python-multipart==0.0.6 \ + --hash=sha256:e9925a80bb668529f1b67c7fdb0a5dacdd7cbfc6fb0bff3ea443fe22bdd62132 \ + --hash=sha256:ee698bab5ef148b0a760751c261902cd096e57e10558e11aca17646b74ee1c18 + # via leapfrogai-api (pyproject.toml) pyyaml==6.0.1 \ --hash=sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5 \ --hash=sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc \ @@ -358,6 +362,10 @@ starlette==0.27.0 \ --hash=sha256:6a6b0d042acb8d469a01eba54e9cda6cbd24ac602c4cd016723117d6a7e73b75 \ --hash=sha256:918416370e846586541235ccd38a474c08b80443ed31c578a418e2209b3eef91 # via fastapi +toml==0.10.2 \ + --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ + --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f + # via leapfrogai-api (pyproject.toml) typing-extensions==4.7.1 \ --hash=sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36 \ --hash=sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2 @@ -374,8 +382,12 @@ wheel==0.41.2 \ --hash=sha256:75909db2664838d015e3d9139004ee16711748a52c8f336b52882266540215d8 # via pip-tools -# WARNING: The following packages were not pinned, but pip requires them to be -# pinned when the requirements file includes hashes and the requirement is not -# satisfied by a package already installed. Consider using the --allow-unsafe flag. -# pip -# setuptools +# The following packages are considered to be unsafe in a requirements file: +pip==23.3.1 \ + --hash=sha256:1fcaa041308d01f14575f6d0d2ea4b75a3e2871fe4f9c694976f908768e14174 \ + --hash=sha256:55eb67bb6171d37447e82213be585b75fe2b12b359e993773aca4de9247a052b + # via pip-tools +setuptools==68.2.2 \ + --hash=sha256:4ac1475276d2f1c48684874089fefcd83bd7162ddaafb81fac866ba0db282a87 \ + --hash=sha256:b454a35605876da60632df1a60f736524eb73cc47bbc9f3f1ef1b644de74fd2a + # via pip-tools diff --git a/requirements.txt b/requirements.txt index e4318ba..7424381 100644 --- a/requirements.txt +++ b/requirements.txt @@ -288,6 +288,10 @@ starlette==0.27.0 \ --hash=sha256:6a6b0d042acb8d469a01eba54e9cda6cbd24ac602c4cd016723117d6a7e73b75 \ --hash=sha256:918416370e846586541235ccd38a474c08b80443ed31c578a418e2209b3eef91 # via fastapi +toml==0.10.2 \ + --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ + --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f + # via leapfrogai-api (pyproject.toml) typing-extensions==4.7.1 \ --hash=sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36 \ --hash=sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2 diff --git a/utils/config.py b/utils/config.py index d13d231..818379c 100644 --- a/utils/config.py +++ b/utils/config.py @@ -1,8 +1,12 @@ import logging import os from typing import List - +import toml import yaml +import glob +import fnmatch + +from watchfiles import awatch class Model: @@ -23,31 +27,82 @@ def __init__(self, models: dict[str, Model] = {}): def __str__(self): return f"Models: {self.models}" - def model_backend(self, model: str) -> str | None: - if model in self.models: - return self.models[model].backend - else: - return None + async def watch_and_load_configs(self, directory="", filename="config.yaml"): + # Get the config directory and filename from the environment variables if provided + env_directory = os.environ.get("LFAI_CONFIG_PATH", directory) + if env_directory != None and env_directory != "": + directory = env_directory + env_filename = os.environ.get("LFAI_CONFIG_FILENAME", filename) + if env_filename != None and env_filename != "": + filename = env_filename + + # Process all the configs that were already in the directory + self.load_all_configs(directory, filename) - def load(self, directory="", filename="config.yaml"): - directory = os.environ.get("LFAI_CONFIG_PATH", directory) - config_path = os.path.join(directory, filename) - logging.info(f"Loading config from {config_path}") + # Watch the directory for changes until the end of time + while True: + async for changes in awatch(directory, recursive=False, step=150): + # get a unique list of files that have been updated + # (awatch can return duplicates depending on the type of updates that happen) + unique_files = set() + for change in changes: + unique_files.add(os.path.basename(change[1])) - # ensure the config file exists - if not os.path.exists(config_path): - logging.warn(f"Config file not found at %s", config_path) - return "TODO: Return an error?" + # filter the files to those that match the filename or glob pattern + filtered_matches = fnmatch.filter(unique_files, filename) + # load all the updated config files + for match in filtered_matches: + self.load_config_file(os.path.join(directory, match)) + + + + + + def load_config_file(self, config_path: str): # load the config file into the config object with open(config_path) as c: - loaded_artifact = yaml.safe_load(c) + # Load the file into a python object + loaded_artifact = {} + if config_path.endswith(".toml"): + loaded_artifact = toml.load(c) + elif config_path.endswith(".yaml"): + loaded_artifact = yaml.safe_load(c) + else: + # TODO: Return an error ??? + print(f"Unsupported file type: {config_path}") + return + # parse the object into our config self.parse_models(loaded_artifact) + return + + + def load_all_configs(self, directory="", filename="config.yaml"): + if not os.path.exists(directory): + return "THE CONFIG DIRECTORY DOES NOT EXIST" + + # Get all config files + config_files = glob.glob(os.path.join(directory, filename)) + + # load all the found config files into the config object + for config_path in config_files: + self.load_config_file(config_path) + + return + + + def get_model_backend(self, model: str) -> str | None: + if model in self.models: + return self.models[model].backend + else: + return None + + def parse_models(self, loaded_artifact): for m in loaded_artifact["models"]: - self.models[m["name"]] = Model( - name=m["name"], - backend=m["backend"], - ) + model_config = Model(name=m["name"], + backend=m["backend"]) + + self.models[m["name"]] = model_config \ No newline at end of file diff --git a/utils/test_config.py b/utils/test_config.py index 36731d0..1234c61 100644 --- a/utils/test_config.py +++ b/utils/test_config.py @@ -20,4 +20,4 @@ def test_existing_model_backend(self): ) } ) - assert c.model_backend("test") == "grpc://localhost:50051" + assert c.get_model_backend("test") == "grpc://localhost:50051" diff --git a/zarf-config.yaml b/zarf-config.yaml new file mode 100644 index 0000000..f7bbe73 --- /dev/null +++ b/zarf-config.yaml @@ -0,0 +1,4 @@ +package: + create: + set: + max_package_size: "1000000000" \ No newline at end of file diff --git a/zarf.yaml b/zarf.yaml new file mode 100644 index 0000000..b301a30 --- /dev/null +++ b/zarf.yaml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/defenseunicorns/zarf/main/zarf.schema.json + +kind: ZarfPackageConfig +metadata: + name: leapfrogai-api + description: "LeapfrogAI" + version: 0.4.0 + architecture: amd64 + +constants: + - name: LEAPFROGAI_API_VERSION + value: "0.4.0" + - name: KIWIGRID_VERSION + value: "1.23.3" + +variables: + - name: ISTIO_ENABLED + default: "false" + - name: ISTIO_GATEWAY + default: "tenant" + - name: ISTIO_INJECTION + default: "disabled" + - name: DOMAIN + default: "bigbang.dev" + +components: + - name: leapfrogai + required: true + charts: + - name: leapfrogai + namespace: leapfrogai + localPath: chart + version: 0.4.0 + valuesFiles: + - "lfai-values.yaml" + images: + - "ghcr.io/defenseunicorns/leapfrogai-api:0.4.0" + - "registry1.dso.mil/ironbank/kiwigrid/k8s-sidecar:1.23.3"