Skip to content

Commit

Permalink
Mlflow tests
Browse files Browse the repository at this point in the history
  • Loading branch information
DanRunfola committed Dec 10, 2024
1 parent da50b3b commit 8046dac
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 33 deletions.
7 changes: 2 additions & 5 deletions geoBoundaryBuilder/images/geoBoundariesBase.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,13 @@ RUN curl -fsSL https://download.docker.com/linux/debian/gpg | gpg --dearmor -o /

# Upgrade pip and install Python dependencies
RUN pip install --upgrade pip && \
pip install prefect==3.1.5 kubernetes==25.3.0
pip install mlflow==2.18.0

# Install geopandas and related dependencies
RUN pip install geopandas==0.13.2
RUN pip install geopandas==0.13.2 kubernetes==31.0.0

# Install additional Python packages
RUN pip install jsonschema==4.19.0 zipfile36==0.1.3

# Install Prefect Kubernetes components
RUN pip install prefect-kubernetes==0.5.3

# Set up git-lfs
RUN git lfs install
65 changes: 65 additions & 0 deletions geoBoundaryBuilder/k8s_manifests/A_mlflow.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
apiVersion: v1
kind: Pod
metadata:
name: mlflow-server
namespace: geoboundaries
labels:
app: mlflow-server
spec:
restartPolicy: Always
securityContext:
runAsUser: 71032
runAsGroup: 9915
containers:
- name: mlflow-server
image: "python:3.9-slim" # Use the Python slim image
command: ["/bin/sh", "-c"]
args:
- |
echo "Setting up Python user base..." && \
export PYTHONUSERBASE=/mlflow/python-user-base && \
pip install --no-cache-dir --user mlflow && \
echo "Starting MLflow server..." && \
/mlflow/python-user-base/bin/mlflow server \
--backend-store-uri sqlite:///mlflow/mlflow.db \
--default-artifact-root /mlflow/artifacts \
--host 0.0.0.0 \
--port 5000
ports:
- containerPort: 5000 # MLflow default port
env:
- name: MLFLOW_BACKEND_STORE_URI
value: "sqlite:///mlflow/mlflow.db"
- name: MLFLOW_ARTIFACT_ROOT
value: "/mlflow/artifacts"
volumeMounts:
- name: mlflow-data
mountPath: "/mlflow"
resources:
requests:
ephemeral-storage: "1Gi"
memory: "4Gi"
cpu: "2"
limits:
ephemeral-storage: "2Gi"
memory: "8Gi"
cpu: "4"
volumes:
- name: mlflow-data
nfs:
server: 128.239.59.144
path: /sciclone/geograd/geoBoundaries/mlflow
---
apiVersion: v1
kind: Service
metadata:
name: mlflow-server-service
namespace: geoboundaries
spec:
selector:
app: mlflow-server
type: ClusterIP
ports:
- protocol: TCP
port: 5000
targetPort: 5000
2 changes: 1 addition & 1 deletion geoBoundaryBuilder/k8s_manifests/C_prefect_workpool.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ spec:
restartPolicy: Always
containers:
- name: prefect-container
image: "gb-workpool-operator:latest"
image: "ghcr.io/wmgeolab/gb-workpool-operator:latest"
env:
- name: PREFECT_API_URL
value: "http://prefect-server-service.geoboundaries.svc.cluster.local:4200/api"
Expand Down
53 changes: 26 additions & 27 deletions geoBoundaryBuilder/test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import os
import subprocess
from prefect import flow
from prefect_kubernetes import KubernetesJob # Use the correct import now that the package is installed

# Step 1: Configure Prefect home directory and API URL
os.environ["PREFECT_HOME"] = "/tmp/.prefect" # Ensure this path is writable
Expand All @@ -8,36 +10,33 @@
print(f"Using PREFECT_API_URL: {PREFECT_API_URL}") # Debug output
subprocess.run(["prefect", "config", "set", f"PREFECT_API_URL={PREFECT_API_URL}"], check=True)

from prefect import flow
from prefect.context import get_run_context

# Step 2: Define a simple Prefect flow
@flow
def my_flow():
# Your flow logic here
print("Running my flow on Kubernetes")
def simple_flow():
print("Hello, Prefect!")
return "Flow Completed"

# Step 3: Configure and deploy the flow to Kubernetes
if __name__ == "__main__":
# Get the current script path
script_path = __file__

# Define dynamic parameters
image = "ghcr.io/wmgeolab/gb-base:latest"

# Deploy the flow with dynamic configurations
deployment = my_flow.deploy(
name="dynamic-k8s-flow",
work_pool_name="k8s-gB",
image=image,
job_variables={
"env": {"EXTRA_PIP_PACKAGES": "your-required-packages"},
"image_pull_policy": "Always",
"command": [
"bash",
"-c",
f"pip install -r requirements.txt && python {script_path}"
]
}
# Define dynamic job variables (e.g., for Kubernetes deployment)
job_variables = {
"image_pull_policy": "Always",
"env": {"EXTRA_PIP_PACKAGES": "prefect kubernetes"}
}

# Create Kubernetes job for Prefect deployment
k8s_infrastructure = KubernetesJob(
image="python:3.11-slim", # Just a placeholder; no need for GitHub or image build
job_variables=job_variables,
image_pull_policy="Always",
)

# Deploy the flow using the created infrastructure
deployment = simple_flow.deploy(
name="simple-flow-k8s-deployment",
work_pool_name="k8s-gB", # Use the existing work pool from Prefect
infrastructure=k8s_infrastructure,
)

# Optionally, run the deployment immediately
deployment.run()
deployment.run()

0 comments on commit 8046dac

Please sign in to comment.