Skip to content

Commit

Permalink
Merge pull request #36 from aws-ia/triton
Browse files Browse the repository at this point in the history
feat: Added NVIDIA Triton server helm chart
  • Loading branch information
vara-bonthu authored May 6, 2024
2 parents cadbd10 + bf61eff commit 1034c43
Show file tree
Hide file tree
Showing 13 changed files with 504 additions and 0 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ module "eks_data_addons" {
| [helm_release.mlflow_tracking](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
| [helm_release.nvidia_device_plugin](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
| [helm_release.nvidia_gpu_operator](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
| [helm_release.nvidia_triton_server](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
| [helm_release.pinot](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
| [helm_release.spark_history_server](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
| [helm_release.spark_operator](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
Expand Down Expand Up @@ -142,6 +143,7 @@ module "eks_data_addons" {
| <a name="input_enable_mlflow_tracking"></a> [enable\_mlflow\_tracking](#input\_enable\_mlflow\_tracking) | Enable MLflow Tracking add-on | `bool` | `false` | no |
| <a name="input_enable_nvidia_device_plugin"></a> [enable\_nvidia\_device\_plugin](#input\_enable\_nvidia\_device\_plugin) | Enable NVIDIA Device Plugin add-on | `bool` | `false` | no |
| <a name="input_enable_nvidia_gpu_operator"></a> [enable\_nvidia\_gpu\_operator](#input\_enable\_nvidia\_gpu\_operator) | Enable NVIDIA GPU Operator add-on | `bool` | `false` | no |
| <a name="input_enable_nvidia_triton_server"></a> [enable\_nvidia\_triton\_server](#input\_enable\_nvidia\_triton\_server) | Enable NVIDIA Triton Server add-on | `bool` | `false` | no |
| <a name="input_enable_pinot"></a> [enable\_pinot](#input\_enable\_pinot) | Enable Apache Pinot Add-On | `bool` | `false` | no |
| <a name="input_enable_spark_history_server"></a> [enable\_spark\_history\_server](#input\_enable\_spark\_history\_server) | Enable Spark History Server add-on | `bool` | `false` | no |
| <a name="input_enable_spark_operator"></a> [enable\_spark\_operator](#input\_enable\_spark\_operator) | Enable Spark on K8s Operator add-on | `bool` | `false` | no |
Expand All @@ -158,6 +160,7 @@ module "eks_data_addons" {
| <a name="input_mlflow_tracking_helm_config"></a> [mlflow\_tracking\_helm\_config](#input\_mlflow\_tracking\_helm\_config) | MLflow Tracking add-on Helm Chart config | `any` | `{}` | no |
| <a name="input_nvidia_device_plugin_helm_config"></a> [nvidia\_device\_plugin\_helm\_config](#input\_nvidia\_device\_plugin\_helm\_config) | NVIDIA Device Plugin Helm Chart config | `any` | `{}` | no |
| <a name="input_nvidia_gpu_operator_helm_config"></a> [nvidia\_gpu\_operator\_helm\_config](#input\_nvidia\_gpu\_operator\_helm\_config) | Helm configuration for NVIDIA GPU Operator | `any` | `{}` | no |
| <a name="input_nvidia_triton_server_helm_config"></a> [nvidia\_triton\_server\_helm\_config](#input\_nvidia\_triton\_server\_helm\_config) | Helm configuration for NVIDIA Triton Server | `any` | `{}` | no |
| <a name="input_oidc_provider_arn"></a> [oidc\_provider\_arn](#input\_oidc\_provider\_arn) | The ARN of the cluster OIDC Provider | `string` | n/a | yes |
| <a name="input_pinot_helm_config"></a> [pinot\_helm\_config](#input\_pinot\_helm\_config) | Apache Pinot Helm Chart config | `any` | `{}` | no |
| <a name="input_spark_history_server_helm_config"></a> [spark\_history\_server\_helm\_config](#input\_spark\_history\_server\_helm\_config) | Helm configuration for Spark History Server | `any` | `{}` | no |
Expand Down
6 changes: 6 additions & 0 deletions helm-charts/nvidia-triton-server/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: v2
name: triton-inference-server
description: A Helm chart for deploying NVIDIA Triton Inference Server on Kubernetes with AWS EKS
type: application
version: "1.0.0"
appVersion: "2.44.0" # Triton Inference Server version
62 changes: 62 additions & 0 deletions helm-charts/nvidia-triton-server/templates/_helpter.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{{/* vim: set filetype=mustache: */}}
{{/*
Expand the name of the chart.
*/}}
{{- define "triton.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
{{- end -}}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
*/}}
{{- define "triton.fullname" -}}
{{- if .Values.fullnameOverride -}}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- $name := default .Chart.Name .Values.nameOverride -}}
{{- if contains $name .Release.Name -}}
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- end -}}
{{- end -}}

{{/*
Generate basic labels
*/}}
{{- define "triton.labels" -}}
helm.sh/chart: {{ include "triton.chart" . }}
app.kubernetes.io/name: {{ include "triton.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/version: {{ .Chart.AppVersion }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
deployed-by: "data-on-eks"
{{- end -}}

{{/*
Selector labels
*/}}
{{- define "triton.selectorLabels" -}}
app.kubernetes.io/name: {{ include "triton.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end -}}

{{/*
Service account name
*/}}
{{- define "triton.serviceAccountName" -}}
{{- if .Values.serviceAccount.create -}}
{{- default (include "triton.fullname" .) .Values.serviceAccount.name | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- default "default" .Values.serviceAccount.name -}}
{{- end -}}
{{- end -}}

{{/*
Chart version and app version
*/}}
{{- define "triton.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version -}}
{{- end -}}
99 changes: 99 additions & 0 deletions helm-charts/nvidia-triton-server/templates/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "triton.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "triton.labels" . | nindent 4 }}
spec:
strategy:
type: RollingUpdate
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
{{- include "triton.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "triton.selectorLabels" . | nindent 8 }}
spec:
volumes:
- name: cache
emptyDir: {}
- name: dshm
emptyDir:
medium: Memory
serviceAccountName: {{ include "triton.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
containers:
- name: {{ include "triton.fullname" . }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
volumeMounts:
- mountPath: /home/triton-server/.cache
name: cache
- mountPath: /dev/shm
name: dshm
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
args: ["tritonserver",
"--model-store={{ .Values.modelRepositoryPath }}",
"--model-control-mode=poll",
"--allow-metrics=True",
"--log-verbose=1",
"--strict-model-config=False",
"--repository-poll-secs=120"]
env:
{{- range .Values.environment }}
- name: {{ .name }}
value: {{ .value | quote }}
{{- end }}
{{- range .Values.secretEnvironment }}
- name: {{ .name }}
valueFrom:
secretKeyRef:
name: {{ .secretName }}
key: {{ .key }}
{{- end }}
ports:
- name: http
containerPort: {{ .Values.service.ports.http }}
- name: grpc
containerPort: {{ .Values.service.ports.grpc }}
- name: metrics
containerPort: {{ .Values.service.ports.metrics }}
livenessProbe:
httpGet:
path: /v2/health/live
port: http
initialDelaySeconds: 180
periodSeconds: 10
successThreshold: 1
failureThreshold: 3
readinessProbe:
httpGet:
path: /v2/health/ready
port: http
initialDelaySeconds: 180
periodSeconds: 10
successThreshold: 1
failureThreshold: 3
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
33 changes: 33 additions & 0 deletions helm-charts/nvidia-triton-server/templates/hpa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{{- if .Values.hpa.enabled }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "triton.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "triton.labels" . | nindent 4 }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "triton.fullname" . }}
minReplicas: {{ .Values.hpa.minReplicas }}
maxReplicas: {{ .Values.hpa.maxReplicas }}
metrics:
{{- if .Values.hpa.targetCPUUtilizationPercentage }}
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: {{ .Values.hpa.targetCPUUtilizationPercentage }}
{{- end }}
{{- if .Values.hpa.targetMemoryUtilizationPercentage }}
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: {{ .Values.hpa.targetMemoryUtilizationPercentage }}
{{- end }}
{{- end }}
45 changes: 45 additions & 0 deletions helm-charts/nvidia-triton-server/templates/ingress.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{{- if .Values.ingress.enabled }}
{{- $fullName := include "triton.fullname" . -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: {{ $fullName }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "triton.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{ toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if .Values.ingress.className }}
ingressClassName: {{ .Values.ingress.className }}
{{- end }}
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
{{- if .pathType }}
pathType: {{ .pathType }}
{{- end }}
backend:
service:
name: {{ $fullName }}
port:
number: {{ .service.port.number }}
{{- end }}
{{- end }}
{{- end }}
16 changes: 16 additions & 0 deletions helm-charts/nvidia-triton-server/templates/metrics-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "triton.fullname" . }}-metrics
namespace: {{ .Release.Namespace }}
labels:
{{- include "triton.labels" . | nindent 4 }}
annotations:
alpha.monitoring.coreos.com/non-namespaced: "true"
spec:
ports:
- name: metrics
port: {{ .Values.service.metricsPort }}
targetPort: metrics
selector:
{{- include "triton.selectorLabels" . | nindent 4 }}
14 changes: 14 additions & 0 deletions helm-charts/nvidia-triton-server/templates/service-monitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ include "triton.fullname" . }}-metrics-monitor
namespace: {{ .Release.Namespace }}
labels:
{{- include "triton.labels" . | nindent 4 }}
spec:
selector:
matchLabels:
{{- include "triton.selectorLabels" . | nindent 6 }}
endpoints:
- port: metrics
interval: 15s
21 changes: 21 additions & 0 deletions helm-charts/nvidia-triton-server/templates/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "triton.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "triton.labels" . | nindent 4 }}
spec:
type: {{ .Values.service.type }}
ports:
- name: http-inference-server
port: {{ .Values.service.ports.http }}
targetPort: http
- name: grpc-inference-server
port: {{ .Values.service.ports.grpc }}
targetPort: grpc
- name: metrics-inference-server
port: {{ .Values.service.ports.metrics }}
targetPort: metrics
selector:
{{- include "triton.selectorLabels" . | nindent 4 }}
11 changes: 11 additions & 0 deletions helm-charts/nvidia-triton-server/templates/serviceaccount.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{{- if .Values.serviceAccount.create }}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "triton.serviceAccountName" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "triton.labels" . | nindent 4 }}
annotations:
{{- toYaml .Values.serviceAccount.annotations | nindent 4 }}
{{- end }}
Loading

0 comments on commit 1034c43

Please sign in to comment.