generated from aws-ia/terraform-repo-template
-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #36 from aws-ia/triton
feat: Added NVIDIA Triton server helm chart
- Loading branch information
Showing
13 changed files
with
504 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
apiVersion: v2 | ||
name: triton-inference-server | ||
description: A Helm chart for deploying NVIDIA Triton Inference Server on Kubernetes with AWS EKS | ||
type: application | ||
version: "1.0.0" | ||
appVersion: "2.44.0" # Triton Inference Server version |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
{{/* vim: set filetype=mustache: */}} | ||
{{/* | ||
Expand the name of the chart. | ||
*/}} | ||
{{- define "triton.name" -}} | ||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} | ||
{{- end -}} | ||
|
||
{{/* | ||
Create a default fully qualified app name. | ||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). | ||
*/}} | ||
{{- define "triton.fullname" -}} | ||
{{- if .Values.fullnameOverride -}} | ||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} | ||
{{- else -}} | ||
{{- $name := default .Chart.Name .Values.nameOverride -}} | ||
{{- if contains $name .Release.Name -}} | ||
{{- .Release.Name | trunc 63 | trimSuffix "-" -}} | ||
{{- else -}} | ||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} | ||
{{- end -}} | ||
{{- end -}} | ||
{{- end -}} | ||
|
||
{{/* | ||
Generate basic labels | ||
*/}} | ||
{{- define "triton.labels" -}} | ||
helm.sh/chart: {{ include "triton.chart" . }} | ||
app.kubernetes.io/name: {{ include "triton.name" . }} | ||
app.kubernetes.io/instance: {{ .Release.Name }} | ||
app.kubernetes.io/version: {{ .Chart.AppVersion }} | ||
app.kubernetes.io/managed-by: {{ .Release.Service }} | ||
deployed-by: "data-on-eks" | ||
{{- end -}} | ||
|
||
{{/* | ||
Selector labels | ||
*/}} | ||
{{- define "triton.selectorLabels" -}} | ||
app.kubernetes.io/name: {{ include "triton.name" . }} | ||
app.kubernetes.io/instance: {{ .Release.Name }} | ||
{{- end -}} | ||
|
||
{{/* | ||
Service account name | ||
*/}} | ||
{{- define "triton.serviceAccountName" -}} | ||
{{- if .Values.serviceAccount.create -}} | ||
{{- default (include "triton.fullname" .) .Values.serviceAccount.name | trunc 63 | trimSuffix "-" -}} | ||
{{- else -}} | ||
{{- default "default" .Values.serviceAccount.name -}} | ||
{{- end -}} | ||
{{- end -}} | ||
|
||
{{/* | ||
Chart version and app version | ||
*/}} | ||
{{- define "triton.chart" -}} | ||
{{- printf "%s-%s" .Chart.Name .Chart.Version -}} | ||
{{- end -}} |
99 changes: 99 additions & 0 deletions
99
helm-charts/nvidia-triton-server/templates/deployment.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
apiVersion: apps/v1 | ||
kind: Deployment | ||
metadata: | ||
name: {{ include "triton.fullname" . }} | ||
namespace: {{ .Release.Namespace }} | ||
labels: | ||
{{- include "triton.labels" . | nindent 4 }} | ||
spec: | ||
strategy: | ||
type: RollingUpdate | ||
replicas: {{ .Values.replicaCount }} | ||
selector: | ||
matchLabels: | ||
{{- include "triton.selectorLabels" . | nindent 6 }} | ||
template: | ||
metadata: | ||
{{- with .Values.podAnnotations }} | ||
annotations: | ||
{{- toYaml . | nindent 8 }} | ||
{{- end }} | ||
labels: | ||
{{- include "triton.selectorLabels" . | nindent 8 }} | ||
spec: | ||
volumes: | ||
- name: cache | ||
emptyDir: {} | ||
- name: dshm | ||
emptyDir: | ||
medium: Memory | ||
serviceAccountName: {{ include "triton.serviceAccountName" . }} | ||
securityContext: | ||
{{- toYaml .Values.podSecurityContext | nindent 8 }} | ||
containers: | ||
- name: {{ include "triton.fullname" . }} | ||
securityContext: | ||
{{- toYaml .Values.securityContext | nindent 12 }} | ||
volumeMounts: | ||
- mountPath: /home/triton-server/.cache | ||
name: cache | ||
- mountPath: /dev/shm | ||
name: dshm | ||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" | ||
imagePullPolicy: {{ .Values.image.pullPolicy }} | ||
resources: | ||
{{- toYaml .Values.resources | nindent 12 }} | ||
args: ["tritonserver", | ||
"--model-store={{ .Values.modelRepositoryPath }}", | ||
"--model-control-mode=poll", | ||
"--allow-metrics=True", | ||
"--log-verbose=1", | ||
"--strict-model-config=False", | ||
"--repository-poll-secs=120"] | ||
env: | ||
{{- range .Values.environment }} | ||
- name: {{ .name }} | ||
value: {{ .value | quote }} | ||
{{- end }} | ||
{{- range .Values.secretEnvironment }} | ||
- name: {{ .name }} | ||
valueFrom: | ||
secretKeyRef: | ||
name: {{ .secretName }} | ||
key: {{ .key }} | ||
{{- end }} | ||
ports: | ||
- name: http | ||
containerPort: {{ .Values.service.ports.http }} | ||
- name: grpc | ||
containerPort: {{ .Values.service.ports.grpc }} | ||
- name: metrics | ||
containerPort: {{ .Values.service.ports.metrics }} | ||
livenessProbe: | ||
httpGet: | ||
path: /v2/health/live | ||
port: http | ||
initialDelaySeconds: 180 | ||
periodSeconds: 10 | ||
successThreshold: 1 | ||
failureThreshold: 3 | ||
readinessProbe: | ||
httpGet: | ||
path: /v2/health/ready | ||
port: http | ||
initialDelaySeconds: 180 | ||
periodSeconds: 10 | ||
successThreshold: 1 | ||
failureThreshold: 3 | ||
{{- with .Values.nodeSelector }} | ||
nodeSelector: | ||
{{- toYaml . | nindent 8 }} | ||
{{- end }} | ||
{{- with .Values.affinity }} | ||
affinity: | ||
{{- toYaml . | nindent 8 }} | ||
{{- end }} | ||
{{- with .Values.tolerations }} | ||
tolerations: | ||
{{- toYaml . | nindent 8 }} | ||
{{- end }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
{{- if .Values.hpa.enabled }} | ||
apiVersion: autoscaling/v2 | ||
kind: HorizontalPodAutoscaler | ||
metadata: | ||
name: {{ include "triton.fullname" . }} | ||
namespace: {{ .Release.Namespace }} | ||
labels: | ||
{{- include "triton.labels" . | nindent 4 }} | ||
spec: | ||
scaleTargetRef: | ||
apiVersion: apps/v1 | ||
kind: Deployment | ||
name: {{ include "triton.fullname" . }} | ||
minReplicas: {{ .Values.hpa.minReplicas }} | ||
maxReplicas: {{ .Values.hpa.maxReplicas }} | ||
metrics: | ||
{{- if .Values.hpa.targetCPUUtilizationPercentage }} | ||
- type: Resource | ||
resource: | ||
name: cpu | ||
target: | ||
type: Utilization | ||
averageUtilization: {{ .Values.hpa.targetCPUUtilizationPercentage }} | ||
{{- end }} | ||
{{- if .Values.hpa.targetMemoryUtilizationPercentage }} | ||
- type: Resource | ||
resource: | ||
name: memory | ||
target: | ||
type: Utilization | ||
averageUtilization: {{ .Values.hpa.targetMemoryUtilizationPercentage }} | ||
{{- end }} | ||
{{- end }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
{{- if .Values.ingress.enabled }} | ||
{{- $fullName := include "triton.fullname" . -}} | ||
apiVersion: networking.k8s.io/v1 | ||
kind: Ingress | ||
metadata: | ||
name: {{ $fullName }} | ||
namespace: {{ .Release.Namespace }} | ||
labels: | ||
{{- include "triton.labels" . | nindent 4 }} | ||
{{- with .Values.ingress.annotations }} | ||
annotations: | ||
{{ toYaml . | nindent 4 }} | ||
{{- end }} | ||
spec: | ||
{{- if .Values.ingress.className }} | ||
ingressClassName: {{ .Values.ingress.className }} | ||
{{- end }} | ||
{{- if .Values.ingress.tls }} | ||
tls: | ||
{{- range .Values.ingress.tls }} | ||
- hosts: | ||
{{- range .hosts }} | ||
- {{ . | quote }} | ||
{{- end }} | ||
secretName: {{ .secretName }} | ||
{{- end }} | ||
{{- end }} | ||
rules: | ||
{{- range .Values.ingress.hosts }} | ||
- host: {{ .host | quote }} | ||
http: | ||
paths: | ||
{{- range .paths }} | ||
- path: {{ .path }} | ||
{{- if .pathType }} | ||
pathType: {{ .pathType }} | ||
{{- end }} | ||
backend: | ||
service: | ||
name: {{ $fullName }} | ||
port: | ||
number: {{ .service.port.number }} | ||
{{- end }} | ||
{{- end }} | ||
{{- end }} |
16 changes: 16 additions & 0 deletions
16
helm-charts/nvidia-triton-server/templates/metrics-service.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
apiVersion: v1 | ||
kind: Service | ||
metadata: | ||
name: {{ include "triton.fullname" . }}-metrics | ||
namespace: {{ .Release.Namespace }} | ||
labels: | ||
{{- include "triton.labels" . | nindent 4 }} | ||
annotations: | ||
alpha.monitoring.coreos.com/non-namespaced: "true" | ||
spec: | ||
ports: | ||
- name: metrics | ||
port: {{ .Values.service.metricsPort }} | ||
targetPort: metrics | ||
selector: | ||
{{- include "triton.selectorLabels" . | nindent 4 }} |
14 changes: 14 additions & 0 deletions
14
helm-charts/nvidia-triton-server/templates/service-monitor.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
apiVersion: monitoring.coreos.com/v1 | ||
kind: ServiceMonitor | ||
metadata: | ||
name: {{ include "triton.fullname" . }}-metrics-monitor | ||
namespace: {{ .Release.Namespace }} | ||
labels: | ||
{{- include "triton.labels" . | nindent 4 }} | ||
spec: | ||
selector: | ||
matchLabels: | ||
{{- include "triton.selectorLabels" . | nindent 6 }} | ||
endpoints: | ||
- port: metrics | ||
interval: 15s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
apiVersion: v1 | ||
kind: Service | ||
metadata: | ||
name: {{ include "triton.fullname" . }} | ||
namespace: {{ .Release.Namespace }} | ||
labels: | ||
{{- include "triton.labels" . | nindent 4 }} | ||
spec: | ||
type: {{ .Values.service.type }} | ||
ports: | ||
- name: http-inference-server | ||
port: {{ .Values.service.ports.http }} | ||
targetPort: http | ||
- name: grpc-inference-server | ||
port: {{ .Values.service.ports.grpc }} | ||
targetPort: grpc | ||
- name: metrics-inference-server | ||
port: {{ .Values.service.ports.metrics }} | ||
targetPort: metrics | ||
selector: | ||
{{- include "triton.selectorLabels" . | nindent 4 }} |
11 changes: 11 additions & 0 deletions
11
helm-charts/nvidia-triton-server/templates/serviceaccount.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{{- if .Values.serviceAccount.create }} | ||
apiVersion: v1 | ||
kind: ServiceAccount | ||
metadata: | ||
name: {{ include "triton.serviceAccountName" . }} | ||
namespace: {{ .Release.Namespace }} | ||
labels: | ||
{{- include "triton.labels" . | nindent 4 }} | ||
annotations: | ||
{{- toYaml .Values.serviceAccount.annotations | nindent 4 }} | ||
{{- end }} |
Oops, something went wrong.