From eccb0c2de85c6d16da09c8629f380628ddeff68a Mon Sep 17 00:00:00 2001 From: Lakindu Date: Thu, 31 Oct 2024 02:14:27 +0530 Subject: [PATCH] Remove tgi --- helm/h2ogpt-chart/README.md | 23 --- .../templates/h2ogpt-deployment.yaml | 17 +-- .../h2ogpt-chart/templates/tgi-configmap.yaml | 13 -- .../templates/tgi-deployment.yaml | 141 ------------------ helm/h2ogpt-chart/templates/tgi-pvc.yaml | 14 -- helm/h2ogpt-chart/templates/tgi-service.yaml | 15 -- .../{validations.yaml => validators.yaml} | 3 - helm/h2ogpt-chart/values.yaml | 44 +----- 8 files changed, 2 insertions(+), 268 deletions(-) delete mode 100644 helm/h2ogpt-chart/templates/tgi-configmap.yaml delete mode 100644 helm/h2ogpt-chart/templates/tgi-deployment.yaml delete mode 100644 helm/h2ogpt-chart/templates/tgi-pvc.yaml delete mode 100644 helm/h2ogpt-chart/templates/tgi-service.yaml rename helm/h2ogpt-chart/templates/{validations.yaml => validators.yaml} (55%) diff --git a/helm/h2ogpt-chart/README.md b/helm/h2ogpt-chart/README.md index 8d37e1f1b..2f41f4291 100644 --- a/helm/h2ogpt-chart/README.md +++ b/helm/h2ogpt-chart/README.md @@ -135,29 +135,6 @@ A Helm chart for h2oGPT | h2ogpt.updateStrategy.type | string | `"RollingUpdate"` | | | nameOverride | string | `""` | | | namespaceOverride | string | `""` | | -| tgi.containerArgs | string | `nil` | | -| tgi.enabled | bool | `false` | Enable tgi | -| tgi.env | object | `{}` | | -| tgi.hfSecret | string | `nil` | | -| tgi.image.pullPolicy | string | `"IfNotPresent"` | | -| tgi.image.repository | string | `"ghcr.io/huggingface/text-generation-inference"` | | -| tgi.image.tag | string | `"0.9.3"` | | -| tgi.nodeSelector | string | `nil` | | -| tgi.overrideConfig | string | `nil` | | -| tgi.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. | -| tgi.podAnnotations | object | `{}` | | -| tgi.podLabels | object | `{}` | | -| tgi.podSecurityContext | string | `nil` | | -| tgi.replicaCount | int | `1` | | -| tgi.resources | string | `nil` | | -| tgi.securityContext | string | `nil` | | -| tgi.service.port | int | `8080` | | -| tgi.service.type | string | `"ClusterIP"` | | -| tgi.storage.class | string | `nil` | | -| tgi.storage.size | string | `"512Gi"` | | -| tgi.storage.useEphemeral | bool | `true` | | -| tgi.tolerations | string | `nil` | | -| tgi.updateStrategy.type | string | `"RollingUpdate"` | | | vllm.containerArgs[0] | string | `"--model"` | | | vllm.containerArgs[1] | string | `"h2oai/h2ogpt-4096-llama2-7b-chat"` | | | vllm.containerArgs[2] | string | `"--tokenizer"` | | diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml index 7556ca758..741390cd7 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml @@ -87,18 +87,7 @@ spec: python3 /workspace/generate.py {{- end }} - {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.modelLock) }} - args: - - > - until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1; - do - echo "Waiting for inference service to become ready..."; - sleep 5; - done - - python3 /workspace/generate.py - {{- end }} - {{- if and .Values.h2ogpt.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled )) }} + {{- if and .Values.h2ogpt.enabled (not .Values.vllm.enabled ) }} args: - > python3 /workspace/generate.py @@ -146,10 +135,6 @@ spec: name: {{ include "h2ogpt.fullname" . }}-external-llm-secret {{- end }} env: - {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.enabled) }} - - name: h2ogpt_inference_server - value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}" - {{- end }} {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.enabled) }} - name: h2ogpt_inference_server value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}" diff --git a/helm/h2ogpt-chart/templates/tgi-configmap.yaml b/helm/h2ogpt-chart/templates/tgi-configmap.yaml deleted file mode 100644 index ec5c17866..000000000 --- a/helm/h2ogpt-chart/templates/tgi-configmap.yaml +++ /dev/null @@ -1,13 +0,0 @@ -{{- if .Values.tgi.enabled }} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - {{- include "h2ogpt.labels" . | nindent 4 }} -data: -{{- range $key, $value := .Values.tgi.overrideConfig }} - {{ printf "%s" $key | upper }}: {{ $value | quote }} -{{- end }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/tgi-deployment.yaml b/helm/h2ogpt-chart/templates/tgi-deployment.yaml deleted file mode 100644 index 721b2ed01..000000000 --- a/helm/h2ogpt-chart/templates/tgi-deployment.yaml +++ /dev/null @@ -1,141 +0,0 @@ -{{- if .Values.tgi.enabled }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - app: {{ include "h2ogpt.fullname" . }}-tgi-inference -spec: - replicas: {{ .Values.tgi.replicaCount }} - selector: - matchLabels: - app: {{ include "h2ogpt.fullname" . }}-tgi-inference - {{- if .Values.tgi.updateStrategy }} - strategy: {{- toYaml .Values.tgi.updateStrategy | nindent 4 }} - {{- end }} - template: - metadata: - {{- with .Values.tgi.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - app: {{ include "h2ogpt.fullname" . }}-tgi-inference - {{- with .Values.tgi.podLabels }} - {{ toYaml . | nindent 6 }} - {{- end }} - spec: - {{- with .Values.tgi.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tgi.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - securityContext: - {{- toYaml .Values.tgi.podSecurityContext | nindent 8 }} - affinity: - {{- if .Values.tgi.podAffinity }} - podAntiAffinity: - {{- if .Values.tgi.podAffinity.hostname }} - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: kubernetes.io/hostname - {{- end }} - {{- if .Values.tgi.podAffinity.zone }} - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: failure-domain.beta.kubernetes.io/zone - {{- end }} - {{- end }} - {{- with .Values.tgi.extraAffinity }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tgi.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference - securityContext: - {{- toYaml .Values.tgi.securityContext | nindent 12 }} - image: "{{ .Values.tgi.image.repository }}:{{ .Values.tgi.image.tag }}" - imagePullPolicy: {{ .Values.tgi.image.pullPolicy }} - command: [] - args: -{{- range $arg := .Values.tgi.containerArgs }} - - "{{ $arg }}" -{{- end }} - ports: - - name: http - containerPort: 80 - protocol: TCP - {{- if .Values.tgi.livenessProbe }} - livenessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.tgi.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.tgi.readinessProbe }} - readinessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.tgi.readinessProbe | nindent 12 }} - {{- end }} - resources: - {{- toYaml .Values.tgi.resources | nindent 12 }} - env: - {{- range $key, $value := .Values.tgi.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - envFrom: - - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config - - secretRef: - name: {{ .Values.tgi.hfSecret }} - volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - mountPath: /app/cache - subPath: cache - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - mountPath: /data - subPath: data - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - mountPath: /dev/shm - subPath: shm - volumes: - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - {{- if not .Values.tgi.storage.useEphemeral}} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - {{- else}} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.tgi.storage.size | quote }} - storageClassName: {{ .Values.tgi.storage.class }} - {{- end }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/tgi-pvc.yaml b/helm/h2ogpt-chart/templates/tgi-pvc.yaml deleted file mode 100644 index 0a34be2fd..000000000 --- a/helm/h2ogpt-chart/templates/tgi-pvc.yaml +++ /dev/null @@ -1,14 +0,0 @@ -{{- if and (.Values.tgi.enabled) (not .Values.tgi.storage.useEphemeral)}} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - accessModes: - - ReadWriteOnce - storageClassName: {{ .Values.tgi.storage.class }} - resources: - requests: - storage: {{ .Values.tgi.storage.size | quote }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/tgi-service.yaml b/helm/h2ogpt-chart/templates/tgi-service.yaml deleted file mode 100644 index de42ad89a..000000000 --- a/helm/h2ogpt-chart/templates/tgi-service.yaml +++ /dev/null @@ -1,15 +0,0 @@ -{{- if .Values.tgi.enabled }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - selector: - app: {{ include "h2ogpt.fullname" . }}-tgi-inference - ports: - - protocol: TCP - port: {{ .Values.tgi.service.port }} - targetPort: 80 - type: {{ .Values.tgi.service.type }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/validations.yaml b/helm/h2ogpt-chart/templates/validators.yaml similarity index 55% rename from helm/h2ogpt-chart/templates/validations.yaml rename to helm/h2ogpt-chart/templates/validators.yaml index ce4e264fd..b97d33e5c 100644 --- a/helm/h2ogpt-chart/templates/validations.yaml +++ b/helm/h2ogpt-chart/templates/validators.yaml @@ -1,6 +1,3 @@ -{{- if and .Values.vllm.enabled .Values.tgi.enabled }} - {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }} -{{- end }} {{- if and ( and .Values.h2ogpt.enabled .Values.h2ogpt.agents.enabled) .Values.agents.enabled }} {{- fail " Both agents in both h2ogpt.agents cannot be enabled. Enably only one and try again" }} {{- end }} diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml index 6bcfb292b..78b79d159 100644 --- a/helm/h2ogpt-chart/values.yaml +++ b/helm/h2ogpt-chart/values.yaml @@ -140,7 +140,7 @@ agents: # -- Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true` enabled: false autoscaling: - # Enable autoscaling for agents + # Enable autoscaling (HPA) for agents enabled: false minReplicas: 1 maxReplicas: 2 @@ -239,48 +239,6 @@ agents: podAnnotations: {} podLabels: {} -tgi: - # -- Enable tgi - enabled: false - replicaCount: 1 - - image: - repository: ghcr.io/huggingface/text-generation-inference - tag: 0.9.3 - pullPolicy: IfNotPresent - # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. - podAffinity: - # hostname: - # zone: - - storage: - size: 512Gi - class: - useEphemeral: true - - overrideConfig: - hfSecret: - containerArgs: - - service: - type: ClusterIP - port: 8080 - - updateStrategy: - type: RollingUpdate - - podSecurityContext: - securityContext: - - resources: - nodeSelector: - tolerations: - - env: {} - - podAnnotations: {} - podLabels: {} - vllm: # -- Enable vllm enabled: false