From eccb0c2de85c6d16da09c8629f380628ddeff68a Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Thu, 31 Oct 2024 02:14:27 +0530
Subject: [PATCH] Remove tgi

---
 helm/h2ogpt-chart/README.md                   |  23 ---
 .../templates/h2ogpt-deployment.yaml          |  17 +--
 .../h2ogpt-chart/templates/tgi-configmap.yaml |  13 --
 .../templates/tgi-deployment.yaml             | 141 ------------------
 helm/h2ogpt-chart/templates/tgi-pvc.yaml      |  14 --
 helm/h2ogpt-chart/templates/tgi-service.yaml  |  15 --
 .../{validations.yaml => validators.yaml}     |   3 -
 helm/h2ogpt-chart/values.yaml                 |  44 +-----
 8 files changed, 2 insertions(+), 268 deletions(-)
 delete mode 100644 helm/h2ogpt-chart/templates/tgi-configmap.yaml
 delete mode 100644 helm/h2ogpt-chart/templates/tgi-deployment.yaml
 delete mode 100644 helm/h2ogpt-chart/templates/tgi-pvc.yaml
 delete mode 100644 helm/h2ogpt-chart/templates/tgi-service.yaml
 rename helm/h2ogpt-chart/templates/{validations.yaml => validators.yaml} (55%)

diff --git a/helm/h2ogpt-chart/README.md b/helm/h2ogpt-chart/README.md
index 8d37e1f1b..2f41f4291 100644
--- a/helm/h2ogpt-chart/README.md
+++ b/helm/h2ogpt-chart/README.md
@@ -135,29 +135,6 @@ A Helm chart for h2oGPT
 | h2ogpt.updateStrategy.type | string | `"RollingUpdate"` |  |
 | nameOverride | string | `""` |  |
 | namespaceOverride | string | `""` |  |
-| tgi.containerArgs | string | `nil` |  |
-| tgi.enabled | bool | `false` | Enable tgi |
-| tgi.env | object | `{}` |  |
-| tgi.hfSecret | string | `nil` |  |
-| tgi.image.pullPolicy | string | `"IfNotPresent"` |  |
-| tgi.image.repository | string | `"ghcr.io/huggingface/text-generation-inference"` |  |
-| tgi.image.tag | string | `"0.9.3"` |  |
-| tgi.nodeSelector | string | `nil` |  |
-| tgi.overrideConfig | string | `nil` |  |
-| tgi.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. |
-| tgi.podAnnotations | object | `{}` |  |
-| tgi.podLabels | object | `{}` |  |
-| tgi.podSecurityContext | string | `nil` |  |
-| tgi.replicaCount | int | `1` |  |
-| tgi.resources | string | `nil` |  |
-| tgi.securityContext | string | `nil` |  |
-| tgi.service.port | int | `8080` |  |
-| tgi.service.type | string | `"ClusterIP"` |  |
-| tgi.storage.class | string | `nil` |  |
-| tgi.storage.size | string | `"512Gi"` |  |
-| tgi.storage.useEphemeral | bool | `true` |  |
-| tgi.tolerations | string | `nil` |  |
-| tgi.updateStrategy.type | string | `"RollingUpdate"` |  |
 | vllm.containerArgs[0] | string | `"--model"` |  |
 | vllm.containerArgs[1] | string | `"h2oai/h2ogpt-4096-llama2-7b-chat"` |  |
 | vllm.containerArgs[2] | string | `"--tokenizer"` |  |
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
index 7556ca758..741390cd7 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
@@ -87,18 +87,7 @@ spec:
 
               python3 /workspace/generate.py
           {{- end }}
-          {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.modelLock) }}
-          args:
-            - >
-              until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1;
-                do
-                  echo "Waiting for inference service to become ready...";
-                  sleep 5;
-                done
-
-              python3 /workspace/generate.py
-          {{- end }}
-          {{- if and .Values.h2ogpt.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled )) }}
+          {{- if and .Values.h2ogpt.enabled (not .Values.vllm.enabled ) }}
           args:
             - >
               python3 /workspace/generate.py
@@ -146,10 +135,6 @@ spec:
                 name: {{ include "h2ogpt.fullname" . }}-external-llm-secret
           {{- end }}
           env:
-          {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.enabled) }}
-            - name: h2ogpt_inference_server
-              value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}"
-          {{- end }}
           {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.enabled) }}
             - name: h2ogpt_inference_server
               value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}"
diff --git a/helm/h2ogpt-chart/templates/tgi-configmap.yaml b/helm/h2ogpt-chart/templates/tgi-configmap.yaml
deleted file mode 100644
index ec5c17866..000000000
--- a/helm/h2ogpt-chart/templates/tgi-configmap.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-{{- if .Values.tgi.enabled }}
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    {{- include "h2ogpt.labels" . | nindent 4 }}
-data:
-{{- range $key, $value := .Values.tgi.overrideConfig }}
-  {{ printf "%s" $key | upper }}: {{ $value | quote }}
-{{- end }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/tgi-deployment.yaml b/helm/h2ogpt-chart/templates/tgi-deployment.yaml
deleted file mode 100644
index 721b2ed01..000000000
--- a/helm/h2ogpt-chart/templates/tgi-deployment.yaml
+++ /dev/null
@@ -1,141 +0,0 @@
-{{- if .Values.tgi.enabled }}
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    app: {{ include "h2ogpt.fullname" . }}-tgi-inference
-spec:
-  replicas: {{ .Values.tgi.replicaCount }}
-  selector:
-    matchLabels:
-      app: {{ include "h2ogpt.fullname" . }}-tgi-inference
-  {{- if .Values.tgi.updateStrategy }}
-  strategy: {{- toYaml .Values.tgi.updateStrategy | nindent 4 }}
-  {{- end }}
-  template:
-    metadata:
-      {{- with .Values.tgi.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        app: {{ include "h2ogpt.fullname" . }}-tgi-inference
-        {{- with .Values.tgi.podLabels }}
-        {{ toYaml . | nindent 6 }}
-        {{- end }}
-    spec:
-      {{- with .Values.tgi.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.tgi.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      securityContext:
-        {{- toYaml .Values.tgi.podSecurityContext | nindent 8 }}
-      affinity:
-        {{- if .Values.tgi.podAffinity }}
-        podAntiAffinity:
-          {{- if .Values.tgi.podAffinity.hostname }}
-          requiredDuringSchedulingIgnoredDuringExecution:
-            - labelSelector:
-                matchExpressions:
-                  - key: app
-                    operator: In
-                    values:
-                      - {{ include "h2ogpt.fullname" . }}
-              topologyKey: kubernetes.io/hostname
-          {{- end }}
-          {{- if .Values.tgi.podAffinity.zone }}
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 100
-              podAffinityTerm:
-                labelSelector:
-                  matchExpressions:
-                    - key: app
-                      operator: In
-                      values:
-                        - {{ include "h2ogpt.fullname" . }}
-                topologyKey: failure-domain.beta.kubernetes.io/zone
-          {{- end }}
-        {{- end }}
-      {{- with .Values.tgi.extraAffinity }}
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.tgi.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      containers:
-        - name: {{ include "h2ogpt.fullname" . }}-tgi-inference
-          securityContext:
-            {{- toYaml .Values.tgi.securityContext | nindent 12 }}
-          image: "{{ .Values.tgi.image.repository }}:{{ .Values.tgi.image.tag }}"
-          imagePullPolicy: {{ .Values.tgi.image.pullPolicy }}
-          command: []
-          args:
-{{- range $arg := .Values.tgi.containerArgs }}
-            - "{{ $arg }}"
-{{- end }}
-          ports:
-            - name: http
-              containerPort: 80
-              protocol: TCP
-          {{- if .Values.tgi.livenessProbe }}
-          livenessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.tgi.livenessProbe | nindent 12 }}
-          {{- end }}
-          {{- if .Values.tgi.readinessProbe }}
-          readinessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.tgi.readinessProbe | nindent 12 }}
-          {{- end }}
-          resources:
-            {{- toYaml .Values.tgi.resources | nindent 12 }}
-          env:
-          {{- range $key, $value := .Values.tgi.env }}
-            - name: "{{ $key }}"
-              value: "{{ $value }}"
-          {{- end }}
-          envFrom:
-            - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config
-            - secretRef:
-                name: {{ .Values.tgi.hfSecret }}
-          volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-              mountPath: /app/cache
-              subPath: cache
-            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-              mountPath: /data
-              subPath: data
-            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-              mountPath: /dev/shm
-              subPath: shm
-      volumes:
-        - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-        {{- if not .Values.tgi.storage.useEphemeral}}
-          persistentVolumeClaim:
-            claimName:  {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-          {{- else}}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes:
-                  - ReadWriteOnce
-                resources:
-                  requests:
-                    storage: {{ .Values.tgi.storage.size | quote }}
-                storageClassName: {{ .Values.tgi.storage.class }}
-          {{- end }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/tgi-pvc.yaml b/helm/h2ogpt-chart/templates/tgi-pvc.yaml
deleted file mode 100644
index 0a34be2fd..000000000
--- a/helm/h2ogpt-chart/templates/tgi-pvc.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-{{- if and (.Values.tgi.enabled) (not .Values.tgi.storage.useEphemeral)}}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  accessModes:
-    - ReadWriteOnce
-  storageClassName: {{ .Values.tgi.storage.class }}
-  resources:
-    requests:
-      storage: {{ .Values.tgi.storage.size | quote }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/tgi-service.yaml b/helm/h2ogpt-chart/templates/tgi-service.yaml
deleted file mode 100644
index de42ad89a..000000000
--- a/helm/h2ogpt-chart/templates/tgi-service.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-{{- if .Values.tgi.enabled }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  selector:
-    app: {{ include "h2ogpt.fullname" . }}-tgi-inference
-  ports:
-    - protocol: TCP
-      port: {{ .Values.tgi.service.port }}
-      targetPort: 80
-  type: {{ .Values.tgi.service.type }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/validations.yaml b/helm/h2ogpt-chart/templates/validators.yaml
similarity index 55%
rename from helm/h2ogpt-chart/templates/validations.yaml
rename to helm/h2ogpt-chart/templates/validators.yaml
index ce4e264fd..b97d33e5c 100644
--- a/helm/h2ogpt-chart/templates/validations.yaml
+++ b/helm/h2ogpt-chart/templates/validators.yaml
@@ -1,6 +1,3 @@
-{{- if and .Values.vllm.enabled .Values.tgi.enabled }}
-  {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }}
-{{- end }}
 {{- if and ( and .Values.h2ogpt.enabled .Values.h2ogpt.agents.enabled) .Values.agents.enabled }}
   {{- fail " Both agents in both h2ogpt.agents cannot be enabled. Enably only one and try again" }}
 {{- end }}
diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml
index 6bcfb292b..78b79d159 100644
--- a/helm/h2ogpt-chart/values.yaml
+++ b/helm/h2ogpt-chart/values.yaml
@@ -140,7 +140,7 @@ agents:
   # -- Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true`
   enabled: false
   autoscaling:
-    # Enable autoscaling for agents
+    # Enable autoscaling (HPA) for agents
     enabled: false
     minReplicas: 1
     maxReplicas: 2
@@ -239,48 +239,6 @@ agents:
   podAnnotations: {}
   podLabels: {}
 
-tgi:
-  # -- Enable tgi
-  enabled: false
-  replicaCount: 1
-
-  image:
-    repository: ghcr.io/huggingface/text-generation-inference
-    tag: 0.9.3
-    pullPolicy: IfNotPresent
-  # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
-  podAffinity:
-    # hostname:
-    # zone:
-
-  storage:
-    size: 512Gi
-    class: 
-    useEphemeral: true
-  
-  overrideConfig:
-  hfSecret:
-  containerArgs:
-
-  service:
-    type: ClusterIP
-    port: 8080
-
-  updateStrategy:
-    type: RollingUpdate
-
-  podSecurityContext:
-  securityContext:
-
-  resources:
-  nodeSelector:
-  tolerations:
-
-  env: {}
-
-  podAnnotations: {}
-  podLabels: {}
-
 vllm:
   # -- Enable vllm
   enabled: false