From 1f1d47e0cf6193a09d74c1ae53bd01fd8da9c0a5 Mon Sep 17 00:00:00 2001
From: Alexandre Chapellon <alexandre.chapellon@hyland.com>
Date: Wed, 5 Jun 2024 11:03:31 +0200
Subject: [PATCH] prevent scaling to zero for both ATS and repo

---
 docs/helm/autoscaling.md                      | 38 +++++++++----------
 .../templates/_helpers-keda.tpl               |  4 +-
 .../keda-scaler-prometheus-repo.yaml          |  3 +-
 3 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/docs/helm/autoscaling.md b/docs/helm/autoscaling.md
index 27dec9ece..bab085575 100644
--- a/docs/helm/autoscaling.md
+++ b/docs/helm/autoscaling.md
@@ -95,6 +95,12 @@ alfresco-repository:
       ...
 ```
 
+The Alfresco repository [KEDA
+scaledobject](https://keda.sh/docs/latest/concepts/scaling-deployments/#scaledobject-spec)
+forbids scaling repository replicaset down to zero, mainly because it is a
+service which requires some time to startup (incompatible with on-demand spin
+up) and it is the main service of the Alfresco platform.
+
 ##### Prometheus scaler
 
 The KEDA based auto scaler relies on the number of Tomcat thread used. By
@@ -113,6 +119,14 @@ In the same maner the parameters below can be set:
 * `minReplicas`: The default minimum number of replica count is 1.
 * `maxReplicas`: The default maximum number of replica count is 3.
 
+To enable repository scaling through KEDA, you need to set the following values:
+
+```yaml
+keda:
+  components:
+    - alfresco-repository
+```
+
 ### Alfresco Transform Service
 
 #### Basic (CPU based) scaling for ATS
@@ -127,6 +141,11 @@ implications.
 
 To start with, make sure your Kubernetes cluster has KEDA installed
 
+The Alfresco Transform Service[KEDA
+scaledobject](https://keda.sh/docs/latest/concepts/scaling-deployments/#scaledobject-spec)
+forbids scaling tengines replicaset down to zero, as the T-router would
+eventually stop working if it losses all instances of a specific tengine.
+
 ##### Activemq scaler
 
 Regular ActiveMQ instances exposes a rest API which can be used to get the
@@ -152,11 +171,6 @@ T-engine workloads (`imagemagick`, `libreoffice`, `transformmisc`, `pdfrenderer`
 * `kedaPollingInterval`: Queues are checked every 15 seconds.
 * `kedaInitialCoolDownPeriod`: KEDA will wait for 5 minutes before activating
   the scaling object (before no scaling can happen).
-* `kedaCooldownPeriod`: After KEDA has found there is no activity in the
-  monitored queue, it will wait for 15 minutes before scaling down the pods to
-  0.
-* `kedaIdleReplicas`: The default idle replica count is 0 (tears down the
-  service).
 * `minReplicas`: The default minimum number of replica count is 1.
 * `maxReplicas`: The default maximum number of replica count is 3.
 
@@ -165,20 +179,6 @@ T-engine workloads (`imagemagick`, `libreoffice`, `transformmisc`, `pdfrenderer`
 > is one of the following: `imagemagick`, `libreoffice`, `transformmisc`,
 > `pdfrenderer` & `tika`.
 
-Scaling replicas down to zero is great when you have workload that is consistent
-enough with long period of inactivity (e.g. overnigh). But it can trigger a
-delay for the first requests when the workload starts again (e.g. the morning
-after). If you want to avoid scaling down you ATS deployments down to zero and
-always have at least one pod up to deal with "lonely" requests just apply the
-yaml below for the appropriate scaler object (here for pdf convertion):
-
-```yaml
-alfresco-transform-service:
-  pdfrenderer:
-    autoscaling:
-      kedaIdleReplicas: null
-```
-
 If you want to use an external ActiveMQ broker instead of the embeded one
 (recommended), you can set the following values:
 
diff --git a/helm/alfresco-content-services/templates/_helpers-keda.tpl b/helm/alfresco-content-services/templates/_helpers-keda.tpl
index 52289b781..228dc946f 100644
--- a/helm/alfresco-content-services/templates/_helpers-keda.tpl
+++ b/helm/alfresco-content-services/templates/_helpers-keda.tpl
@@ -35,8 +35,8 @@ Usage: include "alfresco-content-services.keda.scaler.options" $
 {{- define "alfresco-content-services.keda.scaler.options" -}}
 pollingInterval: {{ .autoscaling.kedaPollingInterval | default 15 }}
 initialCooldownPeriod: {{ .autoscaling.kedaInitialCooldownPeriod | default 300 }}
-cooldownPeriod: {{ .autoscaling.kedaCooldownPeriod | default 900 }}
-idleReplicaCount: {{ .autoscaling.kedaIdleReplicas | default 0 }}
+cooldownPeriod: {{ .autoscaling.kedaCooldownPeriod | default "null" }}
+idleReplicaCount: {{ .autoscaling.kedaIdleReplicas | default "null" }}
 minReplicaCount:  {{ .autoscaling.minReplicas }}
 maxReplicaCount:  {{ .autoscaling.maxReplicas }}
 advanced:
diff --git a/helm/alfresco-content-services/templates/keda-scaler-prometheus-repo.yaml b/helm/alfresco-content-services/templates/keda-scaler-prometheus-repo.yaml
index 21a82199f..8dd64a3bd 100644
--- a/helm/alfresco-content-services/templates/keda-scaler-prometheus-repo.yaml
+++ b/helm/alfresco-content-services/templates/keda-scaler-prometheus-repo.yaml
@@ -17,6 +17,5 @@ spec:
         serverAddress: {{ required $reqMsg .Values.prometheus.url }}
         threshold: {{ $repoCtx.Values.autoscaling.kedaTargetValue | default 170 | quote }}
         query: avg(jvm_threads_live_threads-jvm_threads_daemon_threads)
-  {{- $scaleOpts := omit (include "alfresco-content-services.keda.scaler.options" $repoCtx.Values | fromYaml) "idleReplicaCount" }}
-  {{- toYaml $scaleOpts | nindent 2 }}
+  {{- include "alfresco-content-services.keda.scaler.options" $repoCtx.Values | nindent 2 }}
 {{- end }}