Skip to content

Commit

Permalink
add cron job to clean duckdb index cache (#2016)
Browse files Browse the repository at this point in the history
* add cron job to clean duckdb index cache

* fix action

* give more time to expire cache folder
  • Loading branch information
AndreaFrancis authored Oct 26, 2023
1 parent 58bd71d commit f1ccb72
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 1 deletion.
2 changes: 1 addition & 1 deletion chart/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.20.0
version: 1.21.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
Expand Down
5 changes: 5 additions & 0 deletions chart/env/prod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,11 @@ backfill:
cpu: 2
memory: "8Gi"

cleanDuckdbIndexCache:
nodeSelector:
role-datasets-server: "true"
expiredTimeIntervalSeconds: 259_200 # 3 days

cleanHfDatasetsCache:
nodeSelector:
role-datasets-server: "true"
Expand Down
5 changes: 5 additions & 0 deletions chart/templates/_common/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@ app.kubernetes.io/component: "{{ include "name" . }}-cache-metrics-collector"
app.kubernetes.io/component: "{{ include "name" . }}-backfill"
{{- end -}}

{{- define "labels.cleanDuckdbIndexCache" -}}
{{ include "hf.labels.commons" . }}
app.kubernetes.io/component: "{{ include "name" . }}-clean-duckdb-cache"
{{- end -}}

{{- define "labels.cleanDuckdbIndexDownloads" -}}
{{ include "hf.labels.commons" . }}
app.kubernetes.io/component: "{{ include "name" . }}-clean-duckdb-downloads"
Expand Down
27 changes: 27 additions & 0 deletions chart/templates/cron-jobs/clean-duckdb-index-cache/_container.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright 2023 The HuggingFace Authors.

{{- define "containerCleanDuckdbIndexCache" -}}
- name: "{{ include "name" . }}-clean-duckdb-cache"
image: {{ include "jobs.cacheMaintenance.image" . }}
imagePullPolicy: {{ .Values.images.pullPolicy }}
volumeMounts:
{{ include "volumeMountDuckDBIndexRW" . | nindent 2 }}
securityContext:
allowPrivilegeEscalation: false
resources: {{ toYaml .Values.cleanDuckdbIndexCache.resources | nindent 4 }}
env:
{{ include "envCache" . | nindent 2 }}
{{ include "envQueue" . | nindent 2 }}
{{ include "envCommon" . | nindent 2 }}
- name: CACHE_MAINTENANCE_ACTION
value: {{ .Values.cleanDuckdbIndexCache.action | quote }}
- name: LOG_LEVEL
value: {{ .Values.cleanDuckdbIndexCache.log.level | quote }}
- name: DIRECTORY_CLEANING_CACHE_DIRECTORY
value: {{ .Values.duckDBIndex.cacheDirectory | quote }}
- name: DIRECTORY_CLEANING_SUBFOLDER_PATTERN
value: {{ .Values.cleanDuckdbIndexCache.subfolderPattern | quote }}
- name: DIRECTORY_CLEANING_EXPIRED_TIME_INTERVAL_SECONDS
value: {{ .Values.cleanDuckdbIndexCache.expiredTimeIntervalSeconds | quote }}
{{- end -}}
27 changes: 27 additions & 0 deletions chart/templates/cron-jobs/clean-duckdb-index-cache/job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright 2022 The HuggingFace Authors.

{{- if and .Values.images.jobs.cacheMaintenance .Values.cleanDuckdbIndexCache.enabled }}
apiVersion: batch/v1
kind: CronJob
metadata:
labels: {{ include "labels.cleanDuckdbIndexCache" . | nindent 4 }}
name: "{{ include "name" . }}-job-clean-duckdb-cache"
namespace: {{ .Release.Namespace }}
spec:
schedule: {{ .Values.cleanDuckdbIndexCache.schedule | quote }}
jobTemplate:
spec:
ttlSecondsAfterFinished: 3600
template:
spec:
restartPolicy: OnFailure
{{- include "dnsConfig" . | nindent 10 }}
{{- include "image.imagePullSecrets" . | nindent 6 }}
nodeSelector: {{ toYaml .Values.cleanDuckdbIndexCache.nodeSelector | nindent 12 }}
tolerations: {{ toYaml .Values.cleanDuckdbIndexCache.tolerations | nindent 12 }}
containers: {{ include "containerCleanDuckdbIndexCache" . | nindent 12 }}
securityContext: {{ include "securityContext" . | nindent 12 }}
initContainers: {{ include "initContainerDuckDBIndex" . | nindent 12 }}
volumes: {{ include "volumeDuckDBIndex" . | nindent 12 }}
{{- end}}
19 changes: 19 additions & 0 deletions chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,25 @@ cleanHfDatasetsCache:
expiredTimeIntervalSeconds: 600
subfolderPattern: "*/datasets/*"

cleanDuckdbIndexCache:
enabled: true
log:
level: "info"
action: "clean-directory"
error_codes_to_retry: ""
schedule: "0 */3 * * *"
# every 3 hours
nodeSelector: {}
resources:
requests:
cpu: 0
limits:
cpu: 0
tolerations: []
# the time interval at which cached downloaded data files will be considered as expired and will be deleted
expiredTimeIntervalSeconds: 600
subfolderPattern: "cache/*"


cleanDuckdbIndexDownloads:
enabled: true
Expand Down

0 comments on commit f1ccb72

Please sign in to comment.