From 2ef1ecda47d213057c001be5d0bfac97a11f0d49 Mon Sep 17 00:00:00 2001 From: Andrew Moore <20435317+andrewmooreio@users.noreply.github.com> Date: Mon, 29 Jul 2024 10:58:14 +0100 Subject: [PATCH] :sparkles: Update DB Refresh workflow to include Slack notifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * :sparkles: Update DB Refresh workflow to include Slack notifications * 🐛 poc now uses opensearch * Update data-refresh.yaml * --ignore-not-found for helm uninstall * Happy Helming * Update data-refresh.yaml * 🐛 Correct service account names for refresh jobs * fix: Add curl package and fix Slack message * typo * :sparkles: create image and update job Update job.yaml Update job.yaml Update job.yaml Update job.yaml Update job.yaml Update job.yaml Update job.yaml Update job.yaml Update Dockerfile Update job.yaml Update Dockerfile Update Dockerfile Update job.yaml absolute dirs Update job.yaml Update job.yaml Update job.yaml Update job.yaml Update Dockerfile non root support Update job.yaml Update data-refresh.yaml up Update build-push-db-utils.yml rename Update build-push-db-utils.yml Update build-push-db-utils.yml no need to install jq no password update jobs for refresh * Update job.yaml * Update job.yaml * Update job.yaml * Update job.yaml * Update job.yaml * Update job.yaml * Update job.yaml * Update job.yaml * t * Update job.yaml * Update job.yaml * Update job.yaml * Update job.yaml * Update job.yaml * typo * Update job.yaml * Update job.yaml * Update data-refresh.yaml --------- Co-Authored-By: George Taylor --- .github/workflows/build-push-db-utils.yml | 42 +++++++++ .github/workflows/data-refresh.yaml | 101 +++++++++++----------- jobs/refresh-db/Chart.yaml | 2 +- jobs/refresh-db/templates/job.yaml | 43 +++++++-- jobs/refresh-s3/templates/job.yaml | 4 +- tools/db-utils/Dockerfile | 13 +++ 6 files changed, 145 insertions(+), 60 deletions(-) create mode 100644 .github/workflows/build-push-db-utils.yml create mode 100644 tools/db-utils/Dockerfile diff --git a/.github/workflows/build-push-db-utils.yml b/.github/workflows/build-push-db-utils.yml new file mode 100644 index 0000000..f921712 --- /dev/null +++ b/.github/workflows/build-push-db-utils.yml @@ -0,0 +1,42 @@ +name: Build and push image + +on: + push: + workflow_dispatch: + +env: + IMAGE_NAME: hmpps-delius-alfresco-db-utils + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: checkout code + uses: actions/checkout@v4 + - name: Log in to the Container registry + uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81 + with: + images: ${{ env.IMAGE_NAME }} + - name: Build and push Docker image + if: github.ref == 'refs/heads/main' + uses: docker/build-push-action@5176d81f87c23d6fc96624dfdbcd9f3830bbe445 + with: + context: ./tools/db-utils/ + push: true + tags: ghcr.io/${{ github.repository_owner }}/${{ steps.meta.outputs.tags }}, ghcr.io/${{ github.repository_owner }}/${{ env.IMAGE_NAME }}:latest + labels: ${{ steps.meta.outputs.labels }} + - name: Build and push Docker image + if: github.ref != 'refs/heads/main' + uses: docker/build-push-action@5176d81f87c23d6fc96624dfdbcd9f3830bbe445 + with: + context: ./tools/db-utils/ + push: true + tags: ghcr.io/${{ github.repository_owner }}/${{ steps.meta.outputs.tags }}, ghcr.io/${{ github.repository_owner }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}-${{ github.run_id }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/data-refresh.yaml b/.github/workflows/data-refresh.yaml index 648ac32..5aa0f27 100644 --- a/.github/workflows/data-refresh.yaml +++ b/.github/workflows/data-refresh.yaml @@ -41,12 +41,12 @@ jobs: - name: Stop ${{ github.event.inputs.destination_env }} Environment run: | kubectl scale deployment alfresco-content-services-alfresco-cs-repository --replicas=0 - kubectl scale deployment alfresco-content-services-alfresco-cs-share --replicas=0 - kubectl scale deployment alfresco-content-services-alfresco-search-solr --replicas=0 + kubectl scale deployment alfresco-content-services-alfresco-cs-share --replicas=0 + kubectl scale deployment alfresco-content-services-alfresco-search-enterprise-liveindexing --replicas=0 refresh-db: name: Refresh DB - runs-on: [self-hosted, Linux, management-infrastructure] + runs-on: ubuntu-22.04 environment: name: ${{ github.event.inputs.source_env }} needs: stop-destination-environment @@ -59,11 +59,10 @@ jobs: version: 'v1.26.0' # default is latest stable id: kubectl_install - - name: Install Helm - uses: azure/setup-helm@v3.5 + - uses: azure/setup-helm@v4.2.0 with: - version: 'v3.9.0' - id: helm_install + version: 'v3.15.3' # default is latest (stable) + id: install - name: Configure kubectl run: | @@ -76,20 +75,20 @@ jobs: KUBE_NAMESPACE: ${{ secrets.KUBE_NAMESPACE }} KUBE_CLUSTER: ${{ secrets.KUBE_CLUSTER }} + - name: Uninstall DB Refresh chart + run: helm uninstall refresh-db --ignore-not-found + - name: DB Refresh working-directory: jobs/refresh-db run: | helm install refresh-db . \ --set sourceEnvironment=${{ github.event.inputs.source_env }} \ - --set destinationEnvironment=${{ github.event.inputs.destination_env }} - kubectl wait job refresh-db --for=condition=complete --timeout 10h - - - name: Uninstall DB Refresh chart - run: helm uninstall refresh-db + --set destinationEnvironment=${{ github.event.inputs.destination_env }} \ + --set slackWebhookUrl=${{ secrets.SLACK_WEBHOOK_URL }} refresh-s3: name: Refresh S3 - runs-on: [self-hosted, Linux, management-infrastructure] + runs-on: ubuntu-22.04 environment: name: ${{ github.event.inputs.source_env }} needs: stop-destination-environment @@ -102,11 +101,10 @@ jobs: version: 'v1.26.0' # default is latest stable id: kubectl_install - - name: Install Helm - uses: azure/setup-helm@v3.5 + - uses: azure/setup-helm@v4.2.0 with: - version: 'v3.9.0' - id: helm_install + version: 'v3.15.3' # default is latest (stable) + id: install - name: Configure kubectl run: | @@ -118,6 +116,9 @@ jobs: env: KUBE_NAMESPACE: ${{ secrets.KUBE_NAMESPACE }} KUBE_CLUSTER: ${{ secrets.KUBE_CLUSTER }} + + - name: Uninstall S3 Refresh chart + run: helm uninstall refresh-s3 --ignore-not-found - name: S3 Refresh working-directory: jobs/refresh-s3 @@ -139,35 +140,35 @@ jobs: kubectl wait jobs -l name-prefix=refresh-s3 --for=condition=complete --timeout 10h - name: Uninstall S3 Refresh chart - run: helm uninstall refresh-s3 - - wipe-solr-data: - name: Wipe Solr Data - runs-on: ubuntu-22.04 - environment: - name: ${{ github.event.inputs.destination_env }} - needs: stop-destination-environment - steps: - - uses: actions/checkout@v4.1.1 - - - name: Configure kubectl - run: | - echo "${{ secrets.KUBE_CERT }}" > ca.crt - kubectl config set-cluster ${KUBE_CLUSTER} --certificate-authority=./ca.crt --server=https://${KUBE_CLUSTER} - kubectl config set-credentials deploy-user --token=${{ secrets.KUBE_TOKEN }} - kubectl config set-context ${KUBE_CLUSTER} --cluster=${KUBE_CLUSTER} --user=deploy-user --namespace=${KUBE_NAMESPACE} - kubectl config use-context ${KUBE_CLUSTER} - env: - KUBE_NAMESPACE: ${{ secrets.KUBE_NAMESPACE }} - KUBE_CLUSTER: ${{ secrets.KUBE_CLUSTER }} - - - name: Start Solr Data Wipe Job - run: | - kubectl apply -f jobs/wipe-solr-data.yaml - kubectl wait --timeout 10m --for=condition=complete job/wipe-solr-data - - - name: Delete Refresh Job - run: kubectl delete job wipe-solr-data + run: helm uninstall refresh-s3 --ignore-not-found + + # wipe-solr-data: + # name: Wipe Solr Data + # runs-on: ubuntu-22.04 + # environment: + # name: ${{ github.event.inputs.destination_env }} + # needs: stop-destination-environment + # steps: + # - uses: actions/checkout@v4.1.1 + + # - name: Configure kubectl + # run: | + # echo "${{ secrets.KUBE_CERT }}" > ca.crt + # kubectl config set-cluster ${KUBE_CLUSTER} --certificate-authority=./ca.crt --server=https://${KUBE_CLUSTER} + # kubectl config set-credentials deploy-user --token=${{ secrets.KUBE_TOKEN }} + # kubectl config set-context ${KUBE_CLUSTER} --cluster=${KUBE_CLUSTER} --user=deploy-user --namespace=${KUBE_NAMESPACE} + # kubectl config use-context ${KUBE_CLUSTER} + # env: + # KUBE_NAMESPACE: ${{ secrets.KUBE_NAMESPACE }} + # KUBE_CLUSTER: ${{ secrets.KUBE_CLUSTER }} + + # - name: Start Solr Data Wipe Job + # run: | + # kubectl apply -f jobs/wipe-solr-data.yaml + # kubectl wait --timeout 10m --for=condition=complete job/wipe-solr-data + + # - name: Delete Refresh Job + # run: kubectl delete job wipe-solr-data start-destination-environment: name: Start ${{ github.event.inputs.destination_env }} Environment @@ -177,7 +178,7 @@ jobs: needs: - refresh-db - refresh-s3 - - wipe-solr-data + # - wipe-solr-data steps: - name: Configure kubectl run: | @@ -190,12 +191,10 @@ jobs: KUBE_NAMESPACE: ${{ secrets.KUBE_NAMESPACE }} KUBE_CLUSTER: ${{ secrets.KUBE_CLUSTER }} - - name: Stop ${{ github.event.inputs.destination_env }} Environment + - name: Start ${{ github.event.inputs.destination_env }} Environment run: | - apt update && apt install -y jq - HELM_VALUES=$(helm get values alfresco-content-services -o json) kubectl scale deployment alfresco-content-services-alfresco-cs-repository --replicas=$(echo $HELM_VALUES | jq '.repository.replicaCount') kubectl scale deployment alfresco-content-services-alfresco-cs-share --replicas=$(echo $HELM_VALUES | jq '.share.replicaCount') - kubectl scale deployment alfresco-content-services-alfresco-search-solr --replicas=1 + kubectl scale deployment alfresco-content-services-alfresco-search-enterprise-liveindexing --replicas=1 diff --git a/jobs/refresh-db/Chart.yaml b/jobs/refresh-db/Chart.yaml index c09d657..f5fba86 100644 --- a/jobs/refresh-db/Chart.yaml +++ b/jobs/refresh-db/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v2 appVersion: 0.1 -version: 0.0.1 +version: 0.0.2 description: Job to refresh DB data name: refresh-db diff --git a/jobs/refresh-db/templates/job.yaml b/jobs/refresh-db/templates/job.yaml index 617b077..97a45b9 100644 --- a/jobs/refresh-db/templates/job.yaml +++ b/jobs/refresh-db/templates/job.yaml @@ -7,14 +7,37 @@ data: entrypoint.sh: |- #!/bin/bash set -e + trap 'send_slack_notification $?' EXIT + + function send_slack_notification() { + STATUS=$1 + if [ "$STATUS" -eq 0 ]; then + JSON_PAYLOAD=$(jq -n --arg text "Refresh DB (${SRC_ENV} to ${DST_ENV}) job succeeded" '{text: $text}') + else + ERROR_MSG=$(tail -n 10 ~/error.log) || ERROR_MSG="Unknown error" + JSON_PAYLOAD=$(jq -n --arg text "Refresh DB (${SRC_ENV} to ${DST_ENV}) job failed with error: $ERROR_MSG" '{text: $text}') + fi + curl -X POST -H 'Content-type: application/json' --data "$JSON_PAYLOAD" $SLACK_WEBHOOK_URL + } + echo "${SRC_DB_HOST}:5432:${SRC_DB_NAME}:${SRC_DB_USER}:${SRC_DB_PASS}" > ~/.pgpass echo "${DST_DB_HOST}:5432:${DST_DB_NAME}:${DST_DB_USER}:${DST_DB_PASS}" >> ~/.pgpass + cat ~/.pgpass chmod 0600 ~/.pgpass + chown job:job ~/.pgpass set -x - pg_dump --jobs=4 --host="$SRC_DB_HOST" --username="$SRC_DB_USER" --dbname="$SRC_DB_NAME" --no-owner --no-privileges --verbose --format=directory --file=/tmp/db-dump - pg_restore --jobs=4 --host="$DST_DB_HOST" --username="$DST_DB_USER" --dbname="$DST_DB_NAME" --clean --if-exists --no-owner --no-privileges --verbose /tmp/db-dump - rm -rv /tmp/db-dump ~/.pgpass + # Dump the source database + pg_dump --jobs=4 --host="$SRC_DB_HOST" --username="$SRC_DB_USER" --dbname="$SRC_DB_NAME" --no-owner --no-privileges --verbose --format=directory --file=/home/job/db-dump 2> >(tee ~/error.log >&2) + + psql --host="$DST_DB_HOST" --username="$DST_DB_USER" --dbname="$DST_DB_NAME" -c "drop schema if exists public cascade;" 2> >(tee ~/error.log >&2) + + psql --host="$DST_DB_HOST" --username="$DST_DB_USER" --dbname="$DST_DB_NAME" -c "create schema public;" 2> >(tee ~/error.log >&2) + + # Restore the source database dump to the destination database + pg_restore --jobs=4 --host="$DST_DB_HOST" --username="$DST_DB_USER" --dbname="$DST_DB_NAME" --no-owner --no-privileges --verbose /home/job/db-dump 2> >(tee ~/error.log >&2) + rm -rv /home/job/db-dump ~/.pgpass + --- apiVersion: batch/v1 kind: Job @@ -25,7 +48,7 @@ spec: spec: containers: - name: refresh-db - image: postgres:14 + image: ghcr.io/ministryofjustice/hmpps-delius-alfresco-db-utils:NIT-1403-alfresco-move-away-from-long-running-github-workflows-10114657186 imagePullPolicy: IfNotPresent resources: limits: @@ -34,6 +57,8 @@ spec: command: - /bin/entrypoint.sh env: + - name: HOME + value: "/home/job" - name: SRC_DB_NAME valueFrom: secretKeyRef: @@ -74,6 +99,12 @@ spec: secretKeyRef: name: rds-instance-output-{{ .Values.destinationEnvironment }} key: RDS_INSTANCE_ADDRESS + - name: SLACK_WEBHOOK_URL + value: "{{ .Values.slackWebhookUrl }}" + - name: SRC_ENV + value: "{{ .Values.sourceEnvironment }}" + - name: DST_ENV + value: "{{ .Values.destinationEnvironment }}" volumeMounts: - name: refresh-db-script mountPath: /bin/entrypoint.sh @@ -90,8 +121,8 @@ spec: - ALL seccompProfile: type: RuntimeDefault - serviceAccount: hmpps-migration-development - serviceAccountName: hmpps-migration-development + serviceAccount: hmpps-migration-dev + serviceAccountName: hmpps-migration-dev restartPolicy: Never volumes: - name: refresh-db-script diff --git a/jobs/refresh-s3/templates/job.yaml b/jobs/refresh-s3/templates/job.yaml index 29552e9..763758a 100644 --- a/jobs/refresh-s3/templates/job.yaml +++ b/jobs/refresh-s3/templates/job.yaml @@ -66,8 +66,8 @@ spec: - ALL seccompProfile: type: RuntimeDefault - serviceAccount: hmpps-migration-development - serviceAccountName: hmpps-migration-development + serviceAccount: hmpps-migration-dev + serviceAccountName: hmpps-migration-dev restartPolicy: OnFailure volumes: - name: refresh-s3-script diff --git a/tools/db-utils/Dockerfile b/tools/db-utils/Dockerfile new file mode 100644 index 0000000..5ac167a --- /dev/null +++ b/tools/db-utils/Dockerfile @@ -0,0 +1,13 @@ +FROM debian:bookworm-slim + +# Install psql client +RUN apt-get update && apt-get install -y postgresql-client +# Install curl +RUN apt-get install -y curl +# Install jq +RUN apt-get install -y jq + +# Create a non-root user and set the home directory +RUN useradd -u 999 -ms /bin/bash job +USER job +WORKDIR /home/job