diff --git a/.github/workflows/build-push-db-utils.yml b/.github/workflows/build-push-db-utils.yml new file mode 100644 index 0000000..f921712 --- /dev/null +++ b/.github/workflows/build-push-db-utils.yml @@ -0,0 +1,42 @@ +name: Build and push image + +on: + push: + workflow_dispatch: + +env: + IMAGE_NAME: hmpps-delius-alfresco-db-utils + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: checkout code + uses: actions/checkout@v4 + - name: Log in to the Container registry + uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81 + with: + images: ${{ env.IMAGE_NAME }} + - name: Build and push Docker image + if: github.ref == 'refs/heads/main' + uses: docker/build-push-action@5176d81f87c23d6fc96624dfdbcd9f3830bbe445 + with: + context: ./tools/db-utils/ + push: true + tags: ghcr.io/${{ github.repository_owner }}/${{ steps.meta.outputs.tags }}, ghcr.io/${{ github.repository_owner }}/${{ env.IMAGE_NAME }}:latest + labels: ${{ steps.meta.outputs.labels }} + - name: Build and push Docker image + if: github.ref != 'refs/heads/main' + uses: docker/build-push-action@5176d81f87c23d6fc96624dfdbcd9f3830bbe445 + with: + context: ./tools/db-utils/ + push: true + tags: ghcr.io/${{ github.repository_owner }}/${{ steps.meta.outputs.tags }}, ghcr.io/${{ github.repository_owner }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}-${{ github.run_id }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/data-refresh.yaml b/.github/workflows/data-refresh.yaml index 648ac32..5aa0f27 100644 --- a/.github/workflows/data-refresh.yaml +++ b/.github/workflows/data-refresh.yaml @@ -41,12 +41,12 @@ jobs: - name: Stop ${{ github.event.inputs.destination_env }} Environment run: | kubectl scale deployment alfresco-content-services-alfresco-cs-repository --replicas=0 - kubectl scale deployment alfresco-content-services-alfresco-cs-share --replicas=0 - kubectl scale deployment alfresco-content-services-alfresco-search-solr --replicas=0 + kubectl scale deployment alfresco-content-services-alfresco-cs-share --replicas=0 + kubectl scale deployment alfresco-content-services-alfresco-search-enterprise-liveindexing --replicas=0 refresh-db: name: Refresh DB - runs-on: [self-hosted, Linux, management-infrastructure] + runs-on: ubuntu-22.04 environment: name: ${{ github.event.inputs.source_env }} needs: stop-destination-environment @@ -59,11 +59,10 @@ jobs: version: 'v1.26.0' # default is latest stable id: kubectl_install - - name: Install Helm - uses: azure/setup-helm@v3.5 + - uses: azure/setup-helm@v4.2.0 with: - version: 'v3.9.0' - id: helm_install + version: 'v3.15.3' # default is latest (stable) + id: install - name: Configure kubectl run: | @@ -76,20 +75,20 @@ jobs: KUBE_NAMESPACE: ${{ secrets.KUBE_NAMESPACE }} KUBE_CLUSTER: ${{ secrets.KUBE_CLUSTER }} + - name: Uninstall DB Refresh chart + run: helm uninstall refresh-db --ignore-not-found + - name: DB Refresh working-directory: jobs/refresh-db run: | helm install refresh-db . \ --set sourceEnvironment=${{ github.event.inputs.source_env }} \ - --set destinationEnvironment=${{ github.event.inputs.destination_env }} - kubectl wait job refresh-db --for=condition=complete --timeout 10h - - - name: Uninstall DB Refresh chart - run: helm uninstall refresh-db + --set destinationEnvironment=${{ github.event.inputs.destination_env }} \ + --set slackWebhookUrl=${{ secrets.SLACK_WEBHOOK_URL }} refresh-s3: name: Refresh S3 - runs-on: [self-hosted, Linux, management-infrastructure] + runs-on: ubuntu-22.04 environment: name: ${{ github.event.inputs.source_env }} needs: stop-destination-environment @@ -102,11 +101,10 @@ jobs: version: 'v1.26.0' # default is latest stable id: kubectl_install - - name: Install Helm - uses: azure/setup-helm@v3.5 + - uses: azure/setup-helm@v4.2.0 with: - version: 'v3.9.0' - id: helm_install + version: 'v3.15.3' # default is latest (stable) + id: install - name: Configure kubectl run: | @@ -118,6 +116,9 @@ jobs: env: KUBE_NAMESPACE: ${{ secrets.KUBE_NAMESPACE }} KUBE_CLUSTER: ${{ secrets.KUBE_CLUSTER }} + + - name: Uninstall S3 Refresh chart + run: helm uninstall refresh-s3 --ignore-not-found - name: S3 Refresh working-directory: jobs/refresh-s3 @@ -139,35 +140,35 @@ jobs: kubectl wait jobs -l name-prefix=refresh-s3 --for=condition=complete --timeout 10h - name: Uninstall S3 Refresh chart - run: helm uninstall refresh-s3 - - wipe-solr-data: - name: Wipe Solr Data - runs-on: ubuntu-22.04 - environment: - name: ${{ github.event.inputs.destination_env }} - needs: stop-destination-environment - steps: - - uses: actions/checkout@v4.1.1 - - - name: Configure kubectl - run: | - echo "${{ secrets.KUBE_CERT }}" > ca.crt - kubectl config set-cluster ${KUBE_CLUSTER} --certificate-authority=./ca.crt --server=https://${KUBE_CLUSTER} - kubectl config set-credentials deploy-user --token=${{ secrets.KUBE_TOKEN }} - kubectl config set-context ${KUBE_CLUSTER} --cluster=${KUBE_CLUSTER} --user=deploy-user --namespace=${KUBE_NAMESPACE} - kubectl config use-context ${KUBE_CLUSTER} - env: - KUBE_NAMESPACE: ${{ secrets.KUBE_NAMESPACE }} - KUBE_CLUSTER: ${{ secrets.KUBE_CLUSTER }} - - - name: Start Solr Data Wipe Job - run: | - kubectl apply -f jobs/wipe-solr-data.yaml - kubectl wait --timeout 10m --for=condition=complete job/wipe-solr-data - - - name: Delete Refresh Job - run: kubectl delete job wipe-solr-data + run: helm uninstall refresh-s3 --ignore-not-found + + # wipe-solr-data: + # name: Wipe Solr Data + # runs-on: ubuntu-22.04 + # environment: + # name: ${{ github.event.inputs.destination_env }} + # needs: stop-destination-environment + # steps: + # - uses: actions/checkout@v4.1.1 + + # - name: Configure kubectl + # run: | + # echo "${{ secrets.KUBE_CERT }}" > ca.crt + # kubectl config set-cluster ${KUBE_CLUSTER} --certificate-authority=./ca.crt --server=https://${KUBE_CLUSTER} + # kubectl config set-credentials deploy-user --token=${{ secrets.KUBE_TOKEN }} + # kubectl config set-context ${KUBE_CLUSTER} --cluster=${KUBE_CLUSTER} --user=deploy-user --namespace=${KUBE_NAMESPACE} + # kubectl config use-context ${KUBE_CLUSTER} + # env: + # KUBE_NAMESPACE: ${{ secrets.KUBE_NAMESPACE }} + # KUBE_CLUSTER: ${{ secrets.KUBE_CLUSTER }} + + # - name: Start Solr Data Wipe Job + # run: | + # kubectl apply -f jobs/wipe-solr-data.yaml + # kubectl wait --timeout 10m --for=condition=complete job/wipe-solr-data + + # - name: Delete Refresh Job + # run: kubectl delete job wipe-solr-data start-destination-environment: name: Start ${{ github.event.inputs.destination_env }} Environment @@ -177,7 +178,7 @@ jobs: needs: - refresh-db - refresh-s3 - - wipe-solr-data + # - wipe-solr-data steps: - name: Configure kubectl run: | @@ -190,12 +191,10 @@ jobs: KUBE_NAMESPACE: ${{ secrets.KUBE_NAMESPACE }} KUBE_CLUSTER: ${{ secrets.KUBE_CLUSTER }} - - name: Stop ${{ github.event.inputs.destination_env }} Environment + - name: Start ${{ github.event.inputs.destination_env }} Environment run: | - apt update && apt install -y jq - HELM_VALUES=$(helm get values alfresco-content-services -o json) kubectl scale deployment alfresco-content-services-alfresco-cs-repository --replicas=$(echo $HELM_VALUES | jq '.repository.replicaCount') kubectl scale deployment alfresco-content-services-alfresco-cs-share --replicas=$(echo $HELM_VALUES | jq '.share.replicaCount') - kubectl scale deployment alfresco-content-services-alfresco-search-solr --replicas=1 + kubectl scale deployment alfresco-content-services-alfresco-search-enterprise-liveindexing --replicas=1 diff --git a/jobs/refresh-db/Chart.yaml b/jobs/refresh-db/Chart.yaml index c09d657..f5fba86 100644 --- a/jobs/refresh-db/Chart.yaml +++ b/jobs/refresh-db/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v2 appVersion: 0.1 -version: 0.0.1 +version: 0.0.2 description: Job to refresh DB data name: refresh-db diff --git a/jobs/refresh-db/templates/job.yaml b/jobs/refresh-db/templates/job.yaml index 617b077..97a45b9 100644 --- a/jobs/refresh-db/templates/job.yaml +++ b/jobs/refresh-db/templates/job.yaml @@ -7,14 +7,37 @@ data: entrypoint.sh: |- #!/bin/bash set -e + trap 'send_slack_notification $?' EXIT + + function send_slack_notification() { + STATUS=$1 + if [ "$STATUS" -eq 0 ]; then + JSON_PAYLOAD=$(jq -n --arg text "Refresh DB (${SRC_ENV} to ${DST_ENV}) job succeeded" '{text: $text}') + else + ERROR_MSG=$(tail -n 10 ~/error.log) || ERROR_MSG="Unknown error" + JSON_PAYLOAD=$(jq -n --arg text "Refresh DB (${SRC_ENV} to ${DST_ENV}) job failed with error: $ERROR_MSG" '{text: $text}') + fi + curl -X POST -H 'Content-type: application/json' --data "$JSON_PAYLOAD" $SLACK_WEBHOOK_URL + } + echo "${SRC_DB_HOST}:5432:${SRC_DB_NAME}:${SRC_DB_USER}:${SRC_DB_PASS}" > ~/.pgpass echo "${DST_DB_HOST}:5432:${DST_DB_NAME}:${DST_DB_USER}:${DST_DB_PASS}" >> ~/.pgpass + cat ~/.pgpass chmod 0600 ~/.pgpass + chown job:job ~/.pgpass set -x - pg_dump --jobs=4 --host="$SRC_DB_HOST" --username="$SRC_DB_USER" --dbname="$SRC_DB_NAME" --no-owner --no-privileges --verbose --format=directory --file=/tmp/db-dump - pg_restore --jobs=4 --host="$DST_DB_HOST" --username="$DST_DB_USER" --dbname="$DST_DB_NAME" --clean --if-exists --no-owner --no-privileges --verbose /tmp/db-dump - rm -rv /tmp/db-dump ~/.pgpass + # Dump the source database + pg_dump --jobs=4 --host="$SRC_DB_HOST" --username="$SRC_DB_USER" --dbname="$SRC_DB_NAME" --no-owner --no-privileges --verbose --format=directory --file=/home/job/db-dump 2> >(tee ~/error.log >&2) + + psql --host="$DST_DB_HOST" --username="$DST_DB_USER" --dbname="$DST_DB_NAME" -c "drop schema if exists public cascade;" 2> >(tee ~/error.log >&2) + + psql --host="$DST_DB_HOST" --username="$DST_DB_USER" --dbname="$DST_DB_NAME" -c "create schema public;" 2> >(tee ~/error.log >&2) + + # Restore the source database dump to the destination database + pg_restore --jobs=4 --host="$DST_DB_HOST" --username="$DST_DB_USER" --dbname="$DST_DB_NAME" --no-owner --no-privileges --verbose /home/job/db-dump 2> >(tee ~/error.log >&2) + rm -rv /home/job/db-dump ~/.pgpass + --- apiVersion: batch/v1 kind: Job @@ -25,7 +48,7 @@ spec: spec: containers: - name: refresh-db - image: postgres:14 + image: ghcr.io/ministryofjustice/hmpps-delius-alfresco-db-utils:NIT-1403-alfresco-move-away-from-long-running-github-workflows-10114657186 imagePullPolicy: IfNotPresent resources: limits: @@ -34,6 +57,8 @@ spec: command: - /bin/entrypoint.sh env: + - name: HOME + value: "/home/job" - name: SRC_DB_NAME valueFrom: secretKeyRef: @@ -74,6 +99,12 @@ spec: secretKeyRef: name: rds-instance-output-{{ .Values.destinationEnvironment }} key: RDS_INSTANCE_ADDRESS + - name: SLACK_WEBHOOK_URL + value: "{{ .Values.slackWebhookUrl }}" + - name: SRC_ENV + value: "{{ .Values.sourceEnvironment }}" + - name: DST_ENV + value: "{{ .Values.destinationEnvironment }}" volumeMounts: - name: refresh-db-script mountPath: /bin/entrypoint.sh @@ -90,8 +121,8 @@ spec: - ALL seccompProfile: type: RuntimeDefault - serviceAccount: hmpps-migration-development - serviceAccountName: hmpps-migration-development + serviceAccount: hmpps-migration-dev + serviceAccountName: hmpps-migration-dev restartPolicy: Never volumes: - name: refresh-db-script diff --git a/jobs/refresh-s3/templates/job.yaml b/jobs/refresh-s3/templates/job.yaml index 29552e9..763758a 100644 --- a/jobs/refresh-s3/templates/job.yaml +++ b/jobs/refresh-s3/templates/job.yaml @@ -66,8 +66,8 @@ spec: - ALL seccompProfile: type: RuntimeDefault - serviceAccount: hmpps-migration-development - serviceAccountName: hmpps-migration-development + serviceAccount: hmpps-migration-dev + serviceAccountName: hmpps-migration-dev restartPolicy: OnFailure volumes: - name: refresh-s3-script diff --git a/tools/db-utils/Dockerfile b/tools/db-utils/Dockerfile new file mode 100644 index 0000000..5ac167a --- /dev/null +++ b/tools/db-utils/Dockerfile @@ -0,0 +1,13 @@ +FROM debian:bookworm-slim + +# Install psql client +RUN apt-get update && apt-get install -y postgresql-client +# Install curl +RUN apt-get install -y curl +# Install jq +RUN apt-get install -y jq + +# Create a non-root user and set the home directory +RUN useradd -u 999 -ms /bin/bash job +USER job +WORKDIR /home/job