Skip to content

Commit

Permalink
Merge pull request #31 from kbase/develop
Browse files Browse the repository at this point in the history
DEVOPS-1770 - db_zip2cloud - RC 1.0
  • Loading branch information
jsfillman authored May 16, 2024
2 parents 9d58986 + c6338bb commit 2a4dfdf
Show file tree
Hide file tree
Showing 8 changed files with 306 additions and 61 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/manual-build.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
---
name: Manual Build & Push
on:
workflow_dispatch:
workflow_dispatch:
inputs:
platforms:
description: 'The platforms for which the Docker image should be built. If not specified, defaults to linux/amd64.'
required: false
default: 'linux/amd64,linux/arm64/v8'
jobs:
build-push:
uses: kbase/.github/.github/workflows/reusable_build-push.yml@main
with:
name: '${{ github.event.repository.name }}-develop'
tags: br-${{ github.ref_name }}
platforms: ${{ github.event.inputs.platforms }}
secrets: inherit
Original file line number Diff line number Diff line change
Expand Up @@ -15,27 +15,32 @@ jobs:
build-develop-open:
if: github.base_ref == 'develop' && github.event.pull_request.merged == false
uses: kbase/.github/.github/workflows/reusable_build.yml@main
with:
platforms: 'linux/amd64,linux/arm64/v8'
secrets: inherit
build-develop-merge:
if: github.base_ref == 'develop' && github.event.pull_request.merged == true
uses: kbase/.github/.github/workflows/reusable_build-push.yml@main
with:
name: '${{ github.event.repository.name }}-develop'
tags: pr-${{ github.event.number }},latest
platforms: 'linux/amd64,linux/arm64/v8'
secrets: inherit
build-main-open:
if: (github.base_ref == 'main' || github.base_ref == 'master') && github.event.pull_request.merged == false
uses: kbase/.github/.github/workflows/reusable_build-push.yml@main
with:
name: '${{ github.event.repository.name }}'
tags: pr-${{ github.event.number }}
platforms: 'linux/amd64,linux/arm64/v8'
secrets: inherit
build-main-merge:
if: (github.base_ref == 'main' || github.base_ref == 'master') && github.event.pull_request.merged == true
uses: kbase/.github/.github/workflows/reusable_build-push.yml@main
with:
name: '${{ github.event.repository.name }}'
tags: pr-${{ github.event.number }},latest-rc
platforms: 'linux/amd64,linux/arm64/v8'
secrets: inherit
trivy-scans:
if: (github.base_ref == 'develop' || github.base_ref == 'main' || github.base_ref == 'master' ) && github.event.pull_request.merged == false
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/release-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,5 @@ jobs:
with:
name: '${{ github.event.repository.name }}'
tags: '${{ github.event.release.tag_name }},latest'
platforms: 'linux/amd64,linux/arm64/v8'
secrets: inherit
39 changes: 17 additions & 22 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,32 +1,27 @@
FROM arangodb:3.5.3
# Builder stage
FROM alpine:latest as builder

# Build arguments passed into the docker command for image metadata
ARG BUILD_DATE
ARG COMMIT
ARG BRANCH

# RUN pip install requests docker python-json-logger structlog && \
RUN apk update && \
apk add p7zip && \
cd /tmp && \
wget https://downloads.rclone.org/rclone-current-linux-amd64.zip && \
unzip rclone-current-linux-amd64.zip && \
mv rclone-v*-linux-amd64/rclone /bin/rclone && \
mkdir -p /root/.config/rclone/
apk add --no-cache curl p7zip rclone

# Create config directory
RUN mkdir -p /root/.config/rclone/

# Copy necessary files
COPY rclone.conf /root/.config/rclone/rclone.conf
COPY app/ /app/

LABEL org.label-schema.build-date=$BUILD_DATE \
org.label-schema.vcs-url="https://github.com/kbase/db_zip2cloud.git" \
org.label-schema.vcs-ref=$COMMIT \
org.label-schema.schema-version="1.0.0-rc1" \
us.kbase.vcs-branch=$BRANCH \
maintainer="Steve Chan [email protected]" \
org.opencontainers.image.source="https://github.com/kbase/db_zip2cloud"
# Final stage
FROM alpine:latest

WORKDIR /app
RUN apk update && \
apk add --no-cache curl p7zip

ENTRYPOINT /app/zip2cloud
# Copy necessary binaries and files from builder stage
COPY --from=builder /usr/bin/rclone /usr/bin/rclone
COPY --from=builder /root/.config/rclone/rclone.conf /root/.config/rclone/rclone.conf
COPY --from=builder /app/ /app/

WORKDIR /app

ENTRYPOINT ["/app/zip2cloud"]
55 changes: 51 additions & 4 deletions Readme.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,59 @@


# Backup Service: `zip2cloud`

The `zip2cloud` application is a shell script that manages backup dumps, compresses them into zip files, compares them with existing backups in remote storage, and uploads any new or updated backups to the remote storage.

## Environment Variables

The script uses the following environment variables:

| Variable | Description | Default |
|--------------------|-----------------------------------------------------------------------|----------------------------------|
| `COMPRESSION_LEVEL`| Compression level for 7z files | 0 |
| `DELETE_DUMP` | Optionally deletes exports under `$DUMP_BASE` when done compressing | _Unused_ |
| `DUMP_BASE` | Base directory for dumps | `/dump/full_backup` |
| `DUMP_RETENTION` | Retention policy for dumps | 3 |
| `ENABLE_UPLOAD` | Remote storage details | `true` |
| `REMOTE` | Remote storage details | `remote:${BUCKET}/${BUCKETPATH}` |
| `SECRET` | Encryption key for 7z files | |
| `SLACK_CHANNEL` | Slack channel for notifications | _Unused_ |
| `SLACK_WEBHOOK` | Slack webhook for notifications | _Unused_ |
| `ZIP_BASE` | Base name for zip files | `backup_full` |
| `ZIP_DIR` | Directory for zip files | `/zip` |
| `ZIP_RETENTION` | Retention policy for zip files | 4 |

## Workflow

The script performs the following steps:

1. **Cleanup**: Removes old zip files and backup dumps based on the retention policies set in the environment variables.
2. **Zip**: Creates `.7z` archives of dump dirs (formatted as `YYYY-MM-DD`) in the `$DUMP_BASE`.
3. **Checksum**: Retrieves a list of remote backups and downloads the MD5 checksums for each remote backup into a temporary directory. It then compares the checksums of local zip files against the remote MD5 checksums, adding any files that don't match to an upload list.
4. **Create Upload List**: Verifies and updates the list of files to upload. For each file in the upload list, it compares the local and remote MD5 checksums. If there's a mismatch, it increments the filename and adds it to the final upload list. This incrementing process continues until it finds a filename that doesn't conflict with existing files in the remote storage.
5. **Upload**: Uploads the files in the final upload list to the remote storage using the `rclone` command.

## Dockerfile

The Dockerfile for this application is based on the `alpine:latest` image and includes the necessary binaries and files for the `zip2cloud` script. The Dockerfile uses a multi-stage build process to keep the final image size small.

## GitHub Actions

The application uses GitHub Actions for continuous integration. The workflows are defined in the `.github/workflows/` directory and include steps for building, tagging, and pushing Docker images, as well as scanning for vulnerabilities with Trivy.

---

## Previous Version

### db_zip2cloud

This is a container for backing up databases such as ArangoDB, compressing the backups and then synchronizing a remote S3 bucket against a local archive of the compressed backups
This is a simple cron container for backing up databases such as ArangoDB, compressing the backups and then synchronizing a remote S3 bucket against a local archive of the compressed backups

## Operation
#### Operation

1. [OPTIONAL] Perform a database dump based on environment variables provided, and place it in /dump/
2. Use 7zip to compress and encrypt the contents of the /dump/ directory and put it in into /zip/
* The resulting zip will have have "dump/" as the relative root directory
* The resulting zip will have have "dump/" as the relative root directory
3. Prune any files in /zip/ that are older than 30 days
4. Use rclone with an AWS S3 compatible provider to synchronize /zip/ against a remote S3 bucket and directory. Currently configured for Google Cloud Storage in file rclone.conf

Expand All @@ -20,4 +67,4 @@ The following environment variables need to be passed into the runtime environme

The following volumes need to be mounted into the running container:
* /dump/ - Directory either containing existing DB dumps or which will be the destination for a DB dump.
* /zip/ - Directory for writing the compressed/encrypted DB dumps before copying to the S3 remote
* /zip/ - Directory for writing the compressed/encrypted DB dumps before copying to the S3 remote
43 changes: 43 additions & 0 deletions app/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@

# zip2cloud Application

The `zip2cloud` application is a shell script that manages backup dumps, compresses them into zip files, compares them with existing backups in remote storage, and uploads any new or updated backups to the remote storage.

## Environment Variables

The script uses the following environment variables:

| Variable | Description | Default |
|--------------------|-----------------------------------------------------------------------|----------------------------------|
| `COMPRESSION_LEVEL`| Compression level for 7z files | 0 |
| `DELETE_DUMP` | Optionally deletes exports under `$DUMP_BASE` when done compressing | _Unused_ |
| `DUMP_BASE` | Base directory for dumps | `/dump/full_backup` |
| `DUMP_RETENTION` | Retention policy for dumps | 3 |
| `ENABLE_UPLOAD` | Remote storage details | `true` |
| `REMOTE` | Remote storage details | `remote:${BUCKET}/${BUCKETPATH}` |
| `SECRET` | Encryption key for 7z files | |
| `SLACK_CHANNEL` | Slack channel for notifications | _Unused_ |
| `SLACK_WEBHOOK` | Slack webhook for notifications | _Unused_ |
| `ZIP_BASE` | Base name for zip files | `backup_full` |
| `ZIP_DIR` | Directory for zip files | `/zip` |
| `ZIP_RETENTION` | Retention policy for zip files | 4 |


## Workflow

The script performs the following steps:

1. **Cleanup**: Removes old zip files and backup dumps based on the retention policies set in the environment variables.
2. **Zip**: Creates `.7z` archives of dump dirs (formatted as `YYYY-MM-DD`) in the `$DUMP_BASE`.
3. **Checksum**: Retrieves a list of remote backups and downloads the MD5 checksums for each remote backup into a temporary directory. It then compares the checksums of local zip files against the remote MD5 checksums, adding any files that don't match to an upload list.
4. **Create Upload List**: Verifies and updates the list of files to upload. For each file in the upload list, it compares the local and remote MD5 checksums. If there's a mismatch, it increments the filename and adds it to the final upload list. This incrementing process continues until it finds a filename that doesn't conflict with existing files in the remote storage.
5. **Upload**: Uploads the files in the final upload list to the remote storage using the `rclone` command.


## Dockerfile

The Dockerfile for this application is based on the `alpine:latest` image and includes the necessary binaries and files for the `zip2cloud` script. The Dockerfile uses a multi-stage build process to keep the final image size small.

## GitHub Actions

The application uses GitHub Actions for continuous integration. The workflows are defined in the `.github/workflows/` directory and include steps for building, tagging, and pushing Docker images, as well as scanning for vulnerabilities with Trivy.
28 changes: 28 additions & 0 deletions app/create-test-dumps.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/sh

# Variables
DUMP_BASE=${DUMP_BASE:-/dump/full_backup}

# Function to generate random string
generate_random_string() {
cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 10 | head -n 1
}

# Create directories and files
for i in $(seq 1 5); do
# Generate a date string for directory name
dir_date=$(date -d "$i days ago" +%Y-%m-%d)
dir_path="${DUMP_BASE}/${dir_date}"
# Create directory if it doesn't exist
if [ ! -d "$dir_path" ]; then
mkdir -p "$dir_path"
fi
# Create files with random content
for j in $(seq 1 5); do
file_path="${dir_path}/file_${j}.txt"
# Only create file if it doesn't exist
if [ ! -f "$file_path" ]; then
echo "$(generate_random_string)" > "$file_path"
fi
done
done
Loading

0 comments on commit 2a4dfdf

Please sign in to comment.