diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..5926f8d --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +readme.md \ No newline at end of file diff --git a/.github/actions/build-push-image/action.yaml b/.github/actions/build-push-image/action.yaml new file mode 100644 index 0000000..65bfed9 --- /dev/null +++ b/.github/actions/build-push-image/action.yaml @@ -0,0 +1,72 @@ +name: Build and push image +description: 'Builds and pushes a Docker image to Amazon ECR' + +inputs: + aws-access-key-id: + description: 'AWS Access Key ID' + required: true + aws-secret-access-key: + description: 'AWS Secret Access Key' + required: true + aws-region: + description: 'AWS Region' + required: true + ecr-repository: + description: 'ECR Repository' + required: true + dockerfile: + description: 'Dockerfile' + required: true + default: './Dockerfile' + stage: + description: 'Deployment stage' + required: true + buildable: + description: 'Whether the image should be built and pushed' + required: true + default: 'true' + +outputs: + image: + description: 'Docker image URI' + value: ${{ steps.image-uri.outputs.tag }}:${{ inputs.stage }}-${{ github.sha }} + +runs: + using: 'composite' + steps: + - name: Set up Docker Buildx + if: ${{ inputs.buildable == 'true' }} + uses: docker/setup-buildx-action@v3 + with: + version: latest + + - name: Configure AWS Credentials + if: ${{ inputs.buildable == 'true' }} + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ inputs.aws-access-key-id }} + aws-secret-access-key: ${{ inputs.aws-secret-access-key }} + aws-region: ${{ inputs.aws-region }} + + - name: Login to Amazon ECR + if: ${{ inputs.buildable == 'true' }} + id: login-ecr + uses: aws-actions/amazon-ecr-login@v2 + + - name: Tag Image + if: ${{ inputs.buildable == 'true' }} + id: image-uri + shell: bash + run: | + tag=${{ steps.login-ecr.outputs.registry }}/${{ inputs.ecr-repository }} + echo "tag=$tag" >> $GITHUB_OUTPUT + + - name: Build and Push Image + if: ${{ inputs.buildable == 'true' }} + uses: docker/build-push-action@v5 + with: + push: true + file: ${{ inputs.dockerfile }} + tags: ${{ steps.image-uri.outputs.tag }}:latest, ${{ steps.image-uri.outputs.tag }}:${{ github.sha }}, ${{ steps.image-uri.outputs.tag }}:${{ inputs.stage }}-${{ github.sha }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.github/actions/terraform-apply/action.yaml b/.github/actions/terraform-apply/action.yaml new file mode 100644 index 0000000..08a054f --- /dev/null +++ b/.github/actions/terraform-apply/action.yaml @@ -0,0 +1,81 @@ +name: Terraform Apply +description: 'Applies Terraform configuration' + +inputs: + aws-access-key-id: + description: 'AWS Access Key ID' + required: true + aws-secret-access-key: + description: 'AWS Secret Access Key' + required: true + aws-region: + description: 'AWS Region' + required: true + tfstate-bucket: + description: 'Terraform state bucket' + required: true + tfstate-key: + description: 'Terraform state key' + required: true + stage: + description: 'Deployment stage' + required: true + GRAFANA_ADMIN_PASSWORD: + description: 'Grafana admin password' + required: true + CLOUDFLARE_ZONE_ID: + description: 'Cloudflare Zone ID' + required: true + CLOUDFLARE_TOKEN: + description: 'Cloudflare Token' + required: true + CLOUDFLARE_EMAIL: + description: 'Cloudflare Email' + required: true + CLOUDFLARE_API_TOKEN: + description: 'Cloudflare API Token' + required: true + SLACK_WEBHOOK: + description: 'Slack Webhook' + required: true + TAILSCALE_CLIENT_ID: + description: 'Tailscale Client ID' + required: true + TAILSCALE_CLIENT_SECRET: + description: 'Tailscale Client Secret' + required: true + CROWDSEC_ENROLL_KEY: + description: 'CrowdSec Enroll Key' + required: true + +runs: + using: 'composite' + steps: + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ inputs.aws-access-key-id }} + aws-secret-access-key: ${{ inputs.aws-secret-access-key }} + aws-region: ${{ inputs.aws-region }} + + - name: Terraform Apply + env: + TF_VAR_GRAFANA_ADMIN_PASSWORD: ${{ inputs.GRAFANA_ADMIN_PASSWORD }} + TF_VAR_CLOUDFLARE_ZONE_ID: ${{ inputs.CLOUDFLARE_ZONE_ID }} + TF_VAR_CLOUDFLARE_TOKEN: ${{ inputs.CLOUDFLARE_TOKEN }} + TF_VAR_CLOUDFLARE_EMAIL: ${{ inputs.CLOUDFLARE_EMAIL }} + TF_VAR_CLOUDFLARE_API_TOKEN: ${{ inputs.CLOUDFLARE_API_TOKEN }} + TF_VAR_SLACK_WEBHOOK: ${{ inputs.SLACK_WEBHOOK }} + TF_VAR_TAILSCALE_CLIENT_ID: ${{ inputs.TAILSCALE_CLIENT_ID }} + TF_VAR_TAILSCALE_CLIENT_SECRET: ${{ inputs.TAILSCALE_CLIENT_SECRET }} + TF_VAR_CROWDSEC_ENROLL_KEY: ${{ inputs.CROWDSEC_ENROLL_KEY }} + uses: dflook/terraform-apply@v1 + with: + path: ./terraform + var_file: ./terraform/stages/${{ inputs.stage }}.tfvars + backend_config: > + bucket=${{ inputs.tfstate-bucket }}, + key=${{ inputs.tfstate-key }}, + region=${{ inputs.aws-region }}, + encrypt=true + auto_approve: true diff --git a/.github/actions/terraform-destroy/action.yaml b/.github/actions/terraform-destroy/action.yaml new file mode 100644 index 0000000..f89fecd --- /dev/null +++ b/.github/actions/terraform-destroy/action.yaml @@ -0,0 +1,80 @@ +name: Terraform Apply +description: 'Applies Terraform configuration' + +inputs: + aws-access-key-id: + description: 'AWS Access Key ID' + required: true + aws-secret-access-key: + description: 'AWS Secret Access Key' + required: true + aws-region: + description: 'AWS Region' + required: true + tfstate-bucket: + description: 'Terraform state bucket' + required: true + tfstate-key: + description: 'Terraform state key' + required: true + stage: + description: 'Deployment stage' + required: true + GRAFANA_ADMIN_PASSWORD: + description: 'Grafana admin password' + required: true + CLOUDFLARE_ZONE_ID: + description: 'Cloudflare Zone ID' + required: true + CLOUDFLARE_TOKEN: + description: 'Cloudflare Token' + required: true + CLOUDFLARE_EMAIL: + description: 'Cloudflare Email' + required: true + CLOUDFLARE_API_TOKEN: + description: 'Cloudflare API Token' + required: true + SLACK_WEBHOOK: + description: 'Slack Webhook' + required: true + TAILSCALE_CLIENT_ID: + description: 'Tailscale Client ID' + required: true + TAILSCALE_CLIENT_SECRET: + description: 'Tailscale Client Secret' + required: true + CROWDSEC_ENROLL_KEY: + description: 'CrowdSec Enroll Key' + required: true + +runs: + using: 'composite' + steps: + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ inputs.aws-access-key-id }} + aws-secret-access-key: ${{ inputs.aws-secret-access-key }} + aws-region: ${{ inputs.aws-region }} + + - name: Terraform Destroy + env: + TF_VAR_GRAFANA_ADMIN_PASSWORD: ${{ inputs.GRAFANA_ADMIN_PASSWORD }} + TF_VAR_CLOUDFLARE_ZONE_ID: ${{ inputs.CLOUDFLARE_ZONE_ID }} + TF_VAR_CLOUDFLARE_TOKEN: ${{ inputs.CLOUDFLARE_TOKEN }} + TF_VAR_CLOUDFLARE_EMAIL: ${{ inputs.CLOUDFLARE_EMAIL }} + TF_VAR_CLOUDFLARE_API_TOKEN: ${{ inputs.CLOUDFLARE_API_TOKEN }} + TF_VAR_SLACK_WEBHOOK: ${{ inputs.SLACK_WEBHOOK }} + TF_VAR_TAILSCALE_CLIENT_ID: ${{ inputs.TAILSCALE_CLIENT_ID }} + TF_VAR_TAILSCALE_CLIENT_SECRET: ${{ inputs.TAILSCALE_CLIENT_SECRET }} + TF_VAR_CROWDSEC_ENROLL_KEY: ${{ inputs.CROWDSEC_ENROLL_KEY }} + uses: dflook/terraform-destroy@v1 + with: + path: ./terraform + var_file: ./terraform/stages/${{ inputs.stage }}.tfvars + backend_config: > + bucket=${{ inputs.tfstate-bucket }}, + key=${{ inputs.tfstate-key }}, + region=${{ inputs.aws-region }}, + encrypt=true diff --git a/.github/actions/terraform-plan/action.yaml b/.github/actions/terraform-plan/action.yaml new file mode 100644 index 0000000..a4eb607 --- /dev/null +++ b/.github/actions/terraform-plan/action.yaml @@ -0,0 +1,87 @@ +name: Terraform Apply +description: 'Applies Terraform configuration' + +inputs: + aws-access-key-id: + description: 'AWS Access Key ID' + required: true + aws-secret-access-key: + description: 'AWS Secret Access Key' + required: true + aws-region: + description: 'AWS Region' + required: true + tfstate-bucket: + description: 'Terraform state bucket' + required: true + tfstate-key: + description: 'Terraform state key' + required: true + stage: + description: 'Deployment stage' + required: true + github-token: + description: 'GitHub token' + required: true + GRAFANA_ADMIN_PASSWORD: + description: 'Grafana admin password' + required: true + CLOUDFLARE_ZONE_ID: + description: 'Cloudflare Zone ID' + required: true + CLOUDFLARE_TOKEN: + description: 'Cloudflare Token' + required: true + CLOUDFLARE_EMAIL: + description: 'Cloudflare Email' + required: true + CLOUDFLARE_API_TOKEN: + description: 'Cloudflare API Token' + required: true + SLACK_WEBHOOK: + description: 'Slack Webhook' + required: true + TAILSCALE_CLIENT_ID: + description: 'Tailscale Client ID' + required: true + TAILSCALE_CLIENT_SECRET: + description: 'Tailscale Client Secret' + required: true + CROWDSEC_ENROLL_KEY: + description: 'CrowdSec Enroll Key' + required: true + +runs: + using: 'composite' + steps: + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ inputs.aws-access-key-id }} + aws-secret-access-key: ${{ inputs.aws-secret-access-key }} + aws-region: ${{ inputs.aws-region }} + + - name: Export GitHub Token + shell: bash + run: echo "GITHUB_TOKEN=${{ inputs.github-token }}" >> $GITHUB_ENV + + - name: Terraform Plan + env: + TF_VAR_GRAFANA_ADMIN_PASSWORD: ${{ inputs.GRAFANA_ADMIN_PASSWORD }} + TF_VAR_CLOUDFLARE_ZONE_ID: ${{ inputs.CLOUDFLARE_ZONE_ID }} + TF_VAR_CLOUDFLARE_TOKEN: ${{ inputs.CLOUDFLARE_TOKEN }} + TF_VAR_CLOUDFLARE_EMAIL: ${{ inputs.CLOUDFLARE_EMAIL }} + TF_VAR_CLOUDFLARE_API_TOKEN: ${{ inputs.CLOUDFLARE_API_TOKEN }} + TF_VAR_SLACK_WEBHOOK: ${{ inputs.SLACK_WEBHOOK }} + TF_VAR_TAILSCALE_CLIENT_ID: ${{ inputs.TAILSCALE_CLIENT_ID }} + TF_VAR_TAILSCALE_CLIENT_SECRET: ${{ inputs.TAILSCALE_CLIENT_SECRET }} + TF_VAR_CROWDSEC_ENROLL_KEY: ${{ inputs.CROWDSEC_ENROLL_KEY }} + uses: dflook/terraform-plan@v1 + with: + path: ./terraform + var_file: ./terraform/stages/${{ inputs.stage }}.tfvars + backend_config: > + bucket=${{ inputs.tfstate-bucket }}, + key=${{ inputs.tfstate-key }}, + region=${{ inputs.aws-region }}, + encrypt=true diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml new file mode 100644 index 0000000..33ba3b6 --- /dev/null +++ b/.github/dependabot.yaml @@ -0,0 +1,12 @@ +version: 2 +updates: + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "weekly" + day: "wednesday" + time: "23:30" + open-pull-requests-limit: 5 + ignore: + - dependency-name: "fastapi" + versions: ["<0.91.0"] diff --git a/.github/workflows/destroy.yaml b/.github/workflows/destroy.yaml new file mode 100644 index 0000000..86ecdf0 --- /dev/null +++ b/.github/workflows/destroy.yaml @@ -0,0 +1,60 @@ +name: Terraform Destroy + +on: + workflow_dispatch: + +env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.AWS_BASE_REGION }} + TFSTATE_BUCKET: ${{ secrets.TFSTATE_BUCKET }} + TFSTATE_KEY: ${{ secrets.TFSTATE_KEY }} + GRAFANA_ADMIN_PASSWORD: ${{ secrets.GRAFANA_ADMIN_PASSWORD }} + CLOUDFLARE_ZONE_ID: ${{ secrets.CLOUDFLARE_ZONE_ID }} + CLOUDFLARE_TOKEN: ${{ secrets.CLOUDFLARE_TOKEN }} + CLOUDFLARE_EMAIL: ${{ secrets.CLOUDFLARE_EMAIL }} + CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + TAILSCALE_CLIENT_ID: ${{ secrets.TAILSCALE_CLIENT_ID }} + TAILSCALE_CLIENT_SECRET: ${{ secrets.TAILSCALE_CLIENT_SECRET }} + CROWDSEC_ENROLL_KEY: ${{ secrets.CROWDSEC_ENROLL_KEY }} + +jobs: + terraform-destroy: + runs-on: ubuntu-latest + name: Terraform Destroy + defaults: + run: + working-directory: ./terraform + steps: + - name: Checkout + uses: actions/checkout@v4 + + # - name: Determine Deployment Stage + # id: determine_stage + # run: | + # if [[ "${{ github.ref_name }}" == "main" ]]; then + # echo "stage=prod" >> $GITHUB_ENV + # else + # echo "stage=dev" >> $GITHUB_ENV + # fi + + - name: Terraform Destroy + id: terraform-destroy + uses: ./.github/actions/terraform-destroy + with: + aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + tfstate-bucket: ${{ env.TFSTATE_BUCKET }} + tfstate-key: ${{ env.TFSTATE_KEY }} + stage: "prod" + GRAFANA_ADMIN_PASSWORD: ${{ env.GRAFANA_ADMIN_PASSWORD }} + CLOUDFLARE_ZONE_ID: ${{ env.CLOUDFLARE_ZONE_ID }} + CLOUDFLARE_TOKEN: ${{ env.CLOUDFLARE_TOKEN }} + CLOUDFLARE_EMAIL: ${{ env.CLOUDFLARE_EMAIL }} + CLOUDFLARE_API_TOKEN: ${{ env.CLOUDFLARE_API_TOKEN }} + SLACK_WEBHOOK: ${{ env.SLACK_WEBHOOK }} + TAILSCALE_CLIENT_ID: ${{ env.TAILSCALE_CLIENT_ID }} + TAILSCALE_CLIENT_SECRET: ${{ env.TAILSCALE_CLIENT_SECRET }} + CROWDSEC_ENROLL_KEY: ${{ env.CROWDSEC_ENROLL_KEY }} \ No newline at end of file diff --git a/.github/workflows/dev-deploy.yaml b/.github/workflows/dev-deploy.yaml new file mode 100644 index 0000000..0b95e78 --- /dev/null +++ b/.github/workflows/dev-deploy.yaml @@ -0,0 +1,113 @@ +name: Deploy Development Resources to EKS + +on: + push: + branches: + - dev + +env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.AWS_BASE_REGION }} + ECR_REPOSITORY: ${{ secrets.ECR_REPOSITORY }} + EKS_CLUSTER_NAME: ${{ secrets.EKS_CLUSTER_NAME }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + +jobs: + deploy: + name: Deploy API + runs-on: ubuntu-latest + outputs: + message: ${{ steps.verify-deployment.outputs.status }} + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Check for Changes + id: find_changes + run: | + if [ -z "$(git diff --name-only ${{ github.event.before }} ${{ github.sha }} -- app)" ]; then + echo "deploy=false" >> $GITHUB_ENV + else + echo "deploy=true" >> $GITHUB_ENV + fi + + - name: Build, tag, and push image to Amazon ECR + id: build-push-image + uses: ./.github/actions/build-push-image + with: + aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + ecr-repository: ${{ env.ECR_REPOSITORY }} + dockerfile: ./Dockerfile + stage: "dev" + buildable: ${{ env.deploy }} + + - name: Update Kubeconfig + run: | + if [ "${{ env.deploy }}" != "false" ]; then + aws eks update-kubeconfig --name ${{ env.EKS_CLUSTER_NAME }} --region ${{ env.AWS_REGION }} + fi + + - name: Deploy to EKS + working-directory: ./k8s + env: + IMAGE: ${{ steps.build-push-image.outputs.image }} + STAGE: "dev" + run: | + if [ "${{ env.deploy }}" != "false" ]; then + sed -i "s|{{IMAGE}}|${IMAGE}|g" ${STAGE}/deployment.yaml + kubectl apply -f ${STAGE} + fi + + - name: Verify Deployment + id: verify-deployment + env: + STAGE: "dev" + shell: bash + run: | + if [ "${{ env.deploy }}" != "false" ]; then + kubectl rollout status deployment/app -n ${STAGE} --timeout=5m + if [ $? -eq 0 ]; then + echo "status=DEV Deployment successful!" >> $GITHUB_OUTPUT + else + echo "status=DEV Deployment failed!" >> $GITHUB_OUTPUT + exit 1 + fi + else + echo "status=No changes to deploy on DEV" >> $GITHUB_OUTPUT + fi + + - name: ZAP Baseline Scan + uses: zaproxy/action-baseline@v0.13.0 + with: + token: ${{ secrets.GIT_TOKEN }} + target: "https://staging.tailcf97d7.ts.net/docs/" + + - name: Rollback on Penertration Test Failure + if: failure() + env: + STAGE: "dev" + shell: bash + run: | + kubectl rollout undo deployment/app -n ${STAGE} + echo "status=Deployment failed because of Penetration Test Failure! Rolling back to previous deployment" >> $GITHUB_OUTPUT + + + notify-slack: + needs: + - deploy + if: always() + runs-on: ubuntu-latest + steps: + - uses: rtCamp/action-slack-notify@v2 + env: + SLACK_CHANNEL: "#deployments" + SLACK_WEBHOOK: ${{ env.SLACK_WEBHOOK }} + SLACK_USERNAME: "Deployer on Cloudflare" + SLACK_MESSAGE: ${{ needs.deploy.outputs.message }} + SLACK_COLOR: ${{ contains(needs.deploy.outputs.message, 'successful') && 'good' || 'danger' }} + SLACK_ICON: "https://avatars.githubusercontent.com/u/44036562?s=200&v=4" \ No newline at end of file diff --git a/.github/workflows/plan.yaml b/.github/workflows/plan.yaml new file mode 100644 index 0000000..f6cd6be --- /dev/null +++ b/.github/workflows/plan.yaml @@ -0,0 +1,67 @@ +name: Terraform Plan + +on: + pull_request: + paths: + - '**/*.tf' + - '**/*.tfvars' + +permissions: + contents: read + pull-requests: write + +env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.AWS_BASE_REGION }} + TFSTATE_BUCKET: ${{ secrets.TFSTATE_BUCKET }} + TFSTATE_KEY: ${{ secrets.TFSTATE_KEY }} + GIT_TOKEN: ${{ secrets.GIT_TOKEN }} + GRAFANA_ADMIN_PASSWORD: ${{ secrets.GRAFANA_ADMIN_PASSWORD }} + CLOUDFLARE_ZONE_ID: ${{ secrets.CLOUDFLARE_ZONE_ID }} + CLOUDFLARE_TOKEN: ${{ secrets.CLOUDFLARE_TOKEN }} + CLOUDFLARE_EMAIL: ${{ secrets.CLOUDFLARE_EMAIL }} + CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + TAILSCALE_CLIENT_ID: ${{ secrets.TAILSCALE_CLIENT_ID }} + TAILSCALE_CLIENT_SECRET: ${{ secrets.TAILSCALE_CLIENT_SECRET }} + CROWDSEC_ENROLL_KEY: ${{ secrets.CROWDSEC_ENROLL_KEY }} + +jobs: + terraform-plan: + runs-on: ubuntu-latest + name: Terraform Plan + defaults: + run: + working-directory: ./terraform + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Checkov Scan + id: checkov-scan + uses: bridgecrewio/checkov-action@v12 + with: + directory: . + soft_fail: true + + - name: Terraform Plan + id: terraform-plan + uses: ./.github/actions/terraform-plan + with: + aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + tfstate-bucket: ${{ env.TFSTATE_BUCKET }} + tfstate-key: ${{ env.TFSTATE_KEY }} + stage: "prod" + github-token: ${{ env.GIT_TOKEN }} + GRAFANA_ADMIN_PASSWORD: ${{ env.GRAFANA_ADMIN_PASSWORD }} + CLOUDFLARE_ZONE_ID: ${{ env.CLOUDFLARE_ZONE_ID }} + CLOUDFLARE_TOKEN: ${{ env.CLOUDFLARE_TOKEN }} + CLOUDFLARE_EMAIL: ${{ env.CLOUDFLARE_EMAIL }} + CLOUDFLARE_API_TOKEN: ${{ env.CLOUDFLARE_API_TOKEN }} + SLACK_WEBHOOK: ${{ env.SLACK_WEBHOOK }} + TAILSCALE_CLIENT_ID: ${{ env.TAILSCALE_CLIENT_ID }} + TAILSCALE_CLIENT_SECRET: ${{ env.TAILSCALE_CLIENT_SECRET }} + CROWDSEC_ENROLL_KEY: ${{ env.CROWDSEC_ENROLL_KEY }} \ No newline at end of file diff --git a/.github/workflows/sync-and-deploy.yaml b/.github/workflows/sync-and-deploy.yaml new file mode 100644 index 0000000..fc2777b --- /dev/null +++ b/.github/workflows/sync-and-deploy.yaml @@ -0,0 +1,174 @@ +name: Sync Infra and Deploy to EKS + +on: + push: + branches: + - main + +env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.AWS_BASE_REGION }} + TFSTATE_BUCKET: ${{ secrets.TFSTATE_BUCKET }} + TFSTATE_KEY: ${{ secrets.TFSTATE_KEY }} + GRAFANA_ADMIN_PASSWORD: ${{ secrets.GRAFANA_ADMIN_PASSWORD }} + ECR_REPOSITORY: ${{ secrets.ECR_REPOSITORY }} + EKS_CLUSTER_NAME: ${{ secrets.EKS_CLUSTER_NAME }} + CLOUDFLARE_ZONE_ID: ${{ secrets.CLOUDFLARE_ZONE_ID }} + CLOUDFLARE_TOKEN: ${{ secrets.CLOUDFLARE_TOKEN }} + CLOUDFLARE_EMAIL: ${{ secrets.CLOUDFLARE_EMAIL }} + CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + TAILSCALE_CLIENT_ID: ${{ secrets.TAILSCALE_CLIENT_ID }} + TAILSCALE_CLIENT_SECRET: ${{ secrets.TAILSCALE_CLIENT_SECRET }} + CROWDSEC_ENROLL_KEY: ${{ secrets.CROWDSEC_ENROLL_KEY }} + +jobs: + terraform-apply: + name: Sync Terraform + runs-on: ubuntu-latest + outputs: + message: ${{ steps.prepare-slack.outputs.status }} + defaults: + run: + working-directory: ./terraform + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Checkov Scan + id: checkov-scan + uses: bridgecrewio/checkov-action@v12 + with: + directory: . + soft_fail: true + + - name: Terraform Apply + id: terraform-apply + uses: ./.github/actions/terraform-apply + with: + aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + tfstate-bucket: ${{ env.TFSTATE_BUCKET }} + tfstate-key: ${{ env.TFSTATE_KEY }} + stage: "prod" # the idea is we can have another terraform env for dev, but due to my AWS financial constraints, I just relied on different Kubernetes namespaces. + GRAFANA_ADMIN_PASSWORD: ${{ env.GRAFANA_ADMIN_PASSWORD }} + CLOUDFLARE_ZONE_ID: ${{ env.CLOUDFLARE_ZONE_ID }} + CLOUDFLARE_TOKEN: ${{ env.CLOUDFLARE_TOKEN }} + CLOUDFLARE_EMAIL: ${{ env.CLOUDFLARE_EMAIL }} + CLOUDFLARE_API_TOKEN: ${{ env.CLOUDFLARE_API_TOKEN }} + SLACK_WEBHOOK: ${{ env.SLACK_WEBHOOK }} + TAILSCALE_CLIENT_ID: ${{ env.TAILSCALE_CLIENT_ID }} + TAILSCALE_CLIENT_SECRET: ${{ env.TAILSCALE_CLIENT_SECRET }} + CROWDSEC_ENROLL_KEY: ${{ env.CROWDSEC_ENROLL_KEY }} + + - name: Prepare Slack Notification + if: always() + id: prepare-slack + run: | + if [ "${{ steps.terraform-apply.outcome }}" == "success" ]; then + echo "status=Deployment successful!" >> $GITHUB_OUTPUT + else + echo "status=Deployment failed!" >> $GITHUB_OUTPUT + exit 1 + fi + + deploy: + needs: terraform-apply + name: Deploy API + runs-on: ubuntu-latest + outputs: + message: ${{ steps.verify-deployment.outputs.status }} + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Check for Changes + id: find_changes + run: | + if [ -z "$(git diff --name-only ${{ github.event.before }} ${{ github.sha }} -- app)" ]; then + echo "deploy=false" >> $GITHUB_ENV + else + echo "deploy=true" >> $GITHUB_ENV + fi + + - name: Build, tag, and push image to Amazon ECR + id: build-push-image + uses: ./.github/actions/build-push-image + with: + aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + ecr-repository: ${{ env.ECR_REPOSITORY }} + dockerfile: ./Dockerfile + stage: "prod" + buildable: ${{ env.deploy }} + + - name: Update Kubeconfig + run: | + if [ "${{ env.deploy }}" != "false" ]; then + aws eks update-kubeconfig --name ${{ env.EKS_CLUSTER_NAME }} --region ${{ env.AWS_REGION }} + fi + + - name: Deploy to EKS + working-directory: ./k8s + env: + IMAGE: ${{ steps.build-push-image.outputs.image }} + STAGE: "prod" + run: | + if [ "${{ env.deploy }}" != "false" ]; then + sed -i "s|{{IMAGE}}|${IMAGE}|g" ${STAGE}/deployment.yaml + kubectl apply -f ${STAGE} + fi + + - name: Verify Deployment + id: verify-deployment + env: + STAGE: "prod" + shell: bash + run: | + if [ "${{ env.deploy }}" != "false" ]; then + kubectl rollout status deployment/app -n ${STAGE} --timeout=5m + if [ $? -eq 0 ]; then + echo "status=Deployment successful!" >> $GITHUB_OUTPUT + else + echo "status=Deployment failed!" >> $GITHUB_OUTPUT + exit 1 + fi + else + echo "status=No changes to deploy" >> $GITHUB_OUTPUT + fi + + - name: ZAP Baseline Scan + uses: zaproxy/action-baseline@v0.13.0 + with: + token: ${{ secrets.GIT_TOKEN }} + target: "https://boilerplate.example.com/docs/" + + - name: Rollback on Penertration Test Failure + if: failure() + env: + STAGE: "prod" + shell: bash + run: | + kubectl rollout undo deployment/app -n ${STAGE} + echo "status=Deployment failed because of Penetration Test Failure! Rolling back to previous deployment" >> $GITHUB_OUTPUT + + notify-slack: + needs: + - deploy + - terraform-apply + if: always() + runs-on: ubuntu-latest + steps: + - uses: rtCamp/action-slack-notify@v2 + env: + SLACK_CHANNEL: "#deployments" + SLACK_WEBHOOK: ${{ env.SLACK_WEBHOOK }} + SLACK_USERNAME: "Deployer on Cloudflare" + SLACK_MESSAGE: ${{ needs.deploy.outputs.message && needs.deploy.outputs.message || needs.terraform-apply.outputs.message }} + SLACK_COLOR: ${{ contains(needs.deploy.outputs.message, 'successful') && 'good' || 'danger' }} + SLACK_ICON: "https://avatars.githubusercontent.com/u/44036562?s=200&v=4" \ No newline at end of file diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 0000000..9cb3892 --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,94 @@ +# The pipeline can have failed steps marked as successful by using continue-on-error: true. +# If needed, I just went with the default setting for the sake of simplicity and used the always() condition to ensure that any step runs regardless of the outcome of the previous steps. + +name: Test Pipeline + +on: + pull_request: + branches: + - main + - dev + +jobs: + test-app: + runs-on: ubuntu-latest + name: Test Application + outputs: + message: "Bandit Security Linting: ${{ steps.bandit.outcome }}\nTrufflehog Leaked Secret Scanning: ${{ steps.trufflehog.outcome }}\nGrype Container Vulnerability Scanning: ${{ steps.grype.outcome }}\nApplication Tests: ${{ steps.tests.outcome }}\nFlake8 Linting: ${{ steps.lint.outcome }}" + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.head_ref }} + + - name: Bandit Security Linting + id: bandit + if: always() + uses: tj-actions/bandit@v5.1 + with: + options: "-l" # Include only high severity issues + + - name: Trufflehog Leaked Secret Scanning + id: trufflehog + if: always() + uses: edplato/trufflehog-actions-scan@master + + - name: Set up Docker Buildx + if: always() + uses: docker/setup-buildx-action@v3 + with: + version: latest + + - name: Build Local Image + if: always() + uses: docker/build-push-action@v5 + with: + context: . + file: ./Dockerfile + push: false + tags: localhost/app:latest + load: true + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Grype Container Vulnerability Scanning + id: grype + if: always() + uses: anchore/scan-action@v3 + with: + image: "localhost/app:latest" + fail-build: true + severity-cutoff: "critical" + + - name: Compose-Up + if: always() + run: make run + + - name: Flake8 Linting # I had linting running inside the container as the application uses uvicorn, a server that won't exit if any errors are found and thus the container would not stop running + id: lint + if: always() + run: make lint + + - name: Run-Tests + id: tests + if: always() + run: make test + + - name: Compose-Down + if: always() + run: make clean + + notify-slack: + needs: test-app + if: always() + runs-on: ubuntu-latest + steps: + - uses: rtCamp/action-slack-notify@v2 + env: + SLACK_CHANNEL: "#qa" + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + SLACK_USERNAME: "Test Pipeline Results" + SLACK_MESSAGE: ${{ needs.test-app.outputs.message }} + SLACK_COLOR: ${{ contains(needs.test-app.outputs.message, 'failed') && 'danger' || 'good' }} + SLACK_ICON: "https://avatars.githubusercontent.com/u/44036562?s=200&v=4" \ No newline at end of file diff --git a/.github/workflows/tf-drift-detection.yaml b/.github/workflows/tf-drift-detection.yaml new file mode 100644 index 0000000..c7a4090 --- /dev/null +++ b/.github/workflows/tf-drift-detection.yaml @@ -0,0 +1,193 @@ +name: Terraform Drift Detection + +on: + workflow_dispatch: + schedule: + - cron: '0 3 * * *' + +permissions: + contents: read + issues: write + +env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.AWS_BASE_REGION }} + TFSTATE_BUCKET: ${{ secrets.TFSTATE_BUCKET }} + TFSTATE_KEY: ${{ secrets.TFSTATE_KEY }} + GRAFANA_ADMIN_PASSWORD: ${{ secrets.GRAFANA_ADMIN_PASSWORD }} + ECR_REPOSITORY: ${{ secrets.ECR_REPOSITORY }} + EKS_CLUSTER_NAME: ${{ secrets.EKS_CLUSTER_NAME }} + CLOUDFLARE_ZONE_ID: ${{ secrets.CLOUDFLARE_ZONE_ID }} + CLOUDFLARE_TOKEN: ${{ secrets.CLOUDFLARE_TOKEN }} + CLOUDFLARE_EMAIL: ${{ secrets.CLOUDFLARE_EMAIL }} + CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + TAILSCALE_CLIENT_ID: ${{ secrets.TAILSCALE_CLIENT_ID }} + TAILSCALE_CLIENT_SECRET: ${{ secrets.TAILSCALE_CLIENT_SECRET }} + CROWDSEC_ENROLL_KEY: ${{ secrets.CROWDSEC_ENROLL_KEY }} + +jobs: + terraform-plan: + name: Terraform Plan + runs-on: ubuntu-latest + outputs: + TFPLAN_EXIT_CODE: ${{ steps.tfplan.outputs.exitcode }} + defaults: + run: + working-directory: ./terraform + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + with: + terraform_wrapper: false + + - name: Terraform Init + run: | + terraform init \ + -backend-config="bucket=${TFSTATE_BUCKET}" \ + -backend-config="key=${TFSTATE_KEY}" \ + -backend-config="region=${AWS_REGION}" \ + -backend-config="encrypt=true" + + - name: Terraform Plan + id: tfplan + env: + TF_VAR_GRAFANA_ADMIN_PASSWORD: ${{ env.GRAFANA_ADMIN_PASSWORD }} + TF_VAR_CLOUDFLARE_ZONE_ID: ${{ env.CLOUDFLARE_ZONE_ID }} + TF_VAR_CLOUDFLARE_TOKEN: ${{ env.CLOUDFLARE_TOKEN }} + TF_VAR_CLOUDFLARE_EMAIL: ${{ env.CLOUDFLARE_EMAIL }} + TF_VAR_CLOUDFLARE_API_TOKEN: ${{ env.CLOUDFLARE_API_TOKEN }} + TF_VAR_SLACK_WEBHOOK: ${{ env.SLACK_WEBHOOK }} + TF_VAR_TAILSCALE_CLIENT_ID: ${{ env.TAILSCALE_CLIENT_ID }} + TF_VAR_TAILSCALE_CLIENT_SECRET: ${{ env.TAILSCALE_CLIENT_SECRET }} + TF_VAR_CROWDSEC_ENROLL_KEY: ${{ env.CROWDSEC_ENROLL_KEY }} + run: | + export exitcode=0 + terraform plan -var-file ./stages/prod.tfvars -detailed-exitcode -no-color -out tfplan || exitcode=$? + echo "exitcode=$exitcode" >> $GITHUB_ENV + if [ $exitcode -eq 1 ]; then + echo Terraform Plan Failed! + exit 1 + else + exit 0 + fi + + - name: Upload Terraform Plan + uses: actions/upload-artifact@v4 + with: + name: tfplan + path: tfplan + + - name: Create String Output + id: tf-plan-string + run: | + TERRAFORM_PLAN=$(terraform show -no-color tfplan) + + delimiter="$(openssl rand -hex 8)" + echo "summary<<${delimiter}" >> $GITHUB_OUTPUT + echo "## Terraform Plan Output" >> $GITHUB_OUTPUT + echo "
Click to expand" >> $GITHUB_OUTPUT + echo "" >> $GITHUB_OUTPUT + echo '```terraform' >> $GITHUB_OUTPUT + echo "$TERRAFORM_PLAN" >> $GITHUB_OUTPUT + echo '```' >> $GITHUB_OUTPUT + echo "
" >> $GITHUB_OUTPUT + echo "${delimiter}" >> $GITHUB_OUTPUT + + - name: Publish Terraform Plan to Task Summary + env: + SUMMARY: ${{ steps.tf-plan-string.outputs.summary }} + run: | + echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY + + - name: Publish Drift Report + if: steps.tf-plan.outputs.exitcode == 2 + uses: actions/github-script@v7 + env: + SUMMARY: "${{ steps.tf-plan-string.outputs.summary }}" + with: + github-token: ${{ github.token }} + script: | + const body = `${process.env.SUMMARY}`; + const title = 'Terraform Configuration Drift Detected'; + const creator = 'github-actions[bot]' + + // Look to see if there is an existing drift issue + const issues = await github.rest.issues.listForRepo({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open', + creator: creator, + title: title + }) + + if( issues.data.length > 0 ) { + // We assume there shouldn't be more than 1 open issue, since we update any issue we find + const issue = issues.data[0] + + if ( issue.body == body ) { + console.log('Drift Detected: Found matching issue with duplicate content') + } else { + console.log('Drift Detected: Found matching issue, updating body') + github.rest.issues.update({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issue.number, + body: body + }) + } + } else { + console.log('Drift Detected: Creating new issue') + + github.rest.issues.create({ + owner: context.repo.owner, + repo: context.repo.repo, + title: title, + body: body + }) + } + + - name: Publish Drift Report + if: steps.tf-plan.outputs.exitcode == 0 + uses: actions/github-script@v7 + with: + github-token: ${{ github.token }} + script: | + const title = 'Terraform Configuration Drift Detected'; + const creator = 'github-actions[bot]' + + // Look to see if there is an existing drift issue + const issues = await github.rest.issues.listForRepo({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open', + creator: creator, + title: title + }) + + if( issues.data.length > 0 ) { + const issue = issues.data[0] + + github.rest.issues.update({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issue.number, + state: 'closed' + }) + } + + - name: Error on Failure + if: steps.tf-plan.outputs.exitcode == 2 + run: exit 1 diff --git a/.github/workflows/zap-fullscan.yaml b/.github/workflows/zap-fullscan.yaml new file mode 100644 index 0000000..014123f --- /dev/null +++ b/.github/workflows/zap-fullscan.yaml @@ -0,0 +1,21 @@ +name: Nightly DAST Scans + +on: + workflow_dispatch: + schedule: + - cron: '0 3 * * *' + +jobs: + owasp-zap-full-scan: + runs-on: ubuntu-latest + steps: + + - name: Checkout + uses: actions/checkout@v4 + + - name: Run OWASP Zap Full Scan + uses: zaproxy/action-full-scan@v0.11.0 + with: + token: ${{ secrets.GIT_TOKEN }} + target: "https://staging.tailcf97d7.ts.net/docs/" + issue_title: "Nightly DAST Full Scan by OWASP ZAP - ${{ github.run_number }}" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e0b0c79 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.terraform +*.hcl +*.tfstate +*.tfstate.backup +*.info \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..070e60e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +FROM python:3.11-slim AS base + +ENV PYTHONDONTWRITEBYTECODE 1 +ENV PYTHONUNBUFFERED 1 +WORKDIR /app + +FROM base AS builder +COPY requirements.txt . +RUN pip install --upgrade pip && pip install --no-cache-dir -r requirements.txt + +FROM base AS final +COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages +COPY --from=builder /usr/local/bin /usr/local/bin +COPY app . + +EXPOSE 8000 + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f58a644 --- /dev/null +++ b/Makefile @@ -0,0 +1,21 @@ +DOCKER_COMPOSE = docker compose + +.PHONY: run build test clean lint + +build: + @$(DOCKER_COMPOSE) build + +run: + @$(DOCKER_COMPOSE) up -d + +test: + @$(DOCKER_COMPOSE) run --rm app pytest --cov=app --cov-report=term-missing + +lint: + @echo "Running flake8 for syntax errors and undefined names..." + @$(DOCKER_COMPOSE) run --rm app flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + @echo "Running flake8 with relaxed rules (warnings only)..." + @$(DOCKER_COMPOSE) run --rm app flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + +clean: + @$(DOCKER_COMPOSE) down --volumes --remove-orphans diff --git a/_DOCUMENTATION/2024-11-19-01-09-44.png b/_DOCUMENTATION/2024-11-19-01-09-44.png new file mode 100644 index 0000000..9c8b14b Binary files /dev/null and b/_DOCUMENTATION/2024-11-19-01-09-44.png differ diff --git a/_DOCUMENTATION/2024-11-19-01-10-13.png b/_DOCUMENTATION/2024-11-19-01-10-13.png new file mode 100644 index 0000000..b481683 Binary files /dev/null and b/_DOCUMENTATION/2024-11-19-01-10-13.png differ diff --git a/_DOCUMENTATION/2024-11-19-01-10-51.png b/_DOCUMENTATION/2024-11-19-01-10-51.png new file mode 100644 index 0000000..ce86038 Binary files /dev/null and b/_DOCUMENTATION/2024-11-19-01-10-51.png differ diff --git a/_DOCUMENTATION/2024-11-19-01-14-11.png b/_DOCUMENTATION/2024-11-19-01-14-11.png new file mode 100644 index 0000000..f9fa59a Binary files /dev/null and b/_DOCUMENTATION/2024-11-19-01-14-11.png differ diff --git a/_DOCUMENTATION/2024-11-19-01-15-07.png b/_DOCUMENTATION/2024-11-19-01-15-07.png new file mode 100644 index 0000000..639d059 Binary files /dev/null and b/_DOCUMENTATION/2024-11-19-01-15-07.png differ diff --git a/_DOCUMENTATION/2024-11-19-01-15-30.png b/_DOCUMENTATION/2024-11-19-01-15-30.png new file mode 100644 index 0000000..9bf7cfa Binary files /dev/null and b/_DOCUMENTATION/2024-11-19-01-15-30.png differ diff --git a/_DOCUMENTATION/2024-11-19-01-17-11.png b/_DOCUMENTATION/2024-11-19-01-17-11.png new file mode 100644 index 0000000..b71322c Binary files /dev/null and b/_DOCUMENTATION/2024-11-19-01-17-11.png differ diff --git a/_DOCUMENTATION/2024-11-19-01-22-01.png b/_DOCUMENTATION/2024-11-19-01-22-01.png new file mode 100644 index 0000000..8f4f791 Binary files /dev/null and b/_DOCUMENTATION/2024-11-19-01-22-01.png differ diff --git a/_DOCUMENTATION/2024-11-19-01-48-50.png b/_DOCUMENTATION/2024-11-19-01-48-50.png new file mode 100644 index 0000000..4509acb Binary files /dev/null and b/_DOCUMENTATION/2024-11-19-01-48-50.png differ diff --git a/_DOCUMENTATION/2024-11-19-12-02-22.png b/_DOCUMENTATION/2024-11-19-12-02-22.png new file mode 100644 index 0000000..072beea Binary files /dev/null and b/_DOCUMENTATION/2024-11-19-12-02-22.png differ diff --git a/_DOCUMENTATION/2024-11-19-17-47-31.png b/_DOCUMENTATION/2024-11-19-17-47-31.png new file mode 100644 index 0000000..436ced8 Binary files /dev/null and b/_DOCUMENTATION/2024-11-19-17-47-31.png differ diff --git a/_DOCUMENTATION/2024-11-19-20-45-57.png b/_DOCUMENTATION/2024-11-19-20-45-57.png new file mode 100644 index 0000000..34ac568 Binary files /dev/null and b/_DOCUMENTATION/2024-11-19-20-45-57.png differ diff --git a/_DOCUMENTATION/2024-11-19-20-46-52.png b/_DOCUMENTATION/2024-11-19-20-46-52.png new file mode 100644 index 0000000..2abfde1 Binary files /dev/null and b/_DOCUMENTATION/2024-11-19-20-46-52.png differ diff --git a/_DOCUMENTATION/2024-11-19-20-47-11.png b/_DOCUMENTATION/2024-11-19-20-47-11.png new file mode 100644 index 0000000..19296c2 Binary files /dev/null and b/_DOCUMENTATION/2024-11-19-20-47-11.png differ diff --git a/_DOCUMENTATION/2024-11-19-20-47-54.png b/_DOCUMENTATION/2024-11-19-20-47-54.png new file mode 100644 index 0000000..80d6939 Binary files /dev/null and b/_DOCUMENTATION/2024-11-19-20-47-54.png differ diff --git a/_DOCUMENTATION/2024-11-19-23-02-06.png b/_DOCUMENTATION/2024-11-19-23-02-06.png new file mode 100644 index 0000000..cd5eda1 Binary files /dev/null and b/_DOCUMENTATION/2024-11-19-23-02-06.png differ diff --git a/_DOCUMENTATION/2024-11-19-23-16-06.png b/_DOCUMENTATION/2024-11-19-23-16-06.png new file mode 100644 index 0000000..fea82d5 Binary files /dev/null and b/_DOCUMENTATION/2024-11-19-23-16-06.png differ diff --git a/_DOCUMENTATION/2024-11-19-23-16-28.png b/_DOCUMENTATION/2024-11-19-23-16-28.png new file mode 100644 index 0000000..963e1d6 Binary files /dev/null and b/_DOCUMENTATION/2024-11-19-23-16-28.png differ diff --git a/_DOCUMENTATION/2024-11-20-01-25-23.png b/_DOCUMENTATION/2024-11-20-01-25-23.png new file mode 100644 index 0000000..e5ebd65 Binary files /dev/null and b/_DOCUMENTATION/2024-11-20-01-25-23.png differ diff --git a/_DOCUMENTATION/2024-11-20-02-33-26.png b/_DOCUMENTATION/2024-11-20-02-33-26.png new file mode 100644 index 0000000..093902c Binary files /dev/null and b/_DOCUMENTATION/2024-11-20-02-33-26.png differ diff --git a/_DOCUMENTATION/README.MD b/_DOCUMENTATION/README.MD new file mode 100644 index 0000000..df104fc --- /dev/null +++ b/_DOCUMENTATION/README.MD @@ -0,0 +1,548 @@ +# LANGUAGES AND TOOLS + +I was given a FAST API Python application to deploy therefore I used Python to write tests for the application. + +I used HCL for writing the Terraform code for the infrastructure. According to GitHub stats, that's the dominant language in the repository. + +I used a Dockerfile to build the application's Docker Image and a Makefile to control the build and tests processes with the help of Docker Compose. + +GitHub Actions was the CI/CD tool of choice for this project and I used it not only to automate the build, test, and deployment processes but also to manage the infrastructure with Terraform. + +AWS was the cloud provider of choice and I used EKS to deploy the Kubernetes cluster. + +# ENVIRONMENT + +### MONOREPO STRUCTURE + +The repository is structured as a monorepo with the following directories: +_DOCUMENTATION: This directory contains the documentation for the project. +.github/actions: This directory contains the GitHub Actions workflows modules used in the CI/CD pipeline. +.github/workflows: This directory contains the GitHub Actions workflows for the CI/CD pipeline. +app: This directory contains the FastAPI application. +app/tests: This directory contains the tests for the FastAPI application. +k8s/dev: This directory contains the Kubernetes resources (deployment and service) for the development environment. +k8s/prod: This directory contains the Kubernetes resources (deployment and service) for the production environment. +terraform: This directory contains the Terraform modules for the infrastructure components which will be explained further as we go along. + +### BRANCHES AND PROTECTED MAIN BRANCH + +The main branch is protected and requires a pull request to merge changes into it and deployments from the main branch are done to the prod namespace in the Kubernetes cluster. +The dev branch is used for development and deployments from the dev branch are done to the dev namespace in the Kubernetes cluster. +Each pull request would trigger the test and code scanning jobs in the CI/CD pipeline and the Terraform plan job to check the infrastructure changes before merging only if any `*.tf` files are changed. +Depandabot is enabled to check for dependency updates and create pull requests for them. + +### SECRET MANAGEMENT + +The secrets are managed in the GitHub repository secrets and are used in the CI/CD pipeline for the deployment and infrastructure management. + +Whenever Terraform needs a certain secret to be used, it is passed as a variable to the Terraform module from the GitHub Actions workflow. + +![](2024-11-19-01-10-51.png) + +## AWS + +This section focuses on the AWS environment and the infrastructure components that were created. + +### INFRASTRUCTURE COMPONENTS + +#### THE S3 TFSTATE BUCKET + +An S3 bucket was created to store the Terraform state file. + +#### THE VPC AND SUBNETS + +A VPC using a custom CIDR which is divided into private and public subnets across specified availability zones will be created in the Terraform module to deploy the EKS cluster. + +#### THE SECURITY GROUPS + +A Security group for the cluster in the previous VPC will allow inbound trafffic on ports 80 and 443 from the internet and outbound traffic to the internet. + +#### THE ECR REPOSITORY + +An ECR repository will be created supporting mutable image tags and a lifecycle rule to retain only the last 30 untagged images, expiring the rest. The repository is protected from accidental deletion so the Terraform destroy job will not delete it and therefore it must be deleted manually. + +### IAM ROLES + +It is a best practice not to use the root email account for the AWS account and to create an IAM user with the necessary permissions to manage the infrastructure as the client key and secret are used in Terraform. + +The IAM roles created are: + +- EKS Cluster Role: Allows EKS to manage AWS resources on your behalf. The `AmazonEKSClusterPolicy` provides ELS the necessary permissions to create and manage cluster resources. +- Worker Node Role: Allows worker nodes in the EKS Node Group to communicate with the EKS control plane. The `AmazonEKSWorkerNodePolicy` provides the necessary permissions to register with the EKS cluster, the `AmazonEC2ContainerRegistryReadOnly` policy allows the worker nodes to pull images from the ECR repository, the `AmazonEKS_CNI_Policy` grants permissions for networking tasks within the cluster. + +#### THE WORKER NODE LAUNCH TEMPLATE + +One difference I encountered between AWS and GCP is that in AWS, the worker nodes have a limited number of Kubernetes pods they can run and this is determined by the instance type. The launch template is an advanced feature that allows you to control the instance and customise it to your needs. + +When I started designing the infrastructure, I wanted to be limited to the free tier and therefore I wanted to limit the cluster to 2 f3-micro instances which are the one of smallest instances available in the free tier and thus paying only for the Kuberentes costs. However, those were limited to 4 pods per instance which is given the amount of initial Kuberenetes cluster resources in the kube-system namesapce, I was left with only few pod-allocatable resources for the application pods. + +I even tried with the launch template to increase the number of pods per instance which worked but still I faced a different issue in relation to the number of allocated IPs per the CNI plugin. + +Therefore, I ended up relying on the larger f3-medium instances which have more pod-allocatable resources thanks to the launch template and more IPs allocated per instance, with the possibility to scale the cluster to 4 instances when needed. + +#### THE EKS CLUSTER AND NODE GROUP + +The EKS cluster is created with the latest version of Kubernetes and the node group has the desired capacity of three instances and the maximum capacity of 4 instances. + +# INFRASTRUCTURE AS CODE + +## TERRAFORM AND USED PROVIDERS + +AWS Provider: Manages AWS resources, configured within the set region variable. + +Kubernetes Provider: Connects to EKS, using the cluster endpoint, token, and CA certificate. + +Helm Provider: Interacts with Helm in EKS, using the Kubernetes endpoint and token. The `helm_release` resource is used to install the required Helm charts for the Nginx Ingress Controller, Cert-Manager, CrowdSec, Prometheus Operator, Grafana, Loki, Tempo, and Falco. + +Cloudflare Provider: Authenticates with Cloudflare using the user token. The `cloudflare_record` resource is used to create the DNS records for the domain. + +Kubectl: The `kubectl` command is used to apply the Kubernetes resources to the cluster. While the Kubernetes provider is used to manage the Kubernetes resources, the kubectl was used in cases when a certain resources would require CRDs to be installed from a Helm chart which was not possible with the Kubernetes provider at the time of applying the resources since the CRDs were not installed yet, kubectl applies resources to the cluster differently in such case and would succeed. + +Consult the following file: [provider.tf](../terraform/provider.tf) + +## TERRAFORM MODULES BREAKDOWN + +[provider.tf](../terraform/provider.tf): Contains the provider configurations. +[variables.tf](../terraform/variables.tf): Contains the variables used in the Terraform modules. +[backend.tf](../terraform/backend.tf): Contains the backend configuration for the Terraform state file. +[vpc.tf](../terraform/vpc.tf): Creates the VPC and subnets. +[sg.tf](../terraform/sg.tf): Creates the security group. +[ecr.tf](../terraform/ecr.tf): Creates the ECR repository. +[eks.tf](../terraform/eks.tf): Creates the EKS cluster and node group. +[monitoring.tf](../terraform/monitoring.tf): Creates the monitoring resources (Prometheus, Grafana, Loki, Tempo). +[security.tf](../terraform/security.tf): Creates the security resources (Falco, CrowdSec). +[kyverno.tf](../terraform/kyverno.tf): Creates the Kyverno policies. +[ingress.tf](../terraform/ingress.tf): Contains Nginx Ingress Controller and Cert-Manager deployments along with the dev, prod, and monitoring ingress resources. +[dns.tf](../terraform/dns.tf): Creates the DNS records for the domain. +[tls.tf](../terraform/tls.tf): Contains the TLS certificate resources with the Cert-Manager. +[tailscale.tf](../terraform/tailscale.tf): Creates the Tailscale operator to restrict access to the Tailscale VPN tailnet. +[output.tf](../terraform/output.tf): Contains the outputs of the Terraform modules. + +[stages/dev.tfvars](../terraform/stages/dev.tfvars): Contains the variables for the dev stage. +[stages/prod.tfvars](../terraform/stages/prod.tfvars): Contains the variables for the prod stage. + +PS. I intended initially to create different Terraform enviornments that could be managed via Terraform workspaces and Terragrunt and have different variables. However, I ended up using just Cluster namespacing in the Kubernetes cluster to separate the dev and prod environments. + +# CI/CD PIPELINE + +## MAKEFILE, DOCKERFILE, AND DOCKER COMPOSE + +Before I started with the CI/CD pipeline, I created a Makefile to control the build and test processes of the application and a Dockerfile to build the application's Docker image. + +The Makefile uses Docker Compose to build the application and run the functional tests with Pytest and PyCov for code coverage, Flake8 for linting. It also can start the compose services and stop them. + +The [Makefile](../Makefile) commands: +- `make build`: Builds the application. +- `make test`: Runs the functional tests. +- `make run`: Starts the compose services. +- `make clean`: Stops the compose services. + +The [Dockerfile](../Dockerfile) was written in the most optimised way: +- Layering: The Dockerfile uses multi-stage builds (base, builder, final), which helps to keep the final image small and free of unnecessary dependencies. +- Cache Efficiency: The use of separate layers for installing dependencies and copying application code ensures that dependencies are only reinstalled when `requirements.txt` changes, reducing build times. +- Minimal Final Image: The final image only includes the runtime dependencies and the application code, keeping the image size small, which is around 50MB thanks to the Python3.11-slim base image too. + +The [Docker Compose](../docker-compose.yaml) file was used to run the application and the tests whether locally or in the CI/CD pipeline. It exposes the application on port 8000. + +## GITHUB ACTIONS WORKFLOW + +As mentioned before, the GitHub Actions workflow is modular and its modules are stored in the `.github/actions` directory: + +- build-push-image: Builds the Docker image and pushes it to the ECR repository. +- terraform-apply: Applies the Terraform modules to create or keep the infrastructure in sync. +- terraform-destroy: Destroys the infrastructure. +- terraform-plan: Plans the infrastructure changes (Used in Pull requests when there is a change affecting `*.tf` files). + +For the actual pipelines, the `.github/workflows` directory contains the following workflows: + +- sync-and-deploy.yaml: Syncs the infrastructure and deploys the application to the Kubernetes cluster in the prod namespace. +- dev-deploy.yaml: Contains the job to deploy the application to the Kubernetes cluster in the dev namespace. +- plan.yaml: Contains the job to plan the infrastructure changes when there is a change affecting `*.tf` files and is triggered on pull requests. +- test.yaml: Contains the jobs to run the tests and code scanning tools (Flake8, Bandit, Trufflehog, Grype, Functional Tests) during a pull request. +- destroy.yaml: A manual job to destroy the infrastructure. +- dependabot.yaml: Contains the job to check for dependency updates and create pull requests for them. + +## PLAN INFRASTRUCTURE JOB + +The plan infrastructure job uses `dflook` images to run the Terraform plan command and check for changes in the infrastructure. It is triggered on pull requests when there is a change affecting `*.tf` files. The output can be seen in the workflow logs as well as the pull request comments by the GitHub Actions bot. + +## TEST AND CODE SCANNING JOBS + +REFER TO THIS [JOB LOGS](https://github.com/adamlahbib/boilerplate/actions/runs/11922712020/job/33229562109?pr=11) + +### FLAKE8, PYTEST, AND PYCOV + +Flake8 is a Python linting tool that checks the code for PEP8 compliance and other coding standards. The job is triggered on pull requests and runs the Flake8 tool on the container directly to check the code. + +While the standard way is to setup Python during the pipeline runtime and install PIP then Flake8 and then build the application and run the different tests and code scanning tools. However this is just an extra overhead since we can just have the docker compose build the application then start the services (in our case `uvicorn` would not stop even if the code is broken) so if the build step passes, the Flake8 can possibly run within the container and check the code (being really lightweight) and would help gain a huge amount of time during the pipeline execution. That's the same reason why I prefer to have the practice of also running the functional tests within the container and generate the coverage report as well. + +Flake8 runs [two commands](../Makefile), first focuses on critical errors e.g. syntax errors and undefined names, and the second looks for general issues such as code complexity and line length, to not interrupt the pipeline for ongoing development as long as the code is not broken or in critical shape, such issues are treated as warnings but still are reported in the logs and sent to Slack. + +![](2024-11-19-20-46-52.png) + +Pytest is a testing framework that allows for easy test creation and execution. I have written functional tests for the FastAPI application and they are run in the pipeline to ensure the application is working as expected simply by checking the health endpoint and its response. [test_health.py](../app/tests/test_health.py). + +PyCov is a code coverage tool that measures the amount of code covered by the tests. The coverage report is generated in the pipeline. + +![](2024-11-19-20-47-11.png) + +### BANDIT + +Bandit is a tool designed to find common security issues in Python code. To do this, Bandit processes each file, builds an AST from it, and runs appropriate plugins against the AST nodes. Once Bandit has finished scanning all the files, it generates a report. + +![](2024-11-19-20-45-57.png) + +### CHECKOV + +Checkov is a static code analysis tool for infrastructure-as-code. It scans Terraform, CloudFormation, Kubernetes, and other IaC files for security and compliance issues. Checkov is used to ensure that the Terraform code is secure and compliant with best practices. + +### TERRAFORM DRIFT DETECTION + +I first wanted to use Terramate for this but their process involved using a client locally on my laptop and more, so I went with this one https://github.com/azure-samples/terraform-github-actions/blob/main/.github/workflows/tf-drift.yml that was a great inspiration to understand the process from and implement it myself. + +REFER TO THIS [JOB LOG](https://github.com/adamlahbib/boilerplate/actions/runs/11924994674) + +![](2024-11-20-02-33-26.png) + +### TRUFFLEHOG + +TruffleHog is the most powerful secrets Discovery, Classification, Validation, and Analysis tool. In this context secret refers to a credential a machine uses to authenticate itself to another machine. This includes API keys, database passwords, private encryption keys, and more... + +TruffleHog can look for secrets in many places including Git, chats, wikis, logs, API testing platforms, object stores, filesystems and more... + +By using TruffleHog, we can ensure that no secrets are leaked outside the repository and the CI/CD pipeline and we can take action to remove them. + +### GRYPE + +Grype is a vulnerability scanner for container images and filesystems developed and maintained by Anchore and written in the Go programming language. Grype can scan from Docker, OCI, Singularity, podman, image archives, and local directory. Grype is compatible with SBOMs generated by Syft, and Grype’s vulnerability database draws from a wide variety of sources, including Wolfi SecDB. + +![](2024-11-19-20-47-54.png) + +![](2024-11-20-01-25-23.png) + +### DEPENDABOT FOR DEPENDENCY CHECKS AND UPDATES JOB + +Dependabot is enabled to check for dependency updates and create pull requests for them. The job is triggered on a schedule and checks for updates to the dependencies in the `requirements.txt` file. + +![](2024-11-19-01-17-11.png) + +### OWASP ZAP FOR DYNAMIC SECURITY SCANNING + +The Zed Attack Proxy (ZAP) by Checkmarx is the world’s most widely used web app scanner. Free and open source. A community based GitHub Top 1000 project that anyone can contribute to. + +It can help you automatically find security vulnerabilities in your web applications while you are developing and testing your applications. + +#### OWASP ZAP BASELINE SCAN + +This job penetrates the application with the quicker and less intrusive scan to identify the most common vulnerabilities. The scan is triggered after a Kubernetes deployment in both the dev and prod environments. + +#### OWASP ZAP FULL SCAN + +This job runs over night featuring the full thorough scan to identify all the vulnerabilities in the application. The scan checks the main endpoint for the application. + +After each scan, Zap would raise findings in the repo issues and would manage them thereafter: If you close an unfixed issue, Zap will reopen it after the next run, and similarly, will automatically close an issue if it is fixed before the next run. This is a great way to keep track of the security posture of the application in an automated way. + +#### ROLLBACK JOB IN CASE OF PENETRATION FINDINGS + +The Kubernetes rollback job is triggered if the OWASP ZAP scan finds any critical vulnerabilities in the application. The job will rollback the deployment to the previous good state and notify the team in the Slack channel. + +## BUILD AND PUSH DOCKER IMAGE JOB + +### DOCKER BUILDX + +Buildx is a Docker CLI plugin that extends the docker build command with the full support of the features provided by Moby BuildKit builder toolkit. It provides the same user experience as docker build with many new features like creating scoped builder instances and building against multiple nodes concurrently. But the most important feature for me in this context is its caching mechanism which is really efficient and can save a lot of time during the build process. + +The pushed image to the ECR is tagged with the latest commit SHA along with the stage (dev/prod) and the latest tag. + +## SYNC AND BUILD INFRASTRUCTURE JOBS + +### SYNC INFRASTRUCTURE JOB + +The Sync Terraform job: `terraform-apply` in the [sync-and-deploy.yaml file](../.github/workflows/sync-and-deploy.yaml), automates the deployment of infrastructure with Terraform in a streamlined and secure manner. It includes three main steps: Checkout, which fetches the code repository for execution; Terraform Apply, which uses a custom GitHub Action to apply the Terraform configuration with environment-specific variables like AWS credentials, Cloudflare API tokens, and Kubernetes settings; and Prepare Slack Notification, which ensures a clear status update is sent to a Slack channel, regardless of deployment success or failure. + +![](2024-11-19-23-02-06.png) + +REFER TO THIS [JOB LOGS](https://github.com/adamlahbib/boilerplate/actions/runs/11922627881/job/33229299694) + +### APPLICATION DEPLOYMENT JOB + +This Deploy API job: `deploy` in the [sync-and-deploy.yaml file](../.github/workflows/sync-and-deploy.yaml), automates the deployment of an API application after applying Terraform infrastructure changes. It proceeds through these key steps: + +- Checkout: Fetches the full repository history to ensure accurate change detection. +- Check for Changes: Verifies if changes were made to the app directory, determining whether deployment is necessary. +- Build, Tag, and Push Image: Uses a custom action to build a Docker image, tag it, and push it to Amazon ECR, but only if relevant changes are detected. +- Update Kubeconfig: Configures kubectl to interact with the correct EKS cluster, conditioned on deployment necessity. +- Deploy to EKS: Applies Kubernetes manifests, dynamically replacing the image placeholder in the deployment.yaml file for the current stage. +- Verify Deployment: Ensures the deployment succeeds by monitoring its rollout status and reporting success or failure. + +PROD: REFER TO THIS [JOB LOGS](https://github.com/adamlahbib/boilerplate/actions/runs/11922627881/job/33229354469) + +#### APPLICATION KUBERNETES MANIFESTS + +The k8s folder contains the Kubernetes manifests for both environments: + +For both environments, the deployment.yaml file is designed for efficiency and reliability through several thoughtful configurations: + +- Namespace isolation: The application is scoped to the dev or prod namespace, ensuring a clear separation of environments. +- Dynamic Image Reference: The {{image}} placeholder is replaced with the latest image tag during deployment, ensuring the most recent version is used and ensuring the image is not tampered with or changed. +- Efficient Resource Allocation: Requests and limits are defined for CPU (100m - 500m) and memory (256Mi - 512Mi), ensuring efficient utilisation of cluster resources while preventing over-provisioning. +- Rolling Updates: Configured with `maxSurge: 1` and `maxUnavailable: 0`, enabling smooth rolling updates with zero downtime by maintaining availability during updates. +- Readiness Probe: An HTTP-based readiness probe check `/health` on container port 8000 with a delay of 15 seconds and a 10-second interval ensuring the container is ready to serve traffic. +- Scalable and Minimal Design: Starts with a single replica `(replicas: 1)` for development efficiency and two replicas for production, ensuring high availability and scalability. + +The service.yaml file is namespace-scoped, label and selector-matched, and port-mapped to 80 for production and 8080 for development. It is also configured as a `ClusterIP` service, which is ideal for internal communication within the cluster and the Ingress, reducing exposure to external networks for enhanced security. + +## DESTROY INFRASTRUCTURE JOB + +The destroy infrastructure job uses the `dflook` images to run the Terraform destroy command. It is triggered manually, the tfstate file will be updated with the latest state of the infrastructure and the resources will be destroyed. The S3 bucket and the ECR repository are not destroyed and must be deleted manually in order to completely clean up the resources. + +Of course, our infrastructure is 'portable' so it can be recreated by just having the pipeline run the Terraform apply job again in order to figure out the missing resources and recreate them. + +# KUBERNETES NAMESPACES + +We already mentioned that the Kubernetes cluster is namespaced into dev and prod namespaces. The dev namespace is used for the development environment and the prod namespace is used for the production environment. + +As part of the automated Kuberentes resources deployment thanks to the Helm provider, the following namespaces are created: + +- monitoring: Contains the monitoring resources (Prometheus, Grafana, Loki, Tempo). +- falco: Contains the Falco resources (Falco, Falco Sidekick). +- crowdsec: Contains the CrowdSec resources (CrowdSec, CrowdSec Sidekick). +- ingress-nginx: Contains the Nginx Ingress Controller resources. +- cert-manager: Contains the Cert-Manager resources. +- tailscale: Contains the Tailscale resources (Tailscale Operator). + +# CERTIFICATE MANAGEMENT + +## CERT-MANAGER + +The Helm release uses the Jetstack Cert-Manager chart to ensure a secure and automated setup for TLS certificate management by deploying the Cert-Manager and its CRDs in the cert-manager namespace. + +A Kubernetes secret stores the Cloudflare API token incoming from the GitHub Actions secrets and is used by the Cert-Manager to create the DNS-01 challenge for the domain through a ClusterIssuer resource for Let's Encrypt. + +This integration streamlines certificate issuance and renewal with secure credentials and dependable automation. + +![](2024-11-19-01-48-50.png) + +# CLOUDFLARE DNS MANAGEMENT + +The Cloudflare record resource dynamically configures a DNS record for the application within Cloudflare. It creates a CNAME record, using the hostname from Kubernetes service's load balancer, or defaults to 'PENDING_LB_HOTNAME' if the load balancer hostname is unavailable. However, I ensured that this step only runs after the Nginx Ingress Controller is deployed and ready. The `proxied` option enables Cloudflare's proxying for added security and performance benefits through the Cloudflare network, WAF, and powerful DDoS protection. + +# SLACK NOTIFICATIONS + +Slack notifications across different channels: + +- deployments channel: For deployment status updates. +- qa channel: For test results and code scanning reports. +- falco-alerts channel: For Falco syscall alerts happening in the cluster (these can be limited to only critical alerts and all the notices would remain available on the Falco Sidekick dashboard). +- app-alerts channel: For application alerts and notifications incoming from Prometheus rules and Alertmanager. + +By using a Slack webhook, the GitHub Actions workflow and the Prometheus operator can send messages to the Slack channels, providing real-time updates on the deployment status, test results, and security alerts. + +Besides, we can define certain alerts based on the Grafana dashboard monitoring the cluster and have alerts being sent to different channels like Slack or Email. + +![](2024-11-19-01-15-07.png) + +![](2024-11-19-01-15-30.png) + +# MONITORING, LOGGING, AND DASHBOARDING + +I wouldn't say it is somewhat challenging to deploy the whole Prometheus stack at once as you have to keep rinsing and repeating the process until you get every variable right. Especially when you are configuring it through Terraform using the Helm provider. + +So I gathered the necessary configuration after each trial and error and put them together in the chart values files as variable in the Terraform module. + +Grafana itself had its own configuration to have the datasources and dashboards provisioned automatically. + +Alertmanager was configured to send alerts to the Slack channel out-of-the-box. + +And Prometheus had extra rules configured to send alerts to the Alertmanager, and extra scrapers to scrape the metrics from Crowdsec. + +I will get into the details of each component in the following sections so don't worry 😉 + +## PROMETHEUS + +Using the `kube-prometheus-stack` chart, the Prometheus Operator is deployed in the monitoring namespace. The Prometheus Operator manages the Prometheus instances and the different components of the Prometheus stack: Prometheus, Alertmanager, and Grafana. + +Custom Configuration: + +- Grafana: Configured with an HTTPS-enforced connection, and secure cookie configuration. +- Alertmanager: Enables integration with Slack for real-time alerts. +- Prometheus: Has rules configured to watch for critical CPU usage and HTTP request rates, which can be found here [alertmanager-config.yaml](../terraform/assets/alertmanager-config.yaml). Also, has an extra scraper to scrape the metrics from Crowdsec /metrics endpoint which in turn would be used to showcase Crowdsec findings and attacks in real-time in the Grafana dashboards. + +### ALERT RULES + +The configuration defines custom Prometheus alert rules for resource monitoring including: + +- `HighRequestRate` Alert: Triggers if the HTTP request rate exceeds 100 requests/minute over a 5-minute window, signalling potential server overload. +- `HighCPULoad` Alert: Monitors CPU load, warning if the `node_load1` exceeds 80% for 5 minutes, indicating high system resource usage. + +Each alert has a defined severity level and detailed annotation for quick identification and actionable responses. + +## GRAFANA AND PRE-INSTALLED DASHBOARDS + +Grafana is deployed as part of the Prometheus stack, providing a powerful dashboarding and visualisation tool for monitoring and observability. The Grafana dashboard is pre-configured with the necessary datasources and dashboards for the Prometheus, Alertmanager, Tempo and Loki data sources and two CrowdSec dashboards: + +- Data Sources: Configure connections to Prometheus, Alertmanager, Tempo, and Loki with proxy access. Each data source is defined by its type, cluster URL, and access settings, with Prometheus being the primary data source for monitoring metrics. +- Dashboards: Add pre-configured JSON files as Grafana dashboards, such as [21419.json](../terraform/assets/21419.json) and [crowdsec_v5.json](../terraform/assets/crowdsec_v5.json) both used for CrowdSec monitoring and visualisation. + +### GRAFANA CONFIGURATION + +Both the data source and dashboard configurations are managed through a Kubernetes config map and once provisioned, the Grafana operator automatically picks up the changes and applies them to the Grafana instance. + +### CROWDSEC DASHBOARDS + +The 21419.json dashboard was downloaded from https://grafana.com/grafana/dashboards/21419-crowdsec/ +and the crowdsec_v5.json dashboard is from the official CrowdSec Grafana dashboards repository on GitHub https://github.com/crowdsecurity/grafana-dashboards + +## LOKI + +Loki is a horizontally-scalable, highly-available, multi-tenant log aggregation system inspired by Prometheus. Loki differs from Prometheus by focusing on logs instead of metrics, and collecting logs via push, instead of pull. + +## ALERTMANAGER + +The Alertmanager handles alerts sent by client applications such as the Prometheus server. It takes care of deduplicating, grouping, and routing them to the correct receiver integration such as email, PagerDuty, or OpsGenie. It also takes care of silencing and inhibition of alerts. + +# TRACING + +## TEMPO + +Tempo is an easy-to-operate, high-scale, and cost-effective distributed tracing system. Tempo can be used to oversee the performance of microservices and distributed systems, providing insights into latency, errors, and performance bottlenecks. + +# KYVERNO + +Kyverno is a policy engine designed for Kubernetes. It allows cluster administrators to enforce policies on resources in a Kubernetes cluster. Kyverno policies are written in YAML and can be used to enforce security, compliance, and operational best practices. + +The following policies are enforced: + +- Disallow privileged containers: Ensures that no privileged containers are deployed in the cluster. +- Disallow NodePort services: Ensures that no NodePort services are deployed in the cluster. +- Disallow Latest Tag: Ensures that no images are deployed with the latest tag. +- Disallow Default Namespace: Ensures that no resources are deployed in the default namespace. +- Allowed Replicas: Ensures that the number of replicas for a deployment is within a specified range. +- Allowed Container Registry: Ensures that the container registry used in the deployment is allowed. +- Require Resource Requests and Limits: Ensures that resource requests and limits are set for containers in a deployment. +- Require Pod Probe: Ensures that liveness and readiness probes are set for containers in a deployment. + +# VPN RESTRICTED ACCESS + +In my experience, I have seen that this is a must, having administrative endpoints and dashboards exposed to the internet is always a risk. Therefore, I used Tailscale to restrict access to the monitoring resources and security reports to the Tailscale tailnet users only whether they are administrators or a small team of operators. + +This is achieved by installing the Tailscale operator in the cluster which will create a classname `tailscale` for Ingress resources and then the Ingress resources will be assigned a hostname and a TLS certificate for the specific tailnet domain. In the meantime Tailscale is working on making it possible to use custom domains or even CNAMES for the tailnet domain. + +## TAILSCALE + +[tailscale.yaml](../terraform/tailscale.yaml) contains the Tailscale operator configuration using the Client ID and secret for my tailnet. + +Once both the Tailscale operator and its related Ingress resources are created, the monitoring resources are only accessible via the Tailscale tailnet domain for users who are part of and connected to the tailnet. + +e.g. https://monitoring.tailcf97d7.ts.net to access Grafana which uses the `admin:boilerplate-operator` credentials (just for the task, I wouldn't write it in clear text otherwise). The password was pre-set in the GitHub Actions secrets and the username is the default Grafana admin username. + +![](2024-11-19-17-47-31.png) + +# WAF, SECURITY PRACTICES, LOW-LEVEL MONITORING + +## CLOUDFLARE + +Cloudflare is a web infrastructure and website security company that provides content delivery network services, DDoS mitigation, Internet security, and distributed domain name server services. It is a reverse proxy, a web application firewall, and a distributed domain name server. + +## FALCO AND EBPF + +Falco is a runtime security tool that allows you to monitor the system calls and the kernel events happening in the cluster and detect any abnormal behaviour. It can be used to detect and alert on any abnormal behaviour happening in the cluster through its Falco sidekick which can send the alerts to Slack or any other alerting tool. + +Falco uses eBPF to monitor the system calls before they reach the kernel. This is useful to prevent any privilege escalation or any other malicious activity happening in the cluster whether it is from an insider or an outsider (like a messing around developer, a third-party tool, or possibly an attacker). + +### FALCO SYSCALLS SLACK ALERTS + +We can limit the alerts to only critical alerts and all the notices would remain available on the Falco Sidekick dashboard. As that specific channel is currently receiving every action happening in the cluster. + +![](2024-11-19-01-14-11.png) + +### FALCO SIDEKICK + +Falco Sidekick is a tool that listens to Falco events and sends them to different outputs like Slack, Email, or any other alerting tool. It also has its own dashboard where you can see the events happening in the cluster in real-time, and as it contains sensitive information, it is only accessible via the Tailscale tailnet at https://falco.tailcf97d7.ts.net. `admin:admin` for access. + +![](2024-11-19-01-22-01.png) + +## CROWDSEC + +I have used Open-Appsec before, which is an ML-based Firewall and I have to say that CrowdSec is a great alternative to it. It is a modern behaviour-based security tool that can detect and block attacks in real-time. + +CrowdSec is a free, modern & collaborative behavior detection engine, coupled with a global IP reputation network. It stacks on fail2ban's philosophy but is IPV6 compatible and 60x faster (Go vs Python), it uses Grok patterns to parse logs and YAML scenarios to identify behaviors. CrowdSec is engineered for modern Cloud / Containers / VM-based infrastructures (by decoupling detection and remediation). Once detected you can remedy threats with various bouncers (firewall block, nginx http 403, Captchas, etc.) while the aggressive IP can be sent to CrowdSec for curation before being shared among all users to further improve everyone's security. Quoted from their GitHub repository. + +### IMPLEMENTATION + +A dedicated namespace for CrowdSec is created in the Kubernetes cluster and the CrowdSec Helm chart is deployed within it along with randomly generated bouncer key as a secret, which is used later to help block malicious IPs reaching out the Ingress resources and the CrowdSec enroll key to connect to the CrowdSec SaaS dashboard. + +[The CrowdSec values file](../terraform/assets/crowdsec-values.yaml) contains the configuration for the CrowdSec Helm chart, including the above mentioned keys. It configures the container_runtime, the local API LAPI, the CrowdSec Agent, and Prometheus metrics. + +- The container runtime is set to containerd. +- The LAPI which is the component that allows CrowdSec machines (agents, API, etc.) to push alerts and decisions to a database, allow bouncers to consume said alerts and decision from the database, and allow cscli to view, add, or remove decisions. The LAPI is configured with optimised resource limits and metrics exposure. +- CrowdSec's agent is lightweight open source software that detects peers with aggressive behavior to prevent them accessing your systems. +- CrowdSec can expose a prometheus endpoint for collection (on http://127.0.0.1:6060/metrics by default). You can edit the listen_addr to 0.0.0.0 in config.yaml to allow an external Prometheus to scrape the metrics. On the Prometheus side we set the a scrape job to scrape the metrics from the CrowdSec agent. + +The NGINX ingress controller is extended with the CrowdSec Lua bouncer plugin, dynamically initialised using extraInitContainers and configured to load CrowdSec's security policies directly. Volumes and volume mounts are carefully orchestrated to support the plugin’s functionality. This configuration can be found in the [crowdsec-ingress-nginx.yaml](../terraform/assets/crowdsec-ingress-nginx.yaml) file, and its application within the Ingress operator [here](../terraform/ingress.tf). + +### CROWDSEC SAAS DASHBOARD + +![](2024-11-19-01-10-13.png) + +![](2024-11-19-01-09-44.png) + +### CROWDSEC GRAFANA DASHBOARDS + +![](2024-11-19-23-16-06.png) + +![](2024-11-19-23-16-28.png) + +PS. a fresh CrowdSec installation and no attacks were detected during the time of writing this documentation :/ + +# WRAPPING UP + +This task was a great opportunity to showcase some of the best practices I followed in my career and to always try different tools like with the case of Crowdsec and Tailscale. + +## IN TERMS OF SUPPLY CHAIN ASSURANCE + +The principle of SLSA were followed in the pipeline: + +- Basic SLSA: The pipeline uses a CI/CD tool to automate the builds, code resides within a private version-controlled repository where logs are stored and monitored as part of the pipeline. +- Artifacts are signed with the commit SHA and the stage (dev/prod) and the latest tag. Dependencies are checked for updates and vulnerabilities using Grype and Dependabot. Sandboxed environments are used for build and testing to prevent external tampering. +- Main branch is protected except for the administrator for the sake of the task, MFA is enforced for the GitHub account and any third-party tools. Builds are deterministic by controlling inputs, tools, and environments. GitHub Actions provide an overview of the pipeline status, previous deployments, and logs. So we can revert back to a previous commit, thus deployment if needed. +- Builds are replicated using the same inputs, and if no changes are detected to the app directory, or `*.tf` files, GitHub Actions will not do any changes. Nitrokey is used for access and commits. Secrets are managed by GitHub Actions as this is a PoC, in a real-world scenario, a secret management tool like HashiCorp Vault would be used. + +## IN TERMS OF APPLICATION SECURITY + +DevSecOps implementation is CI/CD-centric and implies the integration of AST (Application Security Testing) tools into the development pipeline. The pipeline includes the following security tools and the choice of one tool over the other in each category was based on my previous experience with them for Flask and Django Python backends: + +- Static (Semgrep, Bandit, Horusec, GoSec) +- Compositional (Dependency Check, Trivy, OpenSCA) +- Scanning Docker Images (Trivy, Grype) +- Secret Scanners (GitLeaks, TruffleHog) +- IaC Scanners (Checkov, Terrascan) + +### COMPLIANCE WITH OWASP TOP 10 + +1. Broken Access Control (A01:2021): Dynamic testing with OWASP ZAP can identify broken access control issues in the application. +2. Cryptographic Failures (A02:2021): Bandit is effective at identifying weak cryptographic implementations or configurations in the codebase. +3. Injection (A03:2021): Bandit can detect vulnerabilities like SQLi or command injection in code. +4. Insecure Design (A04:2021): Checkov mitigates insecure design risks by ensuring that the cloud infrastructure is securely configured. +5. Security Misconfiguration (A05:2021): Dependency Check, Buildx, Checkov and Grype can identify configuration issues in dependencies, Docker images, and infrastructure. +6. Vulnerable and Outdates components (A06:2021): Dependency check and Grype are well suited for ensuring dependencies are up-to-date and free of vulnerabilities. +7. Identification and Authentication Failures (A07:2021): Static tools can identify hardcoded credentials, which is supported by TruffleHog. +8. Software and Data Integrity Failures (A08:2021): Bandit can identify code integrity issues, while Grype can identify vulnerabilities in the built Docker images. Grype goes with Syft, but for this task I just relied on Grype directly which should integrate the Syft scanning as well. But to generate an SBOM, Syft should be used. +9. Security Logging and Monitoring Failures (A09:2021): GitHub Actions and the pipeline are configured to send alerts to Slack. Besides GitHub keeps logs of the pipeline runs. +10. Server-Side Request Forgery (A10:2021): Bandit can identify SSRF vulnerabilities in the codebase. DAST tools like OWASP ZAP can identify SSRF vulnerabilities in the application. + +#### FURTHER SECURITY MEASURES + +- Broad coverage of security tools that address static, compositional, and infrastructure-related vulnerabilities effectively. +- Secrets Management with GitHub Actions secrets and TruffleHog. I'd like to again say that in a real-world scenario, a secret management tool like HashiCorp Vault would be used and checking for secrets can take place as part of the pre-commit hooks! +- Shift-Lift Security with the integration of security tools in the CI/CD pipeline. +- Container Security with Grype. +- Runtime Security with Falco and CrowdSec. +- Incident Response with Falco Sidekick, Loki, and CrowdSec. +- Compliance Monitoring +- Dynamic Testing DAST with OWASP ZAP +- Kubernetes rollback in case of penetration findings. + +#### COMPLIANCES + +In my work, I ensure compliance with key security and privacy standards, including ISO27001, SOC2, GDPR, NIST 800-53, and 800-218, by integrating robust tools and workflows into CI/CD pipelines. I adopt best practices from OpenSSF and SLSA to enhance supply chain security, while aligning with OWASP Top 10 recommendations. Remediation of vulnerabilities identified during penetration tests is prioritised, ensuring both proactive and reactive measures uphold organisational compliance and mitigate risks effectively. diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..6e41fdc --- /dev/null +++ b/app/main.py @@ -0,0 +1,33 @@ +from fastapi import FastAPI, status +from pydantic import BaseModel +import uvicorn + +app = FastAPI() + + +class HealthCheck(BaseModel): + """Response model to validate and return when performing a health check.""" + + status: str = "OK" + + +@app.get( + "/health", + tags=["healthcheck"], + summary="Perform a Health Check", + response_description="Return HTTP Status Code 200 (OK)", + status_code=status.HTTP_200_OK, + response_model=HealthCheck, +) +def get_health() -> HealthCheck: + """ + ## Perform a Health Check + Endpoint to perform a healthcheck on. This endpoint can primarily be used Docker + to ensure a robust container orchestration and management is in place. Other + services which rely on proper functioning of the API service will not deploy if this + endpoint returns any other HTTP status code except 200 (OK). + Returns: + HealthCheck: Returns a JSON response with the health status + """ + return HealthCheck(status="OK") + diff --git a/app/readme.md b/app/readme.md new file mode 100644 index 0000000..5ec2956 --- /dev/null +++ b/app/readme.md @@ -0,0 +1,7 @@ +POC Application in Python! +=========================== +Stack: FastAPI, Uvicorn, Pytest, PyCov, Flake8 + +Python 3.11 + +=============== \ No newline at end of file diff --git a/app/tests/__init__.py b/app/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/tests/test_health.py b/app/tests/test_health.py new file mode 100644 index 0000000..0052d9c --- /dev/null +++ b/app/tests/test_health.py @@ -0,0 +1,10 @@ +from fastapi.testclient import TestClient +from app.main import app + +client = TestClient(app) + + +def test_health_check(): + response = client.get("/health") + assert response.status_code == 200 + assert response.json() == {"status": "OK"} diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..d466d0a --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,11 @@ +version: '3.8' +services: + app: + build: + context: . + dockerfile: Dockerfile + image: app:latest + ports: + - "8000:8000" + environment: + - PYTHONUNBUFFERED=1 diff --git a/k8s/dev/deployment.yaml b/k8s/dev/deployment.yaml new file mode 100644 index 0000000..c6a79ad --- /dev/null +++ b/k8s/dev/deployment.yaml @@ -0,0 +1,37 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: app + namespace: dev + labels: + app: my-app +spec: + replicas: 1 + selector: + matchLabels: + app: my-app + strategy: + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + template: + metadata: + labels: + app: my-app + spec: + containers: + - name: app + image: {{IMAGE}} + resources: + requests: + cpu: "100m" + memory: "256Mi" + limits: + cpu: "500m" + memory: "512Mi" + readinessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 5 + periodSeconds: 10 \ No newline at end of file diff --git a/k8s/dev/service.yaml b/k8s/dev/service.yaml new file mode 100644 index 0000000..55507c9 --- /dev/null +++ b/k8s/dev/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: app-service + namespace: dev + labels: + app: my-app +spec: + selector: + app: my-app + ports: + - protocol: TCP + port: 8080 + targetPort: 8000 + type: ClusterIP diff --git a/k8s/prod/deployment.yaml b/k8s/prod/deployment.yaml new file mode 100644 index 0000000..7fa254d --- /dev/null +++ b/k8s/prod/deployment.yaml @@ -0,0 +1,37 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: app + namespace: prod + labels: + app: my-app +spec: + replicas: 2 + selector: + matchLabels: + app: my-app + strategy: + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + template: + metadata: + labels: + app: my-app + spec: + containers: + - name: app + image: {{IMAGE}} + resources: + requests: + cpu: "100m" + memory: "256Mi" + limits: + cpu: "500m" + memory: "512Mi" + readinessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 5 + periodSeconds: 10 \ No newline at end of file diff --git a/k8s/prod/service.yaml b/k8s/prod/service.yaml new file mode 100644 index 0000000..0bbd35e --- /dev/null +++ b/k8s/prod/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: app-service + namespace: prod + labels: + app: my-app +spec: + selector: + app: my-app + ports: + - protocol: TCP + port: 80 + targetPort: 8000 + type: ClusterIP diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..90a1677 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +fastapi == 0.91 +uvicorn==0.30.5 +pydantic==1.10.10 +pytest==7.4.2 +pytest-cov==4.1.0 +httpx==0.23.0 +flake8==7.1.1 \ No newline at end of file diff --git a/terraform/assets/21419.json b/terraform/assets/21419.json new file mode 100644 index 0000000..0be70e5 --- /dev/null +++ b/terraform/assets/21419.json @@ -0,0 +1,2849 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "panel", + "id": "bargauge", + "name": "Bar gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "11.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 19, + "panels": [], + "title": "Overwiew", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 22, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "first" + ], + "fields": "/^version$/", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "cs_info{instance=\"$instance\"}", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Version", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "dateTimeAsIso" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 23, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "first" + ], + "fields": "/^Time$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "expr": "(process_start_time_seconds{instance=\"$instance\"})*1000", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Up Since", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 4 + }, + "id": 24, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(cs_filesource_hits_total{instance=\"$instance\"}[$__range]) or vector(0)) + \nsum(increase(cs_cloudwatch_stream_hits_total{instance=\"$instance\"}[$__range]) or vector(0)) + \nsum(increase(cs_journalctlsource_hits_total{instance=\"$instance\"}[$__range]) or vector(0)) + \nsum(increase(cs_syslogsource_hits_total{instance=\"$instance\"}[$__range]) or vector(0)) +\nsum(increase(cs_dockersource_hits_total{instance=\"$instance\"}[$__range]) or vector(0))", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Raw Lines Read", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 4 + }, + "id": 25, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(cs_parser_hits_ok_total{instance=\"$instance\"} [$__range]))", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Parsed Lines", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 4 + }, + "id": 32, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(cs_node_wl_hits_ok_total{instance=\"$instance\"} [$__range]))", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Whitelisted Lines", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 4 + }, + "id": 29, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(cs_bucket_poured_total{instance=\"$instance\"} [$__range]))", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Lines Poured to Bucket", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 16, + "y": 4 + }, + "id": 28, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(cs_bucket_overflowed_total{instance=\"$instance\"} [$__range]))", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Bucket Overflows", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 20, + "y": 4 + }, + "id": 27, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(cs_bucket_created_total{instance=\"$instance\"} [$__range]))", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Total Buckets Created", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 50 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 30, + "options": { + "displayMode": "lcd", + "maxVizHeight": 40, + "minVizHeight": 10, + "minVizWidth": 8, + "namePlacement": "top", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "manual", + "valueMode": "color" + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "topk(10, ceil(sum by (name) (increase(cs_bucket_overflowed_total{instance=\"$instance\"}[$__range]))))", + "format": "time_series", + "instant": true, + "legendFormat": "{{ name }}", + "range": false, + "refId": "A" + } + ], + "title": "Top 10 Scenarios", + "type": "bargauge" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 13, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "fieldMinMax": false, + "mappings": [ + { + "options": { + "0": { + "index": 0, + "text": "-" + } + }, + "type": "value" + } + ], + "noValue": "-", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Source" + }, + "properties": [ + { + "id": "custom.width", + "value": 300 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "type" + }, + "properties": [ + { + "id": "custom.width", + "value": 76 + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 1, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": true + }, + "frameIndex": 4, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Source" + } + ] + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (type, source) (increase(cs_parser_hits_total{instance=\"$instance\"} [$__range]))", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{ source }}", + "range": false, + "refId": "read" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (source) (increase(cs_parser_hits_ok_total{instance=\"$instance\"} [$__range]))", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{ source }}", + "range": false, + "refId": "parsed" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (source) (increase(cs_parser_hits_ko_total{instance=\"$instance\"} [$__range]))", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{ source }}", + "range": false, + "refId": "unparsed" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (source) (increase(cs_bucket_poured_total{instance=\"$instance\"} [$__range]))", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{ source }}", + "range": false, + "refId": "poured" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (source) (increase(cs_node_wl_hits_ok_total{instance=\"$instance\"} [$__range]))", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{ source }}", + "range": false, + "refId": "whitelisted" + } + ], + "title": "Metrics", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true + }, + "includeByName": {}, + "indexByName": { + "Time 1": 2, + "Time 2": 4, + "Time 3": 6, + "Time 4": 8, + "Time 5": 10, + "Value #parsed": 5, + "Value #poured": 9, + "Value #read": 3, + "Value #unparsed": 7, + "Value #whitelisted": 11, + "source": 1, + "type": 0 + }, + "renameByName": { + "Value #Lines Parsed": "Lines Parsed", + "Value #Lines Poured to Bucket": "Lines Poured to Bucket", + "Value #Lines Read": "Lines Read", + "Value #Lines Unparsed": "Lines Unparsed", + "Value #Lines Whitelisted": "Lines Whitelisted", + "Value #parsed": "Lines Parsed", + "Value #poured": "Lines Poured to Bucket", + "Value #read": "Lines Read", + "Value #unparsed": "Lines Unparsed", + "Value #whitelisted": "Lines Whitelisted", + "source": "Source", + "type": "Type" + } + } + } + ], + "type": "table" + } + ], + "title": "Acquisition", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 14, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "-" + } + }, + "type": "special" + } + ], + "noValue": "-", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 3, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 1, + "showHeader": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (appsec_engine) (increase(cs_appsec_reqs_total{instance=\"$instance\"} [$__range]))", + "format": "table", + "instant": true, + "legendFormat": "{{ appsec_engine }}", + "range": false, + "refId": "processed" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (appsec_engine) (increase(cs_appsec_block_total{instance=\"$instance\"} [$__range]))", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{ appsec_engine }}", + "range": false, + "refId": "blocked" + } + ], + "title": "AppSec Metrics", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": { + "Time 1": "", + "Time 2": "", + "Value": "Processed", + "Value #A": "Processed", + "Value #B": "Blocked", + "Value #blocked": "Blocked", + "Value #processed": "Processed", + "appsec_engine": "AppSec Enginge" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 4, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": true + }, + "showHeader": true + }, + "pluginVersion": "11.0.0", + "repeat": "appsec_engine", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (rule_name) (increase(cs_appsec_rule_hits{appsec_engine=\"$appsec_engine\", instance=\"$instance\"} [$__range]))", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "AppSec '$appsec_engine' Rules Metrics", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": { + "Value": "Triggered", + "rule_name": "Rule ID" + } + } + } + ], + "type": "table" + } + ], + "title": "AppSec", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 15, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 15, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 2, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": true + }, + "showHeader": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (reason) (cs_alerts{instance=\"$instance\"})", + "format": "table", + "instant": true, + "legendFormat": "{{ reason }}", + "range": false, + "refId": "A" + } + ], + "title": "Alerts", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": { + "Time": "", + "Value": "Count", + "reason": "Reason" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Reason" + }, + "properties": [ + { + "id": "custom.width", + "value": 446 + } + ] + } + ] + }, + "gridPos": { + "h": 22, + "w": 24, + "x": 0, + "y": 44 + }, + "id": 5, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": true, + "fields": "", + "reducer": [ + "sum" + ], + "show": true + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Count" + } + ] + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (reason, origin, action) (cs_active_decisions{instance=\"$instance\"})", + "format": "table", + "instant": true, + "legendFormat": "{{ reason }}", + "range": false, + "refId": "A" + } + ], + "title": "Decisions", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "includeByName": {}, + "indexByName": { + "Time": 0, + "Value": 4, + "action": 3, + "origin": 2, + "reason": 1 + }, + "renameByName": { + "Value": "Count", + "action": "Action", + "origin": "Origin", + "reason": "Reason" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [ + { + "options": { + "0": { + "index": 0, + "text": "-" + } + }, + "type": "value" + } + ], + "noValue": "-", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 66 + }, + "id": 6, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": true + }, + "showHeader": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (route, method) (increase(cs_lapi_route_requests_total{instance=\"$instance\"} [$__range]))", + "format": "table", + "instant": true, + "legendFormat": "{{ route }}", + "range": false, + "refId": "A" + } + ], + "title": "Metrics", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "includeByName": {}, + "indexByName": { + "Time": 0, + "Value": 3, + "method": 2, + "route": 1 + }, + "renameByName": { + "Time": "", + "Value": "Hits", + "method": "Method", + "route": "Route" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 78 + }, + "id": 7, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": true + }, + "showHeader": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (bouncer, route, method) (increase(cs_lapi_bouncer_requests_total{instance=\"$instance\"} [$__range]))", + "format": "table", + "instant": true, + "legendFormat": "{{ route }}", + "range": false, + "refId": "A" + } + ], + "title": "Bouncers Metrics", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "includeByName": {}, + "indexByName": { + "Time": 0, + "Value": 4, + "bouncer": 1, + "method": 3, + "route": 2 + }, + "renameByName": { + "Time": "", + "Value": "Hits", + "bouncer": "Bouncer", + "method": "Method", + "route": "Route" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 84 + }, + "id": 8, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": true + }, + "frameIndex": 0, + "showHeader": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (bouncer) (increase(cs_lapi_decisions_ok_total [$__range]))", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "{{ bouncer }}", + "range": false, + "refId": "non-empty" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (bouncer) (increase(cs_lapi_decisions_ko_total [$__range]))", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{ bouncer }}", + "range": false, + "refId": "empty" + } + ], + "title": "Bouncers Decisions", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true + }, + "includeByName": {}, + "indexByName": { + "Time 1": 1, + "Time 2": 2, + "Value #empty": 3, + "Value #non-empty": 4, + "bouncer": 0 + }, + "renameByName": { + "Time": "", + "Time 1": "", + "Value #empty": "Non-Empty Answers", + "Value #non-empty": "Empty Answers", + "bouncer": "Bouncer", + "bouncer 1": "Bouncer" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [ + { + "options": { + "0": { + "index": 0, + "text": "-" + } + }, + "type": "value" + } + ], + "noValue": "-", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 90 + }, + "id": 9, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": true + }, + "frameIndex": 0, + "showHeader": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (machine, route, method) (increase(cs_lapi_machine_requests_total{instance=\"$instance\"} [$__range]))", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "{{ machine }}", + "range": false, + "refId": "non-empty" + } + ], + "title": "Machine Metrics", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true + }, + "includeByName": {}, + "indexByName": { + "Time": 0, + "Value": 4, + "machine": 1, + "method": 3, + "route": 2 + }, + "renameByName": { + "Time 1": "", + "Value": "Count", + "Value #empty": "Non-Empty Answers", + "Value #non-empty": "Empty Answers", + "bouncer 1": "Bouncer", + "machine": "Machine", + "method": "Method", + "route": "Route" + } + } + } + ], + "type": "table" + } + ], + "title": "Local API", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 16, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "noValue": "-", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Parser" + }, + "properties": [ + { + "id": "custom.width", + "value": 446 + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 10, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": true, + "fields": "", + "reducer": [ + "sum" + ], + "show": true + }, + "frameIndex": 1, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Hits" + } + ] + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (name) (increase(cs_node_hits_total{instance=\"$instance\"} [$__range]))", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "range": false, + "refId": "hits" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (name) (increase(cs_node_hits_ok_total{instance=\"$instance\"} [$__range]))", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "__auto", + "range": false, + "refId": "parsed" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (name) (increase(cs_node_hits_ko_total{instance=\"$instance\"} [$__range]))", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "__auto", + "range": false, + "refId": "unparsed" + } + ], + "title": "Metrics", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": { + "Value #hits": "Hits", + "Value #parsed": "Parsed", + "Value #unparsed": "Unparsed", + "name": "Parser" + } + } + } + ], + "type": "table" + } + ], + "title": "Parsers", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 17, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [ + { + "options": { + "0": { + "index": 0, + "text": "-" + } + }, + "type": "value" + } + ], + "noValue": "-", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Parser" + }, + "properties": [ + { + "id": "custom.width", + "value": 446 + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 11, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": true, + "fields": "", + "reducer": [ + "sum" + ], + "show": true + }, + "frameIndex": 1, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Overflows" + } + ] + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (name) (cs_buckets{instance=\"$instance\"})", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{ name }}", + "range": false, + "refId": "current" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (name) (increase(cs_bucket_overflowed_total{instance=\"$instance\"} [$__range]))", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{ name }}", + "range": false, + "refId": "overflow" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (name) (increase(cs_bucket_underflowed_total{instance=\"$instance\"} [$__range]))", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{ name }}", + "range": false, + "refId": "expired" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (name) (increase(cs_bucket_created_total{instance=\"$instance\"} [$__range]))", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{ name }}", + "range": false, + "refId": "created" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (name) (increase(cs_bucket_poured_total{instance=\"$instance\"} [$__range]))", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{ name }}", + "range": false, + "refId": "poured" + } + ], + "title": "Metrics", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true + }, + "includeByName": {}, + "indexByName": { + "Time 1": 1, + "Time 2": 2, + "Time 3": 3, + "Time 4": 9, + "Time 5": 10, + "Value #created": 6, + "Value #current": 4, + "Value #expired": 8, + "Value #overflow": 5, + "Value #poured": 7, + "name": 0 + }, + "renameByName": { + "Value #created": "Instantiated", + "Value #current": "Current Count", + "Value #expired": "Expired", + "Value #hits": "Hits", + "Value #overflow": "Overflows", + "Value #parsed": "Parsed", + "Value #poured": "Poured", + "Value #unparsed": "Unparsed", + "name": "Parser" + } + } + } + ], + "type": "table" + } + ], + "title": "Scenarios", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 23 + }, + "id": 18, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "noValue": "-", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Parser" + }, + "properties": [ + { + "id": "custom.width", + "value": 446 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 12, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": true, + "fields": "", + "reducer": [ + "sum" + ], + "show": true + }, + "frameIndex": 1, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Expired" + } + ] + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (name, reason) (increase(cs_node_wl_hits_total{instance=\"$instance\"} [$__range]))", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{ name }}", + "range": false, + "refId": "hits" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (name, reason) (increase(cs_node_wl_hits_ok_total{instance=\"$instance\"} [$__range]))", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{ name }}", + "range": false, + "refId": "whitelisted" + } + ], + "title": "Metrics", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "reason 2": true + }, + "includeByName": {}, + "indexByName": { + "Time 1": 1, + "Time 2": 2, + "Time 3": 3, + "Time 4": 9, + "Time 5": 10, + "Value #created": 6, + "Value #current": 4, + "Value #expired": 8, + "Value #overflow": 5, + "Value #poured": 7, + "name": 0 + }, + "renameByName": { + "Time": "", + "Time 2": "", + "Value #A": "Hits", + "Value #B": "Whitelisted", + "Value #created": "Instantiated", + "Value #current": "Current Count", + "Value #expired": "Expired", + "Value #hits": "Hits", + "Value #overflow": "Overflows", + "Value #parsed": "Parsed", + "Value #poured": "Poured", + "Value #unparsed": "Unparsed", + "name": "Whitelist", + "reason": "Reason", + "reason 1": "Reason", + "reason 2": "" + } + } + } + ], + "type": "table" + } + ], + "title": "Whitelists", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 31, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 20, + "maxDataPoints": 500, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "expr": "rate(process_cpu_seconds_total{instance=\"$instance\"}[$__interval])*100", + "instant": false, + "legendFormat": "{{ instance }}", + "range": true, + "refId": "A" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 25 + }, + "id": 21, + "maxDataPoints": 500, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "expr": "process_resident_memory_bytes{instance=\"$instance\"}", + "instant": false, + "legendFormat": "{{ instance }}", + "range": true, + "refId": "A" + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 50, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Whitelisted" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Poured" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 33, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(rate(cs_filesource_hits_total{instance=\"$instance\"}[$__interval]) or vector(0)) + \nsum(rate(cs_cloudwatch_stream_hits_total{instance=\"$instance\"}[$__interval]) or vector(0)) + \nsum(rate(cs_journalctlsource_hits_total{instance=\"$instance\"}[$__interval]) or vector(0)) + \nsum(rate(cs_syslogsource_hits_total{instance=\"$instance\"}[$__interval]) or vector(0)) +\nsum(rate(cs_dockersource_hits_total{instance=\"$instance\"}[$__interval]) or vector(0))", + "hide": true, + "instant": false, + "legendFormat": "Raw Lines", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "expr": "sum(rate(cs_parser_hits_ok_total{instance=\"$instance\"} [$__interval]))", + "hide": false, + "instant": false, + "legendFormat": "Parsed", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "expr": "sum(rate(cs_parser_hits_ko_total{instance=\"$instance\"} [$__interval]))", + "hide": false, + "instant": false, + "legendFormat": "Unparsed", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "expr": "sum(rate(cs_node_wl_hits_ok_total{instance=\"$instance\"} [$__interval]))", + "hide": false, + "instant": false, + "legendFormat": "Whitelisted", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "editorMode": "code", + "expr": "sum(rate(cs_bucket_poured_total{instance=\"$instance\"} [$__interval]))", + "hide": false, + "instant": false, + "legendFormat": "Poured", + "range": true, + "refId": "E" + } + ], + "title": "Lines Processed", + "type": "timeseries" + } + ], + "title": "System", + "type": "row" + } + ], + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "definition": "label_values(cs_appsec_reqs_total,appsec_engine)", + "hide": 2, + "includeAll": false, + "multi": false, + "name": "appsec_engine", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(cs_appsec_reqs_total,appsec_engine)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "definition": "label_values(cs_info,instance)", + "hide": 0, + "includeAll": false, + "label": "Instance", + "multi": false, + "name": "instance", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(cs_info,instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-7d", + "to": "now" + }, + "timeRangeUpdatedDuringEditOrView": false, + "timepicker": {}, + "timezone": "browser", + "title": "Crowdsec Metrics", + "uid": "edq8mqssw2jnkd", + "version": 64, + "weekStart": "", + "gnetId": 21419, + "description": "Shows the 'cscli metrics' and additional stats to Crowdsec in Grafana" +} \ No newline at end of file diff --git a/terraform/assets/alertmanager-config.yaml b/terraform/assets/alertmanager-config.yaml new file mode 100644 index 0000000..65534a9 --- /dev/null +++ b/terraform/assets/alertmanager-config.yaml @@ -0,0 +1,21 @@ +additionalPrometheusRulesMap: + alerts-config: + groups: + - name: resource-alerts + rules: + - alert: HighRequestRate + expr: rate(http_requests_total{job="web_server"}[5m]) > 100 + for: 1m + labels: + severity: critical + annotations: + summary: High request rate on web server + description: '{{ $labels.instance }} has high request rate (>100 req/min) for the last 5 minutes.' + - alert: HighCPULoad + expr: node_load1 > 0.8 + for: 5m + labels: + severity: warning + annotations: + summary: High CPU load detected + description: '{{ $labels.instance }} has CPU load >0.8 for the last 5 minutes.' diff --git a/terraform/assets/crowdsec-ingress-nginx.yaml b/terraform/assets/crowdsec-ingress-nginx.yaml new file mode 100644 index 0000000..9042d39 --- /dev/null +++ b/terraform/assets/crowdsec-ingress-nginx.yaml @@ -0,0 +1,29 @@ +controller: + extraVolumes: + - name: crowdsec-bouncer-plugin + emptyDir: {} + extraInitContainers: + - name: init-clone-crowdsec-bouncer + image: crowdsecurity/lua-bouncer-plugin + imagePullPolicy: IfNotPresent + env: + - name: API_URL + value: "http://crowdsec-service.crowdsec.svc.cluster.local:8080" + - name: API_KEY + valueFrom: + secretKeyRef: + name: crowdsec-bouncer-key + key: BOUNCER_KEY_nginx + - name: BOUNCER_CONFIG + value: "/crowdsec/crowdsec-bouncer.conf" + command: ['sh', '-c', "sh /docker_start.sh; mkdir -p /lua_plugins/crowdsec/; cp -R /crowdsec/* /lua_plugins/crowdsec/"] + volumeMounts: + - name: crowdsec-bouncer-plugin + mountPath: /lua_plugins + extraVolumeMounts: + - name: crowdsec-bouncer-plugin + mountPath: /etc/nginx/lua/plugins/crowdsec + subPath: crowdsec + config: + plugins: "crowdsec" + lua-shared-dicts: "crowdsec_cache: 50m" \ No newline at end of file diff --git a/terraform/assets/crowdsec-values.yaml b/terraform/assets/crowdsec-values.yaml new file mode 100644 index 0000000..c81cb5e --- /dev/null +++ b/terraform/assets/crowdsec-values.yaml @@ -0,0 +1,67 @@ +container_runtime: containerd + +lapi: + resources: + requests: + cpu: 100m + memory: 384Mi + limits: + cpu: 1000m + memory: 384Mi + persistentVolume: + data: + enabled: false + config: + enabled: false + + metrics: + enabled: true + serviceMonitor: + enabled: true + namespace: monitoring + + env: + - name: ENROLL_KEY + valueFrom: + secretKeyRef: + name: crowdsec-enroll-key + key: ENROLL_KEY + - name: ENROLL_INSTANCE_NAME + value: "boilerplate-adaml-eks-cluster" + - name: ENROLL_TAGS + value: "boilerplate adaml eks cluster k8s" + - name: BOUNCER_KEY_nginx + valueFrom: + secretKeyRef: + name: crowdsec-bouncer-key + key: BOUNCER_KEY_nginx + +agent: + acquisition: + - namespace: ingress-nginx + podName: ingress-nginx-controller-* + program: nginx + + env: + - name: COLLECTIONS + value: crowdsecurity/nginx + + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 1000m + memory: 128Mi + + metrics: + enabled: true + serviceMonitor: + enabled: true + namespace: monitoring + +prometheus: + enabled: true + level: full + listen_addr: "0.0.0.0" + listen_port: 6060 \ No newline at end of file diff --git a/terraform/assets/crowdsec_v5.json b/terraform/assets/crowdsec_v5.json new file mode 100644 index 0000000..7337fb2 --- /dev/null +++ b/terraform/assets/crowdsec_v5.json @@ -0,0 +1,1402 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "8.1.2" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph (old)", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 24, + "panels": [], + "title": "Summary", + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#E02F44", + "value": null + }, + { + "color": "#E02F44", + "value": 10 + }, + { + "color": "#299c46", + "value": 10 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.2", + "targets": [ + { + "exemplar": true, + "expr": "count(cs_info)", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Running Crowdsec", + "transparent": true, + "type": "stat" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "decimals": 1, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 18, + "x": 6, + "y": 1 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "total", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(cs_filesource_hits_total[$__interval])) by (instance)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(increase(cs_journalctlsource_hits_total[$__interval])) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "{{instance}}", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(increase(cs_cloudwatch_stream_hits_total[$__interval])) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "{{instance}}", + "refId": "C" + }, + { + "exemplar": true, + "expr": "sum(increase(cs_syslogsource_hits_total[$__interval])) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "{{instance}}", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Acquisitions", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transparent": true, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "decimals": 1, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "total", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(cs_parser_hits_total[$__interval])) by (instance)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Parsers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transparent": true, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "decimals": 1, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "hiddenSeries": false, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "total", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(cs_bucket_overflowed_total[$__interval])) by (instance)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Buckets overflow", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transparent": true, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 30, + "panels": [], + "title": "Alerts", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 36, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.2", + "targets": [ + { + "exemplar": true, + "expr": "sum(cs_active_decisions)", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Decisions Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 38, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.2", + "targets": [ + { + "exemplar": true, + "expr": "sum(cs_alerts)", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Alerts Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 32, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "sum(cs_active_decisions) by (reason)", + "interval": "", + "legendFormat": "{{reason}}", + "refId": "A" + } + ], + "title": "Decisions by scenario", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 34, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "sum(cs_active_decisions) by (action)", + "interval": "", + "legendFormat": "{{action}}", + "refId": "A" + } + ], + "title": "Decisions By Type", + "type": "timeseries" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 42 + }, + "id": 26, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "decimals": 1, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 3 + }, + "hiddenSeries": false, + "id": 4, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "total", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(cs_node_hits_ok_total[$__interval])) by (name)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Parsers ok", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transparent": true, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "decimals": 1, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 3 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "total", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(cs_node_hits_ko_total[$__interval])) by (name)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Parsers nok", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transparent": true, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Parsers", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 43 + }, + "id": 28, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "decimals": 1, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 3 + }, + "hiddenSeries": false, + "id": 18, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "total", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(cs_bucket_created_total[$__interval])) by (name)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Buckets created", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transparent": true, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "decimals": 1, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 3 + }, + "hiddenSeries": false, + "id": 20, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "total", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(cs_bucket_overflowed_total[$__interval])) by (name)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Buckets overflow", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transparent": true, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "dsprometheusuid" + }, + "decimals": 1, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 12 + }, + "hiddenSeries": false, + "id": 22, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "total", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cs_buckets) by (name)", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Buckets Timeline", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transparent": true, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Buckets", + "type": "row" + } + ], + "refresh": false, + "schemaVersion": 30, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Crowdsec Overview", + "uid": "hjmZdB4nk", + "version": 7 +} \ No newline at end of file diff --git a/terraform/assets/kyverno-values.yaml b/terraform/assets/kyverno-values.yaml new file mode 100644 index 0000000..8db72c8 --- /dev/null +++ b/terraform/assets/kyverno-values.yaml @@ -0,0 +1,19 @@ +admissionController: + createSelfSignedCert: true +config: + webhooks: + namespaceSelector: + matchExpressions: + - key: kubernetes.io/metadata.name + operator: NotIn + values: + - kube-system + - kyverno + - monitoring + - falco + - crowdsec + - nginx-ingress + - tailscale + - cert-manager + - kube-public + - kube-node-lease \ No newline at end of file diff --git a/terraform/backend.tf b/terraform/backend.tf new file mode 100644 index 0000000..1e1e50c --- /dev/null +++ b/terraform/backend.tf @@ -0,0 +1,3 @@ +terraform { + backend "s3" {} +} diff --git a/terraform/dns.tf b/terraform/dns.tf new file mode 100644 index 0000000..f86f19d --- /dev/null +++ b/terraform/dns.tf @@ -0,0 +1,24 @@ +resource "cloudflare_record" "app" { + zone_id = var.CLOUDFLARE_ZONE_ID + name = var.dns_name + content = try( + data.kubernetes_service.nginx_ingress.status[0].load_balancer[0].ingress[0].hostname, + "PENDING_LB_HOSTNAME" + ) + type = "CNAME" + proxied = true + allow_overwrite = true + + lifecycle { + ignore_changes = [ + content + ] + } + + depends_on = [ + data.kubernetes_service.nginx_ingress, + kubernetes_ingress_v1.dev-ingress, + kubernetes_ingress_v1.prod-ingress, + kubernetes_ingress_v1.monitoring-ingress + ] +} \ No newline at end of file diff --git a/terraform/ecr.tf b/terraform/ecr.tf new file mode 100644 index 0000000..693afb0 --- /dev/null +++ b/terraform/ecr.tf @@ -0,0 +1,29 @@ +resource "aws_ecr_repository" "ecr" { + name = var.repository_name + image_tag_mutability = "MUTABLE" + + lifecycle { + prevent_destroy = true + } +} + +resource "aws_ecr_lifecycle_policy" "ecr_policy" { + repository = aws_ecr_repository.ecr.name + + policy = jsonencode({ + rules = [ + { + rulePriority = 1 + description = "Keep last 30 images" + selection = { + tagStatus = "untagged" + countType = "imageCountMoreThan" + countNumber = 30 + } + action = { + type = "expire" + } + } + ] + }) +} \ No newline at end of file diff --git a/terraform/eks.tf b/terraform/eks.tf new file mode 100644 index 0000000..4217e65 --- /dev/null +++ b/terraform/eks.tf @@ -0,0 +1,151 @@ +data "aws_eks_cluster_auth" "cluster_auth" { + name = var.cluster_name +} + +resource "aws_eks_cluster" "eks_cluster" { + name = var.cluster_name + role_arn = aws_iam_role.eks_role.arn + + vpc_config { + subnet_ids = module.vpc.private_subnets + security_group_ids = [aws_security_group.eks_cluster_sg.id] + } + + depends_on = [aws_iam_role_policy_attachment.eks_policy_attachment] +} + +resource "aws_iam_role" "eks_role" { + name = "eks-role" + assume_role_policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Principal = { + Service = "eks.amazonaws.com" + }, + Action = "sts:AssumeRole" + } + ] + }) +} + +resource "aws_iam_role_policy_attachment" "eks_policy_attachment" { + role = aws_iam_role.eks_role.name + policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy" +} + +data "aws_ami" "node-image" { + owners = ["amazon"] + most_recent = true + + filter { + name = "name" + values = ["amazon-eks-node-1.31-*"] + } + + filter { + name = "virtualization-type" + values = ["hvm"] + } + + filter { + name = "root-device-type" + values = ["ebs"] + } +} + +resource "aws_iam_instance_profile" "eks_instance_profile" { + name = "eks-instance-profile" + role = aws_iam_role.eks_role.name +} + +resource "aws_iam_instance_profile" "eks_node_instance_profile" { + name = "eks-node-instance-profile" + role = aws_iam_role.worker_role.name +} + +resource "aws_launch_template" "eks_launch_template" { + block_device_mappings { + device_name = "/dev/xvda" + ebs { + delete_on_termination = true + volume_size = 20 + volume_type = "gp2" + } + } + + name_prefix = "eks-node" + image_id = data.aws_ami.node-image.id + user_data = base64encode(<<-SCRIPT + #!/bin/bash + set -o xtrace + /etc/eks/bootstrap.sh ${var.cluster_name} \ + --use-max-pods false \ + --kubelet-extra-args '--max-pods=150' + SCRIPT + ) + + metadata_options { + http_endpoint = "enabled" + http_tokens = "optional" + } +} + +resource "aws_eks_node_group" "eks_nodes" { + cluster_name = aws_eks_cluster.eks_cluster.name + node_group_name = "eks-node-group" + node_role_arn = aws_iam_role.worker_role.arn + subnet_ids = module.vpc.private_subnets + ami_type = "CUSTOM" + instance_types = ["t3.medium"] + capacity_type = "ON_DEMAND" + + launch_template { + id = aws_launch_template.eks_launch_template.id + version = "1" # static version instead of $Latest to avoid it be considered a change at every apply + } + + scaling_config { + desired_size = 3 + max_size = 4 + min_size = 1 + } + + update_config { + max_unavailable = 1 + } + + depends_on = [aws_iam_instance_profile.eks_node_instance_profile, aws_eks_cluster.eks_cluster, aws_security_group.eks_cluster_sg] +} + +resource "aws_iam_role" "worker_role" { + name = "eks-worker-role" + assume_role_policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Principal = { + Service = "ec2.amazonaws.com" + }, + Action = "sts:AssumeRole" + } + ] + }) +} + +resource "aws_iam_role_policy_attachment" "worker_node_attach_AmazonEKSWorkerNodePolicy" { + role = aws_iam_role.worker_role.name + policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy" +} + +resource "aws_iam_role_policy_attachment" "worker_node_attach_AmazonEKS_CNI_Policy" { + role = aws_iam_role.worker_role.name + policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy" +} + +resource "aws_iam_role_policy_attachment" "worker_node_attach_AmazonEC2ContainerRegistryReadOnly" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly" + role = aws_iam_role.worker_role.name +} \ No newline at end of file diff --git a/terraform/ingress.tf b/terraform/ingress.tf new file mode 100644 index 0000000..8c78ddb --- /dev/null +++ b/terraform/ingress.tf @@ -0,0 +1,201 @@ +resource "helm_release" "nginx-ingress-controller" { + name = "nginx-ingress-controller" + repository = "https://charts.bitnami.com/bitnami" + chart = "nginx-ingress-controller" + namespace = "nginx-ingress" + create_namespace = true + + set { + name = "service.type" + value = "LoadBalancer" + } + + set { + name = "controller.publishService.enabled" + value = "true" + } + set { + name = "controller.defaultTLS.secret" + value = "default/tls-cert" + } + + values = [ + file("./assets/crowdsec-ingress-nginx.yaml") + ] + + depends_on = [helm_release.crowdsec] + +} + +data "kubernetes_service" "nginx_ingress" { + metadata { + name = "nginx-ingress-controller" + namespace = "nginx-ingress" + } + depends_on = [helm_release.nginx-ingress-controller] +} + +resource "kubernetes_namespace" "dev" { + metadata { + name = "dev" + } + depends_on = [helm_release.nginx-ingress-controller] +} + +resource "kubernetes_namespace" "prod" { + metadata { + name = "prod" + } + depends_on = [helm_release.nginx-ingress-controller] +} + +resource "kubernetes_ingress_v1" "dev-ingress" { + metadata { + name = "dev-ingress" + namespace = "dev" + } + + spec { + ingress_class_name = "tailscale" + + tls { + hosts = ["staging"] + } + + rule { + host = "staging" + + http { + path { + path = "/" + path_type = "Prefix" + backend{ + service { + name = "app-service" + port { + number = 8080 + } + } + } + } + } + } + } + depends_on = [helm_release.tailscale_operator, kubernetes_namespace.dev] +} + +resource "kubernetes_ingress_v1" "prod-ingress" { + metadata { + name = "prod-ingress" + namespace = "prod" + labels = { + "app" = "nginx" + } + annotations = { + "nginx.ingress.kubernetes.io/ssl-redirect" = "false" + "nginx.ingress.kubernetes.io/force-ssl-redirect" = "false" + "nginx.ingress.kubernetes.io/backend-protocol" = "HTTP" + } + } + + + spec { + ingress_class_name = "nginx" + + tls { + hosts = ["${var.dns_name}"] + secret_name = "tls-cert" + } + + rule { + host = var.dns_name + + http { + path { + path = "/" + path_type = "Prefix" + backend{ + service { + name = "app-service" + port { + number = 80 + } + } + } + } + } + } + } + depends_on = [helm_release.nginx-ingress-controller, kubernetes_namespace.prod] +} + +resource "kubernetes_ingress_v1" "falco-ingress" { + metadata { + name = "falco-ingress" + namespace = "falco" + } + + spec { + ingress_class_name = "tailscale" + + tls { + hosts = ["falco"] + } + + rule { + host = "falco" + + http { + path { + path = "/" + path_type = "Prefix" + backend{ + service { + name = "falco-falcosidekick-ui" + port { + number = 2802 + } + } + } + } + } + } + } + depends_on = [helm_release.tailscale_operator, helm_release.falco] +} + +resource "kubernetes_ingress_v1" "monitoring-ingress" { + metadata { + name = "monitoring-ingress" + namespace = "monitoring" + } + + spec { + ingress_class_name = "tailscale" + + tls { + hosts = ["monitoring"] + } + + rule { + host = "monitoring" + + http { + path { + path = "/" + path_type = "Prefix" + backend{ + service { + name = "prometheus-operator-grafana" + port { + number = 80 + } + } + } + } + } + } + } + depends_on = [helm_release.tailscale_operator, helm_release.prometheus_operator] +} + diff --git a/terraform/kyverno.tf b/terraform/kyverno.tf new file mode 100644 index 0000000..93dace7 --- /dev/null +++ b/terraform/kyverno.tf @@ -0,0 +1,315 @@ +resource "helm_release" "kyverno" { + name = "kyverno" + repository = "https://kyverno.github.io/kyverno/" + chart = "kyverno" + namespace = "kyverno" + create_namespace = true + version = "3.3.3" + + set { + name="admissionController.replicas" + value="3" + } + + values = [ + file("./assets/kyverno-values.yaml") + ] + + depends_on = [ + helm_release.nginx-ingress-controller, + helm_release.crowdsec, + helm_release.falco, + helm_release.prometheus_operator, + helm_release.loki, + helm_release.tempo + ] + + lifecycle { + ignore_changes = all + } +} + +resource "kubectl_manifest" "app_must_have_label" { + yaml_body = <