diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 4a5490f7..ddf1a02f 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -4,105 +4,72 @@ on: pull_request: permissions: - id-token: write contents: read jobs: - # build-provider-e2e-images: - # name: Build Provider E2E Images - # runs-on : [self-hosted, linux, X64, jammy, large] - # steps: - # - name: Login to GitHub Container Registry - # uses: docker/login-action@v3 - # with: - # # We run into rate limiting issues if we don't authenticate - # registry: ghcr.io - # username: ${{ github.actor }} - # password: ${{ secrets.GITHUB_TOKEN }} - # - name: Check out repo - # uses: actions/checkout@v4 - # - name: Install requirements - # run: | - # sudo apt update - # sudo apt install -y make docker-buildx - # sudo snap install go --classic --channel=1.22/stable - # sudo snap install kubectl --classic --channel=1.30/stable - # - name: Build provider images - # #run: sudo make docker-build-e2e - # run: | - # docker pull ghcr.io/canonical/cluster-api-k8s/bootstrap-controller:ci-test - # docker tag ghcr.io/canonical/cluster-api-k8s/bootstrap-controller:ci-test ghcr.io/canonical/cluster-api-k8s/bootstrap-controller:dev - # docker pull ghcr.io/canonical/cluster-api-k8s/controlplane-controller:ci-test - # docker tag ghcr.io/canonical/cluster-api-k8s/controlplane-controller:ci-test ghcr.io/canonical/cluster-api-k8s/controlplane-controller:dev - # - name: Save provider image - # run: | - # sudo docker save -o provider-images.tar ghcr.io/canonical/cluster-api-k8s/controlplane-controller:dev ghcr.io/canonical/cluster-api-k8s/bootstrap-controller:dev - # sudo chmod 775 provider-images.tar - # - name: Upload artifacts - # uses: actions/upload-artifact@v4 - # with: - # name: e2e-images - # path: | - # provider-images.tar - - # build-k8s-snap-e2e-images: - # name: Build K8s Snap E2E Images - # if: false - # runs-on: [self-hosted, linux, X64, jammy, large] - # steps: - # - name: Login to GitHub Container Registry - # uses: docker/login-action@v3 - # with: - # # We run into rate limiting issues if we don't authenticate - # registry: ghcr.io - # username: ${{ github.actor }} - # password: ${{ secrets.GITHUB_TOKEN }} - # - name: Check out repo - # uses: actions/checkout@v4 - # - name: Install requirements - # run: | - # sudo apt update - # sudo apt install -y make docker-buildx - # sudo snap install go --classic --channel=1.22/stable - # sudo snap install kubectl --classic --channel=1.30/stable - # - name: Build k8s-snap images - # working-directory: hack/ - # run: | - # ./build-e2e-images.sh - # - name: Save k8s-snap image - # run: | - # sudo docker save -o k8s-snap-image-old.tar k8s-snap:dev-old - # sudo docker save -o k8s-snap-image-new.tar k8s-snap:dev-new - # sudo chmod 775 k8s-snap-image-old.tar - # sudo chmod 775 k8s-snap-image-new.tar - # - name: Upload artifacts - # uses: actions/upload-artifact@v4 - # with: - # name: e2e-images - # path: | - # k8s-snap-image-old.tar - # k8s-snap-image-new.tar + build-e2e-images: + name: Build & Run E2E Images + runs-on: [self-hosted, linux, X64, jammy, large] + steps: + - + name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + # We run into rate limiting issues if we don't authenticate + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Check out repo + uses: actions/checkout@v4 + - name: Install requirements + run: | + sudo apt update + sudo snap install go --classic --channel=1.22/stable + sudo apt install make + sudo apt install docker-buildx + sudo snap install kubectl --classic --channel=1.30/stable + - name: Build provider images + run: sudo make docker-build-e2e + - name: Build k8s-snap images + working-directory: hack/ + run: | + ./build-e2e-images.sh + - name: Save provider image + run: | + sudo docker save -o provider-images.tar ghcr.io/canonical/cluster-api-k8s/controlplane-controller:dev ghcr.io/canonical/cluster-api-k8s/bootstrap-controller:dev + sudo chmod 775 provider-images.tar + - name: Save k8s-snap image + run: | + sudo docker save -o k8s-snap-image-old.tar k8s-snap:dev-old + sudo docker save -o k8s-snap-image-new.tar k8s-snap:dev-new + sudo chmod 775 k8s-snap-image-old.tar + sudo chmod 775 k8s-snap-image-new.tar + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: e2e-images + path: | + provider-images.tar + k8s-snap-image-old.tar + k8s-snap-image-new.tar run-e2e-tests: name: Run E2E Tests runs-on: [self-hosted, linux, X64, jammy, xlarge] - #needs: [build-provider-e2e-images] + needs: build-e2e-images strategy: - max-parallel: 1 # Only one at a time because of AWS resource limitations (like maximum number of elastic ip's) matrix: - infra: - - "aws" - #- "docker" ginkgo_focus: - #- "KCP remediation" - #- "MachineDeployment remediation" + - "KCP remediation" + - "MachineDeployment remediation" - "Workload cluster creation" - #- "Workload cluster scaling" - #- "Workload cluster upgrade" + - "Workload cluster scaling" + - "Workload cluster upgrade" # TODO(ben): Remove once all tests are running stable. fail-fast: false steps: - - name: Login to GitHub Container Registry + - + name: Login to GitHub Container Registry uses: docker/login-action@v3 with: # We run into rate limiting issues if we don't authenticate @@ -111,81 +78,35 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Check out repo uses: actions/checkout@v4 - # - name: Setup tmate session - # uses: canonical/action-tmate@main - # with: - # detached: true - # - name: Install requirements - # run: | - # sudo apt update - # sudo snap install go --classic --channel=1.22/stable - # sudo snap install kubectl --classic --channel 1.31/stable - # sudo apt install make - # ./hack/install-aws-nuke.sh - # - name: Download artifacts - # uses: actions/download-artifact@v4 - # with: - # name: e2e-images - # path: . - # - name: Load provider image - # run: sudo docker load -i provider-images.tar - # - name: Load k8s-snap old image - # if: matrix.infra == 'docker' - # run: | - # sudo docker load -i k8s-snap-image-old.tar - # - name: Load k8s-snap new image - # if: matrix.infra == 'docker' && matrix.ginkgo_focus == 'Workload cluster upgrade' - # run: | - # sudo docker load -i k8s-snap-image-new.tar - # - name: Create docker network - # run: | - # sudo docker network create kind --driver=bridge -o com.docker.network.bridge.enable_ip_masquerade=true - # - name: Increase inotify watches - # run: | - # # Prevents https://cluster-api.sigs.k8s.io/user/troubleshooting#cluster-api-with-docker----too-many-open-files - # sudo sysctl fs.inotify.max_user_watches=1048576 - # sudo sysctl fs.inotify.max_user_instances=8192 - - name: Install clusterawsadm - if: matrix.infra == 'aws' + - name: Install requirements run: | - curl -L https://github.com/kubernetes-sigs/cluster-api-provider-aws/releases/download/v2.6.1/clusterawsadm-linux-amd64 -o clusterawsadm - chmod +x ./clusterawsadm - sudo mv ./clusterawsadm /usr/local/bin - clusterawsadm version - - name: Configure AWS Credentials - id: creds - if: matrix.infra == 'aws' - uses: aws-actions/configure-aws-credentials@v4 + sudo apt update + sudo snap install go --classic --channel=1.22/stable + sudo apt install make + sudo apt install docker-buildx + sudo snap install kubectl --classic --channel=1.30/stable + - name: Download artifacts + uses: actions/download-artifact@v4 with: - audience: sts.amazonaws.com - aws-region: us-east-2 - role-to-assume: arn:aws:iam::018302341396:role/GithubOIDC - role-duration-seconds: 3600 - output-credentials: true - - name: Set AWS Credentials as Environment Variables - if: matrix.infra == 'aws' + name: e2e-images + path: . + - name: Load provider image + run: sudo docker load -i provider-images.tar + - name: Load k8s-snap old image run: | - #echo "AWS_ACCESS_KEY_ID=${{ steps.creds.outputs.aws-access-key-id }}" >> "$GITHUB_ENV" - #echo "AWS_SECRET_KEY_ID=${{ steps.creds.outputs.aws-secret-access-key }}" >> "$GITHUB_ENV" - #echo "AWS_SESSION_TOKEN=${{ steps.creds.outputs.aws-session-token }}" >> "$GITHUB_ENV" - #echo "AWS_REGION=us-east-2" >> "$GITHUB_ENV" - - #export AWS_ACCESS_KEY_ID=${{ steps.creds.outputs.aws-access-key-id }} - export AWS_SECRET_KEY_ID="${{ steps.creds.outputs.aws-secret-access-key }}" - echo "AWS_SECRET_KEY_ID=${{ steps.creds.outputs.aws-secret-access-key }}" >> "$GITHUB_ENV" - #export AWS_SESSION_TOKEN=${{ steps.creds.outputs.aws-session-token }} - - AWS_B64ENCODED_CREDENTIALS=$(clusterawsadm bootstrap credentials encode-as-profile --region us-east-2) - echo "AWS_B64ENCODED_CREDENTIALS=$AWS_B64ENCODED_CREDENTIALS" >> "$GITHUB_ENV" - echo "::add-mask::$AWS_B64ENCODED_CREDENTIALS" - - name: Run e2e tests - if: ${{!(matrix.infra == 'aws' && (matrix.ginkgo_focus == 'KCP remediation' || matrix.ginkgo_focus == 'MachineDeployment remediation'))}} + sudo docker load -i k8s-snap-image-old.tar + - name: Load k8s-snap new image + if: matrix.ginkgo_focus == 'Workload cluster upgrade' + run: | + sudo docker load -i k8s-snap-image-new.tar + - name: Create docker network run: | - sudo snap install juju --classic --channel 2.9/stable - juju bootstrap aws/us-east-2 vimdiesel-aws --force --bootstrap-series jammy --bootstrap-constraints "arch=amd64" --model-default test-mode=true --model-default resource-tags=owner=vimdiesel --model-default automatically-retry-hooks=false --model-default 'logging-config==DEBUG' --model-default image-stream=daily --debug - juju scp -m controller "$DIR"/run-e2e-test.sh 0:/home/ubuntu/run-e2e-test.sh - juju exec --model controller --unit controller/0 -- AWS_B64ENCODED_CREDENTIALS=${AWS_B64ENCODED_CREDENTIALS} /home/ubuntu/run-e2e-test.sh - - name: Cleanup AWS account - if: false + sudo docker network create kind --driver=bridge -o com.docker.network.bridge.enable_ip_masquerade=true + - name: Increase inotify watches + run: | + # Prevents https://cluster-api.sigs.k8s.io/user/troubleshooting#cluster-api-with-docker----too-many-open-files + sudo sysctl fs.inotify.max_user_watches=1048576 + sudo sysctl fs.inotify.max_user_instances=8192 + - name: Run e2e tests run: | - aws-nuke run --config ./hack/aws-nuke-config.yaml --force --force-sleep 3 --no-dry-run + sudo E2E_INFRA=docker GINKGO_FOCUS="${{ matrix.ginkgo_focus }}" SKIP_RESOURCE_CLEANUP=true make test-e2e diff --git a/Makefile b/Makefile index bce01e26..e05f1b8b 100644 --- a/Makefile +++ b/Makefile @@ -99,7 +99,7 @@ GINKGO_NODES ?= 1 # GINKGO_NODES is the number of parallel nodes to run GINKGO_TIMEOUT ?= 2h GINKGO_POLL_PROGRESS_AFTER ?= 60m GINKGO_POLL_PROGRESS_INTERVAL ?= 5m -E2E_INFRA ?= aws +E2E_INFRA ?= docker E2E_CONF_FILE ?= $(TEST_DIR)/e2e/config/ck8s-$(E2E_INFRA).yaml SKIP_RESOURCE_CLEANUP ?= false USE_EXISTING_CLUSTER ?= false @@ -408,7 +408,3 @@ $(CONTROLLER_GEN): ## Build controller-gen from tools folder. $(CONVERSION_GEN): ## Build conversion-gen from tools folder. GOBIN=$(TOOLS_BIN_DIR) $(GO_INSTALL) k8s.io/code-generator/cmd/conversion-gen $(CONVERSION_GEN_BIN) $(CONVERSION_GEN_VER) - -.PHONY: nuke -nuke: - aws-nuke -c ~/.config/aws-nuke/nuke-config.yaml --no-dry-run diff --git a/hack/aws-nuke-config.yaml b/hack/aws-nuke-config.yaml deleted file mode 100644 index bd93620d..00000000 --- a/hack/aws-nuke-config.yaml +++ /dev/null @@ -1,41 +0,0 @@ -regions: - - us-east-2 - -blocklist: - - "999999999999" # A blocklist is required, but this is a placeholder - -accounts: - 018302341396: {} - -__global__: - - property: tag - type: glob - value: "sigs.k8s.io/cluster-api-provider-aws/cluster/capick8s-*" - -resource-types: - includes: - - EC2Instance - - EC2SecurityGroup - - EC2Volume - - EC2InternetGateway - - EC2InternetGatewayAttachment - - EC2DHCPOption - - EC2NetworkACL - - EC2NATGateway - - EC2RouteTable - - EC2Subnet - - EC2Volume - - EC2VPC - - EC2VPCEndpoint - - EC2VPCEndpointServiceConfiguration - - EC2Address - - EC2NetworkInterface - - EC2VPCEndpointConnection - - EC2VPCPeeringConnection - - EC2EgressOnlyInternetGateway - - ELB - - ELBv2 - - ELBv2TargetGroup - - S3Bucket - - AutoScalingGroup - - AutoScalingLaunchConfiguration diff --git a/hack/juju-create-aws-instance.sh b/hack/juju-create-aws-instance.sh deleted file mode 100755 index 5e766f6f..00000000 --- a/hack/juju-create-aws-instance.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -# Description: -# Bootstraps a Juju cluster (1 machine) and installs all tools necessary -# to run the CAPI e2e tests on AWS, then runs the tests. -# -# Usage: -# $ juju-create-aws-instance.sh -# -# Assumptions: -# - These environment variables are set: -# - AWS_B64ENCODED_CREDENTIALS - -set -o nounset -set -o pipefail - -DIR="$(realpath $(dirname "${0}"))" - -# Bootstrap Juju -# Juju creates the instance that will host the management cluster -juju bootstrap aws/us-east-2 vimdiesel-aws --force --bootstrap-series jammy --bootstrap-constraints "arch=amd64" --model-default test-mode=true --model-default resource-tags=owner=vimdiesel --model-default automatically-retry-hooks=false --model-default 'logging-config==DEBUG' --model-default image-stream=daily --debug - -juju scp -m controller "$DIR"/run-e2e-test.sh 0:/home/ubuntu/run-e2e-test.sh - -#juju ssh --model controller 0 'sudo bash -s' <"$DIR"/run-e2e-test.sh -juju exec --model controller --unit controller/0 -- AWS_B64ENCODED_CREDENTIALS=${AWS_B64ENCODED_CREDENTIALS} /home/ubuntu/run-e2e-test.sh diff --git a/hack/run-e2e-test.sh b/hack/run-e2e-test.sh deleted file mode 100755 index 0e4489a1..00000000 --- a/hack/run-e2e-test.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -snap install go --classic --channel 1.22/stable -snap install kubectl --classic --channel 1.31/stable - -apt update -apt install -y docker.io docker-buildx make -systemctl enable --now docker - -curl -L https://github.com/kubernetes-sigs/cluster-api-provider-aws/releases/download/v2.6.1/clusterawsadm-linux-amd64 -o clusterawsadm -chmod +x ./clusterawsadm -mv ./clusterawsadm /usr/local/bin -clusterawsadm version - -wget https://github.com/kubernetes-sigs/kind/releases/download/v0.24.0/kind-linux-amd64 -O /usr/local/bin/kind - -export KIND_EXPERIMENTAL_DOCKER_NETWORK=bridge -kind version - -docker pull ghcr.io/canonical/cluster-api-k8s/bootstrap-controller:ci-test -docker tag ghcr.io/canonical/cluster-api-k8s/bootstrap-controller:ci-test ghcr.io/canonical/cluster-api-k8s/bootstrap-controller:dev -docker pull ghcr.io/canonical/cluster-api-k8s/controlplane-controller:ci-test -docker tag ghcr.io/canonical/cluster-api-k8s/controlplane-controller:ci-test ghcr.io/canonical/cluster-api-k8s/controlplane-controller:dev - -git clone git@github.com:canonical/cluster-api-k8s.git /home/ubuntu/cluster-api-k8s && (cd /home/ubuntu/cluster-api-k8s || exit 1) - -sudo -E E2E_INFRA=aws GINKGO_FOCUS="Workload cluster creation" SKIP_RESOURCE_CLEANUP=true make test-e2e diff --git a/test/e2e/config/ck8s-aws.yaml b/test/e2e/config/ck8s-aws.yaml index 85579c09..d023c6b6 100644 --- a/test/e2e/config/ck8s-aws.yaml +++ b/test/e2e/config/ck8s-aws.yaml @@ -91,7 +91,7 @@ variables: AWS_NODE_INSTANCE_TYPE: t3.large AWS_PUBLIC_IP: true AWS_CREATE_BASTION: true - AWS_SSH_KEY_NAME: "default" + AWS_SSH_KEY_NAME: "etienne" AWS_AMI_ID: "ami-01b139e6226d65e4f" AWS_CONTROL_PLANE_ROOT_VOLUME_SIZE: 16 AWS_NODE_ROOT_VOLUME_SIZE: 16 @@ -101,30 +101,28 @@ variables: # There is some work to be done here on figuring out which experimental features # we want to enable/disable. EXP_CLUSTER_RESOURCE_SET: "true" - EXP_RUNTIME_SDK: "true" EXP_MACHINE_SET_PREFLIGHT_CHECKS: "false" - EXP_MACHINE_POOL: "true" CLUSTER_TOPOLOGY: "true" CAPA_LOGLEVEL: "4" intervals: # Ref: https://github.com/kubernetes-sigs/cluster-api-provider-aws/blob/main/test/e2e/data/e2e_conf.yaml - default/wait-machines: [ "35m", "10s" ] - default/wait-cluster: [ "35m", "10s" ] - default/wait-control-plane: [ "35m", "10s" ] - default/wait-worker-nodes: [ "20m", "10s" ] - conformance/wait-control-plane: [ "35m", "10s" ] - conformance/wait-worker-nodes: [ "35m", "10s" ] - default/wait-controllers: [ "5m", "10s" ] - default/wait-delete-cluster: [ "20m", "10s" ] - default/wait-machine-upgrade: [ "35m", "10s" ] - default/wait-contolplane-upgrade: [ "40m", "10s" ] - default/wait-machine-status: [ "25m", "10s" ] - default/wait-failed-machine-status: [ "2m", "10s" ] + default/wait-machines: [ "10m", "10s" ] + default/wait-cluster: [ "10m", "10s" ] + default/wait-control-plane: [ "10m", "10s" ] + default/wait-worker-nodes: [ "10m", "10s" ] + conformance/wait-control-plane: [ "10m", "10s" ] + conformance/wait-worker-nodes: [ "10m", "10s" ] + default/wait-controllers: [ "10m", "10s" ] + default/wait-delete-cluster: [ "10m", "10s" ] + default/wait-machine-upgrade: [ "10m", "10s" ] + default/wait-contolplane-upgrade: [ "10m", "10s" ] + default/wait-machine-status: [ "10m", "10s" ] + default/wait-failed-machine-status: [ "10m", "10s" ] default/wait-infra-subnets: [ "5m", "30s" ] - default/wait-machine-pool-nodes: [ "40m", "10s" ] - default/wait-machine-pool-upgrade: [ "50m", "10s" ] - default/wait-create-identity: [ "1m", "10s" ] + default/wait-machine-pool-nodes: [ "10m", "10s" ] + default/wait-machine-pool-upgrade: [ "10m", "10s" ] + default/wait-create-identity: [ "3m", "10s" ] default/wait-job: [ "10m", "10s" ] - default/wait-deployment-ready: [ "5m", "10s" ] + default/wait-deployment-ready: [ "10m", "10s" ] default/wait-loadbalancer-ready: [ "5m", "30s" ] diff --git a/test/e2e/data/infrastructure-aws/cluster-template.yaml b/test/e2e/data/infrastructure-aws/cluster-template.yaml index bd998fd3..c1fc3dff 100644 --- a/test/e2e/data/infrastructure-aws/cluster-template.yaml +++ b/test/e2e/data/infrastructure-aws/cluster-template.yaml @@ -33,19 +33,6 @@ spec: controlPlaneLoadBalancer: healthCheckProtocol: TCP network: - vpc: - id: vpc-09dc405df98c78806 - subnets: - - id: subnet-01d81a191cbc4129d - - id: subnet-08ec1c6ed394b2314 - - id: subnet-04c84a4861de91635 - - id: subnet-0fdbd3832cd20a307 - securityGroupOverrides: - bastion: sg-00cbb4ff658537143 - controlplane: sg-00cbb4ff658537143 - apiserver-lb: sg-00cbb4ff658537143 - node: sg-00cbb4ff658537143 - lb: sg-00cbb4ff658537143 cni: cniIngressRules: - description: microcluster diff --git a/test/e2e/helpers.go b/test/e2e/helpers.go index 42ad619a..98d2c790 100644 --- a/test/e2e/helpers.go +++ b/test/e2e/helpers.go @@ -525,7 +525,7 @@ type WaitForControlPlaneAndMachinesReadyInput struct { ControlPlane *controlplanev1.CK8sControlPlane } -// WaitForControlPlaneAndMachinesReady waits for a KThreeControlPlane object to be ready (all the machine provisioned and one node ready). +// WaitForControlPlaneAndMachinesReady waits for a CK8sControlPlane object to be ready (all the machine provisioned and one node ready). func WaitForControlPlaneAndMachinesReady(ctx context.Context, input WaitForControlPlaneAndMachinesReadyInput, intervals ...interface{}) { Expect(ctx).NotTo(BeNil(), "ctx is required for WaitForControlPlaneReady") Expect(input.GetLister).ToNot(BeNil(), "Invalid argument. input.GetLister can't be nil when calling WaitForControlPlaneReady") diff --git a/yaml b/yaml deleted file mode 100644 index 976270bb..00000000 --- a/yaml +++ /dev/null @@ -1,11 +0,0 @@ -Model Controller Cloud/Region Version SLA Timestamp -controller vimdiesel-aws aws/us-east-2 3.5.4 unsupported 20:32:01-04:00 - -App Version Status Scale Charm Channel Rev Exposed Message -controller active 1 juju-controller 3.5/stable 105 no - -Unit Workload Agent Machine Public address Ports Message -controller/0* active idle 0 18.224.64.247 - -Machine State Address Inst id Base AZ Message -0 started 18.224.64.247 i-0880751e552c2b6a1 ubuntu@22.04 us-east-2a running