Skip to content

Commit

Permalink
Added integration tests and deletion of fw after unhealthytimeout
Browse files Browse the repository at this point in the history
  • Loading branch information
Honigeintopf committed Nov 4, 2024
1 parent fd71798 commit 0de0032
Show file tree
Hide file tree
Showing 5 changed files with 186 additions and 71 deletions.
51 changes: 29 additions & 22 deletions controllers/set/delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,32 +46,28 @@ func (c *controller) deleteFirewalls(r *controllers.Ctx[*v2.FirewallSet], fws ..
return nil
}

func (c *controller) deleteAfterTimeout(r *controllers.Ctx[*v2.FirewallSet], fws ...*v2.Firewall) ([]*v2.Firewall, error) {
func (c *controller) deleteIfUnhealthyOrTimeout(r *controllers.Ctx[*v2.FirewallSet], fws ...*v2.Firewall) ([]*v2.Firewall, error) {
var result []*v2.Firewall

for _, fw := range fws {
fw := fw

if fw.Status.Phase != v2.FirewallPhaseCreating {
continue
}

connected := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue

if c.isFirewallUnhealthy(fw) {
allocationTimestamp := pointer.SafeDeref(fw.Status.MachineStatus).AllocationTimestamp
if time.Since(allocationTimestamp.Time) > c.c.GetFirewallHealthTimeout() {
r.Log.Info("unhealthy firewall not recovering, deleting from set", "firewall-name", fw.Name)

err := c.deleteFirewalls(r, fw)
if err != nil {
return nil, err
}

result = append(result, fw)
continue
r.Log.Info("unhealthy firewall not recovering, deleting from set", "firewall-name", fw.Name)
err := c.deleteFirewalls(r, fw)
if err != nil {
return nil, err
}
result = append(result, fw)
continue
}

if fw.Status.Phase != v2.FirewallPhaseCreating {
continue
}

if !connected && time.Since(fw.CreationTimestamp.Time) > c.c.GetCreateTimeout() {
r.Log.Info("firewall not getting ready, deleting from set", "firewall-name", fw.Name)

Expand All @@ -89,11 +85,22 @@ func (c *controller) deleteAfterTimeout(r *controllers.Ctx[*v2.FirewallSet], fws
}

func (c *controller) isFirewallUnhealthy(fw *v2.Firewall) bool {
created := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallCreated)).Status == v2.ConditionTrue
ready := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallReady)).Status == v2.ConditionTrue
connected := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue
seedConnected := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerSeedConnected)).Status == v2.ConditionTrue
distance := pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallDistanceConfigured)).Status == v2.ConditionTrue

return !(created && ready && connected && seedConnected && distance)
var (
created = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallCreated)).Status == v2.ConditionTrue
ready = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallReady)).Status == v2.ConditionTrue
connected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerConnected)).Status == v2.ConditionTrue
seedConnected = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallControllerSeedConnected)).Status == v2.ConditionTrue
distance = pointer.SafeDeref(fw.Status.Conditions.Get(v2.FirewallDistanceConfigured)).Status == v2.ConditionTrue
)

if created && ready && connected && seedConnected && distance {
return false
}

if created && time.Since(pointer.SafeDeref(fw.Status.MachineStatus).AllocationTimestamp.Time) > c.c.GetFirewallHealthTimeout() {
return true
}

return false
}
2 changes: 1 addition & 1 deletion controllers/set/reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ func (c *controller) Reconcile(r *controllers.Ctx[*v2.FirewallSet]) error {
}
}

deletedFws, err := c.deleteAfterTimeout(r, ownedFirewalls...)
deletedFws, err := c.deleteIfUnhealthyOrTimeout(r, ownedFirewalls...)
if err != nil {
return err
}
Expand Down
116 changes: 69 additions & 47 deletions integration/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,53 +162,6 @@ var _ = Context("integration test", Ordered, func() {
Expect(client.IgnoreAlreadyExists(k8sClient.Create(ctx, shootTokenSecret.DeepCopy()))).To(Succeed())
})

When("creating a firewall deployment that simulates unhealthiness", Ordered, func() {
var fwSet *v2.FirewallSet

BeforeAll(func() {
// Create the Firewall Deployment
fwDeployment := deployment()
Expect(k8sClient.Create(ctx, fwDeployment)).To(Succeed())

// Wait for the FirewallSet to be created
Eventually(func() error {
fwSetList := &v2.FirewallSetList{}
err := k8sClient.List(ctx, fwSetList, client.InNamespace(namespaceName))
if err != nil {
return err
}
if len(fwSetList.Items) == 0 {
return fmt.Errorf("no firewall sets found")
}
fwSet = &fwSetList.Items[0]
return nil
}, 15*time.Second, interval).Should(Succeed(), "FirewallSet should be created")
})

It("should update the deployment status to reflect the unhealthy replica", func() {
// Simulate unhealthiness by updating the FirewallSet status
fwSet.Status.UnhealthyReplicas = 1
Expect(k8sClient.Status().Update(ctx, fwSet)).To(Succeed())

// Wait for the deployment status to reflect the unhealthy replica
Eventually(func() int {
fetchedDeployment := &v2.FirewallDeployment{}
Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(deployment()), fetchedDeployment)).To(Succeed())
return fetchedDeployment.Status.UnhealthyReplicas
}, 15*time.Second, interval).Should(Equal(1), "unhealthy replicas should be reported")
})

It("should eventually replace the unhealthy firewall", func() {
// Wait for the controller to replace the unhealthy firewall
Eventually(func() bool {
fwSetList := &v2.FirewallSetList{}
Expect(k8sClient.List(ctx, fwSetList, client.InNamespace(namespaceName))).To(Succeed())
// Check if a new FirewallSet has been created
return len(fwSetList.Items) > 1
}, 60*time.Second, interval).Should(BeTrue(), "A new FirewallSet should be created to replace the unhealthy one")
})
})

Describe("the rolling update", Ordered, func() {
When("creating a firewall deployment", Ordered, func() {
It("the creation works", func() {
Expand Down Expand Up @@ -1960,4 +1913,73 @@ var _ = Context("integration test", Ordered, func() {

})

When("creating a firewall set that simulates unhealthiness", Ordered, func() {
var firewallSet *v2.FirewallSet

BeforeAll(func() {
swapMetalClient(&metalclient.MetalMockFns{
Firewall: func(m *mock.Mock) {
m.On("AllocateFirewall", mock.Anything, nil).Return(&metalfirewall.AllocateFirewallOK{Payload: firewall3}, nil).Maybe()
m.On("FindFirewall", mock.Anything, nil).Return(&metalfirewall.FindFirewallOK{Payload: firewall3}, nil).Maybe()
m.On("FindFirewalls", mock.Anything, nil).Return(&metalfirewall.FindFirewallsOK{Payload: []*models.V1FirewallResponse{firewall3}}, nil).Maybe()
},
Network: func(m *mock.Mock) {
m.On("FindNetwork", mock.Anything, nil).Return(&network.FindNetworkOK{Payload: network1}, nil).Maybe()
},
Machine: func(m *mock.Mock) {
m.On("UpdateMachine", mock.Anything, nil).Return(&machine.UpdateMachineOK{Payload: &models.V1MachineResponse{}}, nil).Maybe()
m.On("FreeMachine", mock.Anything, nil).Return(&machine.FreeMachineOK{Payload: &models.V1MachineResponse{ID: firewall3.ID}}, nil).Maybe()
},
Image: func(m *mock.Mock) {
m.On("FindLatestImage", mock.Anything, nil).Return(&image.FindLatestImageOK{Payload: image1}, nil).Maybe()
},
})

Expect(k8sClient.Create(ctx, deployment())).To(Succeed())
Eventually(func() error {
firewallSetList := &v2.FirewallSetList{}
err := k8sClient.List(ctx, firewallSetList, client.InNamespace(namespaceName))
if err != nil {
return err
}
if len(firewallSetList.Items) == 0 {
return fmt.Errorf("no firewall sets found")
}
firewallSet = &firewallSetList.Items[0]
return nil
}, 15*time.Second, interval).Should(Succeed(), "FirewallSet should be created")
})

It("should simulate unhealthiness and trigger deletion", func() {
firewallList := &v2.FirewallList{}
Eventually(func() int {

err := k8sClient.List(ctx, firewallList, client.InNamespace(firewallSet.Namespace))
if err != nil {
return 0
}
return len(firewallList.Items)
}, 15*time.Second, interval).Should(BeNumerically(">", 0), "Should have at least one firewall")

By("waiting for the firewall to be deleted")
Eventually(func() bool {
for _, fw := range firewallList.Items {
err := k8sClient.Get(ctx, client.ObjectKeyFromObject(&fw), &v2.Firewall{})
if !apierrors.IsNotFound(err) {
return false
}
}
return true
}, 10*time.Second, interval).Should(BeTrue(), "All Firewalls should be deleted")

By("verifying that a new firewall has been created")
Eventually(func() int {
newFirewallList := &v2.FirewallList{}
Expect(k8sClient.List(ctx, newFirewallList, client.InNamespace(firewallSet.Namespace))).To(Succeed())
return len(newFirewallList.Items)
}, 10*time.Second, interval).Should(Equal(1), "A new firewall should be created")
})

})

})
86 changes: 86 additions & 0 deletions integration/metal_resources_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,92 @@ var (
Vrf: 50,
Vrfshared: true,
}
firewall3 = &models.V1FirewallResponse{
Allocation: &models.V1MachineAllocation{
BootInfo: &models.V1BootInfo{
Bootloaderid: pointer.Pointer("bootloaderid"),
Cmdline: pointer.Pointer("cmdline"),
ImageID: pointer.Pointer("imageid"),
Initrd: pointer.Pointer("initrd"),
Kernel: pointer.Pointer("kernel"),
OsPartition: pointer.Pointer("ospartition"),
PrimaryDisk: pointer.Pointer("primarydisk"),
},
Created: pointer.Pointer(strfmt.DateTime(testTime.Add(-20 * 24 * time.Hour))),
Creator: pointer.Pointer("creator"),
Description: "firewall allocation 3",
Filesystemlayout: fsl1,
Hostname: pointer.Pointer("firewall-hostname-3"),
Image: image1,
Name: pointer.Pointer("firewall-3"),
Networks: []*models.V1MachineNetwork{
{
Asn: pointer.Pointer(int64(200)),
Destinationprefixes: []string{"2.2.2.2"},
Ips: []string{"1.1.1.1"},
Nat: pointer.Pointer(false),
Networkid: pointer.Pointer("private"),
Networktype: pointer.Pointer(net.PrivatePrimaryUnshared),
Prefixes: []string{"prefixes"},
Private: pointer.Pointer(true),
Underlay: pointer.Pointer(false),
Vrf: pointer.Pointer(int64(100)),
},
},
Project: pointer.Pointer("project-1"),
Reinstall: pointer.Pointer(false),
Role: pointer.Pointer(models.V1MachineAllocationRoleFirewall),
SSHPubKeys: []string{"sshpubkey"},
Succeeded: pointer.Pointer(true),
UserData: "---userdata---",
},
Bios: &models.V1MachineBIOS{
Date: pointer.Pointer("biosdata"),
Vendor: pointer.Pointer("biosvendor"),
Version: pointer.Pointer("biosversion"),
},
Description: "firewall 1",
Events: &models.V1MachineRecentProvisioningEvents{
CrashLoop: pointer.Pointer(true),
FailedMachineReclaim: pointer.Pointer(true),
LastErrorEvent: &models.V1MachineProvisioningEvent{
Event: pointer.Pointer("Crashed"),
Message: "crash",
Time: strfmt.DateTime(testTime.Add(-10 * 24 * time.Hour)),
},
LastEventTime: strfmt.DateTime(testTime.Add(-7 * 24 * time.Hour)),
Log: []*models.V1MachineProvisioningEvent{
{
Event: pointer.Pointer("Phoned Home"),
Message: "phoning home",
Time: strfmt.DateTime(testTime.Add(-7 * 24 * time.Hour)),
},
},
},
Hardware: &models.V1MachineHardware{
CPUCores: pointer.Pointer(int32(16)),
Disks: []*models.V1MachineBlockDevice{},
Memory: pointer.Pointer(int64(32)),
Nics: []*models.V1MachineNic{},
},
ID: pointer.Pointer("3"),
Ledstate: &models.V1ChassisIdentifyLEDState{
Description: pointer.Pointer(""),
Value: pointer.Pointer(""),
},
Liveliness: pointer.Pointer("Unhealthy"),
Name: "firewall-3",
Partition: partition1,
Rackid: "rack-1",
Size: size1,
State: &models.V1MachineState{
Description: pointer.Pointer("state"),
Issuer: "issuer",
MetalHammerVersion: pointer.Pointer("version"),
Value: pointer.Pointer(""),
},
Tags: []string{"a"},
}
)

// we are sharing a client for the tests, so we need to make sure we do not run contradicting tests in parallel
Expand Down
2 changes: 1 addition & 1 deletion integration/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ var _ = BeforeSuite(func() {
ClusterTag: fmt.Sprintf("%s=%s", tag.ClusterID, "cluster-a"),
SafetyBackoff: 10 * time.Second,
ProgressDeadline: 10 * time.Minute,
FirewallHealthTimeout: 20 * time.Minute,
FirewallHealthTimeout: 19 * 24 * time.Hour,
CreateTimeout: 10 * time.Minute,
})
Expect(err).ToNot(HaveOccurred())
Expand Down

0 comments on commit 0de0032

Please sign in to comment.