Skip to content

Commit

Permalink
fix/cpu-load (#144)
Browse files Browse the repository at this point in the history
* Updated clusterrole.yaml to include permissions for coordination.k8s.io leases, modified daemonset.yaml for rolling update strategy and added tolerations, and adjusted resource limits in values.yaml.

* Updated main.go to introduce a non-blocking delay with loopTicker for better CPU usage efficiency.

* Updated resource requests and limits for CPU and memory in AWS EKS and GCP GKE configurations.
  • Loading branch information
alexei-led authored Apr 4, 2024
1 parent b6fb907 commit a5d4618
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 42 deletions.
2 changes: 1 addition & 1 deletion chart/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ rules:
verbs: [ "get" ]
- apiGroups: [ "coordination.k8s.io" ]
resources: [ "leases" ]
verbs: [ "create", "get", "delete" ]
verbs: [ "create", "delete", "get" ]
{{- end }}
9 changes: 9 additions & 0 deletions chart/templates/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ spec:
selector:
matchLabels:
app.kubernetes.io/name: {{ include "kubeip.name" . }}
updateStrategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 1
template:
metadata:
labels:
Expand All @@ -20,6 +24,11 @@ spec:
{{- if .Values.daemonSet.nodeSelector }}
{{- toYaml .Values.daemonSet.nodeSelector | nindent 8 }}
{{- end }}
tolerations:
- operator: "Exists"
effect: "NoSchedule"
- operator: "Exists"
effect: "NoExecute"
containers:
- name: kubeip
image: "{{ .Values.image.repository }}"
Expand Down
12 changes: 8 additions & 4 deletions chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ serviceAccount:
name: kubeip-service-account
annotations:
gcpServiceAccountEmail: kubeip-service-account@workload-id-117715.iam.gserviceaccount.com
# annotations:
# awsRoleArn: "your-aws-role-arn"
# gcpServiceAccountEmail: "your-google-service-account-email"
# annotations:
# awsRoleArn: "your-aws-role-arn"
# gcpServiceAccountEmail: "your-google-service-account-email"


# Role-Based Access Control (RBAC) configuration.
rbac:
Expand All @@ -40,3 +40,7 @@ daemonSet:
resources:
requests:
cpu: 100m
memory: 64Mi
limits:
cpu: 100m
memory: 128Mi
66 changes: 31 additions & 35 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ func assignAddress(c context.Context, log *logrus.Entry, client kubernetes.Inter
func run(c context.Context, log *logrus.Entry, cfg *config.Config) error {
ctx, cancel := context.WithCancel(c)
defer cancel()

// add debug mode to context
if cfg.DevelopMode {
ctx = context.WithValue(ctx, developModeKey, true)
Expand Down Expand Up @@ -167,43 +168,37 @@ func run(c context.Context, log *logrus.Entry, cfg *config.Config) error {
if err != nil {
return errors.Wrap(err, "initializing assigner")
}
// assign static public IP address
errorCh := make(chan error, 1) // buffered channel to avoid goroutine leak
go func() {
defer close(errorCh) // close the channel when the goroutine exits to avoid goroutine leak
e := assignAddress(ctx, log, clientset, assigner, n, cfg)
if e != nil {
errorCh <- e
}
}()

for {
select {
case err = <-errorCh:
if err != nil {
return errors.Wrap(err, "assigning static public IP address")
}
case <-ctx.Done():
log.Infof("kubeip agent gracefully stopped")
if cfg.ReleaseOnExit {
log.Infof("releasing static public IP address")
err = func() error {
releaseCtx, releaseCancel := context.WithTimeout(context.Background(), unassignTimeout) // release the static public IP address within 5 minutes
defer releaseCancel()
// use a different context for releasing the static public IP address since the main context is canceled
if err = assigner.Unassign(releaseCtx, n.Instance, n.Zone); err != nil {
return errors.Wrap(err, "failed to release static public IP address")
}
return nil
}()
if err != nil {
return err //nolint:wrapcheck
}
log.Infof("static public IP address released")
}
return nil
err = assignAddress(ctx, log, clientset, assigner, n, cfg)
if err != nil {
return errors.Wrap(err, "assigning static public IP address")
}

// pause the agent to prevent it from exiting immediately after assigning the static public IP address
// wait for the context to be done: SIGTERM, SIGINT
<-ctx.Done()
log.Infof("shutting down kubeip agent")

// release the static public IP address on exit
if cfg.ReleaseOnExit {
log.Infof("releasing static public IP address")
if releaseErr := releaseIP(assigner, n); releaseErr != nil { //nolint:contextcheck
return releaseErr
}
log.Infof("static public IP address released")
}
return nil
}

func releaseIP(assigner address.Assigner, n *types.Node) error {
releaseCtx, releaseCancel := context.WithTimeout(context.Background(), unassignTimeout)
defer releaseCancel()

if err := assigner.Unassign(releaseCtx, n.Instance, n.Zone); err != nil {
return errors.Wrap(err, "failed to release static public IP address")
}

return nil
}

func runCmd(c *cli.Context) error {
Expand All @@ -213,7 +208,8 @@ func runCmd(c *cli.Context) error {
cfg := config.NewConfig(c)

if err := run(ctx, log, cfg); err != nil {
log.Fatalf("eks-lens agent failed: %v", err)
log.WithError(err).Error("error running kubeip agent")
return err
}

return nil
Expand Down
3 changes: 2 additions & 1 deletion examples/aws/eks.tf
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,8 @@ resource "kubernetes_daemonset" "kubeip_daemonset" {
}
resources {
requests = {
cpu = "100m"
cpu = "10m"
memory = "32Mi"
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion examples/gcp/gke.tf
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,8 @@ resource "kubernetes_daemonset" "kubeip_daemonset" {
}
resources {
requests = {
cpu = "100m"
cpu = "10m"
memory = "32Mi"
}
}
}
Expand Down

0 comments on commit a5d4618

Please sign in to comment.