Skip to content

Commit

Permalink
feat: wait for checks before shutdown
Browse files Browse the repository at this point in the history
  • Loading branch information
adityathebe committed Oct 25, 2024
1 parent d85f05b commit 67d848e
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 3 deletions.
1 change: 1 addition & 0 deletions canary-checker.properties
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@

# topology.runNow=true
log.level.db=warn
# check.concurrency=100

# jobs.ComponentRelationshipSync.runNow=true
7 changes: 7 additions & 0 deletions cmd/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
apicontext "github.com/flanksource/canary-checker/api/context"
"github.com/flanksource/canary-checker/pkg/cache"
"github.com/flanksource/canary-checker/pkg/jobs"
"github.com/flanksource/canary-checker/pkg/jobs/canary"
canaryJobs "github.com/flanksource/canary-checker/pkg/jobs/canary"
"github.com/flanksource/canary-checker/pkg/runner"
"github.com/flanksource/canary-checker/pkg/utils"
Expand Down Expand Up @@ -96,6 +97,12 @@ func run() error {
// so we use a goroutine to unblock server start
// to prevent health check from failing
go jobs.Start()

// TODO: stop the cron scheduler so that no more checks are scheduled

shutdown.AddHookWithPriority("check jobs", shutdown.PriorityJobs, func() {
canary.AcquireAllCheckLocks(ctx)
})
}

go serve()
Expand Down
7 changes: 7 additions & 0 deletions cmd/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/flanksource/canary-checker/pkg/db"
"github.com/flanksource/canary-checker/pkg/echo"
"github.com/flanksource/canary-checker/pkg/jobs"
"github.com/flanksource/canary-checker/pkg/jobs/canary"
canaryJobs "github.com/flanksource/canary-checker/pkg/jobs/canary"
echov4 "github.com/labstack/echo/v4"

Expand Down Expand Up @@ -49,6 +50,12 @@ var Serve = &cobra.Command{
canaryJobs.StartScanCanaryConfigs(apicontext.DefaultContext, dataFile, configFiles)
if executor {
jobs.Start()

// TODO: stop the cron scheduler so that no more checks are scheduled

shutdown.AddHookWithPriority("check jobs", shutdown.PriorityJobs, func() {
canary.AcquireAllCheckLocks(apicontext.DefaultContext)
})
}

serve()
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ require (
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
)

// replace github.com/flanksource/duty => ../duty
replace github.com/flanksource/duty => ../duty

// replace github.com/flanksource/artifacts => ../artifacts

Expand Down
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -861,8 +861,6 @@ github.com/flanksource/artifacts v1.0.15 h1:3ImJr2y0ZCXw/QrMhfJJktAT7pYD3sMZR5ix
github.com/flanksource/artifacts v1.0.15/go.mod h1:qHVCnQu5k50aWNJ5UhpcAKEl7pAzqUrFFKGSm147G70=
github.com/flanksource/commons v1.30.5 h1:p8PXGiNt7SurBBh9K3ea8/ZrDvacXSYHJSs/cqJLDK8=
github.com/flanksource/commons v1.30.5/go.mod h1:26zdVkmMPsGpvfcsvst5WgsqcyRL8KqFNxkumagBN+A=
github.com/flanksource/duty v1.0.731 h1:aV3BicPMi2uK/Q7ZKBTakNKIv2uC+9EWQ/F1WOcazr4=
github.com/flanksource/duty v1.0.731/go.mod h1:sZY2NytdenrkqXoMD6Gn2C8xH6dm5HsqOeE0p74Z2VE=
github.com/flanksource/gomplate/v3 v3.20.4/go.mod h1:27BNWhzzSjDed1z8YShO6W+z6G9oZXuxfNFGd/iGSdc=
github.com/flanksource/gomplate/v3 v3.24.39 h1:O763lnNIcTELSMYeIO0dNDfcb3LoZvzU1fr62I4Yxqg=
github.com/flanksource/gomplate/v3 v3.24.39/go.mod h1:0wY/+UPvd7CxmiTBNmzZdWIEOUZAsRkpGY1j5R711O8=
Expand Down
26 changes: 26 additions & 0 deletions pkg/jobs/canary/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,29 @@ import (
"github.com/flanksource/duty/job"
"github.com/flanksource/duty/models"
"github.com/robfig/cron/v3"
"golang.org/x/sync/semaphore"
)

const propertyCheckConcurrency = "check.concurrency"

var (
// The maximum number of checks that can run concurrently
defaultCheckConcurrency = 50

// Holds in the lock for every running check.
// Can be overwritten by 'check.concurrency' property.
globalCheckSemaphore *semaphore.Weighted
)

// AcquireAllCheckLocks blocks until the global check sempahore is fully acquired.
//
// This helps to ensure that no checks are currently running.
func AcquireAllCheckLocks(ctx context.Context) {
ctx.Logger.V(6).Infof("acquiring all check locks")
globalCheckSemaphore.Acquire(ctx, int64(ctx.Properties().Int(propertyCheckConcurrency, defaultCheckConcurrency)))
ctx.Logger.V(6).Infof("acquired all check locks")
}

var canaryJobs sync.Map

const DefaultCanarySchedule = "@every 5m"
Expand Down Expand Up @@ -140,6 +161,7 @@ func newCanaryJob(c CanaryJob) {
IgnoreSuccessHistory: true,
Retention: job.RetentionBalanced,
ResourceID: c.DBCanary.ID.String(),
Semaphores: []*semaphore.Weighted{globalCheckSemaphore},
ResourceType: "canary",
ID: fmt.Sprintf("%s/%s", c.Canary.Namespace, c.Canary.Name),
Fn: c.Run,
Expand All @@ -159,6 +181,10 @@ var SyncCanaryJobs = &job.Job{
Schedule: "@every 5m",
Retention: job.RetentionFew,
Fn: func(ctx job.JobRuntime) error {
if globalCheckSemaphore == nil {
globalCheckSemaphore = semaphore.NewWeighted(int64(ctx.Properties().Int(propertyCheckConcurrency, defaultCheckConcurrency)))
}

canaries, err := db.GetAllCanariesForSync(ctx.Context, runner.WatchNamespace)
if err != nil {
return err
Expand Down

0 comments on commit 67d848e

Please sign in to comment.