-
Notifications
You must be signed in to change notification settings - Fork 52
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Allow cloud archiving to run next ot the primary * fix test * Retention work * Cleanup * fix log * Fix deletion call * Move the log under the debug flag * Adding tests * Checkin * Checkin * Add todo * Update AWS_ENDPOINT_URL -> AWS_ENDPOINT_URL_S3 * Refactor * Checkin * Working PITR, but there's quite a bit of cleanup to do * cleanup * cleanup * Didn't mean to remove this * Set archive_timeout * More cleanup * fixes and cleanup * Fixing up recovery time logic * Adding todo's and removing unused code * Unexport functions * Separate credentials into separate profiles * Fix errcheck * Add todo * static check fixes * Make archive settings configurable * Removing unnecessary change * Cleanup * Adding tests * Bug fix * Wait for PG to be up before monitoring. Also removing some logging that is unnecessary * Fix compile error * Adding log indicating the monitor is starting * Rename the env vars to something more appropriate * Cleanup * Fix that ensures the user configured full_backup_frequency is honored * Cleanup * Fixes issue where a failed backup was being selected as a base restore target * Remove support for 'latest' string, since this should be controlled with the restore target * Don't allow full_backup_frequencies lower than 1h * Cleaning up the config validations * Adding support for targetTimeline * Do not perform a remote restore if there's an existing postgresql directory * Revert back to RFC3339 * Postgres doesn't support 'Z', so trim it and conver to -00:00 * Moving back to RFC3339, however, 'Z' needs to be stripped and replaced with +00:00 * Allow target to be left out * Cleanup --------- Co-authored-by: Ben Iofel <[email protected]>
- Loading branch information
Showing
22 changed files
with
2,288 additions
and
49 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
package main | ||
|
||
import ( | ||
"context" | ||
"log" | ||
"time" | ||
|
||
"github.com/fly-apps/postgres-flex/internal/flypg" | ||
) | ||
|
||
func monitorBackupRetention(ctx context.Context, barman *flypg.Barman) { | ||
ticker := time.NewTicker(defaultBackupRetentionEvalFrequency) | ||
defer ticker.Stop() | ||
|
||
for { | ||
select { | ||
case <-ctx.Done(): | ||
log.Println("Shutting down backup retention monitor") | ||
return | ||
case <-ticker.C: | ||
result, err := barman.WALArchiveDelete(ctx) | ||
if err != nil { | ||
log.Printf("Backup retention failed with: %s", err) | ||
} | ||
|
||
if len(result) > 0 { | ||
log.Printf("Backup retention response: %s", result) | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
package main | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"log" | ||
"time" | ||
|
||
"github.com/fly-apps/postgres-flex/internal/flypg" | ||
) | ||
|
||
func monitorBackupSchedule(ctx context.Context, barman *flypg.Barman) { | ||
// Determine when the last backup was taken. | ||
lastBackupTime, err := barman.LastCompletedBackup(ctx) | ||
if err != nil { | ||
log.Printf("Failed to resolve the last backup taken: %s", err) | ||
} | ||
|
||
fullBackupSchedule := defaultFullBackupSchedule | ||
|
||
// Set the full backup schedule if it is defined in the configuration. | ||
if barman.Settings.FullBackupFrequency != "" { | ||
fullBackupDur, err := time.ParseDuration(barman.Settings.FullBackupFrequency) | ||
switch { | ||
case err != nil: | ||
log.Printf("Failed to parse full backup frequency: %s", err) | ||
default: | ||
fullBackupSchedule = fullBackupDur | ||
} | ||
} | ||
|
||
// Ensure we have a least one backup before proceeding. | ||
if lastBackupTime.IsZero() { | ||
log.Println("No backups found! Performing the initial base backup.") | ||
|
||
if err := performInitialBaseBackup(ctx, barman); err != nil { | ||
log.Printf("Failed to perform the initial full backup: %s", err) | ||
log.Printf("Backup scheduler will re-attempt in %s.", fullBackupSchedule) | ||
} | ||
|
||
lastBackupTime = time.Now() | ||
} | ||
|
||
log.Printf("Full backup schedule set to: %s", fullBackupSchedule) | ||
|
||
// Calculate the time until the next backup is due. | ||
timeUntilNextBackup := time.Until(lastBackupTime.Add(fullBackupSchedule)) | ||
|
||
// Perform backup immediately if the time until the next backup is negative. | ||
if timeUntilNextBackup < 0 { | ||
log.Println("Performing full backup now") | ||
_, err := barman.Backup(ctx, false) | ||
if err != nil { | ||
log.Printf("Full backup failed with: %s", err) | ||
} | ||
|
||
timeUntilNextBackup = fullBackupSchedule | ||
} | ||
|
||
log.Printf("Next full backup due in: %s", timeUntilNextBackup) | ||
|
||
ticker := time.NewTicker(timeUntilNextBackup) | ||
defer ticker.Stop() | ||
|
||
for { | ||
select { | ||
case <-ctx.Done(): | ||
log.Println("Shutting down backup scheduler") | ||
return | ||
case <-ticker.C: | ||
// Perform a backup while passively waiting for the checkpoint process to complete. | ||
// This could actually take a while, so we should be prepared to wait. | ||
log.Println("Performing full backup") | ||
_, err := barman.Backup(ctx, false) | ||
if err != nil { | ||
// TODO - Implement a backup-off strategy. | ||
timeUntilNextBackup = time.Hour * 1 | ||
ticker.Reset(timeUntilNextBackup) | ||
|
||
log.Printf("Backup retention failed with: %s.", err) | ||
log.Printf("Backup will be re-attempted in %s.", timeUntilNextBackup) | ||
|
||
continue | ||
} | ||
|
||
log.Printf("Full backup completed successfully") | ||
ticker.Reset(fullBackupSchedule) | ||
} | ||
} | ||
} | ||
|
||
func performInitialBaseBackup(ctx context.Context, barman *flypg.Barman) error { | ||
maxRetries := 10 | ||
retryCount := 0 | ||
for { | ||
select { | ||
case <-ctx.Done(): | ||
return nil | ||
default: | ||
_, err := barman.Backup(ctx, true) | ||
if err != nil { | ||
log.Printf("Failed to perform the initial full backup: %s. Retrying in 30 seconds.", err) | ||
|
||
// If we've exceeded the maximum number of retries, we should return an error. | ||
if retryCount >= maxRetries { | ||
return fmt.Errorf("failed to perform the initial full backup after %d retries", maxRetries) | ||
} | ||
|
||
retryCount++ | ||
|
||
select { | ||
case <-ctx.Done(): | ||
return ctx.Err() | ||
case <-time.After(time.Second * 30): | ||
continue | ||
} | ||
} | ||
|
||
log.Println("Initial full backup completed successfully") | ||
return nil | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.