From 28da27d16c4b2d8079b8bd0afa289f4fe816faab Mon Sep 17 00:00:00 2001 From: Shaun Davis Date: Sat, 22 Jun 2024 23:21:27 -0500 Subject: [PATCH] Track locale version for future comparisons --- internal/flypg/collation.go | 23 ++++++++++------------- internal/flypg/node.go | 37 ++++++++++++++++++++++++++++++------- 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/internal/flypg/collation.go b/internal/flypg/collation.go index 8a6472ac..88e4e7f3 100644 --- a/internal/flypg/collation.go +++ b/internal/flypg/collation.go @@ -15,16 +15,14 @@ import ( const collationVersionFile = "/data/.collationVersion" -func collationChanged() (bool, error) { +func collationSumChanged(sum [16]byte) (bool, error) { // Short-circuit if there's no collation file. - if !collationVersionFileExists() { + _, err := os.Stat(collationVersionFile) + switch { + case os.IsNotExist(err): return true, nil - } - - // Calculate the md5sum of the ldd version. - sum, err := calculateLDDVersionSum() - if err != nil { - return false, fmt.Errorf("failed to calculate collation sum: %w", err) + case err != nil: + return false, fmt.Errorf("failed to stat collation lock file: %w", err) } // Read the collation lock file. @@ -42,9 +40,9 @@ func collationVersionFileExists() bool { return !os.IsNotExist(err) } -func calculateLDDVersionSum() ([16]byte, error) { +func calculateLocaleVersionSum() ([16]byte, error) { // md5sum the ldd version so we can verify if the system has changed. - output, err := utils.RunCommand("ldd --version", "postgres") + output, err := utils.RunCommand("locale --version", "postgres") if err != nil { return [16]byte{}, fmt.Errorf("failed to capture ldd version: %w", err) } @@ -53,7 +51,7 @@ func calculateLDDVersionSum() ([16]byte, error) { return md5.Sum(output), nil } -func writeCollationLock(sum [16]byte) error { +func writeCollationVersionFile(sum [16]byte) error { // Write the collation lock file. if err := os.WriteFile(collationVersionFile, sum[:], 0600); err != nil { return fmt.Errorf("failed to write collation lock file: %w", err) @@ -253,13 +251,12 @@ func reIndexMismatchedIndexes(ctx context.Context, conn *pgx.Conn) error { return fmt.Errorf("failed to scan row: %v", err) } - log.Printf("[WARN] Reindexing index %s.%s concurrently to address collation mismatch\n", schemaName, indexName) + log.Printf("[WARN] Re-indexing %s.%s concurrently to address collation mismatch\n", schemaName, indexName) _, err = conn.Exec(ctx, fmt.Sprintf("REINDEX INDEX CONCURRENTLY %s.%s;", schemaName, indexName)) if err != nil { log.Printf("failed to reindex %s.%s: %v\n", schemaName, indexName, err) } } - return nil } diff --git a/internal/flypg/node.go b/internal/flypg/node.go index 1ae244ff..5401c15d 100644 --- a/internal/flypg/node.go +++ b/internal/flypg/node.go @@ -306,9 +306,10 @@ func (n *Node) PostInit(ctx context.Context) error { } } - // Confirm collation integrity. - if err := n.confirmCollationIntegrity(ctx, conn); err != nil { - log.Printf("[WARN] problem occurred while evaluating collation integrity: %s", err) + // There was an OS change that can impact certain users during a Flex upgrade. + // This is a safety check to ensure that the collation integrity is maintained. + if err := n.evaluateCollationIntegrity(ctx, conn); err != nil { + log.Printf("[WARN] Problem occurred while evaluating collation integrity: %s", err) } case StandbyRoleName: @@ -478,9 +479,15 @@ func setDirOwnership() error { return nil } -func (n *Node) confirmCollationIntegrity(ctx context.Context, conn *pgx.Conn) error { +func (n *Node) evaluateCollationIntegrity(ctx context.Context, conn *pgx.Conn) error { + // Calculate the sum of the locale version. + sum, err := calculateLocaleVersionSum() + if err != nil { + return fmt.Errorf("failed to calculate collation sum: %w", err) + } + // Check to see if the version has changed - changed, err := collationChanged() + changed, err := collationSumChanged(sum) if err != nil { return fmt.Errorf("failed to check collation version file: %s", err) } @@ -504,8 +511,8 @@ func (n *Node) confirmCollationIntegrity(ctx context.Context, conn *pgx.Conn) er log.Printf("[WARN] Collation mismatches detected. Refreshing collation versions.\n") - // Detect any indexes that are impacted by collation mismatches. - // Unfortunately, this needs to be checked per database. + // Detect any indexes that are currently impacted by collation mismatches. + // Unfortunately, we will need to do this for each database. dbs, err := admin.ListDatabases(ctx, conn) if err != nil { return fmt.Errorf("failed to list databases: %s", err) @@ -513,6 +520,8 @@ func (n *Node) confirmCollationIntegrity(ctx context.Context, conn *pgx.Conn) er dbs = append(dbs, admin.DbInfo{Name: "template1"}) + mismatches := 0 + for _, db := range dbs { // Establish a connection to the specified database. dbConn, err := n.NewLocalConnection(ctx, db.Name, n.SUCredentials) @@ -532,12 +541,26 @@ func (n *Node) confirmCollationIntegrity(ctx context.Context, conn *pgx.Conn) er continue } + mismatches++ + log.Printf("[WARN] %d collation mismatches detected %s\n", count, db.Name) + // Identify any impacted indexes and re-index them concurrently. if err := reIndexMismatchedIndexes(ctx, dbConn); err != nil { return fmt.Errorf("failed to reindex database: %s", err) } } + if mismatches > 0 { + return nil + } + + // No collation issues found, we can safely update the version file. + // This will prevent the system from re-evaluating the collation integrity on every boot. + log.Printf("[INFO] No collation mismatches detected. Updating collation version file.\n") + if err := writeCollationVersionFile(sum); err != nil { + return fmt.Errorf("failed to write collation lock: %s", err) + } + return nil }