From 7be971081aee245b0b78ff22766d8eeb020b66f2 Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Thu, 16 Jan 2025 09:34:11 +0100 Subject: [PATCH] Make sure we request pages with a known-flushed LSN. (#10413) This should fix the largest source of flakyness of test_nbtree_pagesplit_cycleid. ## Problem https://github.com/neondatabase/neon/issues/10390 ## Summary of changes By using a guaranteed-flushed LSN, we ensure that PS won't have to wait forever. (If it does wait forever, we know the issue can't be with Compute's WAL) --- .../regress/test_nbtree_pagesplit_cycleid.py | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/test_runner/regress/test_nbtree_pagesplit_cycleid.py b/test_runner/regress/test_nbtree_pagesplit_cycleid.py index 558557aebad6..32ec6fcb92ac 100644 --- a/test_runner/regress/test_nbtree_pagesplit_cycleid.py +++ b/test_runner/regress/test_nbtree_pagesplit_cycleid.py @@ -4,9 +4,19 @@ from fixtures.neon_fixtures import NeonEnv BTREE_NUM_CYCLEID_PAGES = """ - WITH raw_pages AS ( - SELECT blkno, get_raw_page_at_lsn('t_uidx', 'main', blkno, NULL, NULL) page - FROM generate_series(1, pg_relation_size('t_uidx'::regclass) / 8192) blkno + WITH lsns AS ( + /* + * pg_switch_wal() ensures we have an LSN that + * 1. is after any previous modifications, but also, + * 2. (critically) is flushed, preventing any issues with waiting for + * unflushed WAL in PageServer. + */ + SELECT pg_switch_wal() as lsn + ), + raw_pages AS ( + SELECT blkno, get_raw_page_at_lsn('t_uidx', 'main', blkno, lsn, lsn) page + FROM generate_series(1, pg_relation_size('t_uidx'::regclass) / 8192) AS blkno, + lsns l(lsn) ), parsed_pages AS ( /* cycle ID is the last 2 bytes of the btree page */ @@ -36,7 +46,6 @@ def test_nbtree_pagesplit_cycleid(neon_simple_env: NeonEnv): ses1.execute("CREATE UNIQUE INDEX t_uidx ON t(id);") ses1.execute("INSERT INTO t (txt) SELECT i::text FROM generate_series(1, 2035) i;") - ses1.execute("SELECT neon_xlogflush();") ses1.execute(BTREE_NUM_CYCLEID_PAGES) pages = ses1.fetchall() assert ( @@ -57,7 +66,6 @@ def test_nbtree_pagesplit_cycleid(neon_simple_env: NeonEnv): ses1.execute("DELETE FROM t WHERE id <= 610;") # Flush wal, for checking purposes - ses1.execute("SELECT neon_xlogflush();") ses1.execute(BTREE_NUM_CYCLEID_PAGES) pages = ses1.fetchall() assert len(pages) == 0, f"No back splits with cycle ID expected, got batches of {pages} instead" @@ -108,8 +116,6 @@ def vacuum_freeze_t(ses3, evt: threading.Event): # unpin the btree page, allowing s3's vacuum to complete ses2.execute("FETCH ALL FROM foo;") ses2.execute("ROLLBACK;") - # flush WAL to make sure PS is up-to-date - ses1.execute("SELECT neon_xlogflush();") # check that our expectations are correct ses1.execute(BTREE_NUM_CYCLEID_PAGES) pages = ses1.fetchall()