From 1059e89c388a00220a2cf52dd32ff57c27c2f50d Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 26 Jun 2024 13:55:16 +0200 Subject: [PATCH 1/9] system_tests: check that state is freed in StateDB finalizer --- system_tests/recreatestate_rpc_test.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/system_tests/recreatestate_rpc_test.go b/system_tests/recreatestate_rpc_test.go index bf321808de..dcd936259c 100644 --- a/system_tests/recreatestate_rpc_test.go +++ b/system_tests/recreatestate_rpc_test.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "math/big" + "runtime" "strings" "sync" "testing" @@ -521,12 +522,23 @@ func TestGettingStateForRPCFullNode(t *testing.T) { blockCountRequiredToFlushDirties := builder.execConfig.Caching.BlockCount makeSomeTransfers(t, ctx, builder, blockCountRequiredToFlushDirties) + // force garbage callection to check if it won't break anything + runtime.GC() + exists = state.Exist(addr) err = state.Error() Require(t, err) if !exists { Fatal(t, "User2 address does not exist in the state") } + + // force garbage collection of StateDB object, what should cause the state finalizer to run + state = nil + runtime.GC() + _, err = bc.StateAt(header.Root) + if err == nil { + Fatal(t, "StateAndHeaderByNumber didn't failed as expected") + } } func TestGettingStateForRPCHybridArchiveNode(t *testing.T) { From d7a50e7a60ee2dad03464f5ba1ed6d415d2fa42f Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 26 Jun 2024 14:23:10 +0200 Subject: [PATCH 2/9] system_tests: merge tests for getting state in full node and sparse archive node --- system_tests/recreatestate_rpc_test.go | 57 +++++++------------------- 1 file changed, 15 insertions(+), 42 deletions(-) diff --git a/system_tests/recreatestate_rpc_test.go b/system_tests/recreatestate_rpc_test.go index dcd936259c..1aa98588f6 100644 --- a/system_tests/recreatestate_rpc_test.go +++ b/system_tests/recreatestate_rpc_test.go @@ -488,16 +488,11 @@ func TestSkippingSavingStateAndRecreatingAfterRestart(t *testing.T) { } } -func TestGettingStateForRPCFullNode(t *testing.T) { +func testGettingState(t *testing.T, execConfig *gethexec.Config) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - execConfig := gethexec.ConfigDefaultTest() - execConfig.Caching.SnapshotCache = 0 // disable snapshots - execConfig.Caching.BlockAge = 0 // use only Caching.BlockCount to keep only last N blocks in dirties cache, no matter how new they are - execConfig.Sequencer.MaxBlockSpeed = 0 - execConfig.Sequencer.MaxTxDataSize = 150 // 1 test tx ~= 110 builder, cancelNode := prepareNodeWithHistory(t, ctx, execConfig, 16) - execNode, _ := builder.L2.ExecNode, builder.L2.Client + execNode := builder.L2.ExecNode defer cancelNode() bc := execNode.Backend.ArbInterface().BlockChain() api := execNode.Backend.APIBackend() @@ -541,10 +536,17 @@ func TestGettingStateForRPCFullNode(t *testing.T) { } } -func TestGettingStateForRPCHybridArchiveNode(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() +func TestGettingState(t *testing.T) { execConfig := gethexec.ConfigDefaultTest() + execConfig.Caching.SnapshotCache = 0 // disable snapshots + execConfig.Caching.BlockAge = 0 // use only Caching.BlockCount to keep only last N blocks in dirties cache, no matter how new they are + execConfig.Sequencer.MaxBlockSpeed = 0 + execConfig.Sequencer.MaxTxDataSize = 150 // 1 test tx ~= 110 + t.Run("TestGettingStateForRPCFullNode", func(t *testing.T) { + testGettingState(t, execConfig) + }) + + execConfig = gethexec.ConfigDefaultTest() execConfig.Caching.Archive = true execConfig.Caching.MaxNumberOfBlocksToSkipStateSaving = 128 execConfig.Caching.BlockCount = 128 @@ -552,38 +554,9 @@ func TestGettingStateForRPCHybridArchiveNode(t *testing.T) { execConfig.Caching.BlockAge = 0 // use only Caching.BlockCount to keep only last N blocks in dirties cache, no matter how new they are execConfig.Sequencer.MaxBlockSpeed = 0 execConfig.Sequencer.MaxTxDataSize = 150 // 1 test tx ~= 110 - builder, cancelNode := prepareNodeWithHistory(t, ctx, execConfig, 16) - execNode, _ := builder.L2.ExecNode, builder.L2.Client - defer cancelNode() - bc := execNode.Backend.ArbInterface().BlockChain() - api := execNode.Backend.APIBackend() - - header := bc.CurrentBlock() - if header == nil { - Fatal(t, "failed to get current block header") - } - state, _, err := api.StateAndHeaderByNumber(ctx, rpc.BlockNumber(header.Number.Uint64())) - Require(t, err) - addr := builder.L2Info.GetAddress("User2") - exists := state.Exist(addr) - err = state.Error() - Require(t, err) - if !exists { - Fatal(t, "User2 address does not exist in the state") - } - // Get the state again to avoid caching - state, _, err = api.StateAndHeaderByNumber(ctx, rpc.BlockNumber(header.Number.Uint64())) - Require(t, err) - - blockCountRequiredToFlushDirties := builder.execConfig.Caching.BlockCount - makeSomeTransfers(t, ctx, builder, blockCountRequiredToFlushDirties) - - exists = state.Exist(addr) - err = state.Error() - Require(t, err) - if !exists { - Fatal(t, "User2 address does not exist in the state") - } + t.Run("TestGettingStateForRPCSparseArchiveNode", func(t *testing.T) { + testGettingState(t, execConfig) + }) } func TestStateAndHeaderForRecentBlock(t *testing.T) { From 97622b8a9393c63d136ff580df31b6d6d9938e0a Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 27 Jun 2024 12:35:37 +0200 Subject: [PATCH 3/9] add comments to TestStateAndHeaderForRecentBlock --- system_tests/recreatestate_rpc_test.go | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/system_tests/recreatestate_rpc_test.go b/system_tests/recreatestate_rpc_test.go index 1aa98588f6..65938abcc5 100644 --- a/system_tests/recreatestate_rpc_test.go +++ b/system_tests/recreatestate_rpc_test.go @@ -559,6 +559,8 @@ func TestGettingState(t *testing.T) { }) } +// regression test for issue caused by accessing block state that has just been committed to TrieDB but not yet referenced in core.BlockChain.writeBlockWithState (here called state of "recent" block) +// before the corresponding fix, access to the recent block state caused premature garbage collection of the head block state func TestStateAndHeaderForRecentBlock(t *testing.T) { threads := 32 ctx, cancel := context.WithCancel(context.Background()) @@ -597,15 +599,22 @@ func TestStateAndHeaderForRecentBlock(t *testing.T) { }() api := builder.L2.ExecNode.Backend.APIBackend() db := builder.L2.ExecNode.Backend.ChainDb() - i := 1 + + recentBlock := 1 var mtx sync.RWMutex var wgCallers sync.WaitGroup for j := 0; j < threads && ctx.Err() == nil; j++ { wgCallers.Add(1) + // each thread attempts to get state for a block that is just being created (here called recent): + // 1. Before state trie node is referenced in core.BlockChain.writeBlockWithState, block body is written to database with key prefix `b` followed by block number and then block hash (see: rawdb.blockBodyKey) + // 2. Each thread tries to read the block body entry to: a. extract recent block hash b. congest resource usage to slow down execution of core.BlockChain.writeBlockWithState + // 3. After extracting the hash from block body entry key, StateAndHeaderByNumberOfHash is called for the hash. It is expected that it will: + // a. either fail with "ahead of current block" if we made it before StateDB commit + // b. or it will succeed if state was already commited - then the recentBlock is advanced go func() { defer wgCallers.Done() mtx.RLock() - blockNumber := i + blockNumber := recentBlock mtx.RUnlock() for blockNumber < 300 && ctx.Err() == nil { prefix := make([]byte, 8) @@ -624,8 +633,8 @@ func TestStateAndHeaderForRecentBlock(t *testing.T) { _, _, err := api.StateAndHeaderByNumberOrHash(ctx, rpc.BlockNumberOrHash{BlockHash: &blockHash}) if err == nil { mtx.Lock() - if blockNumber == i { - i++ + if blockNumber == recentBlock { + recentBlock++ } mtx.Unlock() break @@ -645,7 +654,7 @@ func TestStateAndHeaderForRecentBlock(t *testing.T) { } it.Release() mtx.RLock() - blockNumber = i + blockNumber = recentBlock mtx.RUnlock() } }() From ef65cc7115a93e08995bed43a1ef51d2ac59a3a5 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Fri, 28 Jun 2024 11:59:03 +0200 Subject: [PATCH 4/9] use subtests in TestSkippingSavingStateAndRecreatingAfterRestart --- system_tests/recreatestate_rpc_test.go | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/system_tests/recreatestate_rpc_test.go b/system_tests/recreatestate_rpc_test.go index 65938abcc5..adff4a3822 100644 --- a/system_tests/recreatestate_rpc_test.go +++ b/system_tests/recreatestate_rpc_test.go @@ -327,6 +327,7 @@ func TestRecreateStateForRPCBlockNotFoundWhileRecreating(t *testing.T) { } func testSkippingSavingStateAndRecreatingAfterRestart(t *testing.T, cacheConfig *gethexec.CachingConfig, txCount int) { + t.Parallel() maxRecreateStateDepth := int64(30 * 1000 * 1000) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -455,20 +456,26 @@ func TestSkippingSavingStateAndRecreatingAfterRestart(t *testing.T) { cacheConfig.SnapshotCache = 0 // disable snapshots cacheConfig.BlockAge = 0 // use only Caching.BlockCount to keep only last N blocks in dirties cache, no matter how new they are + runTestCase := func(t *testing.T, cacheConfig gethexec.CachingConfig, txes int) { + t.Run(fmt.Sprintf("TestSkippingSavingStateAndRecreatingAfterRestart-skip-blocks-%d-skip-gas-%d-txes-%d", cacheConfig.MaxNumberOfBlocksToSkipStateSaving, cacheConfig.MaxAmountOfGasToSkipStateSaving, txes), func(t *testing.T) { + testSkippingSavingStateAndRecreatingAfterRestart(t, &cacheConfig, txes) + }) + } + // test defaults - testSkippingSavingStateAndRecreatingAfterRestart(t, &cacheConfig, 512) + runTestCase(t, cacheConfig, 512) cacheConfig.MaxNumberOfBlocksToSkipStateSaving = 127 cacheConfig.MaxAmountOfGasToSkipStateSaving = 0 - testSkippingSavingStateAndRecreatingAfterRestart(t, &cacheConfig, 512) + runTestCase(t, cacheConfig, 512) cacheConfig.MaxNumberOfBlocksToSkipStateSaving = 0 cacheConfig.MaxAmountOfGasToSkipStateSaving = 15 * 1000 * 1000 - testSkippingSavingStateAndRecreatingAfterRestart(t, &cacheConfig, 512) + runTestCase(t, cacheConfig, 512) cacheConfig.MaxNumberOfBlocksToSkipStateSaving = 127 cacheConfig.MaxAmountOfGasToSkipStateSaving = 15 * 1000 * 1000 - testSkippingSavingStateAndRecreatingAfterRestart(t, &cacheConfig, 512) + runTestCase(t, cacheConfig, 512) // lower number of blocks in triegc below 100 blocks, to be able to check for nonexistence in testSkippingSavingStateAndRecreatingAfterRestart (it doesn't check last BlockCount blocks as some of them may be persisted on node shutdown) cacheConfig.BlockCount = 16 @@ -483,7 +490,7 @@ func TestSkippingSavingStateAndRecreatingAfterRestart(t *testing.T) { for _, skipBlocks := range skipBlockValues[:len(skipBlockValues)-2] { cacheConfig.MaxAmountOfGasToSkipStateSaving = skipGas cacheConfig.MaxNumberOfBlocksToSkipStateSaving = uint32(skipBlocks) - testSkippingSavingStateAndRecreatingAfterRestart(t, &cacheConfig, 100) + runTestCase(t, cacheConfig, 100) } } } From 35b9054bbee2f5440747629d5b17672e8ca4d8c7 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Fri, 28 Jun 2024 12:26:51 +0200 Subject: [PATCH 5/9] update the recent block test comment --- system_tests/recreatestate_rpc_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system_tests/recreatestate_rpc_test.go b/system_tests/recreatestate_rpc_test.go index adff4a3822..1a2e00d7f4 100644 --- a/system_tests/recreatestate_rpc_test.go +++ b/system_tests/recreatestate_rpc_test.go @@ -616,8 +616,8 @@ func TestStateAndHeaderForRecentBlock(t *testing.T) { // 1. Before state trie node is referenced in core.BlockChain.writeBlockWithState, block body is written to database with key prefix `b` followed by block number and then block hash (see: rawdb.blockBodyKey) // 2. Each thread tries to read the block body entry to: a. extract recent block hash b. congest resource usage to slow down execution of core.BlockChain.writeBlockWithState // 3. After extracting the hash from block body entry key, StateAndHeaderByNumberOfHash is called for the hash. It is expected that it will: - // a. either fail with "ahead of current block" if we made it before StateDB commit - // b. or it will succeed if state was already commited - then the recentBlock is advanced + // a. either fail with "ahead of current block" if we made it before rawdb.WriteCanonicalHash is called in core.BlockChain.writeHeadBlock, what is called after writeBlockWithState finishes, + // b. or it will succeed if the canonical hash was written for the block meaning that writeBlockWithState was fully executed (i.a. state root trie node correctly referenced) - then the recentBlock is advanced go func() { defer wgCallers.Done() mtx.RLock() From a5cccc49fd53fccf68682f62cdb7fc1e93b0b97f Mon Sep 17 00:00:00 2001 From: Maciej Kulawik <10907694+magicxyyz@users.noreply.github.com> Date: Wed, 28 Aug 2024 15:52:46 +0200 Subject: [PATCH 6/9] fix typo in system_tests/recreatestate_rpc_test.go Co-authored-by: Diego Ximenes Mendes --- system_tests/recreatestate_rpc_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/system_tests/recreatestate_rpc_test.go b/system_tests/recreatestate_rpc_test.go index 1a2e00d7f4..b31e02fa89 100644 --- a/system_tests/recreatestate_rpc_test.go +++ b/system_tests/recreatestate_rpc_test.go @@ -524,7 +524,7 @@ func testGettingState(t *testing.T, execConfig *gethexec.Config) { blockCountRequiredToFlushDirties := builder.execConfig.Caching.BlockCount makeSomeTransfers(t, ctx, builder, blockCountRequiredToFlushDirties) - // force garbage callection to check if it won't break anything + // force garbage collection to check if it won't break anything runtime.GC() exists = state.Exist(addr) From 1e1f49c7bf5342f1d0d15948ee1507a23b62e8ad Mon Sep 17 00:00:00 2001 From: Maciej Kulawik <10907694+magicxyyz@users.noreply.github.com> Date: Wed, 28 Aug 2024 15:56:04 +0200 Subject: [PATCH 7/9] fix comment in system_tests/recreatestate_rpc_test.go Co-authored-by: Diego Ximenes Mendes --- system_tests/recreatestate_rpc_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/system_tests/recreatestate_rpc_test.go b/system_tests/recreatestate_rpc_test.go index b31e02fa89..6c5d3153ad 100644 --- a/system_tests/recreatestate_rpc_test.go +++ b/system_tests/recreatestate_rpc_test.go @@ -616,7 +616,7 @@ func TestStateAndHeaderForRecentBlock(t *testing.T) { // 1. Before state trie node is referenced in core.BlockChain.writeBlockWithState, block body is written to database with key prefix `b` followed by block number and then block hash (see: rawdb.blockBodyKey) // 2. Each thread tries to read the block body entry to: a. extract recent block hash b. congest resource usage to slow down execution of core.BlockChain.writeBlockWithState // 3. After extracting the hash from block body entry key, StateAndHeaderByNumberOfHash is called for the hash. It is expected that it will: - // a. either fail with "ahead of current block" if we made it before rawdb.WriteCanonicalHash is called in core.BlockChain.writeHeadBlock, what is called after writeBlockWithState finishes, + // a. either fail with "ahead of current block" if we made it before rawdb.WriteCanonicalHash is called in core.BlockChain.writeHeadBlock, which is called after writeBlockWithState finishes, // b. or it will succeed if the canonical hash was written for the block meaning that writeBlockWithState was fully executed (i.a. state root trie node correctly referenced) - then the recentBlock is advanced go func() { defer wgCallers.Done() From e80c834463b001fbe1a6a6ed8f7bc0a268982738 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 28 Aug 2024 19:04:29 +0200 Subject: [PATCH 8/9] use errors.As when checking for MissingNodeError --- system_tests/recreatestate_rpc_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/system_tests/recreatestate_rpc_test.go b/system_tests/recreatestate_rpc_test.go index d28d923bbf..0a5ed38502 100644 --- a/system_tests/recreatestate_rpc_test.go +++ b/system_tests/recreatestate_rpc_test.go @@ -536,6 +536,10 @@ func testGettingState(t *testing.T, execConfig *gethexec.Config) { if err == nil { Fatal(t, "StateAndHeaderByNumber didn't failed as expected") } + expectedErr := &trie.MissingNodeError{} + if !errors.As(err, &expectedErr) { + Fatal(t, "StateAndHeaderByNumber failed with unexpected error:", err) + } } func TestGettingState(t *testing.T) { From d48f24aa49b6a376a68f54d8fcaef1789db8845c Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Tue, 8 Oct 2024 18:18:31 +0200 Subject: [PATCH 9/9] reacreatestate_rpc_test: don't prefix subtest with testcase name --- system_tests/recreatestate_rpc_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/system_tests/recreatestate_rpc_test.go b/system_tests/recreatestate_rpc_test.go index 2f6650c63e..22329a1be5 100644 --- a/system_tests/recreatestate_rpc_test.go +++ b/system_tests/recreatestate_rpc_test.go @@ -454,7 +454,7 @@ func TestSkippingSavingStateAndRecreatingAfterRestart(t *testing.T) { cacheConfig.BlockAge = 0 // use only Caching.BlockCount to keep only last N blocks in dirties cache, no matter how new they are runTestCase := func(t *testing.T, cacheConfig gethexec.CachingConfig, txes int) { - t.Run(fmt.Sprintf("TestSkippingSavingStateAndRecreatingAfterRestart-skip-blocks-%d-skip-gas-%d-txes-%d", cacheConfig.MaxNumberOfBlocksToSkipStateSaving, cacheConfig.MaxAmountOfGasToSkipStateSaving, txes), func(t *testing.T) { + t.Run(fmt.Sprintf("skip-blocks-%d-skip-gas-%d-txes-%d", cacheConfig.MaxNumberOfBlocksToSkipStateSaving, cacheConfig.MaxAmountOfGasToSkipStateSaving, txes), func(t *testing.T) { testSkippingSavingStateAndRecreatingAfterRestart(t, &cacheConfig, txes) }) } @@ -553,7 +553,7 @@ func TestGettingState(t *testing.T) { execConfig.Caching.BlockAge = 0 // use only Caching.BlockCount to keep only last N blocks in dirties cache, no matter how new they are execConfig.Sequencer.MaxBlockSpeed = 0 execConfig.Sequencer.MaxTxDataSize = 150 // 1 test tx ~= 110 - t.Run("TestGettingStateForRPCFullNode", func(t *testing.T) { + t.Run("full-node", func(t *testing.T) { testGettingState(t, execConfig) }) @@ -567,7 +567,7 @@ func TestGettingState(t *testing.T) { execConfig.Caching.BlockAge = 0 // use only Caching.BlockCount to keep only last N blocks in dirties cache, no matter how new they are execConfig.Sequencer.MaxBlockSpeed = 0 execConfig.Sequencer.MaxTxDataSize = 150 // 1 test tx ~= 110 - t.Run("TestGettingStateForRPCSparseArchiveNode", func(t *testing.T) { + t.Run("archive-node", func(t *testing.T) { testGettingState(t, execConfig) }) }