Skip to content

Commit

Permalink
Checksum the data when writing and reading ledger snapshots
Browse files Browse the repository at this point in the history
- Allow skipping snapshot checksum check
- Generalise `Test/Ouroboros/Storage/LedgerDB/OnDisk.hs`
- Restrict `Ord` instance for `DiskSnapshot` to `dsNumber`
  - Use the `Ord` instance in `listSnapshots`
- Connect snapshot checksum with the node interface:
  - Add `Flag "DoDiskSnapshotChecksum"` to `DiskPolicyArgs`
  - Expose `(No)DoDiskSnapshotChecksum` in Ouroboros.Consensus.Node
  - Re-export `Flag` from `DiskPolicy`

Update changelog
  • Loading branch information
geo2a committed Dec 10, 2024
1 parent b8a13dd commit 1ec6cdc
Show file tree
Hide file tree
Showing 19 changed files with 288 additions and 79 deletions.
12 changes: 9 additions & 3 deletions ouroboros-consensus-cardano/app/DBAnalyser/Parsers.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{-# LANGUAGE ApplicativeDo #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ApplicativeDo #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE PatternSynonyms #-}

module DBAnalyser.Parsers (
BlockType (..)
Expand All @@ -21,6 +22,7 @@ import Options.Applicative
import Ouroboros.Consensus.Block
import Ouroboros.Consensus.Byron.Node (PBftSignatureThreshold (..))
import Ouroboros.Consensus.Shelley.Node (Nonce (..))
import Ouroboros.Consensus.Storage.LedgerDB.DiskPolicy (pattern DoDiskSnapshotChecksum, pattern NoDoDiskSnapshotChecksum)

{-------------------------------------------------------------------------------
Parsing
Expand All @@ -44,6 +46,10 @@ parseDBAnalyserConfig = DBAnalyserConfig
<*> parseValidationPolicy
<*> parseAnalysis
<*> parseLimit
<*> flag NoDoDiskSnapshotChecksum DoDiskSnapshotChecksum (mconcat [
long "do-disk-snapshot-checksum"
, help "Check the '.checksum' file if reading a ledger snapshot"
])

parseSelectDB :: Parser SelectDB
parseSelectDB =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeApplications #-}
Expand Down Expand Up @@ -72,7 +73,7 @@ import Ouroboros.Consensus.Storage.Common (BlockComponent (..))
import Ouroboros.Consensus.Storage.ImmutableDB (ImmutableDB)
import qualified Ouroboros.Consensus.Storage.ImmutableDB as ImmutableDB
import Ouroboros.Consensus.Storage.LedgerDB (DiskSnapshot (..),
writeSnapshot)
pattern NoDoDiskSnapshotChecksum, writeSnapshot)
import Ouroboros.Consensus.Storage.Serialisation (encodeDisk)
import Ouroboros.Consensus.Util ((..:))
import qualified Ouroboros.Consensus.Util.IOLike as IOLike
Expand Down Expand Up @@ -422,7 +423,7 @@ storeLedgerStateAt slotNo ledgerAppMode env = do
storeLedgerState ledgerState = case pointSlot pt of
NotOrigin slot -> do
let snapshot = DiskSnapshot (unSlotNo slot) (Just "db-analyser")
writeSnapshot ledgerDbFS encLedger snapshot ledgerState
writeSnapshot ledgerDbFS NoDoDiskSnapshotChecksum encLedger snapshot ledgerState
traceWith tracer $ SnapshotStoredEvent slot
Origin -> pure ()
where
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ analyse ::
=> DBAnalyserConfig
-> Args blk
-> IO (Maybe AnalysisResult)
analyse DBAnalyserConfig{analysis, confLimit, dbDir, selectDB, validation, verbose} args =
analyse DBAnalyserConfig{analysis, confLimit, dbDir, selectDB, validation, verbose, diskSnapshotChecksum} args =
withRegistry $ \registry -> do
lock <- newMVar ()
chainDBTracer <- mkTracer lock verbose
Expand Down Expand Up @@ -92,6 +92,7 @@ analyse DBAnalyserConfig{analysis, confLimit, dbDir, selectDB, validation, verbo
ledgerDbFS
(decodeDiskExtLedgerState $ configCodec cfg)
decode
diskSnapshotChecksum
(DiskSnapshot slot (Just "db-analyser"))
-- TODO @readSnapshot@ has type @ExceptT ReadIncrementalErr m
-- (ExtLedgerState blk)@ but it also throws exceptions! This makes
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}

module Cardano.Tools.DBAnalyser.Types (module Cardano.Tools.DBAnalyser.Types) where

import Data.Word
import Ouroboros.Consensus.Block
import Ouroboros.Consensus.Util (Flag)

data SelectDB =
SelectImmutableDB (WithOrigin SlotNo)

data DBAnalyserConfig = DBAnalyserConfig {
dbDir :: FilePath
, verbose :: Bool
, selectDB :: SelectDB
, validation :: Maybe ValidateBlocks
, analysis :: AnalysisName
, confLimit :: Limit
dbDir :: FilePath
, verbose :: Bool
, selectDB :: SelectDB
, validation :: Maybe ValidateBlocks
, analysis :: AnalysisName
, confLimit :: Limit
, diskSnapshotChecksum :: Flag "DoDiskSnapshotChecksum"
}

data AnalysisName =
Expand Down
5 changes: 5 additions & 0 deletions ouroboros-consensus-cardano/test/tools-test/Main.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
{-# LANGUAGE PatternSynonyms #-}

module Main (main) where

import qualified Cardano.Tools.DBAnalyser.Block.Cardano as Cardano
Expand All @@ -8,6 +10,8 @@ import qualified Cardano.Tools.DBSynthesizer.Run as DBSynthesizer
import Cardano.Tools.DBSynthesizer.Types
import Ouroboros.Consensus.Block
import Ouroboros.Consensus.Cardano.Block
import Ouroboros.Consensus.Storage.LedgerDB.DiskPolicy
(pattern NoDoDiskSnapshotChecksum)
import qualified Test.Cardano.Tools.Headers
import Test.Tasty
import Test.Tasty.HUnit
Expand Down Expand Up @@ -68,6 +72,7 @@ testAnalyserConfig =
, validation = Just ValidateAllBlocks
, analysis = CountBlocks
, confLimit = Unlimited
, diskSnapshotChecksum = NoDoDiskSnapshotChecksum
}

testBlockArgs :: Cardano.Args (CardanoBlock StandardCrypto)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MonadComprehensions #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
Expand Down Expand Up @@ -51,6 +52,8 @@ module Ouroboros.Consensus.Node (
, RunNodeArgs (..)
, Tracers
, Tracers' (..)
, pattern DoDiskSnapshotChecksum
, pattern NoDoDiskSnapshotChecksum
-- * Internal helpers
, mkNodeKernelArgs
, nodeKernelArgsEnforceInvariants
Expand Down Expand Up @@ -107,7 +110,8 @@ import Ouroboros.Consensus.Storage.ChainDB (ChainDB, ChainDbArgs,
import qualified Ouroboros.Consensus.Storage.ChainDB as ChainDB
import qualified Ouroboros.Consensus.Storage.ChainDB.Impl.Args as ChainDB
import Ouroboros.Consensus.Storage.LedgerDB.DiskPolicy
(DiskPolicyArgs (..))
(DiskPolicyArgs (..), pattern DoDiskSnapshotChecksum,
pattern NoDoDiskSnapshotChecksum)
import Ouroboros.Consensus.Util.Args
import Ouroboros.Consensus.Util.IOLike
import Ouroboros.Consensus.Util.Orphans ()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
### Breaking

- When writing a ledger state snapshot to disk, calculate the state's CRC32 checksum and write it to a separate file, which is named the same as the snapshot file, plus the `.checksum` extension.
- When reading a snapshot file in `readSnapshot`, calculate its checksum and compare it to the value in the corresponding `.checksum` file. Return an error if the checksum is different or invalid. Issue a warning if the checksum file does not exist, but still initialise the ledger DB.
- To support the previous item, change the error type of the `readSnapshot` from `ReadIncrementalErr` to the extended `ReadSnaphotErr`.
- Checksumming the snapshots is controlled via the `doChecksum :: Flag "DoDiskSnapshotChecksum"` parameter of `initFromSnapshot`. Ultimately, this parameter comes from the Node's configuration file via the `DiskPolicy` data type.
- Extend the `DiskPolicyArgs` data type to enable the node to pass `Flag "DoDiskSnapshotChecksum"` to Consensus.

### Non-breaking

- Make `Ouroboros.Consensus.Util.CBOR.readIncremental` optionally compute the checksum of the data as it is read.
- Introduce an explicit `Ord` instance for `DiskSnapshot` that compares the values on `dsNumber`.
- Introduce a new utility newtype `Flag` to represent type-safe boolean flags. See ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Util.hs.
- Use `Flag "DoDiskSnapshotChecksum"` to control the check of the snapshot checksum file in `takeSnapshot`, `readSnapshot` and `writeSnapshot`.
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ initFromDisk ::
initFromDisk LgrDbArgs { lgrHasFS = hasFS, .. }
replayTracer
immutableDB = wrapFailure (Proxy @blk) $ do
let LedgerDB.DiskPolicyArgs _ _ doDiskSnapshotChecksum = lgrDiskPolicyArgs
(_initLog, db, replayed) <-
LedgerDB.initLedgerDB
replayTracer
Expand All @@ -232,6 +233,7 @@ initFromDisk LgrDbArgs { lgrHasFS = hasFS, .. }
lgrConfig
lgrGenesis
(streamAPI immutableDB)
doDiskSnapshotChecksum
return (db, replayed)
where
ccfg = configCodec $ getExtLedgerCfg $ LedgerDB.ledgerDbCfg lgrConfig
Expand Down Expand Up @@ -280,11 +282,12 @@ takeSnapshot ::
, IsLedger (LedgerState blk)
)
=> LgrDB m blk -> m (Maybe (LedgerDB.DiskSnapshot, RealPoint blk))
takeSnapshot lgrDB@LgrDB{ cfg, tracer, hasFS } = wrapFailure (Proxy @blk) $ do
takeSnapshot lgrDB@LgrDB{ cfg, tracer, hasFS, diskPolicy } = wrapFailure (Proxy @blk) $ do
ledgerDB <- LedgerDB.ledgerDbAnchor <$> atomically (getCurrent lgrDB)
LedgerDB.takeSnapshot
tracer
hasFS
(LedgerDB.onDiskShouldChecksumSnapshots diskPolicy)
(encodeDiskExtLedgerState ccfg)
ledgerDB
where
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
{-# LANGUAGE PatternSynonyms #-}

-- | The Ledger DB is responsible for the following tasks:
--
-- - __Maintaining the in-memory ledger state at the tip__: When we try to
Expand Down Expand Up @@ -134,6 +136,8 @@ module Ouroboros.Consensus.Storage.LedgerDB (
, SnapshotFailure (..)
, diskSnapshotIsTemporary
, listSnapshots
, pattern DoDiskSnapshotChecksum
, pattern NoDoDiskSnapshotChecksum
, readSnapshot
-- ** Write to disk
, takeSnapshot
Expand All @@ -160,7 +164,9 @@ module Ouroboros.Consensus.Storage.LedgerDB (
import Ouroboros.Consensus.Storage.LedgerDB.DiskPolicy
(DiskPolicy (..), DiskPolicyArgs (..),
NumOfDiskSnapshots (..), SnapshotInterval (..),
TimeSinceLast (..), defaultDiskPolicyArgs, mkDiskPolicy)
TimeSinceLast (..), defaultDiskPolicyArgs, mkDiskPolicy,
pattern DoDiskSnapshotChecksum,
pattern NoDoDiskSnapshotChecksum)
import Ouroboros.Consensus.Storage.LedgerDB.Init (InitLog (..),
ReplayGoal (..), ReplayStart (..), TraceReplayEvent (..),
decorateReplayTracerWithGoal,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE RecordWildCards #-}

module Ouroboros.Consensus.Storage.LedgerDB.DiskPolicy (
Expand All @@ -13,6 +14,10 @@ module Ouroboros.Consensus.Storage.LedgerDB.DiskPolicy (
, TimeSinceLast (..)
, defaultDiskPolicyArgs
, mkDiskPolicy
, pattern DoDiskSnapshotChecksum
, pattern NoDoDiskSnapshotChecksum
-- * Re-exports
, Flag (..)
) where

import Control.Monad.Class.MonadTime.SI
Expand All @@ -21,6 +26,7 @@ import Data.Word
import GHC.Generics
import NoThunks.Class (NoThunks, OnlyCheckWhnf (..))
import Ouroboros.Consensus.Config.SecurityParam
import Ouroboros.Consensus.Util (Flag (..))

-- | Length of time, requested by the user, that has to pass after which
-- a snapshot is taken. It can be:
Expand All @@ -41,7 +47,11 @@ data NumOfDiskSnapshots =
| RequestedNumOfDiskSnapshots Word
deriving stock (Eq, Generic, Show)

data DiskPolicyArgs = DiskPolicyArgs SnapshotInterval NumOfDiskSnapshots
pattern DoDiskSnapshotChecksum, NoDoDiskSnapshotChecksum :: Flag "DoDiskSnapshotChecksum"
pattern DoDiskSnapshotChecksum = Flag True
pattern NoDoDiskSnapshotChecksum = Flag False

data DiskPolicyArgs = DiskPolicyArgs SnapshotInterval NumOfDiskSnapshots (Flag "DoDiskSnapshotChecksum")

-- | On-disk policy
--
Expand All @@ -67,7 +77,7 @@ data DiskPolicy = DiskPolicy {
-- the next snapshot, we delete the oldest one, leaving the middle
-- one available in case of truncation of the write. This is
-- probably a sane value in most circumstances.
onDiskNumSnapshots :: Word
onDiskNumSnapshots :: Word

-- | Should we write a snapshot of the ledger state to disk?
--
Expand All @@ -87,7 +97,11 @@ data DiskPolicy = DiskPolicy {
-- blocks had to be replayed.
--
-- See also 'mkDiskPolicy'
, onDiskShouldTakeSnapshot :: TimeSinceLast DiffTime -> Word64 -> Bool
, onDiskShouldTakeSnapshot :: TimeSinceLast DiffTime -> Word64 -> Bool

-- | Whether or not to checksum the ledger snapshots to detect data corruption on disk.
-- "yes" if @'DoDiskSnapshotChecksum'@; "no" if @'NoDoDiskSnapshotChecksum'@.
, onDiskShouldChecksumSnapshots :: Flag "DoDiskSnapshotChecksum"
}
deriving NoThunks via OnlyCheckWhnf DiskPolicy

Expand All @@ -97,10 +111,10 @@ data TimeSinceLast time = NoSnapshotTakenYet | TimeSinceLast time
-- | Default on-disk policy arguments suitable to use with cardano-node
--
defaultDiskPolicyArgs :: DiskPolicyArgs
defaultDiskPolicyArgs = DiskPolicyArgs DefaultSnapshotInterval DefaultNumOfDiskSnapshots
defaultDiskPolicyArgs = DiskPolicyArgs DefaultSnapshotInterval DefaultNumOfDiskSnapshots DoDiskSnapshotChecksum

mkDiskPolicy :: SecurityParam -> DiskPolicyArgs -> DiskPolicy
mkDiskPolicy (SecurityParam k) (DiskPolicyArgs reqInterval reqNumOfSnapshots) =
mkDiskPolicy (SecurityParam k) (DiskPolicyArgs reqInterval reqNumOfSnapshots onDiskShouldChecksumSnapshots) =
DiskPolicy {..}
where
onDiskNumSnapshots :: Word
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}

Expand Down Expand Up @@ -31,10 +33,14 @@ import Ouroboros.Consensus.Ledger.Extended
import Ouroboros.Consensus.Ledger.Inspect
import Ouroboros.Consensus.Ledger.SupportsProtocol
import Ouroboros.Consensus.Storage.ImmutableDB.Stream
import Ouroboros.Consensus.Storage.LedgerDB.DiskPolicy
(pattern DoDiskSnapshotChecksum,
pattern NoDoDiskSnapshotChecksum)
import Ouroboros.Consensus.Storage.LedgerDB.LedgerDB
import Ouroboros.Consensus.Storage.LedgerDB.Query
import Ouroboros.Consensus.Storage.LedgerDB.Snapshots
import Ouroboros.Consensus.Storage.LedgerDB.Update
import Ouroboros.Consensus.Util (Flag)
import Ouroboros.Consensus.Util.IOLike
import Ouroboros.Network.Block (Point (Point))
import System.FS.API
Expand Down Expand Up @@ -102,6 +108,7 @@ initLedgerDB ::
-> LedgerDbCfg (ExtLedgerState blk)
-> m (ExtLedgerState blk) -- ^ Genesis ledger state
-> StreamAPI m blk blk
-> Flag "DoDiskSnapshotChecksum"
-> m (InitLog blk, LedgerDB' blk, Word64)
initLedgerDB replayTracer
tracer
Expand All @@ -110,14 +117,16 @@ initLedgerDB replayTracer
decHash
cfg
getGenesisLedger
stream = do
stream
doDoDiskSnapshotChecksum = do
snapshots <- listSnapshots hasFS
tryNewestFirst id snapshots
tryNewestFirst doDoDiskSnapshotChecksum id snapshots
where
tryNewestFirst :: (InitLog blk -> InitLog blk)
tryNewestFirst :: Flag "DoDiskSnapshotChecksum"
-> (InitLog blk -> InitLog blk)
-> [DiskSnapshot]
-> m (InitLog blk, LedgerDB' blk, Word64)
tryNewestFirst acc [] = do
tryNewestFirst _ acc [] = do
-- We're out of snapshots. Start at genesis
traceWith replayTracer ReplayFromGenesis
initDb <- ledgerDbWithAnchor <$> getGenesisLedger
Expand All @@ -126,8 +135,7 @@ initLedgerDB replayTracer
case ml of
Left _ -> error "invariant violation: invalid current chain"
Right (l, replayed) -> return (acc InitFromGenesis, l, replayed)
tryNewestFirst acc (s:ss) = do
-- If we fail to use this snapshot, delete it and try an older one
tryNewestFirst doChecksum acc allSnapshot@(s:ss) = do
ml <- runExceptT $ initFromSnapshot
replayTracer
hasFS
Expand All @@ -136,14 +144,23 @@ initLedgerDB replayTracer
cfg
stream
s
doChecksum
case ml of
-- If a checksum file is missing for a snapshot,
-- issue a warning and retry the same snapshot
-- ignoring the checksum
Left (InitFailureRead ReadSnapshotNoChecksumFile{}) -> do
traceWith tracer $ SnapshotMissingChecksum s
tryNewestFirst NoDoDiskSnapshotChecksum acc allSnapshot
-- If we fail to use this snapshot for any other reason, delete it and try an older one
Left err -> do
when (diskSnapshotIsTemporary s) $
-- We don't delete permanent snapshots, even if we couldn't parse
-- them
deleteSnapshot hasFS s
traceWith tracer $ InvalidSnapshot s err
tryNewestFirst (acc . InitFailure s err) ss
-- always reset checksum flag after failure
tryNewestFirst DoDiskSnapshotChecksum (acc . InitFailure s err) ss
Right (r, l, replayed) ->
return (acc (InitFromSnapshot s r), l, replayed)

Expand All @@ -170,10 +187,11 @@ initFromSnapshot ::
-> LedgerDbCfg (ExtLedgerState blk)
-> StreamAPI m blk blk
-> DiskSnapshot
-> Flag "DoDiskSnapshotChecksum"
-> ExceptT (SnapshotFailure blk) m (RealPoint blk, LedgerDB' blk, Word64)
initFromSnapshot tracer hasFS decLedger decHash cfg stream ss = do
initFromSnapshot tracer hasFS decLedger decHash cfg stream ss doChecksum = do
initSS <- withExceptT InitFailureRead $
readSnapshot hasFS decLedger decHash ss
readSnapshot hasFS decLedger decHash doChecksum ss
let replayStart = castPoint $ getTip initSS
case pointToWithOriginRealPoint replayStart of
Origin -> throwError InitFailureGenesis
Expand Down
Loading

0 comments on commit 1ec6cdc

Please sign in to comment.