From 4d3c73f4caedbb192718633cecfd984f87cfffb7 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 11 Jun 2021 23:08:48 +0300 Subject: [PATCH 001/197] noop blockstore --- blockstore/noop.go | 66 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 blockstore/noop.go diff --git a/blockstore/noop.go b/blockstore/noop.go new file mode 100644 index 00000000000..f2658cbd739 --- /dev/null +++ b/blockstore/noop.go @@ -0,0 +1,66 @@ +package blockstore + +import ( + "context" + "io" + + blocks "github.com/ipfs/go-block-format" + cid "github.com/ipfs/go-cid" +) + +var _ Blockstore = (*noopstore)(nil) + +type noopstore struct { + bs Blockstore +} + +func NewNoopStore(bs Blockstore) Blockstore { + return &noopstore{bs: bs} +} + +func (b *noopstore) Has(cid cid.Cid) (bool, error) { + return b.bs.Has(cid) +} + +func (b *noopstore) HashOnRead(hor bool) { + b.bs.HashOnRead(hor) +} + +func (b *noopstore) Get(cid cid.Cid) (blocks.Block, error) { + return b.bs.Get(cid) +} + +func (b *noopstore) GetSize(cid cid.Cid) (int, error) { + return b.bs.GetSize(cid) +} + +func (b *noopstore) View(cid cid.Cid, f func([]byte) error) error { + return b.bs.View(cid, f) +} + +func (b *noopstore) Put(blk blocks.Block) error { + return nil +} + +func (b *noopstore) PutMany(blks []blocks.Block) error { + return nil +} + +func (b *noopstore) DeleteBlock(cid cid.Cid) error { + return nil +} + +func (b *noopstore) DeleteMany(cids []cid.Cid) error { + return nil +} + +func (b *noopstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { + return b.bs.AllKeysChan(ctx) +} + +func (b *noopstore) Close() error { + if c, ok := b.bs.(io.Closer); ok { + return c.Close() + } + return nil +} From 5cca29d1dbb9b88a77d5718690fddb5ae5e2c428 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 11 Jun 2021 23:22:20 +0300 Subject: [PATCH 002/197] hook noop blockstore for splitstore in DI --- node/builder.go | 4 ++++ node/config/def.go | 4 +++- node/modules/blockstore.go | 8 ++++++-- node/modules/dtypes/storage.go | 5 ++++- 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/node/builder.go b/node/builder.go index 884261a89b9..4a678f37f8a 100644 --- a/node/builder.go +++ b/node/builder.go @@ -643,6 +643,10 @@ func Repo(r repo.Repo) Option { Override(new(dtypes.UniversalBlockstore), modules.UniversalBlockstore), If(cfg.EnableSplitstore, + If(cfg.Splitstore.ColdStoreType == "universal", + Override(new(dtypes.ColdBlockstore), From(new(dtypes.UniversalBlockstore)))), + If(cfg.Splitstore.ColdStoreType == "noop", + Override(new(dtypes.ColdBlockstore), modules.NoopColdBlockstore)), If(cfg.Splitstore.HotStoreType == "badger", Override(new(dtypes.HotBlockstore), modules.BadgerHotBlockstore)), Override(new(dtypes.SplitBlockstore), modules.SplitBlockstore(cfg)), diff --git a/node/config/def.go b/node/config/def.go index 240fadbd93f..dcff654dad6 100644 --- a/node/config/def.go +++ b/node/config/def.go @@ -229,6 +229,7 @@ type Chainstore struct { } type Splitstore struct { + ColdStoreType string HotStoreType string TrackingStoreType string MarkSetType string @@ -305,7 +306,8 @@ func DefaultFullNode() *FullNode { Chainstore: Chainstore{ EnableSplitstore: false, Splitstore: Splitstore{ - HotStoreType: "badger", + ColdStoreType: "universal", + HotStoreType: "badger", }, }, } diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index 787d782b7ea..6dd776999da 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -37,6 +37,10 @@ func UniversalBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, r repo.Locked return bs, err } +func NoopColdBlockstore(lc fx.Lifecycle, bs dtypes.UniversalBlockstore) (dtypes.ColdBlockstore, error) { + return blockstore.NewNoopStore(bs), nil +} + func BadgerHotBlockstore(lc fx.Lifecycle, r repo.LockedRepo) (dtypes.HotBlockstore, error) { path, err := r.SplitstorePath() if err != nil { @@ -66,8 +70,8 @@ func BadgerHotBlockstore(lc fx.Lifecycle, r repo.LockedRepo) (dtypes.HotBlocksto return bs, nil } -func SplitBlockstore(cfg *config.Chainstore) func(lc fx.Lifecycle, r repo.LockedRepo, ds dtypes.MetadataDS, cold dtypes.UniversalBlockstore, hot dtypes.HotBlockstore) (dtypes.SplitBlockstore, error) { - return func(lc fx.Lifecycle, r repo.LockedRepo, ds dtypes.MetadataDS, cold dtypes.UniversalBlockstore, hot dtypes.HotBlockstore) (dtypes.SplitBlockstore, error) { +func SplitBlockstore(cfg *config.Chainstore) func(lc fx.Lifecycle, r repo.LockedRepo, ds dtypes.MetadataDS, cold dtypes.ColdBlockstore, hot dtypes.HotBlockstore) (dtypes.SplitBlockstore, error) { + return func(lc fx.Lifecycle, r repo.LockedRepo, ds dtypes.MetadataDS, cold dtypes.ColdBlockstore, hot dtypes.HotBlockstore) (dtypes.SplitBlockstore, error) { path, err := r.SplitstorePath() if err != nil { return nil, err diff --git a/node/modules/dtypes/storage.go b/node/modules/dtypes/storage.go index e35d02811a7..8d82006b752 100644 --- a/node/modules/dtypes/storage.go +++ b/node/modules/dtypes/storage.go @@ -24,9 +24,12 @@ import ( type MetadataDS datastore.Batching type ( - // UniversalBlockstore is the cold blockstore. + // UniversalBlockstore is the universal blockstore backend. UniversalBlockstore blockstore.Blockstore + // ColdBlockstore is the Cold blockstore abstraction for the splitstore + ColdBlockstore blockstore.Blockstore + // HotBlockstore is the Hot blockstore abstraction for the splitstore HotBlockstore blockstore.Blockstore From 04f2e102a18866289c543dc4fe6ddb42523d4edf Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 14 Jun 2021 20:19:06 +0300 Subject: [PATCH 003/197] kill full splitstore compaction, simplify splitstore configuration --- blockstore/splitstore/splitstore.go | 267 +--------------------------- node/config/def.go | 11 +- node/modules/blockstore.go | 7 +- 3 files changed, 12 insertions(+), 273 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index f6d26bbdd60..62b6fbf1153 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -86,17 +86,6 @@ type Config struct { // // Supported values are: "bloom" (default if omitted), "bolt". MarkSetType string - // perform full reachability analysis (expensive) for compaction - // You should enable this option if you plan to use the splitstore without a backing coldstore - EnableFullCompaction bool - // EXPERIMENTAL enable pruning of unreachable objects. - // This has not been sufficiently tested yet; only enable if you know what you are doing. - // Only applies if you enable full compaction. - EnableGC bool - // full archival nodes should enable this if EnableFullCompaction is enabled - // do NOT enable this if you synced from a snapshot. - // Only applies if you enabled full compaction - Archival bool } // ChainAccessor allows the Splitstore to access the chain. It will most likely @@ -113,16 +102,10 @@ type SplitStore struct { critsection int32 // compaction critical section closing int32 // the split store is closing - fullCompaction bool - enableGC bool - skipOldMsgs bool - skipMsgReceipts bool - baseEpoch abi.ChainEpoch warmupEpoch abi.ChainEpoch coldPurgeSize int - deadPurgeSize int mx sync.Mutex curTs *types.TipSet @@ -165,18 +148,9 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co tracker: tracker, env: env, - fullCompaction: cfg.EnableFullCompaction, - enableGC: cfg.EnableGC, - skipOldMsgs: !(cfg.EnableFullCompaction && cfg.Archival), - skipMsgReceipts: !(cfg.EnableFullCompaction && cfg.Archival), - coldPurgeSize: defaultColdPurgeSize, } - if cfg.EnableGC { - ss.deadPurgeSize = defaultDeadPurgeSize - } - return ss, nil } @@ -465,7 +439,7 @@ func (s *SplitStore) warmup(curTs *types.TipSet) { batchSnoop := make([]cid.Cid, 0, batchSize) count := int64(0) - err := s.chain.WalkSnapshot(context.Background(), curTs, 1, s.skipOldMsgs, s.skipMsgReceipts, + err := s.chain.WalkSnapshot(context.Background(), curTs, 1, true, true, func(cid cid.Cid) error { count++ @@ -556,11 +530,7 @@ func (s *SplitStore) compact(curTs *types.TipSet) { } start := time.Now() - if s.fullCompaction { - err = s.compactFull(curTs) - } else { - err = s.compactSimple(curTs) - } + err = s.doCompact(curTs) took := time.Since(start).Milliseconds() stats.Record(context.Background(), metrics.SplitstoreCompactionTimeSeconds.M(float64(took)/1e3)) @@ -571,7 +541,7 @@ func (s *SplitStore) compact(curTs *types.TipSet) { func (s *SplitStore) estimateMarkSetSize(curTs *types.TipSet) error { var count int64 - err := s.chain.WalkSnapshot(context.Background(), curTs, 1, s.skipOldMsgs, s.skipMsgReceipts, + err := s.chain.WalkSnapshot(context.Background(), curTs, 1, true, true, func(cid cid.Cid) error { count++ return nil @@ -585,12 +555,12 @@ func (s *SplitStore) estimateMarkSetSize(curTs *types.TipSet) error { return nil } -func (s *SplitStore) compactSimple(curTs *types.TipSet) error { +func (s *SplitStore) doCompact(curTs *types.TipSet) error { coldEpoch := s.baseEpoch + CompactionCold currentEpoch := curTs.Height() boundaryEpoch := currentEpoch - CompactionBoundary - log.Infow("running simple compaction", "currentEpoch", currentEpoch, "baseEpoch", s.baseEpoch, "coldEpoch", coldEpoch, "boundaryEpoch", boundaryEpoch) + log.Infow("running compaction", "currentEpoch", currentEpoch, "baseEpoch", s.baseEpoch, "coldEpoch", coldEpoch, "boundaryEpoch", boundaryEpoch) coldSet, err := s.env.Create("cold", s.markSetSize) if err != nil { @@ -608,7 +578,7 @@ func (s *SplitStore) compactSimple(curTs *types.TipSet) error { } var count int64 - err = s.chain.WalkSnapshot(context.Background(), boundaryTs, 1, s.skipOldMsgs, s.skipMsgReceipts, + err = s.chain.WalkSnapshot(context.Background(), boundaryTs, 1, true, true, func(cid cid.Cid) error { count++ return coldSet.Mark(cid) @@ -826,231 +796,6 @@ func (s *SplitStore) gcHotstore() { } } -func (s *SplitStore) compactFull(curTs *types.TipSet) error { - currentEpoch := curTs.Height() - coldEpoch := s.baseEpoch + CompactionCold - boundaryEpoch := currentEpoch - CompactionBoundary - - log.Infow("running full compaction", "currentEpoch", currentEpoch, "baseEpoch", s.baseEpoch, "coldEpoch", coldEpoch, "boundaryEpoch", boundaryEpoch) - - // create two mark sets, one for marking the cold finality region - // and one for marking the hot region - hotSet, err := s.env.Create("hot", s.markSetSize) - if err != nil { - return xerrors.Errorf("error creating hot mark set: %w", err) - } - defer hotSet.Close() //nolint:errcheck - - coldSet, err := s.env.Create("cold", s.markSetSize) - if err != nil { - return xerrors.Errorf("error creating cold mark set: %w", err) - } - defer coldSet.Close() //nolint:errcheck - - // Phase 1: marking - log.Info("marking live blocks") - startMark := time.Now() - - // Phase 1a: mark all reachable CIDs in the hot range - boundaryTs, err := s.chain.GetTipsetByHeight(context.Background(), boundaryEpoch, curTs, true) - if err != nil { - return xerrors.Errorf("error getting tipset at boundary epoch: %w", err) - } - - count := int64(0) - err = s.chain.WalkSnapshot(context.Background(), boundaryTs, boundaryEpoch-coldEpoch, s.skipOldMsgs, s.skipMsgReceipts, - func(cid cid.Cid) error { - count++ - return hotSet.Mark(cid) - }) - - if err != nil { - return xerrors.Errorf("error marking hot blocks: %w", err) - } - - if count > s.markSetSize { - s.markSetSize = count + count>>2 // overestimate a bit - } - - // Phase 1b: mark all reachable CIDs in the cold range - coldTs, err := s.chain.GetTipsetByHeight(context.Background(), coldEpoch, curTs, true) - if err != nil { - return xerrors.Errorf("error getting tipset at cold epoch: %w", err) - } - - count = 0 - err = s.chain.WalkSnapshot(context.Background(), coldTs, CompactionCold, s.skipOldMsgs, s.skipMsgReceipts, - func(cid cid.Cid) error { - count++ - return coldSet.Mark(cid) - }) - - if err != nil { - return xerrors.Errorf("error marking cold blocks: %w", err) - } - - if count > s.markSetSize { - s.markSetSize = count + count>>2 // overestimate a bit - } - - log.Infow("marking done", "took", time.Since(startMark)) - - // Phase 2: sweep cold objects: - // - If a cold object is reachable in the hot range, it stays in the hotstore. - // - If a cold object is reachable in the cold range, it is moved to the coldstore. - // - If a cold object is unreachable, it is deleted if GC is enabled, otherwise moved to the coldstore. - log.Info("collecting cold objects") - startCollect := time.Now() - - // some stats for logging - var hotCnt, coldCnt, deadCnt int - - cold := make([]cid.Cid, 0, s.coldPurgeSize) - dead := make([]cid.Cid, 0, s.deadPurgeSize) - - // 2.1 iterate through the tracker and collect cold and dead objects - err = s.tracker.ForEach(func(cid cid.Cid, wrEpoch abi.ChainEpoch) error { - // is the object stil hot? - if wrEpoch > coldEpoch { - // yes, stay in the hotstore - hotCnt++ - return nil - } - - // the object is cold -- check whether it is reachable in the hot range - mark, err := hotSet.Has(cid) - if err != nil { - return xerrors.Errorf("error checking live mark for %s: %w", cid, err) - } - - if mark { - // the object is reachable in the hot range, stay in the hotstore - hotCnt++ - return nil - } - - // check whether it is reachable in the cold range - mark, err = coldSet.Has(cid) - if err != nil { - return xerrors.Errorf("error checkiing cold set for %s: %w", cid, err) - } - - if s.enableGC { - if mark { - // the object is reachable in the cold range, move it to the cold store - cold = append(cold, cid) - coldCnt++ - } else { - // the object is dead and will be deleted - dead = append(dead, cid) - deadCnt++ - } - } else { - // if GC is disabled, we move both cold and dead objects to the coldstore - cold = append(cold, cid) - if mark { - coldCnt++ - } else { - deadCnt++ - } - } - - return nil - }) - - if err != nil { - return xerrors.Errorf("error collecting cold objects: %w", err) - } - - if coldCnt > 0 { - s.coldPurgeSize = coldCnt + coldCnt>>2 // overestimate a bit - } - if deadCnt > 0 { - s.deadPurgeSize = deadCnt + deadCnt>>2 // overestimate a bit - } - - log.Infow("collection done", "took", time.Since(startCollect)) - log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt, "dead", deadCnt) - stats.Record(context.Background(), metrics.SplitstoreCompactionHot.M(int64(hotCnt))) - stats.Record(context.Background(), metrics.SplitstoreCompactionCold.M(int64(coldCnt))) - stats.Record(context.Background(), metrics.SplitstoreCompactionDead.M(int64(deadCnt))) - - // Enter critical section - atomic.StoreInt32(&s.critsection, 1) - defer atomic.StoreInt32(&s.critsection, 0) - - // check to see if we are closing first; if that's the case just return - if atomic.LoadInt32(&s.closing) == 1 { - log.Info("splitstore is closing; aborting compaction") - return xerrors.Errorf("compaction aborted") - } - - // 2.2 copy the cold objects to the coldstore - log.Info("moving cold objects to the coldstore") - startMove := time.Now() - err = s.moveColdBlocks(cold) - if err != nil { - return xerrors.Errorf("error moving cold blocks: %w", err) - } - log.Infow("moving done", "took", time.Since(startMove)) - - // 2.3 delete cold objects from the hotstore - log.Info("purging cold objects from the hotstore") - startPurge := time.Now() - err = s.purgeBlocks(cold) - if err != nil { - return xerrors.Errorf("error purging cold blocks: %w", err) - } - log.Infow("purging cold from hotstore done", "took", time.Since(startPurge)) - - // 2.4 remove the tracker tracking for cold objects - startPurge = time.Now() - log.Info("purging cold objects from tracker") - err = s.purgeTracking(cold) - if err != nil { - return xerrors.Errorf("error purging tracking for cold blocks: %w", err) - } - log.Infow("purging cold from tracker done", "took", time.Since(startPurge)) - - // 3. if we have dead objects, delete them from the hotstore and remove the tracking - if len(dead) > 0 { - log.Info("deleting dead objects") - err = s.purgeBlocks(dead) - if err != nil { - return xerrors.Errorf("error purging dead blocks: %w", err) - } - - // remove the tracker tracking - startPurge := time.Now() - log.Info("purging dead objects from tracker") - err = s.purgeTracking(dead) - if err != nil { - return xerrors.Errorf("error purging tracking for dead blocks: %w", err) - } - log.Infow("purging dead from tracker done", "took", time.Since(startPurge)) - } - - // we are done; do some housekeeping - err = s.tracker.Sync() - if err != nil { - return xerrors.Errorf("error syncing tracker: %w", err) - } - - s.gcHotstore() - - err = s.setBaseEpoch(coldEpoch) - if err != nil { - return xerrors.Errorf("error saving base epoch: %w", err) - } - - err = s.ds.Put(markSetSizeKey, int64ToBytes(s.markSetSize)) - if err != nil { - return xerrors.Errorf("error saving mark set size: %w", err) - } - - return nil -} - func (s *SplitStore) setBaseEpoch(epoch abi.ChainEpoch) error { s.baseEpoch = epoch // write to datastore diff --git a/node/config/def.go b/node/config/def.go index dcff654dad6..c2754fe4f30 100644 --- a/node/config/def.go +++ b/node/config/def.go @@ -229,13 +229,10 @@ type Chainstore struct { } type Splitstore struct { - ColdStoreType string - HotStoreType string - TrackingStoreType string - MarkSetType string - EnableFullCompaction bool - EnableGC bool // EXPERIMENTAL - Archival bool + ColdStoreType string + HotStoreType string + TrackingStoreType string + MarkSetType string } // // Full Node diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index 6dd776999da..5accb50650a 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -78,11 +78,8 @@ func SplitBlockstore(cfg *config.Chainstore) func(lc fx.Lifecycle, r repo.Locked } cfg := &splitstore.Config{ - TrackingStoreType: cfg.Splitstore.TrackingStoreType, - MarkSetType: cfg.Splitstore.MarkSetType, - EnableFullCompaction: cfg.Splitstore.EnableFullCompaction, - EnableGC: cfg.Splitstore.EnableGC, - Archival: cfg.Splitstore.Archival, + TrackingStoreType: cfg.Splitstore.TrackingStoreType, + MarkSetType: cfg.Splitstore.MarkSetType, } ss, err := splitstore.Open(path, ds, hot, cold, cfg) if err != nil { From e3cbeec6ee67c039980e310e705698c08bde6afa Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 13 Mar 2021 12:00:28 +0200 Subject: [PATCH 004/197] implement chain walking --- blockstore/splitstore/splitstore.go | 147 ++++++++++++++++++++--- blockstore/splitstore/splitstore_test.go | 114 +++++------------- 2 files changed, 158 insertions(+), 103 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 62b6fbf1153..63e42ab532f 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1,6 +1,7 @@ package splitstore import ( + "bytes" "context" "encoding/binary" "errors" @@ -15,6 +16,7 @@ import ( cid "github.com/ipfs/go-cid" dstore "github.com/ipfs/go-datastore" logging "github.com/ipfs/go-log/v2" + cbg "github.com/whyrusleeping/cbor-gen" "github.com/filecoin-project/go-state-types/abi" @@ -48,7 +50,7 @@ var ( CompactionCold = build.Finality // CompactionBoundary is the number of epochs from the current epoch at which - // we will walk the chain for live objects + // we will walk the chain for live objects. CompactionBoundary = 2 * build.Finality ) @@ -73,7 +75,6 @@ const ( batchSize = 16384 defaultColdPurgeSize = 7_000_000 - defaultDeadPurgeSize = 1_000_000 ) type Config struct { @@ -94,7 +95,6 @@ type ChainAccessor interface { GetTipsetByHeight(context.Context, abi.ChainEpoch, *types.TipSet, bool) (*types.TipSet, error) GetHeaviestTipSet() *types.TipSet SubscribeHeadChanges(change func(revert []*types.TipSet, apply []*types.TipSet) error) - WalkSnapshot(context.Context, *types.TipSet, abi.ChainEpoch, bool, bool, func(cid.Cid) error) error } type SplitStore struct { @@ -104,6 +104,7 @@ type SplitStore struct { baseEpoch abi.ChainEpoch warmupEpoch abi.ChainEpoch + warm bool coldPurgeSize int @@ -340,6 +341,7 @@ func (s *SplitStore) Start(chain ChainAccessor) error { switch err { case nil: s.warmupEpoch = bytesToEpoch(bs) + s.warm = true case dstore.ErrNotFound: default: @@ -396,7 +398,7 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { return nil } - if s.warmupEpoch == 0 { + if !s.warm { // splitstore needs to warm up go func() { defer atomic.StoreInt32(&s.compacting, 0) @@ -404,7 +406,17 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { log.Info("warming up hotstore") start := time.Now() - s.warmup(curTs) + baseTs, err := s.chain.GetTipsetByHeight(context.Background(), s.baseEpoch, curTs, true) + if err != nil { + log.Errorf("error warming up hotstore: error getting tipset at base epoch: %s", err) + return + } + + err = s.warmup(baseTs) + if err != nil { + log.Errorf("error warming up hotstore: %s", err) + return + } log.Infow("warm up done", "took", time.Since(start)) }() @@ -432,14 +444,16 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { return nil } -func (s *SplitStore) warmup(curTs *types.TipSet) { +func (s *SplitStore) warmup(curTs *types.TipSet) error { epoch := curTs.Height() batchHot := make([]blocks.Block, 0, batchSize) batchSnoop := make([]cid.Cid, 0, batchSize) count := int64(0) - err := s.chain.WalkSnapshot(context.Background(), curTs, 1, true, true, + xcount := int64(0) + missing := int64(0) + err := s.walk(curTs, epoch, func(cid cid.Cid) error { count++ @@ -454,9 +468,15 @@ func (s *SplitStore) warmup(curTs *types.TipSet) { blk, err := s.cold.Get(cid) if err != nil { + if err == bstore.ErrNotFound { + missing++ + return nil + } return err } + xcount++ + batchHot = append(batchHot, blk) batchSnoop = append(batchSnoop, cid) @@ -478,39 +498,41 @@ func (s *SplitStore) warmup(curTs *types.TipSet) { }) if err != nil { - log.Errorf("error warming up splitstore: %s", err) - return + return err } if len(batchHot) > 0 { err = s.tracker.PutBatch(batchSnoop, epoch) if err != nil { - log.Errorf("error warming up splitstore: %s", err) - return + return err } err = s.hot.PutMany(batchHot) if err != nil { - log.Errorf("error warming up splitstore: %s", err) - return + return err } } + log.Infow("warmup stats", "visited", count, "cold", xcount, "missing", missing) + if count > s.markSetSize { s.markSetSize = count + count>>2 // overestimate a bit } // save the warmup epoch + s.warm = true s.warmupEpoch = epoch err = s.ds.Put(warmupEpochKey, epochToBytes(epoch)) if err != nil { - log.Errorf("error saving warmup epoch: %s", err) + log.Warnf("error saving warmup epoch: %s", err) } err = s.ds.Put(markSetSizeKey, int64ToBytes(s.markSetSize)) if err != nil { - log.Errorf("error saving mark set size: %s", err) + log.Warnf("error saving mark set size: %s", err) } + + return nil } // Compaction/GC Algorithm @@ -540,8 +562,10 @@ func (s *SplitStore) compact(curTs *types.TipSet) { } func (s *SplitStore) estimateMarkSetSize(curTs *types.TipSet) error { + epoch := curTs.Height() + var count int64 - err := s.chain.WalkSnapshot(context.Background(), curTs, 1, true, true, + err := s.walk(curTs, epoch, func(cid cid.Cid) error { count++ return nil @@ -578,7 +602,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { } var count int64 - err = s.chain.WalkSnapshot(context.Background(), boundaryTs, 1, true, true, + err = s.walk(boundaryTs, boundaryEpoch, func(cid cid.Cid) error { count++ return coldSet.Mark(cid) @@ -592,7 +616,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { s.markSetSize = count + count>>2 // overestimate a bit } - log.Infow("marking done", "took", time.Since(startMark)) + log.Infow("marking done", "took", time.Since(startMark), "marked", count) // 2. move cold unreachable objects to the coldstore log.Info("collecting cold objects") @@ -700,6 +724,93 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return nil } +func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, f func(cid.Cid) error) error { + walked := cid.NewSet() + toWalk := ts.Cids() + + walkBlock := func(c cid.Cid) error { + if !walked.Visit(c) { + return nil + } + + blk, err := s.Get(c) + if err != nil { + return xerrors.Errorf("error retrieving block (cid: %s): %w", c, err) + } + + var hdr types.BlockHeader + if err := hdr.UnmarshalCBOR(bytes.NewBuffer(blk.RawData())); err != nil { + return xerrors.Errorf("error unmarshaling block header (cid: %s): %w", c, err) + } + + // don't walk under the boundary + if hdr.Height < boundary { + return nil + } + + if err := f(c); err != nil { + return err + } + + if err := s.walkLinks(hdr.Messages, walked, f); err != nil { + return xerrors.Errorf("error walking messages (cid: %s): %w", hdr.Messages, err) + } + + if err := s.walkLinks(hdr.ParentStateRoot, walked, f); err != nil { + return xerrors.Errorf("error walking state root (cid: %s): %w", hdr.ParentStateRoot, err) + } + + toWalk = append(toWalk, hdr.Parents...) + return nil + } + + for len(toWalk) > 0 { + walking := toWalk + toWalk = nil + for _, c := range walking { + if err := walkBlock(c); err != nil { + return xerrors.Errorf("error walking block (cid: %s): %w", c, err) + } + } + } + + return nil +} + +func (s *SplitStore) walkLinks(c cid.Cid, walked *cid.Set, f func(cid.Cid) error) error { + if !walked.Visit(c) { + return nil + } + + if c.Prefix().Codec != cid.DagCBOR { + return nil + } + + if err := f(c); err != nil { + return err + } + + blk, err := s.Get(c) + if err != nil { + return xerrors.Errorf("error retrieving linked block (cid: %s): %w", c, err) + } + + var rerr error + err = cbg.ScanForLinks(bytes.NewReader(blk.RawData()), func(c cid.Cid) { + if rerr != nil { + return + } + + rerr = s.walkLinks(c, walked, f) + }) + + if err != nil { + return xerrors.Errorf("error scanning links (cid: %s): %w", c, err) + } + + return rerr +} + func (s *SplitStore) moveColdBlocks(cold []cid.Cid) error { batch := make([]blocks.Block, 0, batchSize) diff --git a/blockstore/splitstore/splitstore_test.go b/blockstore/splitstore/splitstore_test.go index dcaf276474d..f50e4300afc 100644 --- a/blockstore/splitstore/splitstore_test.go +++ b/blockstore/splitstore/splitstore_test.go @@ -13,7 +13,7 @@ import ( "github.com/filecoin-project/lotus/chain/types" "github.com/filecoin-project/lotus/chain/types/mock" - cid "github.com/ipfs/go-cid" + blocks "github.com/ipfs/go-block-format" datastore "github.com/ipfs/go-datastore" dssync "github.com/ipfs/go-datastore/sync" logging "github.com/ipfs/go-log/v2" @@ -28,16 +28,28 @@ func init() { func testSplitStore(t *testing.T, cfg *Config) { chain := &mockChain{t: t} - // genesis - genBlock := mock.MkBlock(nil, 0, 0) - genTs := mock.TipSet(genBlock) - chain.push(genTs) // the myriads of stores ds := dssync.MutexWrap(datastore.NewMapDatastore()) hot := blockstore.NewMemorySync() cold := blockstore.NewMemorySync() + // this is necessary to avoid the garbage mock puts in the blocks + garbage := blocks.NewBlock([]byte{1, 2, 3}) + err := cold.Put(garbage) + if err != nil { + t.Fatal(err) + } + + // genesis + genBlock := mock.MkBlock(nil, 0, 0) + genBlock.Messages = garbage.Cid() + genBlock.ParentMessageReceipts = garbage.Cid() + genBlock.ParentStateRoot = garbage.Cid() + + genTs := mock.TipSet(genBlock) + chain.push(genTs) + // put the genesis block to cold store blk, err := genBlock.ToStorageBlock() if err != nil { @@ -64,6 +76,11 @@ func testSplitStore(t *testing.T, cfg *Config) { // make some tipsets, but not enough to cause compaction mkBlock := func(curTs *types.TipSet, i int) *types.TipSet { blk := mock.MkBlock(curTs, uint64(i), uint64(i)) + + blk.Messages = garbage.Cid() + blk.ParentMessageReceipts = garbage.Cid() + blk.ParentStateRoot = garbage.Cid() + sblk, err := blk.ToStorageBlock() if err != nil { t.Fatal(err) @@ -78,18 +95,6 @@ func testSplitStore(t *testing.T, cfg *Config) { return ts } - mkGarbageBlock := func(curTs *types.TipSet, i int) { - blk := mock.MkBlock(curTs, uint64(i), uint64(i)) - sblk, err := blk.ToStorageBlock() - if err != nil { - t.Fatal(err) - } - err = ss.Put(sblk) - if err != nil { - t.Fatal(err) - } - } - waitForCompaction := func() { for atomic.LoadInt32(&ss.compacting) == 1 { time.Sleep(100 * time.Millisecond) @@ -102,8 +107,6 @@ func testSplitStore(t *testing.T, cfg *Config) { waitForCompaction() } - mkGarbageBlock(genTs, 1) - // count objects in the cold and hot stores ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -123,8 +126,8 @@ func testSplitStore(t *testing.T, cfg *Config) { coldCnt := countBlocks(cold) hotCnt := countBlocks(hot) - if coldCnt != 1 { - t.Errorf("expected %d blocks, but got %d", 1, coldCnt) + if coldCnt != 2 { + t.Errorf("expected %d blocks, but got %d", 2, coldCnt) } if hotCnt != 5 { @@ -140,34 +143,12 @@ func testSplitStore(t *testing.T, cfg *Config) { coldCnt = countBlocks(cold) hotCnt = countBlocks(hot) - if !cfg.EnableFullCompaction { - if coldCnt != 5 { - t.Errorf("expected %d cold blocks, but got %d", 5, coldCnt) - } - - if hotCnt != 5 { - t.Errorf("expected %d hot blocks, but got %d", 5, hotCnt) - } + if coldCnt != 7 { + t.Errorf("expected %d cold blocks, but got %d", 7, coldCnt) } - if cfg.EnableFullCompaction && !cfg.EnableGC { - if coldCnt != 3 { - t.Errorf("expected %d cold blocks, but got %d", 3, coldCnt) - } - - if hotCnt != 7 { - t.Errorf("expected %d hot blocks, but got %d", 7, hotCnt) - } - } - - if cfg.EnableFullCompaction && cfg.EnableGC { - if coldCnt != 2 { - t.Errorf("expected %d cold blocks, but got %d", 2, coldCnt) - } - - if hotCnt != 7 { - t.Errorf("expected %d hot blocks, but got %d", 7, hotCnt) - } + if hotCnt != 4 { + t.Errorf("expected %d hot blocks, but got %d", 4, hotCnt) } // Make sure we can revert without panicking. @@ -178,21 +159,6 @@ func TestSplitStoreSimpleCompaction(t *testing.T) { testSplitStore(t, &Config{TrackingStoreType: "mem"}) } -func TestSplitStoreFullCompactionWithoutGC(t *testing.T) { - testSplitStore(t, &Config{ - TrackingStoreType: "mem", - EnableFullCompaction: true, - }) -} - -func TestSplitStoreFullCompactionWithGC(t *testing.T) { - testSplitStore(t, &Config{ - TrackingStoreType: "mem", - EnableFullCompaction: true, - EnableGC: true, - }) -} - type mockChain struct { t testing.TB @@ -242,7 +208,7 @@ func (c *mockChain) GetTipsetByHeight(_ context.Context, epoch abi.ChainEpoch, _ return nil, fmt.Errorf("bad epoch %d", epoch) } - return c.tipsets[iEpoch-1], nil + return c.tipsets[iEpoch], nil } func (c *mockChain) GetHeaviestTipSet() *types.TipSet { @@ -255,25 +221,3 @@ func (c *mockChain) GetHeaviestTipSet() *types.TipSet { func (c *mockChain) SubscribeHeadChanges(change func(revert []*types.TipSet, apply []*types.TipSet) error) { c.listener = change } - -func (c *mockChain) WalkSnapshot(_ context.Context, ts *types.TipSet, epochs abi.ChainEpoch, _ bool, _ bool, f func(cid.Cid) error) error { - c.Lock() - defer c.Unlock() - - start := int(ts.Height()) - 1 - end := start - int(epochs) - if end < 0 { - end = -1 - } - for i := start; i > end; i-- { - ts := c.tipsets[i] - for _, cid := range ts.Cids() { - err := f(cid) - if err != nil { - return err - } - } - } - - return nil -} From d7ceef104ea7daf503966cd663e51e5306932186 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 14 Mar 2021 11:06:00 +0200 Subject: [PATCH 005/197] decrease CompactionThreshold to 3 finalities --- blockstore/splitstore/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 63e42ab532f..ab7408162db 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -42,7 +42,7 @@ var ( // === :: cold (already archived) // ≡≡≡ :: to be archived in this compaction // --- :: hot - CompactionThreshold = 5 * build.Finality + CompactionThreshold = 3 * build.Finality // CompactionCold is the number of epochs that will be archived to the // cold store on compaction. See diagram on CompactionThreshold for a From 3a9b7c592db077bf07f6b3d7dfc4de650037bc72 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 14 Mar 2021 12:32:05 +0200 Subject: [PATCH 006/197] mark from current epoch to boundary epoch when necessary this is necessary to avoid wearing clown shoes when the node stays offline for an extended period of time (more than 1 finality). Basically it gets quite slow if we do the full 2 finality walk, so we try to avoid it unless necessary. The conditions under which a full walk is necessary is if there is a sync gap (most likely because the node was offline) during which the tracking of writes is inaccurate because we have not yet delivered the HeadChange notification. In this case, it is possible to have actually hot blocks to be tracked before the boundary and fail to mark them accordingly. So when we detect a sync gap, we do the full walk; if there is no sync gap, we can just use the much faster boundary epoch walk. --- blockstore/splitstore/splitstore.go | 88 ++++++++++++++++++------ blockstore/splitstore/splitstore_test.go | 2 + 2 files changed, 70 insertions(+), 20 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index ab7408162db..0aea3d63818 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -52,6 +52,10 @@ var ( // CompactionBoundary is the number of epochs from the current epoch at which // we will walk the chain for live objects. CompactionBoundary = 2 * build.Finality + + // SyncGapTime is the time delay from a tipset's min timestamp before we decide + // there is a sync gap + SyncGapTime = 5 * time.Minute ) var ( @@ -64,6 +68,11 @@ var ( // all active blocks into the hotstore. warmupEpochKey = dstore.NewKey("/splitstore/warmupEpoch") + // syncGapEpochKey stores the last epoch where a sync gap was detected. + // If there is a sync gap after the boundary epoch, compaction will perform + // a slower full walk from the current epoch to the boundary epoch + syncGapEpochKey = dstore.NewKey("/splitstore/syncGapEpoch") + // markSetSizeKey stores the current estimate for the mark set size. // this is first computed at warmup and updated in every compaction markSetSizeKey = dstore.NewKey("/splitstore/markSetSize") @@ -102,9 +111,10 @@ type SplitStore struct { critsection int32 // compaction critical section closing int32 // the split store is closing - baseEpoch abi.ChainEpoch - warmupEpoch abi.ChainEpoch - warm bool + baseEpoch abi.ChainEpoch + syncGapEpoch abi.ChainEpoch + warmupEpoch abi.ChainEpoch + warm bool coldPurgeSize int @@ -348,6 +358,17 @@ func (s *SplitStore) Start(chain ChainAccessor) error { return xerrors.Errorf("error loading warmup epoch: %w", err) } + // load sync gap epoch from metadata ds + bs, err = s.ds.Get(syncGapEpochKey) + switch err { + case nil: + s.syncGapEpoch = bytesToEpoch(bs) + + case dstore.ErrNotFound: + default: + return xerrors.Errorf("error loading sync gap epoch: %w", err) + } + // load markSetSize from metadata ds // if none, the splitstore will compute it during warmup and update in every compaction bs, err = s.ds.Get(markSetSizeKey) @@ -393,6 +414,14 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { s.curTs = curTs s.mx.Unlock() + timestamp := time.Unix(int64(curTs.MinTimestamp()), 0) + if time.Since(timestamp) > SyncGapTime { + err := s.setSyncGapEpoch(epoch) + if err != nil { + log.Warnf("error saving sync gap epoch: %s", err) + } + } + if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) { // we are currently compacting, do nothing and wait for the next head change return nil @@ -432,7 +461,7 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { log.Info("compacting splitstore") start := time.Now() - s.compact(curTs) + s.compact(curTs, s.syncGapEpoch) log.Infow("compaction done", "took", time.Since(start)) }() @@ -513,7 +542,7 @@ func (s *SplitStore) warmup(curTs *types.TipSet) error { } } - log.Infow("warmup stats", "visited", count, "cold", xcount, "missing", missing) + log.Infow("warmup stats", "visited", count, "warm", xcount, "missing", missing) if count > s.markSetSize { s.markSetSize = count + count>>2 // overestimate a bit @@ -536,7 +565,7 @@ func (s *SplitStore) warmup(curTs *types.TipSet) error { } // Compaction/GC Algorithm -func (s *SplitStore) compact(curTs *types.TipSet) { +func (s *SplitStore) compact(curTs *types.TipSet, syncGapEpoch abi.ChainEpoch) { var err error if s.markSetSize == 0 { start := time.Now() @@ -552,7 +581,7 @@ func (s *SplitStore) compact(curTs *types.TipSet) { } start := time.Now() - err = s.doCompact(curTs) + err = s.doCompact(curTs, syncGapEpoch) took := time.Since(start).Milliseconds() stats.Record(context.Background(), metrics.SplitstoreCompactionTimeSeconds.M(float64(took)/1e3)) @@ -579,33 +608,48 @@ func (s *SplitStore) estimateMarkSetSize(curTs *types.TipSet) error { return nil } -func (s *SplitStore) doCompact(curTs *types.TipSet) error { +func (s *SplitStore) doCompact(curTs *types.TipSet, syncGapEpoch abi.ChainEpoch) error { coldEpoch := s.baseEpoch + CompactionCold currentEpoch := curTs.Height() boundaryEpoch := currentEpoch - CompactionBoundary log.Infow("running compaction", "currentEpoch", currentEpoch, "baseEpoch", s.baseEpoch, "coldEpoch", coldEpoch, "boundaryEpoch", boundaryEpoch) - coldSet, err := s.env.Create("cold", s.markSetSize) + markSet, err := s.env.Create("live", s.markSetSize) if err != nil { return xerrors.Errorf("error creating mark set: %w", err) } - defer coldSet.Close() //nolint:errcheck + defer markSet.Close() //nolint:errcheck - // 1. mark reachable cold objects by looking at the objects reachable only from the cold epoch - log.Infow("marking reachable cold blocks", "boundaryEpoch", boundaryEpoch) + // 1. mark reachable objects by walking the chain from the current epoch to the boundary epoch + log.Infow("marking reachable blocks", "currentEpoch", currentEpoch, "boundaryEpoch", boundaryEpoch) startMark := time.Now() - boundaryTs, err := s.chain.GetTipsetByHeight(context.Background(), boundaryEpoch, curTs, true) - if err != nil { - return xerrors.Errorf("error getting tipset at boundary epoch: %w", err) + var markTs *types.TipSet + if syncGapEpoch > boundaryEpoch { + // There is a sync gap that may have caused writes that are logically after the boundary + // epoch to be written before the respective head change notification and hence be tracked + // at the wrong epoch. + // This can happen if the node is offline or falls out of sync for an extended period of time. + // In this case we perform a full walk to avoid pathologies with pushing actually hot + // objects into the coldstore. + markTs = curTs + log.Infof("sync gap detected at epoch %d; marking from current epoch to boundary epoch", syncGapEpoch) + } else { + // There is no pathological sync gap, so we can use the much faster single tipset walk at + // the boundary epoch. + boundaryTs, err := s.chain.GetTipsetByHeight(context.Background(), boundaryEpoch, curTs, true) + if err != nil { + return xerrors.Errorf("error getting tipset at boundary epoch: %w", err) + } + markTs = boundaryTs } var count int64 - err = s.walk(boundaryTs, boundaryEpoch, + err = s.walk(markTs, boundaryEpoch, func(cid cid.Cid) error { count++ - return coldSet.Mark(cid) + return markSet.Mark(cid) }) if err != nil { @@ -637,9 +681,9 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { } // check whether it is reachable in the cold boundary - mark, err := coldSet.Has(cid) + mark, err := markSet.Has(cid) if err != nil { - return xerrors.Errorf("error checkiing cold set for %s: %w", cid, err) + return xerrors.Errorf("error checkiing mark set for %s: %w", cid, err) } if mark { @@ -909,10 +953,14 @@ func (s *SplitStore) gcHotstore() { func (s *SplitStore) setBaseEpoch(epoch abi.ChainEpoch) error { s.baseEpoch = epoch - // write to datastore return s.ds.Put(baseEpochKey, epochToBytes(epoch)) } +func (s *SplitStore) setSyncGapEpoch(epoch abi.ChainEpoch) error { + s.syncGapEpoch = epoch + return s.ds.Put(syncGapEpochKey, epochToBytes(epoch)) +} + func epochToBytes(epoch abi.ChainEpoch) []byte { return uint64ToBytes(uint64(epoch)) } diff --git a/blockstore/splitstore/splitstore_test.go b/blockstore/splitstore/splitstore_test.go index f50e4300afc..b595e6a3bbb 100644 --- a/blockstore/splitstore/splitstore_test.go +++ b/blockstore/splitstore/splitstore_test.go @@ -46,6 +46,7 @@ func testSplitStore(t *testing.T, cfg *Config) { genBlock.Messages = garbage.Cid() genBlock.ParentMessageReceipts = garbage.Cid() genBlock.ParentStateRoot = garbage.Cid() + genBlock.Timestamp = uint64(time.Now().Unix()) genTs := mock.TipSet(genBlock) chain.push(genTs) @@ -80,6 +81,7 @@ func testSplitStore(t *testing.T, cfg *Config) { blk.Messages = garbage.Cid() blk.ParentMessageReceipts = garbage.Cid() blk.ParentStateRoot = garbage.Cid() + blk.Timestamp = uint64(time.Now().Unix()) sblk, err := blk.ToStorageBlock() if err != nil { From b2b7eb2ded04333e4504576bf59afedf7721f39d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Tue, 16 Mar 2021 20:05:22 +0000 Subject: [PATCH 007/197] metrics: increment misses in View(). --- blockstore/splitstore/splitstore.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 0aea3d63818..ed525105e9e 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -310,7 +310,11 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { err := s.hot.View(cid, cb) switch err { case bstore.ErrNotFound: - return s.cold.View(cid, cb) + err = s.cold.View(cid, cb) + if err == nil { + stats.Record(context.Background(), metrics.SplitstoreMiss.M(1)) + } + return err default: return err From e9f531b4aac839638a5783beba6af1c063cd5d82 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 19 Mar 2021 12:36:20 +0200 Subject: [PATCH 008/197] don't open bolt tracking store with NoSync, it might get corrupted --- blockstore/splitstore/tracking_bolt.go | 1 - 1 file changed, 1 deletion(-) diff --git a/blockstore/splitstore/tracking_bolt.go b/blockstore/splitstore/tracking_bolt.go index c5c451e1570..2980e8c5ab1 100644 --- a/blockstore/splitstore/tracking_bolt.go +++ b/blockstore/splitstore/tracking_bolt.go @@ -21,7 +21,6 @@ var _ TrackingStore = (*BoltTrackingStore)(nil) func OpenBoltTrackingStore(path string) (*BoltTrackingStore, error) { opts := &bolt.Options{ Timeout: 1 * time.Second, - NoSync: true, } db, err := bolt.Open(path, 0644, opts) if err != nil { From 7cf75e667d7e3cf564ff2e672fef2120ee9b10bb Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 19 Mar 2021 12:17:32 +0200 Subject: [PATCH 009/197] keep genesis-linked state hot --- blockstore/splitstore/splitstore.go | 57 ++++++++++++++++++++++++ blockstore/splitstore/splitstore_test.go | 12 ++++- 2 files changed, 67 insertions(+), 2 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index ed525105e9e..bdfad8da9c7 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -101,6 +101,7 @@ type Config struct { // ChainAccessor allows the Splitstore to access the chain. It will most likely // be a ChainStore at runtime. type ChainAccessor interface { + GetGenesis() (*types.BlockHeader, error) GetTipsetByHeight(context.Context, abi.ChainEpoch, *types.TipSet, bool) (*types.TipSet, error) GetHeaviestTipSet() *types.TipSet SubscribeHeadChanges(change func(revert []*types.TipSet, apply []*types.TipSet) error) @@ -127,6 +128,8 @@ type SplitStore struct { cold bstore.Blockstore tracker TrackingStore + genesis, genesisStateRoot cid.Cid + env MarkSetEnv markSetSize int64 @@ -326,6 +329,60 @@ func (s *SplitStore) Start(chain ChainAccessor) error { s.chain = chain s.curTs = chain.GetHeaviestTipSet() + // make sure the genesis and its state root are hot + gb, err := chain.GetGenesis() + if err != nil { + return xerrors.Errorf("error getting genesis: %w", err) + } + + s.genesis = gb.Cid() + s.genesisStateRoot = gb.ParentStateRoot + + has, err := s.hot.Has(s.genesis) + if err != nil { + return xerrors.Errorf("error checking hotstore for genesis: %w", err) + } + + if !has { + blk, err := gb.ToStorageBlock() + if err != nil { + return xerrors.Errorf("error converting genesis block to storage block: %w", err) + } + + err = s.hot.Put(blk) + if err != nil { + return xerrors.Errorf("error putting genesis block to hotstore: %w", err) + } + } + + err = s.walkLinks(s.genesisStateRoot, cid.NewSet(), func(c cid.Cid) error { + has, err = s.hot.Has(c) + if err != nil { + return xerrors.Errorf("error checking hotstore for genesis state root: %w", err) + } + + if !has { + blk, err := s.cold.Get(c) + if err != nil { + if err == bstore.ErrNotFound { + return nil + } + + return xerrors.Errorf("error retrieving genesis state linked object from coldstore: %w", err) + } + + err = s.hot.Put(blk) + if err != nil { + return xerrors.Errorf("error putting genesis state linked object to hotstore: %w", err) + } + } + + return nil + }) + if err != nil { + return xerrors.Errorf("error walking genesis state root links: %w", err) + } + // load base epoch from metadata ds // if none, then use current epoch because it's a fresh start bs, err := s.ds.Get(baseEpochKey) diff --git a/blockstore/splitstore/splitstore_test.go b/blockstore/splitstore/splitstore_test.go index b595e6a3bbb..0fe298c6fc8 100644 --- a/blockstore/splitstore/splitstore_test.go +++ b/blockstore/splitstore/splitstore_test.go @@ -149,8 +149,8 @@ func testSplitStore(t *testing.T, cfg *Config) { t.Errorf("expected %d cold blocks, but got %d", 7, coldCnt) } - if hotCnt != 4 { - t.Errorf("expected %d hot blocks, but got %d", 4, hotCnt) + if hotCnt != 5 { + t.Errorf("expected %d hot blocks, but got %d", 5, hotCnt) } // Make sure we can revert without panicking. @@ -165,6 +165,7 @@ type mockChain struct { t testing.TB sync.Mutex + genesis *types.BlockHeader tipsets []*types.TipSet listener func(revert []*types.TipSet, apply []*types.TipSet) error } @@ -172,6 +173,9 @@ type mockChain struct { func (c *mockChain) push(ts *types.TipSet) { c.Lock() c.tipsets = append(c.tipsets, ts) + if c.genesis == nil { + c.genesis = ts.Blocks()[0] + } c.Unlock() if c.listener != nil { @@ -201,6 +205,10 @@ func (c *mockChain) revert(count int) { } } +func (c *mockChain) GetGenesis() (*types.BlockHeader, error) { + return c.genesis, nil +} + func (c *mockChain) GetTipsetByHeight(_ context.Context, epoch abi.ChainEpoch, _ *types.TipSet, _ bool) (*types.TipSet, error) { c.Lock() defer c.Unlock() From bdb97d61860c44cfb44aef296089811c18aeeaed Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 14 Jun 2021 20:49:11 +0300 Subject: [PATCH 010/197] more robust handling of sync gap walks --- blockstore/splitstore/splitstore.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index bdfad8da9c7..2b3905e0796 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -543,7 +543,7 @@ func (s *SplitStore) warmup(curTs *types.TipSet) error { count := int64(0) xcount := int64(0) missing := int64(0) - err := s.walk(curTs, epoch, + err := s.walk(curTs, epoch, false, func(cid cid.Cid) error { count++ @@ -655,7 +655,7 @@ func (s *SplitStore) estimateMarkSetSize(curTs *types.TipSet) error { epoch := curTs.Height() var count int64 - err := s.walk(curTs, epoch, + err := s.walk(curTs, epoch, false, func(cid cid.Cid) error { count++ return nil @@ -686,6 +686,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet, syncGapEpoch abi.ChainEpoch) log.Infow("marking reachable blocks", "currentEpoch", currentEpoch, "boundaryEpoch", boundaryEpoch) startMark := time.Now() + var inclMsgs bool var markTs *types.TipSet if syncGapEpoch > boundaryEpoch { // There is a sync gap that may have caused writes that are logically after the boundary @@ -695,6 +696,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet, syncGapEpoch abi.ChainEpoch) // In this case we perform a full walk to avoid pathologies with pushing actually hot // objects into the coldstore. markTs = curTs + inclMsgs = true log.Infof("sync gap detected at epoch %d; marking from current epoch to boundary epoch", syncGapEpoch) } else { // There is no pathological sync gap, so we can use the much faster single tipset walk at @@ -707,7 +709,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet, syncGapEpoch abi.ChainEpoch) } var count int64 - err = s.walk(markTs, boundaryEpoch, + err = s.walk(markTs, boundaryEpoch, inclMsgs, func(cid cid.Cid) error { count++ return markSet.Mark(cid) @@ -829,7 +831,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet, syncGapEpoch abi.ChainEpoch) return nil } -func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, f func(cid.Cid) error) error { +func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs bool, f func(cid.Cid) error) error { walked := cid.NewSet() toWalk := ts.Cids() @@ -857,8 +859,10 @@ func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, f func(cid. return err } - if err := s.walkLinks(hdr.Messages, walked, f); err != nil { - return xerrors.Errorf("error walking messages (cid: %s): %w", hdr.Messages, err) + if inclMsgs { + if err := s.walkLinks(hdr.Messages, walked, f); err != nil { + return xerrors.Errorf("error walking messages (cid: %s): %w", hdr.Messages, err) + } } if err := s.walkLinks(hdr.ParentStateRoot, walked, f); err != nil { From d33a44e67fe5bb91fe7893a3db519c8c56c83b79 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 24 Mar 2021 18:23:47 +0200 Subject: [PATCH 011/197] first visit the cid, then short-circuit non dagcbor objects --- blockstore/splitstore/splitstore.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 2b3905e0796..b8c199f706d 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -891,14 +891,14 @@ func (s *SplitStore) walkLinks(c cid.Cid, walked *cid.Set, f func(cid.Cid) error return nil } - if c.Prefix().Codec != cid.DagCBOR { - return nil - } - if err := f(c); err != nil { return err } + if c.Prefix().Codec != cid.DagCBOR { + return nil + } + blk, err := s.Get(c) if err != nil { return xerrors.Errorf("error retrieving linked block (cid: %s): %w", c, err) From fda291b87646872d2d0d88328969bd7536b17ecc Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 14 Jun 2021 21:02:18 +0300 Subject: [PATCH 012/197] fix test --- blockstore/splitstore/splitstore_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/blockstore/splitstore/splitstore_test.go b/blockstore/splitstore/splitstore_test.go index 0fe298c6fc8..d842fe72a67 100644 --- a/blockstore/splitstore/splitstore_test.go +++ b/blockstore/splitstore/splitstore_test.go @@ -132,8 +132,8 @@ func testSplitStore(t *testing.T, cfg *Config) { t.Errorf("expected %d blocks, but got %d", 2, coldCnt) } - if hotCnt != 5 { - t.Errorf("expected %d blocks, but got %d", 5, hotCnt) + if hotCnt != 6 { + t.Errorf("expected %d blocks, but got %d", 6, hotCnt) } // trigger a compaction @@ -149,8 +149,8 @@ func testSplitStore(t *testing.T, cfg *Config) { t.Errorf("expected %d cold blocks, but got %d", 7, coldCnt) } - if hotCnt != 5 { - t.Errorf("expected %d hot blocks, but got %d", 5, hotCnt) + if hotCnt != 6 { + t.Errorf("expected %d hot blocks, but got %d", 6, hotCnt) } // Make sure we can revert without panicking. From fa6481401d52b57412e3eeb50fa2a0e1ddc258ac Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 14 Jun 2021 21:33:53 +0300 Subject: [PATCH 013/197] reduce SyncGapTime to 1 minute for maximal safety. --- blockstore/splitstore/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index b8c199f706d..f24a350b030 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -55,7 +55,7 @@ var ( // SyncGapTime is the time delay from a tipset's min timestamp before we decide // there is a sync gap - SyncGapTime = 5 * time.Minute + SyncGapTime = time.Minute ) var ( From 41573f1fb2993ce1894c773f31116c61437af05e Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 14 Jun 2021 23:21:47 +0300 Subject: [PATCH 014/197] also walk parent message receipts when including messages in the walk --- blockstore/splitstore/splitstore.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index f24a350b030..d25de718e4a 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -863,6 +863,10 @@ func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs bo if err := s.walkLinks(hdr.Messages, walked, f); err != nil { return xerrors.Errorf("error walking messages (cid: %s): %w", hdr.Messages, err) } + + if err := s.walkLinks(hdr.ParentMessageReceipts, walked, f); err != nil { + return xerrors.Errorf("error walking message receipts (cid: %s): %w", hdr.ParentMessageReceipts, err) + } } if err := s.walkLinks(hdr.ParentStateRoot, walked, f); err != nil { From 7c814cd2e3fcc883356aaf7948db6c0fb7d3450d Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 16 Jun 2021 13:58:16 +0300 Subject: [PATCH 015/197] refactor genesis state loading code into its own method --- blockstore/splitstore/splitstore.go | 121 +++++++++++++++------------- 1 file changed, 65 insertions(+), 56 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index d25de718e4a..6d717be9a33 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -128,8 +128,6 @@ type SplitStore struct { cold bstore.Blockstore tracker TrackingStore - genesis, genesisStateRoot cid.Cid - env MarkSetEnv markSetSize int64 @@ -329,60 +327,6 @@ func (s *SplitStore) Start(chain ChainAccessor) error { s.chain = chain s.curTs = chain.GetHeaviestTipSet() - // make sure the genesis and its state root are hot - gb, err := chain.GetGenesis() - if err != nil { - return xerrors.Errorf("error getting genesis: %w", err) - } - - s.genesis = gb.Cid() - s.genesisStateRoot = gb.ParentStateRoot - - has, err := s.hot.Has(s.genesis) - if err != nil { - return xerrors.Errorf("error checking hotstore for genesis: %w", err) - } - - if !has { - blk, err := gb.ToStorageBlock() - if err != nil { - return xerrors.Errorf("error converting genesis block to storage block: %w", err) - } - - err = s.hot.Put(blk) - if err != nil { - return xerrors.Errorf("error putting genesis block to hotstore: %w", err) - } - } - - err = s.walkLinks(s.genesisStateRoot, cid.NewSet(), func(c cid.Cid) error { - has, err = s.hot.Has(c) - if err != nil { - return xerrors.Errorf("error checking hotstore for genesis state root: %w", err) - } - - if !has { - blk, err := s.cold.Get(c) - if err != nil { - if err == bstore.ErrNotFound { - return nil - } - - return xerrors.Errorf("error retrieving genesis state linked object from coldstore: %w", err) - } - - err = s.hot.Put(blk) - if err != nil { - return xerrors.Errorf("error putting genesis state linked object to hotstore: %w", err) - } - } - - return nil - }) - if err != nil { - return xerrors.Errorf("error walking genesis state root links: %w", err) - } - // load base epoch from metadata ds // if none, then use current epoch because it's a fresh start bs, err := s.ds.Get(baseEpochKey) @@ -415,6 +359,12 @@ func (s *SplitStore) Start(chain ChainAccessor) error { s.warm = true case dstore.ErrNotFound: + // the hotstore hasn't warmed up, load the genesis into the hotstore + err = s.loadGenesisState() + if err != nil { + return xerrors.Errorf("error loading genesis state: %w", err) + } + default: return xerrors.Errorf("error loading warmup epoch: %w", err) } @@ -534,6 +484,65 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { return nil } +func (s *SplitStore) loadGenesisState() error { + // makes sure the genesis and its state root are hot + gb, err := s.chain.GetGenesis() + if err != nil { + return xerrors.Errorf("error getting genesis: %w", err) + } + + genesis := gb.Cid() + genesisStateRoot := gb.ParentStateRoot + + has, err := s.hot.Has(genesis) + if err != nil { + return xerrors.Errorf("error checking hotstore for genesis: %w", err) + } + + if !has { + blk, err := gb.ToStorageBlock() + if err != nil { + return xerrors.Errorf("error converting genesis block to storage block: %w", err) + } + + err = s.hot.Put(blk) + if err != nil { + return xerrors.Errorf("error putting genesis block to hotstore: %w", err) + } + } + + err = s.walkLinks(genesisStateRoot, cid.NewSet(), func(c cid.Cid) error { + has, err = s.hot.Has(c) + if err != nil { + return xerrors.Errorf("error checking hotstore for genesis state root: %w", err) + } + + if !has { + blk, err := s.cold.Get(c) + if err != nil { + if err == bstore.ErrNotFound { + return nil + } + + return xerrors.Errorf("error retrieving genesis state linked object from coldstore: %w", err) + } + + err = s.hot.Put(blk) + if err != nil { + return xerrors.Errorf("error putting genesis state linked object to hotstore: %w", err) + } + } + + return nil + }) + + if err != nil { + return xerrors.Errorf("error walking genesis state root links: %w", err) + } + + return nil +} + func (s *SplitStore) warmup(curTs *types.TipSet) error { epoch := curTs.Height() From 997f2c098b714a2218173a3030e96829e2e19f10 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 16 Jun 2021 14:07:10 +0300 Subject: [PATCH 016/197] keep headers hot when running with a noop splitstore --- blockstore/splitstore/splitstore.go | 32 +++++++++++++++++++---------- node/modules/blockstore.go | 1 + 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 6d717be9a33..2532413dbbf 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -96,6 +96,11 @@ type Config struct { // // Supported values are: "bloom" (default if omitted), "bolt". MarkSetType string + + // HotHeaders indicates whether to keep chain block headers in hotstore or not. + // This is necessary, and automatically set by DI in lotus node construction, if + // you are running with a noop coldstore. + HotHeaders bool } // ChainAccessor allows the Splitstore to access the chain. It will most likely @@ -112,6 +117,8 @@ type SplitStore struct { critsection int32 // compaction critical section closing int32 // the split store is closing + cfg *Config + baseEpoch abi.ChainEpoch syncGapEpoch abi.ChainEpoch warmupEpoch abi.ChainEpoch @@ -154,6 +161,7 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co // and now we can make a SplitStore ss := &SplitStore{ + cfg: cfg, ds: ds, hot: hot, cold: cold, @@ -859,8 +867,8 @@ func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs bo return xerrors.Errorf("error unmarshaling block header (cid: %s): %w", c, err) } - // don't walk under the boundary - if hdr.Height < boundary { + // don't walk under the boundary, unless we are keeping the headers hot + if hdr.Height < boundary && !s.cfg.HotHeaders { return nil } @@ -868,18 +876,20 @@ func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs bo return err } - if inclMsgs { - if err := s.walkLinks(hdr.Messages, walked, f); err != nil { - return xerrors.Errorf("error walking messages (cid: %s): %w", hdr.Messages, err) - } + if hdr.Height >= boundary { + if inclMsgs { + if err := s.walkLinks(hdr.Messages, walked, f); err != nil { + return xerrors.Errorf("error walking messages (cid: %s): %w", hdr.Messages, err) + } - if err := s.walkLinks(hdr.ParentMessageReceipts, walked, f); err != nil { - return xerrors.Errorf("error walking message receipts (cid: %s): %w", hdr.ParentMessageReceipts, err) + if err := s.walkLinks(hdr.ParentMessageReceipts, walked, f); err != nil { + return xerrors.Errorf("error walking message receipts (cid: %s): %w", hdr.ParentMessageReceipts, err) + } } - } - if err := s.walkLinks(hdr.ParentStateRoot, walked, f); err != nil { - return xerrors.Errorf("error walking state root (cid: %s): %w", hdr.ParentStateRoot, err) + if err := s.walkLinks(hdr.ParentStateRoot, walked, f); err != nil { + return xerrors.Errorf("error walking state root (cid: %s): %w", hdr.ParentStateRoot, err) + } } toWalk = append(toWalk, hdr.Parents...) diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index 5accb50650a..cc85e517fc2 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -80,6 +80,7 @@ func SplitBlockstore(cfg *config.Chainstore) func(lc fx.Lifecycle, r repo.Locked cfg := &splitstore.Config{ TrackingStoreType: cfg.Splitstore.TrackingStoreType, MarkSetType: cfg.Splitstore.MarkSetType, + HotHeaders: cfg.Splitstore.ColdStoreType == "noop", } ss, err := splitstore.Open(path, ds, hot, cold, cfg) if err != nil { From 7b02673620cd40378b09ceb9a76e4a21c16a9077 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 16 Jun 2021 18:26:04 +0300 Subject: [PATCH 017/197] don't try to visit genesis parent blocks --- blockstore/splitstore/splitstore.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 2532413dbbf..660823762cd 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -892,7 +892,9 @@ func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs bo } } - toWalk = append(toWalk, hdr.Parents...) + if hdr.Height > 0 { + toWalk = append(toWalk, hdr.Parents...) + } return nil } From 3fe4261f12199f9d6282dda80efd7c8506357803 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 16 Jun 2021 19:58:18 +0300 Subject: [PATCH 018/197] don't attempt compaction while still syncing --- blockstore/splitstore/splitstore.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 660823762cd..3d7049010de 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -439,6 +439,9 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { if err != nil { log.Warnf("error saving sync gap epoch: %s", err) } + + // don't attempt compaction before we have caught up syncing + return nil } if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) { From 9b6448518c2204805d588b4d7651b84a8671466f Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 16 Jun 2021 20:45:33 +0300 Subject: [PATCH 019/197] refactor warmup to trigger at startup and not wait for sync --- blockstore/splitstore/splitstore.go | 77 ++++++++++++++--------------- 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 3d7049010de..daace6959a6 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -122,7 +122,6 @@ type SplitStore struct { baseEpoch abi.ChainEpoch syncGapEpoch abi.ChainEpoch warmupEpoch abi.ChainEpoch - warm bool coldPurgeSize int @@ -364,13 +363,12 @@ func (s *SplitStore) Start(chain ChainAccessor) error { switch err { case nil: s.warmupEpoch = bytesToEpoch(bs) - s.warm = true case dstore.ErrNotFound: // the hotstore hasn't warmed up, load the genesis into the hotstore - err = s.loadGenesisState() + err = s.warmup(s.curTs) if err != nil { - return xerrors.Errorf("error loading genesis state: %w", err) + return xerrors.Errorf("error warming up: %w", err) } default: @@ -449,32 +447,6 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { return nil } - if !s.warm { - // splitstore needs to warm up - go func() { - defer atomic.StoreInt32(&s.compacting, 0) - - log.Info("warming up hotstore") - start := time.Now() - - baseTs, err := s.chain.GetTipsetByHeight(context.Background(), s.baseEpoch, curTs, true) - if err != nil { - log.Errorf("error warming up hotstore: error getting tipset at base epoch: %s", err) - return - } - - err = s.warmup(baseTs) - if err != nil { - log.Errorf("error warming up hotstore: %s", err) - return - } - - log.Infow("warm up done", "took", time.Since(start)) - }() - - return nil - } - if epoch-s.baseEpoch > CompactionThreshold { // it's time to compact go func() { @@ -495,6 +467,41 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { return nil } +func (s *SplitStore) warmup(curTs *types.TipSet) error { + err := s.loadGenesisState() + if err != nil { + return xerrors.Errorf("error loading genesis state: %w") + } + + if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) { + return xerrors.Errorf("error locking compaction") + } + + go func() { + defer atomic.StoreInt32(&s.compacting, 0) + + log.Info("warming up hotstore") + start := time.Now() + + err = s.doWarmup(curTs) + if err != nil { + log.Errorf("error warming up hotstore: %s", err) + return + } + + log.Infow("warm up done", "took", time.Since(start)) + }() + + // save the warmup epoch + s.warmupEpoch = curTs.Height() + err = s.ds.Put(warmupEpochKey, epochToBytes(s.warmupEpoch)) + if err != nil { + return xerrors.Errorf("error saving warm up epoch: %w") + } + + return nil +} + func (s *SplitStore) loadGenesisState() error { // makes sure the genesis and its state root are hot gb, err := s.chain.GetGenesis() @@ -554,7 +561,7 @@ func (s *SplitStore) loadGenesisState() error { return nil } -func (s *SplitStore) warmup(curTs *types.TipSet) error { +func (s *SplitStore) doWarmup(curTs *types.TipSet) error { epoch := curTs.Height() batchHot := make([]blocks.Block, 0, batchSize) @@ -629,14 +636,6 @@ func (s *SplitStore) warmup(curTs *types.TipSet) error { s.markSetSize = count + count>>2 // overestimate a bit } - // save the warmup epoch - s.warm = true - s.warmupEpoch = epoch - err = s.ds.Put(warmupEpochKey, epochToBytes(epoch)) - if err != nil { - log.Warnf("error saving warmup epoch: %s", err) - } - err = s.ds.Put(markSetSizeKey, int64ToBytes(s.markSetSize)) if err != nil { log.Warnf("error saving mark set size: %s", err) From 421f05eab975d0ac4b36f18966c48e3f934e921a Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 17 Jun 2021 00:21:16 +0300 Subject: [PATCH 020/197] save the warm up epoch only if successful in warming up --- blockstore/splitstore/splitstore.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index daace6959a6..d8360838c5b 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -492,13 +492,6 @@ func (s *SplitStore) warmup(curTs *types.TipSet) error { log.Infow("warm up done", "took", time.Since(start)) }() - // save the warmup epoch - s.warmupEpoch = curTs.Height() - err = s.ds.Put(warmupEpochKey, epochToBytes(s.warmupEpoch)) - if err != nil { - return xerrors.Errorf("error saving warm up epoch: %w") - } - return nil } @@ -641,6 +634,13 @@ func (s *SplitStore) doWarmup(curTs *types.TipSet) error { log.Warnf("error saving mark set size: %s", err) } + // save the warmup epoch + err = s.ds.Put(warmupEpochKey, epochToBytes(epoch)) + if err != nil { + return xerrors.Errorf("error saving warm up epoch: %w", err) + } + s.warmupEpoch = epoch + return nil } From bb17608ae0102099793490b7701418aa4b6ba4d5 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 17 Jun 2021 12:32:32 +0300 Subject: [PATCH 021/197] track writeEpoch relative to current wall clock time The issue: head change notifications are not emitted until after catching up, which results in all writes during a catch up period being tracked at the base epoch. --- blockstore/splitstore/splitstore.go | 58 +++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index d8360838c5b..dfb6ddf066f 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -24,6 +24,7 @@ import ( "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain/types" "github.com/filecoin-project/lotus/metrics" + "github.com/filecoin-project/specs-actors/v2/actors/builtin" "go.opencensus.io/stats" ) @@ -122,6 +123,7 @@ type SplitStore struct { baseEpoch abi.ChainEpoch syncGapEpoch abi.ChainEpoch warmupEpoch abi.ChainEpoch + writeEpoch abi.ChainEpoch coldPurgeSize int @@ -137,6 +139,9 @@ type SplitStore struct { env MarkSetEnv markSetSize int64 + + ctx context.Context + cancel func() } var _ bstore.Blockstore = (*SplitStore)(nil) @@ -170,6 +175,8 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co coldPurgeSize: defaultColdPurgeSize, } + ss.ctx, ss.cancel = context.WithCancel(context.Background()) + return ss, nil } @@ -239,7 +246,7 @@ func (s *SplitStore) Put(blk blocks.Block) error { return s.cold.Put(blk) } - epoch := s.curTs.Height() + epoch := s.writeEpoch s.mx.Unlock() err := s.tracker.Put(blk.Cid(), epoch) @@ -258,7 +265,7 @@ func (s *SplitStore) PutMany(blks []blocks.Block) error { return s.cold.PutMany(blks) } - epoch := s.curTs.Height() + epoch := s.writeEpoch s.mx.Unlock() batch := make([]cid.Cid, 0, len(blks)) @@ -398,7 +405,11 @@ func (s *SplitStore) Start(chain ChainAccessor) error { return xerrors.Errorf("error loading mark set size: %w", err) } - log.Infow("starting splitstore", "baseEpoch", s.baseEpoch, "warmupEpoch", s.warmupEpoch) + s.updateWriteEpoch() + + log.Infow("starting splitstore", "baseEpoch", s.baseEpoch, "warmupEpoch", s.warmupEpoch, "writeEpoch", s.writeEpoch) + + go s.background() // watch the chain chain.SubscribeHeadChanges(s.HeadChange) @@ -416,6 +427,7 @@ func (s *SplitStore) Close() error { } } + s.cancel() return multierr.Combine(s.tracker.Close(), s.env.Close()) } @@ -431,6 +443,8 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { s.curTs = curTs s.mx.Unlock() + s.updateWriteEpoch() + timestamp := time.Unix(int64(curTs.MinTimestamp()), 0) if time.Since(timestamp) > SyncGapTime { err := s.setSyncGapEpoch(epoch) @@ -467,6 +481,44 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { return nil } +func (s *SplitStore) updateWriteEpoch() { + s.mx.Lock() + defer s.mx.Unlock() + + curTs := s.curTs + timestamp := time.Unix(int64(curTs.MinTimestamp()), 0) + + dt := time.Since(timestamp) + if dt < 0 { + writeEpoch := curTs.Height() + if writeEpoch > s.writeEpoch { + s.writeEpoch = writeEpoch + } + + return + } + + writeEpoch := curTs.Height() + abi.ChainEpoch(dt.Seconds())/builtin.EpochDurationSeconds + if writeEpoch > s.writeEpoch { + s.writeEpoch = writeEpoch + } +} + +func (s *SplitStore) background() { + ticker := time.NewTicker(time.Duration(builtin.EpochDurationSeconds) * time.Second) + defer ticker.Stop() + + for { + select { + case <-s.ctx.Done(): + return + + case <-ticker.C: + s.updateWriteEpoch() + } + } +} + func (s *SplitStore) warmup(curTs *types.TipSet) error { err := s.loadGenesisState() if err != nil { From 66f1630f14df2ae0b9d8f889cfa62a74c663b1cc Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 17 Jun 2021 12:35:56 +0300 Subject: [PATCH 022/197] fix lint issue --- blockstore/splitstore/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index dfb6ddf066f..bccf99c57b9 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -522,7 +522,7 @@ func (s *SplitStore) background() { func (s *SplitStore) warmup(curTs *types.TipSet) error { err := s.loadGenesisState() if err != nil { - return xerrors.Errorf("error loading genesis state: %w") + return xerrors.Errorf("error loading genesis state: %w", err) } if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) { From 933c786421ddcfaceb078aef1e7b7ea950e574c4 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 17 Jun 2021 13:47:56 +0300 Subject: [PATCH 023/197] update write epoch in the background every second --- blockstore/splitstore/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index bccf99c57b9..eee3b79964d 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -505,7 +505,7 @@ func (s *SplitStore) updateWriteEpoch() { } func (s *SplitStore) background() { - ticker := time.NewTicker(time.Duration(builtin.EpochDurationSeconds) * time.Second) + ticker := time.NewTicker(time.Second) defer ticker.Stop() for { From b7897595eb21859959e79bd2744d4e33514b6a61 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 17 Jun 2021 13:49:14 +0300 Subject: [PATCH 024/197] augment current epoch by +1 to account for off by one conditions --- blockstore/splitstore/splitstore.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index eee3b79964d..ed63fb89465 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -490,7 +490,7 @@ func (s *SplitStore) updateWriteEpoch() { dt := time.Since(timestamp) if dt < 0 { - writeEpoch := curTs.Height() + writeEpoch := curTs.Height() + 1 if writeEpoch > s.writeEpoch { s.writeEpoch = writeEpoch } @@ -498,7 +498,7 @@ func (s *SplitStore) updateWriteEpoch() { return } - writeEpoch := curTs.Height() + abi.ChainEpoch(dt.Seconds())/builtin.EpochDurationSeconds + writeEpoch := curTs.Height() + abi.ChainEpoch(dt.Seconds())/builtin.EpochDurationSeconds + 1 if writeEpoch > s.writeEpoch { s.writeEpoch = writeEpoch } From c4d95de987e54af555485f44c6aa4ab09778053d Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 17 Jun 2021 18:24:03 +0300 Subject: [PATCH 025/197] coalesce back-to-back compactions get rid of the CompactionCold construct, run a single compaction on catch up --- blockstore/splitstore/splitstore.go | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index ed63fb89465..b05fa2fc126 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -35,21 +35,16 @@ var ( // // |················· CompactionThreshold ··················| // | | - // =======‖≡≡≡≡≡≡≡‖-----------------------|------------------------» - // | | | chain --> ↑__ current epoch - // |·······| | - // ↑________ CompactionCold ↑________ CompactionBoundary + // =======‖≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡‖|------------------------» + // | | chain --> ↑__ current epoch + // | archived epochs | + // ↑________ CompactionBoundary // // === :: cold (already archived) // ≡≡≡ :: to be archived in this compaction // --- :: hot CompactionThreshold = 3 * build.Finality - // CompactionCold is the number of epochs that will be archived to the - // cold store on compaction. See diagram on CompactionThreshold for a - // better sense. - CompactionCold = build.Finality - // CompactionBoundary is the number of epochs from the current epoch at which // we will walk the chain for live objects. CompactionBoundary = 2 * build.Finality @@ -741,9 +736,9 @@ func (s *SplitStore) estimateMarkSetSize(curTs *types.TipSet) error { } func (s *SplitStore) doCompact(curTs *types.TipSet, syncGapEpoch abi.ChainEpoch) error { - coldEpoch := s.baseEpoch + CompactionCold currentEpoch := curTs.Height() boundaryEpoch := currentEpoch - CompactionBoundary + coldEpoch := boundaryEpoch - 1 log.Infow("running compaction", "currentEpoch", currentEpoch, "baseEpoch", s.baseEpoch, "coldEpoch", coldEpoch, "boundaryEpoch", boundaryEpoch) From a178c1fb938eb5306c02e3bc49fdb7f029ad5d26 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 17 Jun 2021 18:24:09 +0300 Subject: [PATCH 026/197] fix test --- blockstore/splitstore/splitstore_test.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/blockstore/splitstore/splitstore_test.go b/blockstore/splitstore/splitstore_test.go index d842fe72a67..a4b8ba766c9 100644 --- a/blockstore/splitstore/splitstore_test.go +++ b/blockstore/splitstore/splitstore_test.go @@ -21,7 +21,6 @@ import ( func init() { CompactionThreshold = 5 - CompactionCold = 1 CompactionBoundary = 2 logging.SetLogLevel("splitstore", "DEBUG") } @@ -145,12 +144,12 @@ func testSplitStore(t *testing.T, cfg *Config) { coldCnt = countBlocks(cold) hotCnt = countBlocks(hot) - if coldCnt != 7 { - t.Errorf("expected %d cold blocks, but got %d", 7, coldCnt) + if coldCnt != 8 { + t.Errorf("expected %d cold blocks, but got %d", 8, coldCnt) } - if hotCnt != 6 { - t.Errorf("expected %d hot blocks, but got %d", 6, hotCnt) + if hotCnt != 5 { + t.Errorf("expected %d hot blocks, but got %d", 5, hotCnt) } // Make sure we can revert without panicking. From a25ac80777cdcc4d896c708d35a2d4665a59e621 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 17 Jun 2021 20:27:37 +0300 Subject: [PATCH 027/197] reintroduce compaction slack --- blockstore/splitstore/splitstore.go | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index b05fa2fc126..ccf88f5294b 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -35,10 +35,11 @@ var ( // // |················· CompactionThreshold ··················| // | | - // =======‖≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡‖|------------------------» - // | | chain --> ↑__ current epoch - // | archived epochs | - // ↑________ CompactionBoundary + // =======‖≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡‖----------|------------------------» + // | | | chain --> ↑__ current epoch + // | archived epochs ___↑ | + // | ↑________ CompactionBoundary + // ↑__ CompactionSlack // // === :: cold (already archived) // ≡≡≡ :: to be archived in this compaction @@ -49,6 +50,10 @@ var ( // we will walk the chain for live objects. CompactionBoundary = 2 * build.Finality + // CompactionSlack is the number of epochs from the compaction boundary to the beginning + // of the cold epoch. + CompactionSlack = build.Finality + // SyncGapTime is the time delay from a tipset's min timestamp before we decide // there is a sync gap SyncGapTime = time.Minute @@ -738,7 +743,7 @@ func (s *SplitStore) estimateMarkSetSize(curTs *types.TipSet) error { func (s *SplitStore) doCompact(curTs *types.TipSet, syncGapEpoch abi.ChainEpoch) error { currentEpoch := curTs.Height() boundaryEpoch := currentEpoch - CompactionBoundary - coldEpoch := boundaryEpoch - 1 + coldEpoch := boundaryEpoch - CompactionSlack log.Infow("running compaction", "currentEpoch", currentEpoch, "baseEpoch", s.baseEpoch, "coldEpoch", coldEpoch, "boundaryEpoch", boundaryEpoch) From 79d21489df8c96ed0d31241903a8d002c3534af2 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 17 Jun 2021 20:27:43 +0300 Subject: [PATCH 028/197] fix test --- blockstore/splitstore/splitstore_test.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/blockstore/splitstore/splitstore_test.go b/blockstore/splitstore/splitstore_test.go index a4b8ba766c9..3583146dce6 100644 --- a/blockstore/splitstore/splitstore_test.go +++ b/blockstore/splitstore/splitstore_test.go @@ -22,6 +22,7 @@ import ( func init() { CompactionThreshold = 5 CompactionBoundary = 2 + CompactionSlack = 2 logging.SetLogLevel("splitstore", "DEBUG") } @@ -144,12 +145,12 @@ func testSplitStore(t *testing.T, cfg *Config) { coldCnt = countBlocks(cold) hotCnt = countBlocks(hot) - if coldCnt != 8 { - t.Errorf("expected %d cold blocks, but got %d", 8, coldCnt) + if coldCnt != 6 { + t.Errorf("expected %d cold blocks, but got %d", 6, coldCnt) } - if hotCnt != 5 { - t.Errorf("expected %d hot blocks, but got %d", 5, hotCnt) + if hotCnt != 7 { + t.Errorf("expected %d hot blocks, but got %d", 7, hotCnt) } // Make sure we can revert without panicking. From a21f55919b4df97b1ecc6745b35fce335e877680 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 17 Jun 2021 22:21:49 +0300 Subject: [PATCH 029/197] CompactionThreshold should be 4 finalities otherwise we'll wear clown shoes with the slack and end up in continuous compaction. --- blockstore/splitstore/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index ccf88f5294b..22adadd28df 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -44,7 +44,7 @@ var ( // === :: cold (already archived) // ≡≡≡ :: to be archived in this compaction // --- :: hot - CompactionThreshold = 3 * build.Finality + CompactionThreshold = 4 * build.Finality // CompactionBoundary is the number of epochs from the current epoch at which // we will walk the chain for live objects. From 30dbe4978b97a5d604ddc01db399868ac7e5fa84 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 21 Jun 2021 14:50:03 +0300 Subject: [PATCH 030/197] adjust compaction range --- blockstore/splitstore/splitstore.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 22adadd28df..0d7b5e4a215 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -44,15 +44,15 @@ var ( // === :: cold (already archived) // ≡≡≡ :: to be archived in this compaction // --- :: hot - CompactionThreshold = 4 * build.Finality + CompactionThreshold = 7 * build.Finality // CompactionBoundary is the number of epochs from the current epoch at which // we will walk the chain for live objects. - CompactionBoundary = 2 * build.Finality + CompactionBoundary = 4 * build.Finality // CompactionSlack is the number of epochs from the compaction boundary to the beginning // of the cold epoch. - CompactionSlack = build.Finality + CompactionSlack = 2 * build.Finality // SyncGapTime is the time delay from a tipset's min timestamp before we decide // there is a sync gap From 0390285c4e35c2c1a8f6947eb42f9c8d6be876f5 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 21 Jun 2021 14:53:56 +0300 Subject: [PATCH 031/197] always do full walks, not only when there is a sync gap --- blockstore/splitstore/splitstore.go | 65 ++++------------------------- 1 file changed, 8 insertions(+), 57 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 0d7b5e4a215..e4242c14bd2 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -69,11 +69,6 @@ var ( // all active blocks into the hotstore. warmupEpochKey = dstore.NewKey("/splitstore/warmupEpoch") - // syncGapEpochKey stores the last epoch where a sync gap was detected. - // If there is a sync gap after the boundary epoch, compaction will perform - // a slower full walk from the current epoch to the boundary epoch - syncGapEpochKey = dstore.NewKey("/splitstore/syncGapEpoch") - // markSetSizeKey stores the current estimate for the mark set size. // this is first computed at warmup and updated in every compaction markSetSizeKey = dstore.NewKey("/splitstore/markSetSize") @@ -120,10 +115,9 @@ type SplitStore struct { cfg *Config - baseEpoch abi.ChainEpoch - syncGapEpoch abi.ChainEpoch - warmupEpoch abi.ChainEpoch - writeEpoch abi.ChainEpoch + baseEpoch abi.ChainEpoch + warmupEpoch abi.ChainEpoch + writeEpoch abi.ChainEpoch coldPurgeSize int @@ -382,17 +376,6 @@ func (s *SplitStore) Start(chain ChainAccessor) error { return xerrors.Errorf("error loading warmup epoch: %w", err) } - // load sync gap epoch from metadata ds - bs, err = s.ds.Get(syncGapEpochKey) - switch err { - case nil: - s.syncGapEpoch = bytesToEpoch(bs) - - case dstore.ErrNotFound: - default: - return xerrors.Errorf("error loading sync gap epoch: %w", err) - } - // load markSetSize from metadata ds // if none, the splitstore will compute it during warmup and update in every compaction bs, err = s.ds.Get(markSetSizeKey) @@ -447,11 +430,6 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { timestamp := time.Unix(int64(curTs.MinTimestamp()), 0) if time.Since(timestamp) > SyncGapTime { - err := s.setSyncGapEpoch(epoch) - if err != nil { - log.Warnf("error saving sync gap epoch: %s", err) - } - // don't attempt compaction before we have caught up syncing return nil } @@ -469,7 +447,7 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { log.Info("compacting splitstore") start := time.Now() - s.compact(curTs, s.syncGapEpoch) + s.compact(curTs) log.Infow("compaction done", "took", time.Since(start)) }() @@ -697,7 +675,7 @@ func (s *SplitStore) doWarmup(curTs *types.TipSet) error { } // Compaction/GC Algorithm -func (s *SplitStore) compact(curTs *types.TipSet, syncGapEpoch abi.ChainEpoch) { +func (s *SplitStore) compact(curTs *types.TipSet) { var err error if s.markSetSize == 0 { start := time.Now() @@ -713,7 +691,7 @@ func (s *SplitStore) compact(curTs *types.TipSet, syncGapEpoch abi.ChainEpoch) { } start := time.Now() - err = s.doCompact(curTs, syncGapEpoch) + err = s.doCompact(curTs) took := time.Since(start).Milliseconds() stats.Record(context.Background(), metrics.SplitstoreCompactionTimeSeconds.M(float64(took)/1e3)) @@ -740,7 +718,7 @@ func (s *SplitStore) estimateMarkSetSize(curTs *types.TipSet) error { return nil } -func (s *SplitStore) doCompact(curTs *types.TipSet, syncGapEpoch abi.ChainEpoch) error { +func (s *SplitStore) doCompact(curTs *types.TipSet) error { currentEpoch := curTs.Height() boundaryEpoch := currentEpoch - CompactionBoundary coldEpoch := boundaryEpoch - CompactionSlack @@ -757,30 +735,8 @@ func (s *SplitStore) doCompact(curTs *types.TipSet, syncGapEpoch abi.ChainEpoch) log.Infow("marking reachable blocks", "currentEpoch", currentEpoch, "boundaryEpoch", boundaryEpoch) startMark := time.Now() - var inclMsgs bool - var markTs *types.TipSet - if syncGapEpoch > boundaryEpoch { - // There is a sync gap that may have caused writes that are logically after the boundary - // epoch to be written before the respective head change notification and hence be tracked - // at the wrong epoch. - // This can happen if the node is offline or falls out of sync for an extended period of time. - // In this case we perform a full walk to avoid pathologies with pushing actually hot - // objects into the coldstore. - markTs = curTs - inclMsgs = true - log.Infof("sync gap detected at epoch %d; marking from current epoch to boundary epoch", syncGapEpoch) - } else { - // There is no pathological sync gap, so we can use the much faster single tipset walk at - // the boundary epoch. - boundaryTs, err := s.chain.GetTipsetByHeight(context.Background(), boundaryEpoch, curTs, true) - if err != nil { - return xerrors.Errorf("error getting tipset at boundary epoch: %w", err) - } - markTs = boundaryTs - } - var count int64 - err = s.walk(markTs, boundaryEpoch, inclMsgs, + err = s.walk(curTs, boundaryEpoch, true, func(cid cid.Cid) error { count++ return markSet.Mark(cid) @@ -1100,11 +1056,6 @@ func (s *SplitStore) setBaseEpoch(epoch abi.ChainEpoch) error { return s.ds.Put(baseEpochKey, epochToBytes(epoch)) } -func (s *SplitStore) setSyncGapEpoch(epoch abi.ChainEpoch) error { - s.syncGapEpoch = epoch - return s.ds.Put(syncGapEpochKey, epochToBytes(epoch)) -} - func epochToBytes(epoch abi.ChainEpoch) []byte { return uint64ToBytes(uint64(epoch)) } From fc247e4223e2daa4b02ce658cd81adc65c2dcdc4 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 21 Jun 2021 15:17:00 +0300 Subject: [PATCH 032/197] add debug log skeleton --- blockstore/splitstore/debug.go | 53 +++++++++++++++++++++++++++++ blockstore/splitstore/splitstore.go | 19 +++++++++++ 2 files changed, 72 insertions(+) create mode 100644 blockstore/splitstore/debug.go diff --git a/blockstore/splitstore/debug.go b/blockstore/splitstore/debug.go new file mode 100644 index 00000000000..9b4f4ba8903 --- /dev/null +++ b/blockstore/splitstore/debug.go @@ -0,0 +1,53 @@ +package splitstore + +import ( + "github.com/filecoin-project/go-state-types/abi" + "github.com/filecoin-project/lotus/chain/types" + + blocks "github.com/ipfs/go-block-format" + cid "github.com/ipfs/go-cid" +) + +type debugLog struct { +} + +func (d *debugLog) LogReadMiss(cid cid.Cid) { + if d == nil { + return + } + + // TODO +} + +func (d *debugLog) LogWrite(curTs *types.TipSet, blk blocks.Block, writeEpoch abi.ChainEpoch) { + if d == nil { + return + } + + // TODO +} + +func (d *debugLog) LogWriteMany(curTs *types.TipSet, blks []blocks.Block, writeEpoch abi.ChainEpoch) { + if d == nil { + return + } + + // TODO +} + +func (d *debugLog) LogMove(curTs *types.TipSet, cid cid.Cid, writeEpoch abi.ChainEpoch) { + if d == nil { + return + } + + // TODO +} + +func (d *debugLog) Close() error { + if d == nil { + return nil + } + + // TODO + return nil +} diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index e4242c14bd2..3ff6b5c145b 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -136,6 +136,8 @@ type SplitStore struct { ctx context.Context cancel func() + + debug *debugLog } var _ bstore.Blockstore = (*SplitStore)(nil) @@ -203,9 +205,12 @@ func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { return blk, nil case bstore.ErrNotFound: + s.debug.LogReadMiss(cid) + blk, err = s.cold.Get(cid) if err == nil { stats.Record(context.Background(), metrics.SplitstoreMiss.M(1)) + } return blk, err @@ -222,6 +227,8 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { return size, nil case bstore.ErrNotFound: + s.debug.LogReadMiss(cid) + size, err = s.cold.GetSize(cid) if err == nil { stats.Record(context.Background(), metrics.SplitstoreMiss.M(1)) @@ -240,6 +247,7 @@ func (s *SplitStore) Put(blk blocks.Block) error { return s.cold.Put(blk) } + curTs := s.curTs epoch := s.writeEpoch s.mx.Unlock() @@ -249,6 +257,8 @@ func (s *SplitStore) Put(blk blocks.Block) error { return s.cold.Put(blk) } + s.debug.LogWrite(curTs, blk, epoch) + return s.hot.Put(blk) } @@ -259,6 +269,7 @@ func (s *SplitStore) PutMany(blks []blocks.Block) error { return s.cold.PutMany(blks) } + curTs := s.curTs epoch := s.writeEpoch s.mx.Unlock() @@ -273,6 +284,8 @@ func (s *SplitStore) PutMany(blks []blocks.Block) error { return s.cold.PutMany(blks) } + s.debug.LogWriteMany(curTs, blks, epoch) + return s.hot.PutMany(blks) } @@ -319,6 +332,8 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { err := s.hot.View(cid, cb) switch err { case bstore.ErrNotFound: + s.debug.LogReadMiss(cid) + err = s.cold.View(cid, cb) if err == nil { stats.Record(context.Background(), metrics.SplitstoreMiss.M(1)) @@ -411,6 +426,7 @@ func (s *SplitStore) Close() error { } s.cancel() + s.debug.Close() return multierr.Combine(s.tracker.Close(), s.env.Close()) } @@ -784,6 +800,9 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { // it's cold, mark it for move cold = append(cold, cid) coldCnt++ + + s.debug.LogMove(curTs, cid, writeEpoch) + return nil }) From fce7b8dc9b58bc6fae20c63232f4d024e511d960 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 22 Jun 2021 09:21:19 +0300 Subject: [PATCH 033/197] flush move log when cold collection is done --- blockstore/splitstore/debug.go | 8 ++++++++ blockstore/splitstore/splitstore.go | 2 ++ 2 files changed, 10 insertions(+) diff --git a/blockstore/splitstore/debug.go b/blockstore/splitstore/debug.go index 9b4f4ba8903..a8f6f298130 100644 --- a/blockstore/splitstore/debug.go +++ b/blockstore/splitstore/debug.go @@ -43,6 +43,14 @@ func (d *debugLog) LogMove(curTs *types.TipSet, cid cid.Cid, writeEpoch abi.Chai // TODO } +func (d *debugLog) FlushMove() { + if d == nil { + return + } + + // TODO +} + func (d *debugLog) Close() error { if d == nil { return nil diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 3ff6b5c145b..8fe3bdddb8c 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -806,6 +806,8 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return nil }) + s.debug.FlushMove() + if err != nil { return xerrors.Errorf("error collecting cold objects: %w", err) } From a53c4e15974a2d7c64dea7f255adf519f397f21f Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 22 Jun 2021 10:10:24 +0300 Subject: [PATCH 034/197] implement debug log --- blockstore/splitstore/debug.go | 208 ++++++++++++++++++++++++++-- blockstore/splitstore/splitstore.go | 27 +++- 2 files changed, 222 insertions(+), 13 deletions(-) diff --git a/blockstore/splitstore/debug.go b/blockstore/splitstore/debug.go index a8f6f298130..02d84d34252 100644 --- a/blockstore/splitstore/debug.go +++ b/blockstore/splitstore/debug.go @@ -1,6 +1,17 @@ package splitstore import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "runtime/debug" + "sync" + "time" + + "go.uber.org/multierr" + "golang.org/x/xerrors" + "github.com/filecoin-project/go-state-types/abi" "github.com/filecoin-project/lotus/chain/types" @@ -9,14 +20,72 @@ import ( ) type debugLog struct { + basePath string + readPath, writePath, movePath string + readMx, writeMx, moveMx sync.Mutex + readLog, writeLog, moveLog *os.File + readCnt, writeCnt, moveCnt int } -func (d *debugLog) LogReadMiss(cid cid.Cid) { +func openDebugLog(path string) (*debugLog, error) { + basePath := filepath.Join(path, "debug") + err := os.MkdirAll(basePath, 0755) + if err != nil { + return nil, xerrors.Errorf("error creating debug log directory: %w", err) + } + + readPath := filepath.Join(basePath, "read.log") + readFile, err := os.OpenFile(readPath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) + if err != nil { + return nil, xerrors.Errorf("error opening read log: %w", err) + } + + writePath := filepath.Join(basePath, "write.log") + writeFile, err := os.OpenFile(writePath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) + if err != nil { + _ = readFile.Close() + return nil, xerrors.Errorf("error opening write log: %w", err) + } + + movePath := filepath.Join(basePath, "move.log") + moveFile, err := os.OpenFile(movePath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) + if err != nil { + _ = readFile.Close() + _ = writeFile.Close() + return nil, xerrors.Errorf("error opening move log: %w", err) + } + + return &debugLog{ + basePath: basePath, + readPath: readPath, + writePath: writePath, + movePath: movePath, + readLog: readFile, + writeLog: writeFile, + moveLog: moveFile, + }, nil +} + +func (d *debugLog) LogReadMiss(curTs *types.TipSet, cid cid.Cid) { if d == nil { return } - // TODO + stack := debug.Stack() + var epoch abi.ChainEpoch + if curTs != nil { + epoch = curTs.Height() + } + + d.readMx.Lock() + defer d.readMx.Unlock() + + d.readCnt++ + + _, err := fmt.Fprintf(d.readLog, "%d %s\n%s\n", epoch, cid, string(stack)) + if err != nil { + log.Warnf("error writing read log: %s", err) + } } func (d *debugLog) LogWrite(curTs *types.TipSet, blk blocks.Block, writeEpoch abi.ChainEpoch) { @@ -24,7 +93,15 @@ func (d *debugLog) LogWrite(curTs *types.TipSet, blk blocks.Block, writeEpoch ab return } - // TODO + d.writeMx.Lock() + defer d.writeMx.Unlock() + + d.writeCnt++ + + _, err := fmt.Fprintf(d.writeLog, "%d %s %d\n", curTs.Height(), blk.Cid(), writeEpoch) + if err != nil { + log.Warnf("error writing write log: %s", err) + } } func (d *debugLog) LogWriteMany(curTs *types.TipSet, blks []blocks.Block, writeEpoch abi.ChainEpoch) { @@ -32,7 +109,18 @@ func (d *debugLog) LogWriteMany(curTs *types.TipSet, blks []blocks.Block, writeE return } - // TODO + d.writeMx.Lock() + defer d.writeMx.Unlock() + + d.writeCnt += len(blks) + + for _, blk := range blks { + _, err := fmt.Fprintf(d.writeLog, "%d %s %d\n", curTs.Height(), blk.Cid(), writeEpoch) + if err != nil { + log.Warnf("error writing write log: %s", err) + break + } + } } func (d *debugLog) LogMove(curTs *types.TipSet, cid cid.Cid, writeEpoch abi.ChainEpoch) { @@ -40,15 +128,106 @@ func (d *debugLog) LogMove(curTs *types.TipSet, cid cid.Cid, writeEpoch abi.Chai return } - // TODO + d.moveMx.Lock() + defer d.moveMx.Unlock() + + d.moveCnt++ + + _, err := fmt.Fprintf(d.moveLog, "%d %s %d\n", curTs.Height(), cid, writeEpoch) + if err != nil { + log.Warnf("error writing move log: %s", err) + } } -func (d *debugLog) FlushMove() { +func (d *debugLog) Flush() { if d == nil { return } - // TODO + // rotate non-empty logs + d.rotateReadLog() + d.rotateWriteLog() + d.rotateMoveLog() +} + +func (d *debugLog) rotateReadLog() { + d.readMx.Lock() + defer d.readMx.Unlock() + + if d.readCnt == 0 { + return + } + + err := d.rotate(d.readLog, d.readPath) + if err != nil { + log.Errorf("error rotating read log: %s", err) + } + + d.readLog, err = os.OpenFile(d.readPath, os.O_WRONLY|os.O_CREATE, 0644) + if err != nil { + log.Errorf("error opening log file: %s", err) + } +} + +func (d *debugLog) rotateWriteLog() { + d.writeMx.Lock() + defer d.writeMx.Unlock() + + if d.writeCnt == 0 { + return + } + + err := d.rotate(d.writeLog, d.writePath) + if err != nil { + log.Errorf("error rotating read log: %s", err) + } + + d.writeLog, err = os.OpenFile(d.writePath, os.O_WRONLY|os.O_CREATE, 0644) + if err != nil { + log.Errorf("error opening log file: %s", err) + } +} + +func (d *debugLog) rotateMoveLog() { + d.moveMx.Lock() + defer d.moveMx.Unlock() + + if d.moveCnt == 0 { + return + } + + err := d.rotate(d.moveLog, d.movePath) + if err != nil { + log.Errorf("error rotating read log: %s", err) + } + + d.moveLog, err = os.OpenFile(d.movePath, os.O_WRONLY|os.O_CREATE, 0644) + if err != nil { + log.Errorf("error opening log file: %s", err) + } +} + +func (d *debugLog) rotate(f *os.File, path string) error { + err := f.Close() + if err != nil { + return xerrors.Errorf("error closing file: %w", err) + } + + arxivPath := fmt.Sprintf("%s-%d", path, time.Now().Unix()) + err = os.Rename(path, arxivPath) + if err != nil { + return xerrors.Errorf("error moving file: %w", err) + } + + go func() { + cmd := exec.Command("gzip", arxivPath) + err := cmd.Run() + if err != nil { + log.Errorf("error compressing log: %s", err) + } + }() + + return nil } func (d *debugLog) Close() error { @@ -56,6 +235,17 @@ func (d *debugLog) Close() error { return nil } - // TODO - return nil + d.readMx.Lock() + err1 := d.readLog.Close() + d.readMx.Unlock() + + d.writeMx.Lock() + err2 := d.writeLog.Close() + d.writeMx.Unlock() + + d.moveMx.Lock() + err3 := d.moveLog.Close() + d.moveMx.Unlock() + + return multierr.Combine(err1, err2, err3) } diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 8fe3bdddb8c..475e832822a 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -74,6 +74,9 @@ var ( markSetSizeKey = dstore.NewKey("/splitstore/markSetSize") log = logging.Logger("splitstore") + + // set this to true if you are debugging the splitstore to enable debug logging + enableDebugLog = false ) const ( @@ -173,6 +176,13 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co ss.ctx, ss.cancel = context.WithCancel(context.Background()) + if enableDebugLog { + ss.debug, err = openDebugLog(path) + if err != nil { + return nil, err + } + } + return ss, nil } @@ -205,7 +215,10 @@ func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { return blk, nil case bstore.ErrNotFound: - s.debug.LogReadMiss(cid) + s.mx.Lock() + curTs := s.curTs + s.mx.Unlock() + s.debug.LogReadMiss(curTs, cid) blk, err = s.cold.Get(cid) if err == nil { @@ -227,7 +240,10 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { return size, nil case bstore.ErrNotFound: - s.debug.LogReadMiss(cid) + s.mx.Lock() + curTs := s.curTs + s.mx.Unlock() + s.debug.LogReadMiss(curTs, cid) size, err = s.cold.GetSize(cid) if err == nil { @@ -332,7 +348,10 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { err := s.hot.View(cid, cb) switch err { case bstore.ErrNotFound: - s.debug.LogReadMiss(cid) + s.mx.Lock() + curTs := s.curTs + s.mx.Unlock() + s.debug.LogReadMiss(curTs, cid) err = s.cold.View(cid, cb) if err == nil { @@ -806,7 +825,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return nil }) - s.debug.FlushMove() + s.debug.Flush() if err != nil { return xerrors.Errorf("error collecting cold objects: %w", err) From b187b5c301e613ea98cbfb407c053023608d6fa9 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 22 Jun 2021 10:14:58 +0300 Subject: [PATCH 035/197] fix lint --- blockstore/splitstore/splitstore.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 475e832822a..89eec627290 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -445,8 +445,7 @@ func (s *SplitStore) Close() error { } s.cancel() - s.debug.Close() - return multierr.Combine(s.tracker.Close(), s.env.Close()) + return multierr.Combine(s.tracker.Close(), s.env.Close(), s.debug.Close()) } func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { From 375a1790e793efeba0c486263ea6ada0e9ba97d4 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 22 Jun 2021 10:17:26 +0300 Subject: [PATCH 036/197] reset counters after flush --- blockstore/splitstore/debug.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/blockstore/splitstore/debug.go b/blockstore/splitstore/debug.go index 02d84d34252..9fea63a5f08 100644 --- a/blockstore/splitstore/debug.go +++ b/blockstore/splitstore/debug.go @@ -161,12 +161,16 @@ func (d *debugLog) rotateReadLog() { err := d.rotate(d.readLog, d.readPath) if err != nil { log.Errorf("error rotating read log: %s", err) + return } d.readLog, err = os.OpenFile(d.readPath, os.O_WRONLY|os.O_CREATE, 0644) if err != nil { log.Errorf("error opening log file: %s", err) + return } + + d.readCnt = 0 } func (d *debugLog) rotateWriteLog() { @@ -180,12 +184,16 @@ func (d *debugLog) rotateWriteLog() { err := d.rotate(d.writeLog, d.writePath) if err != nil { log.Errorf("error rotating read log: %s", err) + return } d.writeLog, err = os.OpenFile(d.writePath, os.O_WRONLY|os.O_CREATE, 0644) if err != nil { log.Errorf("error opening log file: %s", err) + return } + + d.writeCnt = 0 } func (d *debugLog) rotateMoveLog() { @@ -199,12 +207,16 @@ func (d *debugLog) rotateMoveLog() { err := d.rotate(d.moveLog, d.movePath) if err != nil { log.Errorf("error rotating read log: %s", err) + return } d.moveLog, err = os.OpenFile(d.movePath, os.O_WRONLY|os.O_CREATE, 0644) if err != nil { log.Errorf("error opening log file: %s", err) + return } + + d.moveCnt = 0 } func (d *debugLog) rotate(f *os.File, path string) error { From 50ebaf25aa9803d6e3bff528d9e9c222ef73f035 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 22 Jun 2021 10:56:35 +0300 Subject: [PATCH 037/197] don't log read misses before warmup --- blockstore/splitstore/splitstore.go | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 89eec627290..17ece419fe3 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -216,9 +216,12 @@ func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { case bstore.ErrNotFound: s.mx.Lock() + warmup := s.warmupEpoch > 0 curTs := s.curTs s.mx.Unlock() - s.debug.LogReadMiss(curTs, cid) + if warmup { + s.debug.LogReadMiss(curTs, cid) + } blk, err = s.cold.Get(cid) if err == nil { @@ -241,9 +244,12 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { case bstore.ErrNotFound: s.mx.Lock() + warmup := s.warmupEpoch > 0 curTs := s.curTs s.mx.Unlock() - s.debug.LogReadMiss(curTs, cid) + if warmup { + s.debug.LogReadMiss(curTs, cid) + } size, err = s.cold.GetSize(cid) if err == nil { @@ -349,9 +355,12 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { switch err { case bstore.ErrNotFound: s.mx.Lock() + warmup := s.warmupEpoch > 0 curTs := s.curTs s.mx.Unlock() - s.debug.LogReadMiss(curTs, cid) + if warmup { + s.debug.LogReadMiss(curTs, cid) + } err = s.cold.View(cid, cb) if err == nil { From 649b7dd162b38459976a335b39e4f0419e293ff4 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 23 Jun 2021 21:21:56 +0300 Subject: [PATCH 038/197] add config option for hot headers --- node/config/def.go | 1 + node/modules/blockstore.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/node/config/def.go b/node/config/def.go index c2754fe4f30..c5770d9e070 100644 --- a/node/config/def.go +++ b/node/config/def.go @@ -233,6 +233,7 @@ type Splitstore struct { HotStoreType string TrackingStoreType string MarkSetType string + HotHeaders bool } // // Full Node diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index cc85e517fc2..3eefd4ac890 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -80,7 +80,7 @@ func SplitBlockstore(cfg *config.Chainstore) func(lc fx.Lifecycle, r repo.Locked cfg := &splitstore.Config{ TrackingStoreType: cfg.Splitstore.TrackingStoreType, MarkSetType: cfg.Splitstore.MarkSetType, - HotHeaders: cfg.Splitstore.ColdStoreType == "noop", + HotHeaders: cfg.Splitstore.HotHeaders || cfg.Splitstore.ColdStoreType == "noop", } ss, err := splitstore.Open(path, ds, hot, cold, cfg) if err != nil { From cb665d07e049e0316a4d36dbb02fa9ebc8d30644 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 25 Jun 2021 10:07:45 +0300 Subject: [PATCH 039/197] fix transactional race during compaction It is possible for an object to be written or recreated (and checked with Has) after the mark completes and during the purge; if this happens we will purge a live block. --- blockstore/splitstore/debug.go | 4 +- blockstore/splitstore/markset.go | 4 +- blockstore/splitstore/markset_bloom.go | 23 +++- blockstore/splitstore/splitstore.go | 147 +++++++++++++++++++++---- 4 files changed, 145 insertions(+), 33 deletions(-) diff --git a/blockstore/splitstore/debug.go b/blockstore/splitstore/debug.go index 9fea63a5f08..2310612d542 100644 --- a/blockstore/splitstore/debug.go +++ b/blockstore/splitstore/debug.go @@ -123,7 +123,7 @@ func (d *debugLog) LogWriteMany(curTs *types.TipSet, blks []blocks.Block, writeE } } -func (d *debugLog) LogMove(curTs *types.TipSet, cid cid.Cid, writeEpoch abi.ChainEpoch) { +func (d *debugLog) LogMove(curTs *types.TipSet, cid cid.Cid) { if d == nil { return } @@ -133,7 +133,7 @@ func (d *debugLog) LogMove(curTs *types.TipSet, cid cid.Cid, writeEpoch abi.Chai d.moveCnt++ - _, err := fmt.Fprintf(d.moveLog, "%d %s %d\n", curTs.Height(), cid, writeEpoch) + _, err := fmt.Fprintf(d.moveLog, "%d %s\n", curTs.Height(), cid) if err != nil { log.Warnf("error writing move log: %s", err) } diff --git a/blockstore/splitstore/markset.go b/blockstore/splitstore/markset.go index ef14a2fc668..0c057a1df2c 100644 --- a/blockstore/splitstore/markset.go +++ b/blockstore/splitstore/markset.go @@ -29,7 +29,9 @@ type MarkSetEnv interface { func OpenMarkSetEnv(path string, mtype string) (MarkSetEnv, error) { switch mtype { case "", "bloom": - return NewBloomMarkSetEnv() + return NewBloomMarkSetEnv(false) + case "bloomts": + return NewBloomMarkSetEnv(true) case "bolt": return NewBoltMarkSetEnv(filepath.Join(path, "markset.bolt")) default: diff --git a/blockstore/splitstore/markset_bloom.go b/blockstore/splitstore/markset_bloom.go index c213436c898..cffd4f23ad7 100644 --- a/blockstore/splitstore/markset_bloom.go +++ b/blockstore/splitstore/markset_bloom.go @@ -15,19 +15,22 @@ const ( BloomFilterProbability = 0.01 ) -type BloomMarkSetEnv struct{} +type BloomMarkSetEnv struct { + ts bool +} var _ MarkSetEnv = (*BloomMarkSetEnv)(nil) type BloomMarkSet struct { salt []byte bf *bbloom.Bloom + ts bool } var _ MarkSet = (*BloomMarkSet)(nil) -func NewBloomMarkSetEnv() (*BloomMarkSetEnv, error) { - return &BloomMarkSetEnv{}, nil +func NewBloomMarkSetEnv(ts bool) (*BloomMarkSetEnv, error) { + return &BloomMarkSetEnv{ts: ts}, nil } func (e *BloomMarkSetEnv) Create(name string, sizeHint int64) (MarkSet, error) { @@ -47,7 +50,7 @@ func (e *BloomMarkSetEnv) Create(name string, sizeHint int64) (MarkSet, error) { return nil, xerrors.Errorf("error creating bloom filter: %w", err) } - return &BloomMarkSet{salt: salt, bf: bf}, nil + return &BloomMarkSet{salt: salt, bf: bf, ts: e.ts}, nil } func (e *BloomMarkSetEnv) Close() error { @@ -64,12 +67,20 @@ func (s *BloomMarkSet) saltedKey(cid cid.Cid) []byte { } func (s *BloomMarkSet) Mark(cid cid.Cid) error { - s.bf.Add(s.saltedKey(cid)) + if s.ts { + s.bf.AddTS(s.saltedKey(cid)) + } else { + s.bf.Add(s.saltedKey(cid)) + } + return nil } func (s *BloomMarkSet) Has(cid cid.Cid) (bool, error) { - return s.bf.Has(s.saltedKey(cid)), nil + if s.ts { + return s.bf.HasTS(s.saltedKey(cid)), nil + } + return s.bf.HasTS(s.saltedKey(cid)), nil } func (s *BloomMarkSet) Close() error { diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 17ece419fe3..7f84d7a7df2 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -141,6 +141,11 @@ type SplitStore struct { cancel func() debug *debugLog + + // protection for concurrent read/writes during compaction + txnLk sync.RWMutex + txnEnv MarkSetEnv + txnProtect MarkSet } var _ bstore.Blockstore = (*SplitStore)(nil) @@ -162,6 +167,14 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co return nil, err } + // the txn markset env + txnEnv, err := OpenMarkSetEnv(path, "bloomts") + if err != nil { + _ = tracker.Close() + _ = env.Close() + return nil, err + } + // and now we can make a SplitStore ss := &SplitStore{ cfg: cfg, @@ -170,6 +183,7 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co cold: cold, tracker: tracker, env: env, + txnEnv: txnEnv, coldPurgeSize: defaultColdPurgeSize, } @@ -198,9 +212,16 @@ func (s *SplitStore) DeleteMany(_ []cid.Cid) error { } func (s *SplitStore) Has(cid cid.Cid) (bool, error) { + s.txnLk.RLock() + defer s.txnLk.RUnlock() + has, err := s.hot.Has(cid) if err != nil || has { + if has && s.txnProtect != nil { + err = s.txnProtect.Mark(cid) + } + return has, err } @@ -208,11 +229,18 @@ func (s *SplitStore) Has(cid cid.Cid) (bool, error) { } func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { + s.txnLk.RLock() + defer s.txnLk.RUnlock() + blk, err := s.hot.Get(cid) switch err { case nil: - return blk, nil + if s.txnProtect != nil { + err = s.txnProtect.Mark(cid) + } + + return blk, err case bstore.ErrNotFound: s.mx.Lock() @@ -236,11 +264,18 @@ func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { } func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { + s.txnLk.RLock() + defer s.txnLk.RUnlock() + size, err := s.hot.GetSize(cid) switch err { case nil: - return size, nil + if s.txnProtect != nil { + err = s.txnProtect.Mark(cid) + } + + return size, err case bstore.ErrNotFound: s.mx.Lock() @@ -273,6 +308,9 @@ func (s *SplitStore) Put(blk blocks.Block) error { epoch := s.writeEpoch s.mx.Unlock() + s.txnLk.RLock() + defer s.txnLk.RUnlock() + err := s.tracker.Put(blk.Cid(), epoch) if err != nil { log.Errorf("error tracking CID in hotstore: %s; falling back to coldstore", err) @@ -281,7 +319,12 @@ func (s *SplitStore) Put(blk blocks.Block) error { s.debug.LogWrite(curTs, blk, epoch) - return s.hot.Put(blk) + err = s.hot.Put(blk) + if err == nil && s.txnProtect != nil { + err = s.txnProtect.Mark(blk.Cid()) + } + + return err } func (s *SplitStore) PutMany(blks []blocks.Block) error { @@ -300,6 +343,9 @@ func (s *SplitStore) PutMany(blks []blocks.Block) error { batch = append(batch, blk.Cid()) } + s.txnLk.RLock() + defer s.txnLk.RUnlock() + err := s.tracker.PutBatch(batch, epoch) if err != nil { log.Errorf("error tracking CIDs in hotstore: %s; falling back to coldstore", err) @@ -308,7 +354,17 @@ func (s *SplitStore) PutMany(blks []blocks.Block) error { s.debug.LogWriteMany(curTs, blks, epoch) - return s.hot.PutMany(blks) + err = s.hot.PutMany(blks) + if err == nil && s.txnProtect != nil { + for _, cid := range batch { + err2 := s.txnProtect.Mark(cid) + if err2 != nil { + err = multierr.Combine(err, err2) + } + } + } + + return err } func (s *SplitStore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { @@ -351,8 +407,18 @@ func (s *SplitStore) HashOnRead(enabled bool) { } func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { + s.txnLk.RLock() + defer s.txnLk.RUnlock() + err := s.hot.View(cid, cb) switch err { + case nil: + if s.txnProtect != nil { + err = s.txnProtect.Mark(cid) + } + + return err + case bstore.ErrNotFound: s.mx.Lock() warmup := s.warmupEpoch > 0 @@ -774,6 +840,24 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { } defer markSet.Close() //nolint:errcheck + // create the pruge protect filter + s.txnLk.Lock() + s.txnProtect, err = s.txnEnv.Create("protected", s.markSetSize) + if err != nil { + s.txnLk.Unlock() + return xerrors.Errorf("error creating transactional mark set: %w", err) + } + s.txnLk.Unlock() + + defer func() { + s.txnLk.Lock() + _ = s.txnProtect.Close() + s.txnProtect = nil + s.txnLk.Unlock() + }() + + defer s.debug.Flush() + // 1. mark reachable objects by walking the chain from the current epoch to the boundary epoch log.Infow("marking reachable blocks", "currentEpoch", currentEpoch, "boundaryEpoch", boundaryEpoch) startMark := time.Now() @@ -828,13 +912,9 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { cold = append(cold, cid) coldCnt++ - s.debug.LogMove(curTs, cid, writeEpoch) - return nil }) - s.debug.Flush() - if err != nil { return xerrors.Errorf("error collecting cold objects: %w", err) } @@ -867,24 +947,15 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { } log.Infow("moving done", "took", time.Since(startMove)) - // 2.3 delete cold objects from the hotstore + // 2.3 purge cold objects from the hotstore log.Info("purging cold objects from the hotstore") startPurge := time.Now() - err = s.purgeBlocks(cold) + err = s.purge(curTs, cold) if err != nil { return xerrors.Errorf("error purging cold blocks: %w", err) } log.Infow("purging cold from hotstore done", "took", time.Since(startPurge)) - // 2.4 remove the tracker tracking for cold objects - startPurge = time.Now() - log.Info("purging cold objects from tracker") - err = s.purgeTracking(cold) - if err != nil { - return xerrors.Errorf("error purging tracking for cold blocks: %w", err) - } - log.Infow("purging cold from tracker done", "took", time.Since(startPurge)) - // we are done; do some housekeeping err = s.tracker.Sync() if err != nil { @@ -1067,12 +1138,40 @@ func (s *SplitStore) purgeBatch(cids []cid.Cid, deleteBatch func([]cid.Cid) erro return nil } -func (s *SplitStore) purgeBlocks(cids []cid.Cid) error { - return s.purgeBatch(cids, s.hot.DeleteMany) -} +func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { + return s.purgeBatch(cids, + func(cids []cid.Cid) error { + deadCids := make([]cid.Cid, 0, len(cids)) + + s.txnLk.Lock() + defer s.txnLk.Unlock() -func (s *SplitStore) purgeTracking(cids []cid.Cid) error { - return s.purgeBatch(cids, s.tracker.DeleteBatch) + for _, c := range cids { + live, err := s.txnProtect.Has(c) + if err != nil { + return xerrors.Errorf("error checking for liveness: %w", err) + } + + if live { + continue + } + + deadCids = append(deadCids, c) + s.debug.LogMove(curTs, c) + } + + err := s.tracker.DeleteBatch(deadCids) + if err != nil { + return xerrors.Errorf("error purging tracking: %w", err) + } + + err = s.hot.DeleteMany(deadCids) + if err != nil { + return xerrors.Errorf("error purging cold objects: %w", err) + } + + return nil + }) } func (s *SplitStore) gcHotstore() { From 65ccc99e796e2e36a6bc4c95b7e55373f3f6bdd4 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 25 Jun 2021 11:39:24 +0300 Subject: [PATCH 040/197] minor tweaks in purge - allocate once - log purge count --- blockstore/splitstore/splitstore.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 7f84d7a7df2..f3d232877e1 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1139,9 +1139,13 @@ func (s *SplitStore) purgeBatch(cids []cid.Cid, deleteBatch func([]cid.Cid) erro } func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { + deadCids := make([]cid.Cid, 0, batchSize) + purgeCnt := 0 + defer log.Infof("purged %d objects", purgeCnt) + return s.purgeBatch(cids, func(cids []cid.Cid) error { - deadCids := make([]cid.Cid, 0, len(cids)) + deadCids := deadCids[:0] s.txnLk.Lock() defer s.txnLk.Unlock() @@ -1170,6 +1174,8 @@ func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { return xerrors.Errorf("error purging cold objects: %w", err) } + purgeCnt += len(deadCids) + return nil }) } From 6af3a23dd4bc0756adb221d5fc5498c040ce2e26 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 25 Jun 2021 19:41:31 +0300 Subject: [PATCH 041/197] use a map for txn protection mark set --- blockstore/splitstore/markset.go | 59 +++++++++++++++++++++++++++++ blockstore/splitstore/splitstore.go | 2 +- 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/blockstore/splitstore/markset.go b/blockstore/splitstore/markset.go index 0c057a1df2c..d1620527fb6 100644 --- a/blockstore/splitstore/markset.go +++ b/blockstore/splitstore/markset.go @@ -2,6 +2,7 @@ package splitstore import ( "path/filepath" + "sync" "golang.org/x/xerrors" @@ -32,9 +33,67 @@ func OpenMarkSetEnv(path string, mtype string) (MarkSetEnv, error) { return NewBloomMarkSetEnv(false) case "bloomts": return NewBloomMarkSetEnv(true) + case "map": + return NewMapMarkSetEnv(false) + case "mapts": + return NewMapMarkSetEnv(true) case "bolt": return NewBoltMarkSetEnv(filepath.Join(path, "markset.bolt")) default: return nil, xerrors.Errorf("unknown mark set type %s", mtype) } } + +type MapMarkSetEnv struct { + ts bool +} + +var _ MarkSetEnv = (*MapMarkSetEnv)(nil) + +type MapMarkSet struct { + mx sync.Mutex + cids map[cid.Cid]struct{} + + ts bool +} + +var _ MarkSet = (*MapMarkSet)(nil) + +func NewMapMarkSetEnv(ts bool) (*MapMarkSetEnv, error) { + return &MapMarkSetEnv{ts: ts}, nil +} + +func (e *MapMarkSetEnv) Create(name string, sizeHint int64) (MarkSet, error) { + return &MapMarkSet{ + cids: make(map[cid.Cid]struct{}), + ts: e.ts, + }, nil +} + +func (e *MapMarkSetEnv) Close() error { + return nil +} + +func (s *MapMarkSet) Mark(cid cid.Cid) error { + if s.ts { + s.mx.Lock() + defer s.mx.Unlock() + } + + s.cids[cid] = struct{}{} + return nil +} + +func (s *MapMarkSet) Has(cid cid.Cid) (bool, error) { + if s.ts { + s.mx.Lock() + defer s.mx.Unlock() + } + + _, ok := s.cids[cid] + return ok, nil +} + +func (s *MapMarkSet) Close() error { + return nil +} diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index f3d232877e1..074b85360b8 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -168,7 +168,7 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co } // the txn markset env - txnEnv, err := OpenMarkSetEnv(path, "bloomts") + txnEnv, err := OpenMarkSetEnv(path, "mapts") if err != nil { _ = tracker.Close() _ = env.Close() From 31497f4bd3087f0592938ca31241e5ce809ccb15 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 27 Jun 2021 15:46:51 +0300 Subject: [PATCH 042/197] use internal get during walk to avoid blowing the compaction txn otherwise the walk itself precludes purge... duh! --- blockstore/splitstore/splitstore.go | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 074b85360b8..8b5fd8f53a4 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -263,6 +263,19 @@ func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { } } +// internal version used by walk so that we don't blow the txn +func (s *SplitStore) get(cid cid.Cid) (blocks.Block, error) { + blk, err := s.hot.Get(cid) + + switch err { + case bstore.ErrNotFound: + return s.cold.Get(cid) + + default: + return blk, err + } +} + func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { s.txnLk.RLock() defer s.txnLk.RUnlock() @@ -986,7 +999,7 @@ func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs bo return nil } - blk, err := s.Get(c) + blk, err := s.get(c) if err != nil { return xerrors.Errorf("error retrieving block (cid: %s): %w", c, err) } @@ -1053,7 +1066,7 @@ func (s *SplitStore) walkLinks(c cid.Cid, walked *cid.Set, f func(cid.Cid) error return nil } - blk, err := s.Get(c) + blk, err := s.get(c) if err != nil { return xerrors.Errorf("error retrieving linked block (cid: %s): %w", c, err) } From 4a71c68e0652072d5b001b8b271a31188a35beee Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 27 Jun 2021 17:04:26 +0300 Subject: [PATCH 043/197] move code around for better readability --- blockstore/splitstore/splitstore.go | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 8b5fd8f53a4..b44c70d5240 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -263,19 +263,6 @@ func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { } } -// internal version used by walk so that we don't blow the txn -func (s *SplitStore) get(cid cid.Cid) (blocks.Block, error) { - blk, err := s.hot.Get(cid) - - switch err { - case bstore.ErrNotFound: - return s.cold.Get(cid) - - default: - return blk, err - } -} - func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { s.txnLk.RLock() defer s.txnLk.RUnlock() @@ -1087,6 +1074,19 @@ func (s *SplitStore) walkLinks(c cid.Cid, walked *cid.Set, f func(cid.Cid) error return rerr } +// internal version used by walk so that we don't blow the txn +func (s *SplitStore) get(cid cid.Cid) (blocks.Block, error) { + blk, err := s.hot.Get(cid) + + switch err { + case bstore.ErrNotFound: + return s.cold.Get(cid) + + default: + return blk, err + } +} + func (s *SplitStore) moveColdBlocks(cold []cid.Cid) error { batch := make([]blocks.Block, 0, batchSize) From 9fda61abec408744445cf7d04c87195744b8cbef Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 28 Jun 2021 13:35:06 +0300 Subject: [PATCH 044/197] fix error check for unreachable cids --- blockstore/splitstore/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index b44c70d5240..67ea136dd45 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1093,7 +1093,7 @@ func (s *SplitStore) moveColdBlocks(cold []cid.Cid) error { for _, cid := range cold { blk, err := s.hot.Get(cid) if err != nil { - if err == dstore.ErrNotFound { + if err == bstore.ErrNotFound { // this can happen if the node is killed after we have deleted the block from the hotstore // but before we have deleted it from the tracker; just delete the tracker. err = s.tracker.Delete(cid) From 40ff5bf164d29f1b7bd1a6336fc7ceed9ff20954 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 28 Jun 2021 15:21:51 +0300 Subject: [PATCH 045/197] log put errors in splitstore log --- blockstore/splitstore/splitstore.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 67ea136dd45..19bd8ae106b 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -324,6 +324,10 @@ func (s *SplitStore) Put(blk blocks.Block) error { err = s.txnProtect.Mark(blk.Cid()) } + if err != nil { + log.Errorf("error putting cid %s in hotstore: %s", blk.Cid(), err) + } + return err } @@ -364,6 +368,10 @@ func (s *SplitStore) PutMany(blks []blocks.Block) error { } } + if err != nil { + log.Errorf("error putting batch in hotstore: %s", err) + } + return err } From 7ebef6d8381374e9bcf7126d6105b7e5aabf5970 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 28 Jun 2021 15:31:32 +0300 Subject: [PATCH 046/197] better log message --- blockstore/splitstore/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 19bd8ae106b..57a0e40a763 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -325,7 +325,7 @@ func (s *SplitStore) Put(blk blocks.Block) error { } if err != nil { - log.Errorf("error putting cid %s in hotstore: %s", blk.Cid(), err) + log.Errorf("error putting block %s in hotstore: %s", blk.Cid(), err) } return err From dec61fa333fc6afc595a6385eed52e3ce7b249d4 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 29 Jun 2021 16:15:45 +0300 Subject: [PATCH 047/197] deduplicate stack logs and optionally trace write stacks --- blockstore/splitstore/debug.go | 130 ++++++++++++++++++++++++---- blockstore/splitstore/splitstore.go | 2 + 2 files changed, 114 insertions(+), 18 deletions(-) diff --git a/blockstore/splitstore/debug.go b/blockstore/splitstore/debug.go index 2310612d542..7182321dbeb 100644 --- a/blockstore/splitstore/debug.go +++ b/blockstore/splitstore/debug.go @@ -1,11 +1,14 @@ package splitstore import ( + "crypto/sha256" + "encoding/hex" "fmt" "os" "os/exec" "path/filepath" "runtime/debug" + "strings" "sync" "time" @@ -20,11 +23,11 @@ import ( ) type debugLog struct { - basePath string - readPath, writePath, movePath string - readMx, writeMx, moveMx sync.Mutex - readLog, writeLog, moveLog *os.File - readCnt, writeCnt, moveCnt int + readPath, writePath, movePath, stackPath string + readMx, writeMx, moveMx, stackMx sync.Mutex + readLog, writeLog, moveLog, stackLog *os.File + readCnt, writeCnt, moveCnt, stackCnt int + stackMap map[string]struct{} } func openDebugLog(path string) (*debugLog, error) { @@ -55,14 +58,25 @@ func openDebugLog(path string) (*debugLog, error) { return nil, xerrors.Errorf("error opening move log: %w", err) } + stackPath := filepath.Join(basePath, "stack.log") + stackFile, err := os.OpenFile(stackPath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) + if err != nil { + _ = readFile.Close() + _ = writeFile.Close() + _ = moveFile.Close() + return nil, xerrors.Errorf("error opening stack log: %w", err) + } + return &debugLog{ - basePath: basePath, readPath: readPath, writePath: writePath, movePath: movePath, + stackPath: stackPath, readLog: readFile, writeLog: writeFile, moveLog: moveFile, + stackLog: stackFile, + stackMap: make(map[string]struct{}), }, nil } @@ -71,7 +85,8 @@ func (d *debugLog) LogReadMiss(curTs *types.TipSet, cid cid.Cid) { return } - stack := debug.Stack() + stack := d.getStack() + var epoch abi.ChainEpoch if curTs != nil { epoch = curTs.Height() @@ -82,7 +97,7 @@ func (d *debugLog) LogReadMiss(curTs *types.TipSet, cid cid.Cid) { d.readCnt++ - _, err := fmt.Fprintf(d.readLog, "%d %s\n%s\n", epoch, cid, string(stack)) + _, err := fmt.Fprintf(d.readLog, "%s %d %s %s\n", time.Now(), epoch, cid, stack) if err != nil { log.Warnf("error writing read log: %s", err) } @@ -93,12 +108,17 @@ func (d *debugLog) LogWrite(curTs *types.TipSet, blk blocks.Block, writeEpoch ab return } + var stack string + if enableDebugLogWriteTraces { + stack = " " + d.getStack() + } + d.writeMx.Lock() defer d.writeMx.Unlock() d.writeCnt++ - _, err := fmt.Fprintf(d.writeLog, "%d %s %d\n", curTs.Height(), blk.Cid(), writeEpoch) + _, err := fmt.Fprintf(d.writeLog, "%s %d %s %d%s\n", time.Now(), curTs.Height(), blk.Cid(), writeEpoch, stack) if err != nil { log.Warnf("error writing write log: %s", err) } @@ -109,13 +129,19 @@ func (d *debugLog) LogWriteMany(curTs *types.TipSet, blks []blocks.Block, writeE return } + var stack string + if enableDebugLogWriteTraces { + stack = " " + d.getStack() + } + d.writeMx.Lock() defer d.writeMx.Unlock() d.writeCnt += len(blks) + now := time.Now() for _, blk := range blks { - _, err := fmt.Fprintf(d.writeLog, "%d %s %d\n", curTs.Height(), blk.Cid(), writeEpoch) + _, err := fmt.Fprintf(d.writeLog, "%s %d %s %d%s\n", now, curTs.Height(), blk.Cid(), writeEpoch, stack) if err != nil { log.Warnf("error writing write log: %s", err) break @@ -148,6 +174,7 @@ func (d *debugLog) Flush() { d.rotateReadLog() d.rotateWriteLog() d.rotateMoveLog() + d.rotateStackLog() } func (d *debugLog) rotateReadLog() { @@ -160,13 +187,13 @@ func (d *debugLog) rotateReadLog() { err := d.rotate(d.readLog, d.readPath) if err != nil { - log.Errorf("error rotating read log: %s", err) + log.Warnf("error rotating read log: %s", err) return } d.readLog, err = os.OpenFile(d.readPath, os.O_WRONLY|os.O_CREATE, 0644) if err != nil { - log.Errorf("error opening log file: %s", err) + log.Warnf("error opening log file: %s", err) return } @@ -183,13 +210,13 @@ func (d *debugLog) rotateWriteLog() { err := d.rotate(d.writeLog, d.writePath) if err != nil { - log.Errorf("error rotating read log: %s", err) + log.Warnf("error rotating write log: %s", err) return } d.writeLog, err = os.OpenFile(d.writePath, os.O_WRONLY|os.O_CREATE, 0644) if err != nil { - log.Errorf("error opening log file: %s", err) + log.Warnf("error opening write log file: %s", err) return } @@ -206,19 +233,42 @@ func (d *debugLog) rotateMoveLog() { err := d.rotate(d.moveLog, d.movePath) if err != nil { - log.Errorf("error rotating read log: %s", err) + log.Warnf("error rotating move log: %s", err) return } d.moveLog, err = os.OpenFile(d.movePath, os.O_WRONLY|os.O_CREATE, 0644) if err != nil { - log.Errorf("error opening log file: %s", err) + log.Warnf("error opening move log file: %s", err) return } d.moveCnt = 0 } +func (d *debugLog) rotateStackLog() { + d.stackMx.Lock() + defer d.stackMx.Unlock() + + if d.stackCnt == 0 { + return + } + + err := d.rotate(d.stackLog, d.stackPath) + if err != nil { + log.Warnf("error rotating stack log: %s", err) + return + } + + d.stackLog, err = os.OpenFile(d.stackPath, os.O_WRONLY|os.O_CREATE, 0644) + if err != nil { + log.Warnf("error opening stack log file: %s", err) + return + } + + d.stackCnt = 0 +} + func (d *debugLog) rotate(f *os.File, path string) error { err := f.Close() if err != nil { @@ -235,7 +285,7 @@ func (d *debugLog) rotate(f *os.File, path string) error { cmd := exec.Command("gzip", arxivPath) err := cmd.Run() if err != nil { - log.Errorf("error compressing log: %s", err) + log.Warnf("error compressing log: %s", err) } }() @@ -259,5 +309,49 @@ func (d *debugLog) Close() error { err3 := d.moveLog.Close() d.moveMx.Unlock() - return multierr.Combine(err1, err2, err3) + d.stackMx.Lock() + err4 := d.stackLog.Close() + d.stackMx.Unlock() + + return multierr.Combine(err1, err2, err3, err4) +} + +func (d *debugLog) getStack() string { + sk := d.getNormalizedStackTrace() + hash := sha256.Sum256([]byte(sk)) + key := string(hash[:]) + repr := hex.EncodeToString(hash[:]) + + d.stackMx.Lock() + _, ok := d.stackMap[key] + + if !ok { + _, err := fmt.Fprintf(d.stackLog, "%s\n%s\n", repr, sk) + if err != nil { + log.Warnf("error writing stack trace: %s", err) + } + } + + d.stackMap[key] = struct{}{} + d.stackCnt++ + d.stackMx.Unlock() + + return repr +} + +func (d *debugLog) getNormalizedStackTrace() string { + sk := string(debug.Stack()) + + // Normalization for deduplication + // skip first line -- it's the goroutine + // for each line that ends in a ), remove the call args -- these are the registers + lines := strings.Split(sk, "\n")[1:] + for i, line := range lines { + if line[len(line)-1] == ')' { + idx := strings.LastIndex(line, "(") + lines[i] = line[:idx] + } + } + + return strings.Join(lines, "\n") } diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 57a0e40a763..67c623a964e 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -77,6 +77,8 @@ var ( // set this to true if you are debugging the splitstore to enable debug logging enableDebugLog = false + // set this to true if you want to track origin stack traces in the write log + enableDebugLogWriteTraces = false ) const ( From 0b315e97c8ca3a49bed59e884db1300918741781 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 29 Jun 2021 16:18:25 +0300 Subject: [PATCH 048/197] fix index out of range --- blockstore/splitstore/debug.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/blockstore/splitstore/debug.go b/blockstore/splitstore/debug.go index 7182321dbeb..893a05c837c 100644 --- a/blockstore/splitstore/debug.go +++ b/blockstore/splitstore/debug.go @@ -349,6 +349,9 @@ func (d *debugLog) getNormalizedStackTrace() string { for i, line := range lines { if line[len(line)-1] == ')' { idx := strings.LastIndex(line, "(") + if idx < 0 { + continue + } lines[i] = line[:idx] } } From b2b13bbe891b98c9778fc4a53b36b21fb471ac46 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 29 Jun 2021 16:21:20 +0300 Subject: [PATCH 049/197] fix debug panic --- blockstore/splitstore/debug.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockstore/splitstore/debug.go b/blockstore/splitstore/debug.go index 893a05c837c..6be250a0924 100644 --- a/blockstore/splitstore/debug.go +++ b/blockstore/splitstore/debug.go @@ -347,7 +347,7 @@ func (d *debugLog) getNormalizedStackTrace() string { // for each line that ends in a ), remove the call args -- these are the registers lines := strings.Split(sk, "\n")[1:] for i, line := range lines { - if line[len(line)-1] == ')' { + if len(line) > 0 && line[len(line)-1] == ')' { idx := strings.LastIndex(line, "(") if idx < 0 { continue From 57e25ae1cdddc2977d0ac19ccda6ad39b2371aa3 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 29 Jun 2021 16:29:03 +0300 Subject: [PATCH 050/197] use succint timetamp in debug logs --- blockstore/splitstore/debug.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/blockstore/splitstore/debug.go b/blockstore/splitstore/debug.go index 6be250a0924..6620b362d66 100644 --- a/blockstore/splitstore/debug.go +++ b/blockstore/splitstore/debug.go @@ -97,7 +97,7 @@ func (d *debugLog) LogReadMiss(curTs *types.TipSet, cid cid.Cid) { d.readCnt++ - _, err := fmt.Fprintf(d.readLog, "%s %d %s %s\n", time.Now(), epoch, cid, stack) + _, err := fmt.Fprintf(d.readLog, "%s %d %s %s\n", d.timestamp(), epoch, cid, stack) if err != nil { log.Warnf("error writing read log: %s", err) } @@ -118,7 +118,7 @@ func (d *debugLog) LogWrite(curTs *types.TipSet, blk blocks.Block, writeEpoch ab d.writeCnt++ - _, err := fmt.Fprintf(d.writeLog, "%s %d %s %d%s\n", time.Now(), curTs.Height(), blk.Cid(), writeEpoch, stack) + _, err := fmt.Fprintf(d.writeLog, "%s %d %s %d%s\n", d.timestamp(), curTs.Height(), blk.Cid(), writeEpoch, stack) if err != nil { log.Warnf("error writing write log: %s", err) } @@ -139,7 +139,7 @@ func (d *debugLog) LogWriteMany(curTs *types.TipSet, blks []blocks.Block, writeE d.writeCnt += len(blks) - now := time.Now() + now := d.timestamp() for _, blk := range blks { _, err := fmt.Fprintf(d.writeLog, "%s %d %s %d%s\n", now, curTs.Height(), blk.Cid(), writeEpoch, stack) if err != nil { @@ -358,3 +358,8 @@ func (d *debugLog) getNormalizedStackTrace() string { return strings.Join(lines, "\n") } + +func (d *debugLog) timestamp() string { + ts, _ := time.Now().MarshalText() + return string(ts) +} From 7307eb54dc6ef1f86a52f48638fe0115521f9aa7 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 29 Jun 2021 16:50:54 +0300 Subject: [PATCH 051/197] cache stack repr computation --- blockstore/splitstore/debug.go | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/blockstore/splitstore/debug.go b/blockstore/splitstore/debug.go index 6620b362d66..39d68ce2767 100644 --- a/blockstore/splitstore/debug.go +++ b/blockstore/splitstore/debug.go @@ -27,7 +27,7 @@ type debugLog struct { readMx, writeMx, moveMx, stackMx sync.Mutex readLog, writeLog, moveLog, stackLog *os.File readCnt, writeCnt, moveCnt, stackCnt int - stackMap map[string]struct{} + stackMap map[string]string } func openDebugLog(path string) (*debugLog, error) { @@ -76,7 +76,7 @@ func openDebugLog(path string) (*debugLog, error) { writeLog: writeFile, moveLog: moveFile, stackLog: stackFile, - stackMap: make(map[string]struct{}), + stackMap: make(map[string]string), }, nil } @@ -320,20 +320,19 @@ func (d *debugLog) getStack() string { sk := d.getNormalizedStackTrace() hash := sha256.Sum256([]byte(sk)) key := string(hash[:]) - repr := hex.EncodeToString(hash[:]) d.stackMx.Lock() - _, ok := d.stackMap[key] - + repr, ok := d.stackMap[key] if !ok { + repr = hex.EncodeToString(hash[:]) + d.stackMap[key] = repr + d.stackCnt++ + _, err := fmt.Fprintf(d.stackLog, "%s\n%s\n", repr, sk) if err != nil { - log.Warnf("error writing stack trace: %s", err) + log.Warnf("error writing stack trace for %s: %s", repr, err) } } - - d.stackMap[key] = struct{}{} - d.stackCnt++ d.stackMx.Unlock() return repr From 4bed3161f09627bcf72f924e30f2ee65a4c24c77 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 30 Jun 2021 20:35:00 +0300 Subject: [PATCH 052/197] fix broken purge count log --- blockstore/splitstore/splitstore.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 67c623a964e..2a820192d02 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1164,7 +1164,9 @@ func (s *SplitStore) purgeBatch(cids []cid.Cid, deleteBatch func([]cid.Cid) erro func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { deadCids := make([]cid.Cid, 0, batchSize) purgeCnt := 0 - defer log.Infof("purged %d objects", purgeCnt) + defer func() { + log.Infof("purged %d objects", purgeCnt) + }() return s.purgeBatch(cids, func(cids []cid.Cid) error { From e29b64c5de91bd1abd4473e52267146dcaf1bd5e Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 30 Jun 2021 21:37:31 +0300 Subject: [PATCH 053/197] check both markset and txn liveset before declaring an object cold --- blockstore/splitstore/splitstore.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 2a820192d02..dbc8e7aecd6 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -913,7 +913,12 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return xerrors.Errorf("error checkiing mark set for %s: %w", cid, err) } - if mark { + live, err := s.txnProtect.Has(cid) + if err != nil { + return xerrors.Errorf("error checking liveness for %s: %w", cid, err) + } + + if mark || live { hotCnt++ return nil } From 7de0771883a7ee5102251d50766db72420d52500 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 1 Jul 2021 08:38:35 +0300 Subject: [PATCH 054/197] count txn live objects explicitly for logging --- blockstore/splitstore/splitstore.go | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index dbc8e7aecd6..d1410d556d3 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -896,7 +896,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { cold := make([]cid.Cid, 0, s.coldPurgeSize) // some stats for logging - var hotCnt, coldCnt int + var hotCnt, coldCnt, liveCnt int // 2.1 iterate through the tracking store and collect unreachable cold objects err = s.tracker.ForEach(func(cid cid.Cid, writeEpoch abi.ChainEpoch) error { @@ -913,13 +913,18 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return xerrors.Errorf("error checkiing mark set for %s: %w", cid, err) } + if mark { + hotCnt++ + return nil + } + live, err := s.txnProtect.Has(cid) if err != nil { return xerrors.Errorf("error checking liveness for %s: %w", cid, err) } - if mark || live { - hotCnt++ + if live { + liveCnt++ return nil } @@ -939,7 +944,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { } log.Infow("collection done", "took", time.Since(startCollect)) - log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt) + log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt, "live", liveCnt) stats.Record(context.Background(), metrics.SplitstoreCompactionHot.M(int64(hotCnt))) stats.Record(context.Background(), metrics.SplitstoreCompactionCold.M(int64(coldCnt))) @@ -1168,9 +1173,9 @@ func (s *SplitStore) purgeBatch(cids []cid.Cid, deleteBatch func([]cid.Cid) erro func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { deadCids := make([]cid.Cid, 0, batchSize) - purgeCnt := 0 + var purgeCnt, liveCnt int defer func() { - log.Infof("purged %d objects", purgeCnt) + log.Infow("purged objects", "purged", purgeCnt, "live", liveCnt) }() return s.purgeBatch(cids, @@ -1187,6 +1192,7 @@ func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { } if live { + liveCnt++ continue } From 09efed50fd73fea8be6fce52b657ec8a4aebc695 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 1 Jul 2021 10:29:30 +0300 Subject: [PATCH 055/197] check for lookback references to block headers in walk --- blockstore/splitstore/splitstore.go | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index d1410d556d3..8fef0ee0afe 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -998,14 +998,23 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { } func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs bool, f func(cid.Cid) error) error { + visited := cid.NewSet() walked := cid.NewSet() toWalk := ts.Cids() walkBlock := func(c cid.Cid) error { - if !walked.Visit(c) { + if !visited.Visit(c) { return nil } + // check if it has been referenced by some later state root via lookback to avoid duplicate + // dispatches to the visitor + if !walked.Has(c) { + if err := f(c); err != nil { + return err + } + } + blk, err := s.get(c) if err != nil { return xerrors.Errorf("error retrieving block (cid: %s): %w", c, err) @@ -1021,10 +1030,6 @@ func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs bo return nil } - if err := f(c); err != nil { - return err - } - if hdr.Height >= boundary { if inclMsgs { if err := s.walkLinks(hdr.Messages, walked, f); err != nil { From 40f42db7fa38f011962079cb5e9c36d16e1db4fb Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 1 Jul 2021 14:06:28 +0300 Subject: [PATCH 056/197] walk tweaks --- blockstore/splitstore/splitstore.go | 33 +++++++++++++++++------------ 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 8fef0ee0afe..93a6e7797c7 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -712,7 +712,7 @@ func (s *SplitStore) doWarmup(curTs *types.TipSet) error { count := int64(0) xcount := int64(0) missing := int64(0) - err := s.walk(curTs, epoch, false, + err := s.walk(curTs, epoch, false, s.cfg.HotHeaders, func(cid cid.Cid) error { count++ @@ -823,7 +823,7 @@ func (s *SplitStore) estimateMarkSetSize(curTs *types.TipSet) error { epoch := curTs.Height() var count int64 - err := s.walk(curTs, epoch, false, + err := s.walk(curTs, epoch, false, s.cfg.HotHeaders, func(cid cid.Cid) error { count++ return nil @@ -873,7 +873,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { startMark := time.Now() var count int64 - err = s.walk(curTs, boundaryEpoch, true, + err = s.walk(curTs, boundaryEpoch, true, s.cfg.HotHeaders, func(cid cid.Cid) error { count++ return markSet.Mark(cid) @@ -997,22 +997,22 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return nil } -func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs bool, f func(cid.Cid) error) error { +func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs, fullChain bool, + f func(cid.Cid) error) error { visited := cid.NewSet() walked := cid.NewSet() toWalk := ts.Cids() + blkCnt := 0 walkBlock := func(c cid.Cid) error { if !visited.Visit(c) { return nil } - // check if it has been referenced by some later state root via lookback to avoid duplicate - // dispatches to the visitor - if !walked.Has(c) { - if err := f(c); err != nil { - return err - } + blkCnt++ + + if err := f(c); err != nil { + return err } blk, err := s.get(c) @@ -1025,11 +1025,12 @@ func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs bo return xerrors.Errorf("error unmarshaling block header (cid: %s): %w", c, err) } - // don't walk under the boundary, unless we are keeping the headers hot - if hdr.Height < boundary && !s.cfg.HotHeaders { + // don't walk under the boundary, unless we are walking the full chain + if hdr.Height < boundary && !fullChain { return nil } + // we only scan the block if it is above the boundary if hdr.Height >= boundary { if inclMsgs { if err := s.walkLinks(hdr.Messages, walked, f); err != nil { @@ -1049,6 +1050,7 @@ func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs bo if hdr.Height > 0 { toWalk = append(toWalk, hdr.Parents...) } + return nil } @@ -1062,6 +1064,8 @@ func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs bo } } + log.Infof("walked %d blocks", blkCnt) + return nil } @@ -1089,7 +1093,10 @@ func (s *SplitStore) walkLinks(c cid.Cid, walked *cid.Set, f func(cid.Cid) error return } - rerr = s.walkLinks(c, walked, f) + err := s.walkLinks(c, walked, f) + if err != nil { + rerr = err + } }) if err != nil { From 90dc274113b886fd545ad6cb23487f10a53978d7 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 1 Jul 2021 14:10:57 +0300 Subject: [PATCH 057/197] better logging for chain walk --- blockstore/splitstore/splitstore.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 93a6e7797c7..d19bd80d467 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1002,14 +1002,15 @@ func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs, f visited := cid.NewSet() walked := cid.NewSet() toWalk := ts.Cids() - blkCnt := 0 + walkCnt := 0 + scanCnt := 0 walkBlock := func(c cid.Cid) error { if !visited.Visit(c) { return nil } - blkCnt++ + walkCnt++ if err := f(c); err != nil { return err @@ -1032,6 +1033,7 @@ func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs, f // we only scan the block if it is above the boundary if hdr.Height >= boundary { + scanCnt++ if inclMsgs { if err := s.walkLinks(hdr.Messages, walked, f); err != nil { return xerrors.Errorf("error walking messages (cid: %s): %w", hdr.Messages, err) @@ -1064,7 +1066,7 @@ func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs, f } } - log.Infof("walked %d blocks", blkCnt) + log.Infow("chain walk done", "walked", walkCnt, "scanned", scanCnt) return nil } From f97535d87e4927238a10adf224d4dea8dda842f2 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 08:03:54 +0300 Subject: [PATCH 058/197] store the hash in map markset --- blockstore/splitstore/markset.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/blockstore/splitstore/markset.go b/blockstore/splitstore/markset.go index d1620527fb6..2eb46f6d856 100644 --- a/blockstore/splitstore/markset.go +++ b/blockstore/splitstore/markset.go @@ -51,8 +51,8 @@ type MapMarkSetEnv struct { var _ MarkSetEnv = (*MapMarkSetEnv)(nil) type MapMarkSet struct { - mx sync.Mutex - cids map[cid.Cid]struct{} + mx sync.Mutex + set map[string]struct{} ts bool } @@ -65,8 +65,8 @@ func NewMapMarkSetEnv(ts bool) (*MapMarkSetEnv, error) { func (e *MapMarkSetEnv) Create(name string, sizeHint int64) (MarkSet, error) { return &MapMarkSet{ - cids: make(map[cid.Cid]struct{}), - ts: e.ts, + set: make(map[string]struct{}), + ts: e.ts, }, nil } @@ -80,7 +80,7 @@ func (s *MapMarkSet) Mark(cid cid.Cid) error { defer s.mx.Unlock() } - s.cids[cid] = struct{}{} + s.set[string(cid.Hash())] = struct{}{} return nil } @@ -90,7 +90,7 @@ func (s *MapMarkSet) Has(cid cid.Cid) (bool, error) { defer s.mx.Unlock() } - _, ok := s.cids[cid] + _, ok := s.set[string(cid.Hash())] return ok, nil } From 6a3cbea790b44b9986020688df20906ec962d212 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 09:36:15 +0300 Subject: [PATCH 059/197] treat Has as an implicit Write Rationale: the VM uses the Has check to avoid issuing a duplicate Write in the blockstore. This means that live objects that would be otherwise written are not actually written, resulting in the first write epoch being considered the write epoch. --- blockstore/splitstore/debug.go | 13 ++++---- blockstore/splitstore/splitstore.go | 48 +++++++++++++++++++++++++---- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/blockstore/splitstore/debug.go b/blockstore/splitstore/debug.go index 39d68ce2767..afaf53db036 100644 --- a/blockstore/splitstore/debug.go +++ b/blockstore/splitstore/debug.go @@ -18,7 +18,6 @@ import ( "github.com/filecoin-project/go-state-types/abi" "github.com/filecoin-project/lotus/chain/types" - blocks "github.com/ipfs/go-block-format" cid "github.com/ipfs/go-cid" ) @@ -103,7 +102,7 @@ func (d *debugLog) LogReadMiss(curTs *types.TipSet, cid cid.Cid) { } } -func (d *debugLog) LogWrite(curTs *types.TipSet, blk blocks.Block, writeEpoch abi.ChainEpoch) { +func (d *debugLog) LogWrite(curTs *types.TipSet, c cid.Cid, writeEpoch abi.ChainEpoch) { if d == nil { return } @@ -118,13 +117,13 @@ func (d *debugLog) LogWrite(curTs *types.TipSet, blk blocks.Block, writeEpoch ab d.writeCnt++ - _, err := fmt.Fprintf(d.writeLog, "%s %d %s %d%s\n", d.timestamp(), curTs.Height(), blk.Cid(), writeEpoch, stack) + _, err := fmt.Fprintf(d.writeLog, "%s %d %s %d%s\n", d.timestamp(), curTs.Height(), c, writeEpoch, stack) if err != nil { log.Warnf("error writing write log: %s", err) } } -func (d *debugLog) LogWriteMany(curTs *types.TipSet, blks []blocks.Block, writeEpoch abi.ChainEpoch) { +func (d *debugLog) LogWriteMany(curTs *types.TipSet, cids []cid.Cid, writeEpoch abi.ChainEpoch) { if d == nil { return } @@ -137,11 +136,11 @@ func (d *debugLog) LogWriteMany(curTs *types.TipSet, blks []blocks.Block, writeE d.writeMx.Lock() defer d.writeMx.Unlock() - d.writeCnt += len(blks) + d.writeCnt += len(cids) now := d.timestamp() - for _, blk := range blks { - _, err := fmt.Fprintf(d.writeLog, "%s %d %s %d%s\n", now, curTs.Height(), blk.Cid(), writeEpoch, stack) + for _, c := range cids { + _, err := fmt.Fprintf(d.writeLog, "%s %d %s %d%s\n", now, curTs.Height(), c, writeEpoch, stack) if err != nil { log.Warnf("error writing write log: %s", err) break diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index d19bd80d467..366341d34ef 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -219,12 +219,35 @@ func (s *SplitStore) Has(cid cid.Cid) (bool, error) { has, err := s.hot.Has(cid) - if err != nil || has { - if has && s.txnProtect != nil { - err = s.txnProtect.Mark(cid) + if err != nil { + return has, err + } + + if has { + // treat it as an implicit Write, absence options -- the vm uses this check to avoid duplicate + // writes on Flush. When we have options in the API, the vm can explicitly signal that this is + // an implicit Write. + s.mx.Lock() + curTs := s.curTs + epoch := s.writeEpoch + s.mx.Unlock() + + err = s.tracker.Put(cid, epoch) + if err != nil { + log.Errorf("error tracking implicit write in hotstore: %s", err) + return true, err } - return has, err + s.debug.LogWrite(curTs, cid, epoch) + + // also make sure the object is considered live during compaction + if s.txnProtect != nil { + err = s.txnProtect.Mark(cid) + if err != nil { + log.Errorf("error protecting object in compaction transaction: %s", err) + } + return true, err + } } return s.cold.Has(cid) @@ -240,6 +263,9 @@ func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { case nil: if s.txnProtect != nil { err = s.txnProtect.Mark(cid) + if err != nil { + log.Errorf("error protecting object in compaction transaction: %s", err) + } } return blk, err @@ -275,6 +301,9 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { case nil: if s.txnProtect != nil { err = s.txnProtect.Mark(cid) + if err != nil { + log.Errorf("error protecting object in compaction transaction: %s", err) + } } return size, err @@ -319,11 +348,14 @@ func (s *SplitStore) Put(blk blocks.Block) error { return s.cold.Put(blk) } - s.debug.LogWrite(curTs, blk, epoch) + s.debug.LogWrite(curTs, blk.Cid(), epoch) err = s.hot.Put(blk) if err == nil && s.txnProtect != nil { err = s.txnProtect.Mark(blk.Cid()) + if err != nil { + log.Errorf("error protecting object in compaction transaction: %s", err) + } } if err != nil { @@ -358,13 +390,14 @@ func (s *SplitStore) PutMany(blks []blocks.Block) error { return s.cold.PutMany(blks) } - s.debug.LogWriteMany(curTs, blks, epoch) + s.debug.LogWriteMany(curTs, batch, epoch) err = s.hot.PutMany(blks) if err == nil && s.txnProtect != nil { for _, cid := range batch { err2 := s.txnProtect.Mark(cid) if err2 != nil { + log.Errorf("error protecting object in compaction transaction: %s", err) err = multierr.Combine(err, err2) } } @@ -425,6 +458,9 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { case nil: if s.txnProtect != nil { err = s.txnProtect.Mark(cid) + if err != nil { + log.Errorf("error protecting object in compaction transaction: %s", err) + } } return err From e472cacb3ea8f6736bd0acdaf5818effd0c7ae7d Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 10:09:12 +0300 Subject: [PATCH 060/197] add missing return --- blockstore/splitstore/splitstore.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 366341d34ef..0509a91d2be 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -246,8 +246,9 @@ func (s *SplitStore) Has(cid cid.Cid) (bool, error) { if err != nil { log.Errorf("error protecting object in compaction transaction: %s", err) } - return true, err } + + return true, err } return s.cold.Has(cid) From be6cc2c3e6c8ecb2dbd6ef125b94ad4e0ac04f06 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 11:02:02 +0300 Subject: [PATCH 061/197] batch implicit write tracking bolt performance leaves something to be desired; doing a single Put takes 10ms, about the same time as batching thousands of them. --- blockstore/splitstore/splitstore.go | 56 ++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 0509a91d2be..9b2ae8e2083 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -148,6 +148,9 @@ type SplitStore struct { txnLk sync.RWMutex txnEnv MarkSetEnv txnProtect MarkSet + + // implicit write set + implicitWrites map[cid.Cid]struct{} } var _ bstore.Blockstore = (*SplitStore)(nil) @@ -188,6 +191,8 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co txnEnv: txnEnv, coldPurgeSize: defaultColdPurgeSize, + + implicitWrites: make(map[cid.Cid]struct{}), } ss.ctx, ss.cancel = context.WithCancel(context.Background()) @@ -227,18 +232,9 @@ func (s *SplitStore) Has(cid cid.Cid) (bool, error) { // treat it as an implicit Write, absence options -- the vm uses this check to avoid duplicate // writes on Flush. When we have options in the API, the vm can explicitly signal that this is // an implicit Write. - s.mx.Lock() - curTs := s.curTs - epoch := s.writeEpoch - s.mx.Unlock() - - err = s.tracker.Put(cid, epoch) - if err != nil { - log.Errorf("error tracking implicit write in hotstore: %s", err) - return true, err - } - - s.debug.LogWrite(curTs, cid, epoch) + // Unfortunately we can't just directly tracker.Put one by one, as it is ridiculously slow with + // bolot because of syncing, so we batch them + s.putImplicitWrite(cid) // also make sure the object is considered live during compaction if s.txnProtect != nil { @@ -566,6 +562,7 @@ func (s *SplitStore) Close() error { } } + s.flushImplicitWrites(false) s.cancel() return multierr.Combine(s.tracker.Close(), s.env.Close(), s.debug.Close()) } @@ -634,10 +631,45 @@ func (s *SplitStore) updateWriteEpoch() { writeEpoch := curTs.Height() + abi.ChainEpoch(dt.Seconds())/builtin.EpochDurationSeconds + 1 if writeEpoch > s.writeEpoch { + s.flushImplicitWrites(true) s.writeEpoch = writeEpoch } } +func (s *SplitStore) putImplicitWrite(c cid.Cid) { + s.mx.Lock() + defer s.mx.Unlock() + + s.implicitWrites[c] = struct{}{} +} + +func (s *SplitStore) flushImplicitWrites(locked bool) { + if !locked { + s.mx.Lock() + defer s.mx.Unlock() + } + + if len(s.implicitWrites) == 0 { + return + } + + cids := make([]cid.Cid, 0, len(s.implicitWrites)) + for c := range s.implicitWrites { + cids = append(cids, c) + } + s.implicitWrites = make(map[cid.Cid]struct{}) + + epoch := s.writeEpoch + curTs := s.curTs + + err := s.tracker.PutBatch(cids, epoch) + if err != nil { + log.Errorf("error putting implicit write batch to tracker: %s", err) + } + + s.debug.LogWriteMany(curTs, cids, epoch) +} + func (s *SplitStore) background() { ticker := time.NewTicker(time.Second) defer ticker.Stop() From a29947d47c6912ed55585536a42d1b4e93a6104b Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 11:09:36 +0300 Subject: [PATCH 062/197] flush implicit writes in all paths in updateWriteEpoch --- blockstore/splitstore/splitstore.go | 1 + 1 file changed, 1 insertion(+) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 9b2ae8e2083..b41a90ecbc9 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -623,6 +623,7 @@ func (s *SplitStore) updateWriteEpoch() { if dt < 0 { writeEpoch := curTs.Height() + 1 if writeEpoch > s.writeEpoch { + s.flushImplicitWrites(true) s.writeEpoch = writeEpoch } From 7f473f56ebe0c6b28bb5da544947ff210ec50962 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 11:17:31 +0300 Subject: [PATCH 063/197] flush implicit writes before starting compaction --- blockstore/splitstore/splitstore.go | 1 + 1 file changed, 1 insertion(+) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index b41a90ecbc9..1ff6195d9ef 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -879,6 +879,7 @@ func (s *SplitStore) compact(curTs *types.TipSet) { log.Infow("current mark set size estimate", "size", s.markSetSize) } + s.flushImplicitWrites(false) start := time.Now() err = s.doCompact(curTs) took := time.Since(start).Milliseconds() From d0bfe421b57fbba3d5d13c6236a37eef3173b1ab Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 11:18:51 +0300 Subject: [PATCH 064/197] flush implicit writes at the right time before starting compaction to avoid races --- blockstore/splitstore/splitstore.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 1ff6195d9ef..ea7f21ea0ce 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -879,7 +879,6 @@ func (s *SplitStore) compact(curTs *types.TipSet) { log.Infow("current mark set size estimate", "size", s.markSetSize) } - s.flushImplicitWrites(false) start := time.Now() err = s.doCompact(curTs) took := time.Since(start).Milliseconds() @@ -939,6 +938,8 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { defer s.debug.Flush() + s.flushImplicitWrites(false) + // 1. mark reachable objects by walking the chain from the current epoch to the boundary epoch log.Infow("marking reachable blocks", "currentEpoch", currentEpoch, "boundaryEpoch", boundaryEpoch) startMark := time.Now() From 3e8e9273cacb2c2a36f337f93e5cd3a6a2dcb458 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 11:37:35 +0300 Subject: [PATCH 065/197] track all writes using async batching, not just implicit ones --- blockstore/splitstore/splitstore.go | 80 +++++++++++------------------ 1 file changed, 29 insertions(+), 51 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index ea7f21ea0ce..912a808d6e8 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -149,8 +149,8 @@ type SplitStore struct { txnEnv MarkSetEnv txnProtect MarkSet - // implicit write set - implicitWrites map[cid.Cid]struct{} + // pending write set + pendingWrites map[cid.Cid]struct{} } var _ bstore.Blockstore = (*SplitStore)(nil) @@ -192,7 +192,7 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co coldPurgeSize: defaultColdPurgeSize, - implicitWrites: make(map[cid.Cid]struct{}), + pendingWrites: make(map[cid.Cid]struct{}), } ss.ctx, ss.cancel = context.WithCancel(context.Background()) @@ -234,7 +234,7 @@ func (s *SplitStore) Has(cid cid.Cid) (bool, error) { // an implicit Write. // Unfortunately we can't just directly tracker.Put one by one, as it is ridiculously slow with // bolot because of syncing, so we batch them - s.putImplicitWrite(cid) + s.trackWrite(cid) // also make sure the object is considered live during compaction if s.txnProtect != nil { @@ -326,28 +326,12 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { } func (s *SplitStore) Put(blk blocks.Block) error { - s.mx.Lock() - if s.curTs == nil { - s.mx.Unlock() - return s.cold.Put(blk) - } - - curTs := s.curTs - epoch := s.writeEpoch - s.mx.Unlock() - s.txnLk.RLock() defer s.txnLk.RUnlock() - err := s.tracker.Put(blk.Cid(), epoch) - if err != nil { - log.Errorf("error tracking CID in hotstore: %s; falling back to coldstore", err) - return s.cold.Put(blk) - } - - s.debug.LogWrite(curTs, blk.Cid(), epoch) + s.trackWrite(blk.Cid()) - err = s.hot.Put(blk) + err := s.hot.Put(blk) if err == nil && s.txnProtect != nil { err = s.txnProtect.Mark(blk.Cid()) if err != nil { @@ -363,16 +347,6 @@ func (s *SplitStore) Put(blk blocks.Block) error { } func (s *SplitStore) PutMany(blks []blocks.Block) error { - s.mx.Lock() - if s.curTs == nil { - s.mx.Unlock() - return s.cold.PutMany(blks) - } - - curTs := s.curTs - epoch := s.writeEpoch - s.mx.Unlock() - batch := make([]cid.Cid, 0, len(blks)) for _, blk := range blks { batch = append(batch, blk.Cid()) @@ -381,15 +355,9 @@ func (s *SplitStore) PutMany(blks []blocks.Block) error { s.txnLk.RLock() defer s.txnLk.RUnlock() - err := s.tracker.PutBatch(batch, epoch) - if err != nil { - log.Errorf("error tracking CIDs in hotstore: %s; falling back to coldstore", err) - return s.cold.PutMany(blks) - } - - s.debug.LogWriteMany(curTs, batch, epoch) + s.trackWrites(batch) - err = s.hot.PutMany(blks) + err := s.hot.PutMany(blks) if err == nil && s.txnProtect != nil { for _, cid := range batch { err2 := s.txnProtect.Mark(cid) @@ -562,7 +530,7 @@ func (s *SplitStore) Close() error { } } - s.flushImplicitWrites(false) + s.flushPendingWrites(false) s.cancel() return multierr.Combine(s.tracker.Close(), s.env.Close(), s.debug.Close()) } @@ -623,7 +591,7 @@ func (s *SplitStore) updateWriteEpoch() { if dt < 0 { writeEpoch := curTs.Height() + 1 if writeEpoch > s.writeEpoch { - s.flushImplicitWrites(true) + s.flushPendingWrites(true) s.writeEpoch = writeEpoch } @@ -632,33 +600,42 @@ func (s *SplitStore) updateWriteEpoch() { writeEpoch := curTs.Height() + abi.ChainEpoch(dt.Seconds())/builtin.EpochDurationSeconds + 1 if writeEpoch > s.writeEpoch { - s.flushImplicitWrites(true) + s.flushPendingWrites(true) s.writeEpoch = writeEpoch } } -func (s *SplitStore) putImplicitWrite(c cid.Cid) { +func (s *SplitStore) trackWrite(c cid.Cid) { s.mx.Lock() defer s.mx.Unlock() - s.implicitWrites[c] = struct{}{} + s.pendingWrites[c] = struct{}{} +} + +func (s *SplitStore) trackWrites(cids []cid.Cid) { + s.mx.Lock() + defer s.mx.Unlock() + + for _, c := range cids { + s.pendingWrites[c] = struct{}{} + } } -func (s *SplitStore) flushImplicitWrites(locked bool) { +func (s *SplitStore) flushPendingWrites(locked bool) { if !locked { s.mx.Lock() defer s.mx.Unlock() } - if len(s.implicitWrites) == 0 { + if len(s.pendingWrites) == 0 { return } - cids := make([]cid.Cid, 0, len(s.implicitWrites)) - for c := range s.implicitWrites { + cids := make([]cid.Cid, 0, len(s.pendingWrites)) + for c := range s.pendingWrites { cids = append(cids, c) } - s.implicitWrites = make(map[cid.Cid]struct{}) + s.pendingWrites = make(map[cid.Cid]struct{}) epoch := s.writeEpoch curTs := s.curTs @@ -938,7 +915,8 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { defer s.debug.Flush() - s.flushImplicitWrites(false) + // flush pending writes to update the tracker + s.flushPendingWrites(false) // 1. mark reachable objects by walking the chain from the current epoch to the boundary epoch log.Infow("marking reachable blocks", "currentEpoch", currentEpoch, "boundaryEpoch", boundaryEpoch) From aeaa59d4b55124b30c40c959de2f6eb90f7058e3 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 11:41:12 +0300 Subject: [PATCH 066/197] move comments about tracking perf issues into a more pertinent place --- blockstore/splitstore/splitstore.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 912a808d6e8..5677c4770d4 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -232,8 +232,6 @@ func (s *SplitStore) Has(cid cid.Cid) (bool, error) { // treat it as an implicit Write, absence options -- the vm uses this check to avoid duplicate // writes on Flush. When we have options in the API, the vm can explicitly signal that this is // an implicit Write. - // Unfortunately we can't just directly tracker.Put one by one, as it is ridiculously slow with - // bolot because of syncing, so we batch them s.trackWrite(cid) // also make sure the object is considered live during compaction @@ -605,6 +603,8 @@ func (s *SplitStore) updateWriteEpoch() { } } +// Unfortunately we can't just directly tracker.Put one by one, as it is ridiculously slow with +// bbolt because of syncing (order of 10ms), so we batch them. func (s *SplitStore) trackWrite(c cid.Cid) { s.mx.Lock() defer s.mx.Unlock() @@ -612,6 +612,7 @@ func (s *SplitStore) trackWrite(c cid.Cid) { s.pendingWrites[c] = struct{}{} } +// and also combine batch writes into them func (s *SplitStore) trackWrites(cids []cid.Cid) { s.mx.Lock() defer s.mx.Unlock() From 2faa4aa993de902f30de3af96f6c6ffef86ed45f Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 12:25:58 +0300 Subject: [PATCH 067/197] debug log writes at track so that we get correct stack traces --- blockstore/splitstore/splitstore.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 5677c4770d4..77edc879078 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -610,6 +610,8 @@ func (s *SplitStore) trackWrite(c cid.Cid) { defer s.mx.Unlock() s.pendingWrites[c] = struct{}{} + + s.debug.LogWrite(s.curTs, c, s.writeEpoch) } // and also combine batch writes into them @@ -620,6 +622,8 @@ func (s *SplitStore) trackWrites(cids []cid.Cid) { for _, c := range cids { s.pendingWrites[c] = struct{}{} } + + s.debug.LogWriteMany(s.curTs, cids, s.writeEpoch) } func (s *SplitStore) flushPendingWrites(locked bool) { @@ -639,14 +643,10 @@ func (s *SplitStore) flushPendingWrites(locked bool) { s.pendingWrites = make(map[cid.Cid]struct{}) epoch := s.writeEpoch - curTs := s.curTs - err := s.tracker.PutBatch(cids, epoch) if err != nil { log.Errorf("error putting implicit write batch to tracker: %s", err) } - - s.debug.LogWriteMany(curTs, cids, epoch) } func (s *SplitStore) background() { From b3ddaa5f02c18708939d0508d58ce9dfd51d5972 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 12:30:43 +0300 Subject: [PATCH 068/197] fix panic at startup genesis is written (!) before starting the splitstore, so curTs is nil --- blockstore/splitstore/debug.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/blockstore/splitstore/debug.go b/blockstore/splitstore/debug.go index afaf53db036..47d61816f3d 100644 --- a/blockstore/splitstore/debug.go +++ b/blockstore/splitstore/debug.go @@ -112,12 +112,17 @@ func (d *debugLog) LogWrite(curTs *types.TipSet, c cid.Cid, writeEpoch abi.Chain stack = " " + d.getStack() } + var curEpoch abi.ChainEpoch + if curTs != nil { + curEpoch = curTs.Height() + } + d.writeMx.Lock() defer d.writeMx.Unlock() d.writeCnt++ - _, err := fmt.Fprintf(d.writeLog, "%s %d %s %d%s\n", d.timestamp(), curTs.Height(), c, writeEpoch, stack) + _, err := fmt.Fprintf(d.writeLog, "%s %d %s %d%s\n", d.timestamp(), curEpoch, c, writeEpoch, stack) if err != nil { log.Warnf("error writing write log: %s", err) } @@ -133,6 +138,11 @@ func (d *debugLog) LogWriteMany(curTs *types.TipSet, cids []cid.Cid, writeEpoch stack = " " + d.getStack() } + var curEpoch abi.ChainEpoch + if curTs != nil { + curEpoch = curTs.Height() + } + d.writeMx.Lock() defer d.writeMx.Unlock() @@ -140,7 +150,7 @@ func (d *debugLog) LogWriteMany(curTs *types.TipSet, cids []cid.Cid, writeEpoch now := d.timestamp() for _, c := range cids { - _, err := fmt.Fprintf(d.writeLog, "%s %d %s %d%s\n", now, curTs.Height(), c, writeEpoch, stack) + _, err := fmt.Fprintf(d.writeLog, "%s %d %s %d%s\n", now, curEpoch, c, writeEpoch, stack) if err != nil { log.Warnf("error writing write log: %s", err) break From 4de0cd9fcbdd4e43a7dce7dc2aaf09fe42c2877a Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 12:38:27 +0300 Subject: [PATCH 069/197] move write log back to flush so that we don't crawl to a halt --- blockstore/splitstore/splitstore.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 77edc879078..cf3c07a62b6 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -610,8 +610,6 @@ func (s *SplitStore) trackWrite(c cid.Cid) { defer s.mx.Unlock() s.pendingWrites[c] = struct{}{} - - s.debug.LogWrite(s.curTs, c, s.writeEpoch) } // and also combine batch writes into them @@ -622,8 +620,6 @@ func (s *SplitStore) trackWrites(cids []cid.Cid) { for _, c := range cids { s.pendingWrites[c] = struct{}{} } - - s.debug.LogWriteMany(s.curTs, cids, s.writeEpoch) } func (s *SplitStore) flushPendingWrites(locked bool) { @@ -647,6 +643,8 @@ func (s *SplitStore) flushPendingWrites(locked bool) { if err != nil { log.Errorf("error putting implicit write batch to tracker: %s", err) } + + s.debug.LogWriteMany(s.curTs, cids, epoch) } func (s *SplitStore) background() { From 982867317eb0139028b5f915a351fa1c4cbd3405 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 13:27:56 +0300 Subject: [PATCH 070/197] transitively track dags from implicit writes in Has --- blockstore/splitstore/splitstore.go | 34 ++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index cf3c07a62b6..bfd5dc34382 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -218,11 +218,11 @@ func (s *SplitStore) DeleteMany(_ []cid.Cid) error { return errors.New("DeleteMany not implemented on SplitStore; don't do this Luke!") //nolint } -func (s *SplitStore) Has(cid cid.Cid) (bool, error) { +func (s *SplitStore) Has(c cid.Cid) (bool, error) { s.txnLk.RLock() defer s.txnLk.RUnlock() - has, err := s.hot.Has(cid) + has, err := s.hot.Has(c) if err != nil { return has, err @@ -232,20 +232,38 @@ func (s *SplitStore) Has(cid cid.Cid) (bool, error) { // treat it as an implicit Write, absence options -- the vm uses this check to avoid duplicate // writes on Flush. When we have options in the API, the vm can explicitly signal that this is // an implicit Write. - s.trackWrite(cid) + // we also walk dags for links so that the reference applies transitively to children. + if c.Prefix().Codec != cid.DagCBOR { + s.trackWrite(c) + } else { + err = s.walkLinks(c, cid.NewSet(), func(c cid.Cid) error { + s.trackWrite(c) + return nil + }) + if err != nil { + log.Errorf("error transitively tracking cid %s: %s", c, err) + } + } // also make sure the object is considered live during compaction if s.txnProtect != nil { - err = s.txnProtect.Mark(cid) + if c.Prefix().Codec != cid.DagCBOR { + err = s.txnProtect.Mark(c) + } else { + err = s.walkLinks(c, cid.NewSet(), func(c cid.Cid) error { + return s.txnProtect.Mark(c) + }) + } + if err != nil { - log.Errorf("error protecting object in compaction transaction: %s", err) + log.Errorf("error protecting object (cid: %s) in compaction transaction: %s", c, err) } } return true, err } - return s.cold.Has(cid) + return s.cold.Has(c) } func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { @@ -353,7 +371,7 @@ func (s *SplitStore) PutMany(blks []blocks.Block) error { s.txnLk.RLock() defer s.txnLk.RUnlock() - s.trackWrites(batch) + s.trackWriteMany(batch) err := s.hot.PutMany(blks) if err == nil && s.txnProtect != nil { @@ -613,7 +631,7 @@ func (s *SplitStore) trackWrite(c cid.Cid) { } // and also combine batch writes into them -func (s *SplitStore) trackWrites(cids []cid.Cid) { +func (s *SplitStore) trackWriteMany(cids []cid.Cid) { s.mx.Lock() defer s.mx.Unlock() From 13a674330f6abad249f029e7f83003138b99e2aa Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 13:36:11 +0300 Subject: [PATCH 071/197] add pending write check before tracking the object in Has --- blockstore/splitstore/splitstore.go | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index bfd5dc34382..69988a77377 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -233,6 +233,11 @@ func (s *SplitStore) Has(c cid.Cid) (bool, error) { // writes on Flush. When we have options in the API, the vm can explicitly signal that this is // an implicit Write. // we also walk dags for links so that the reference applies transitively to children. + // but first check if it is already a pending write to avoid unnecessary work + if s.isPendingWrite(c) { + return true, nil + } + if c.Prefix().Codec != cid.DagCBOR { s.trackWrite(c) } else { @@ -245,7 +250,8 @@ func (s *SplitStore) Has(c cid.Cid) (bool, error) { } } - // also make sure the object is considered live during compaction + // also make sure the object is considered live during compaction in case we have already + // flushed pending writes and started compaction if s.txnProtect != nil { if c.Prefix().Codec != cid.DagCBOR { err = s.txnProtect.Mark(c) @@ -640,6 +646,14 @@ func (s *SplitStore) trackWriteMany(cids []cid.Cid) { } } +func (s *SplitStore) isPendingWrite(c cid.Cid) bool { + s.mx.Lock() + defer s.mx.Unlock() + + _, ok := s.pendingWrites[c] + return ok +} + func (s *SplitStore) flushPendingWrites(locked bool) { if !locked { s.mx.Lock() From a98a062347db0f3c46dbd0c23c1bd5f59631c18a Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 14:01:10 +0300 Subject: [PATCH 072/197] do the dag walk for deep write tracking during flush avoid crawling everything to a halt --- blockstore/splitstore/splitstore.go | 40 ++++++++++++++--------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 69988a77377..32fdb2a93ab 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -232,34 +232,16 @@ func (s *SplitStore) Has(c cid.Cid) (bool, error) { // treat it as an implicit Write, absence options -- the vm uses this check to avoid duplicate // writes on Flush. When we have options in the API, the vm can explicitly signal that this is // an implicit Write. - // we also walk dags for links so that the reference applies transitively to children. - // but first check if it is already a pending write to avoid unnecessary work if s.isPendingWrite(c) { return true, nil } - if c.Prefix().Codec != cid.DagCBOR { - s.trackWrite(c) - } else { - err = s.walkLinks(c, cid.NewSet(), func(c cid.Cid) error { - s.trackWrite(c) - return nil - }) - if err != nil { - log.Errorf("error transitively tracking cid %s: %s", c, err) - } - } + s.trackWrite(c) // also make sure the object is considered live during compaction in case we have already // flushed pending writes and started compaction if s.txnProtect != nil { - if c.Prefix().Codec != cid.DagCBOR { - err = s.txnProtect.Mark(c) - } else { - err = s.walkLinks(c, cid.NewSet(), func(c cid.Cid) error { - return s.txnProtect.Mark(c) - }) - } + err = s.txnProtect.Mark(c) if err != nil { log.Errorf("error protecting object (cid: %s) in compaction transaction: %s", c, err) @@ -667,6 +649,24 @@ func (s *SplitStore) flushPendingWrites(locked bool) { cids := make([]cid.Cid, 0, len(s.pendingWrites)) for c := range s.pendingWrites { cids = append(cids, c) + + // recursively walk dags to propagate dependent references + if c.Prefix().Codec != cid.DagCBOR { + continue + } + + err := s.walkLinks(c, cid.NewSet(), func(c cid.Cid) error { + _, has := s.pendingWrites[c] + if !has { + cids = append(cids, c) + } + + return nil + }) + + if err != nil { + log.Errorf("error tracking dependent writes for cid %s: %s", c, err) + } } s.pendingWrites = make(map[cid.Cid]struct{}) From bd92c230dad39d7487e94494ed162bac47ab6b40 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 14:17:10 +0300 Subject: [PATCH 073/197] refactor txn reference tracking, do deep marking of DAGs --- blockstore/splitstore/splitstore.go | 114 +++++++++++++--------------- 1 file changed, 52 insertions(+), 62 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 32fdb2a93ab..fd3ddb63971 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -232,21 +232,11 @@ func (s *SplitStore) Has(c cid.Cid) (bool, error) { // treat it as an implicit Write, absence options -- the vm uses this check to avoid duplicate // writes on Flush. When we have options in the API, the vm can explicitly signal that this is // an implicit Write. - if s.isPendingWrite(c) { - return true, nil - } - s.trackWrite(c) // also make sure the object is considered live during compaction in case we have already // flushed pending writes and started compaction - if s.txnProtect != nil { - err = s.txnProtect.Mark(c) - - if err != nil { - log.Errorf("error protecting object (cid: %s) in compaction transaction: %s", c, err) - } - } + s.trackTxnRef(c) return true, err } @@ -262,13 +252,7 @@ func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { switch err { case nil: - if s.txnProtect != nil { - err = s.txnProtect.Mark(cid) - if err != nil { - log.Errorf("error protecting object in compaction transaction: %s", err) - } - } - + s.trackTxnRef(cid) return blk, err case bstore.ErrNotFound: @@ -300,13 +284,7 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { switch err { case nil: - if s.txnProtect != nil { - err = s.txnProtect.Mark(cid) - if err != nil { - log.Errorf("error protecting object in compaction transaction: %s", err) - } - } - + s.trackTxnRef(cid) return size, err case bstore.ErrNotFound: @@ -336,15 +314,8 @@ func (s *SplitStore) Put(blk blocks.Block) error { s.trackWrite(blk.Cid()) err := s.hot.Put(blk) - if err == nil && s.txnProtect != nil { - err = s.txnProtect.Mark(blk.Cid()) - if err != nil { - log.Errorf("error protecting object in compaction transaction: %s", err) - } - } - - if err != nil { - log.Errorf("error putting block %s in hotstore: %s", blk.Cid(), err) + if err == nil { + s.trackTxnRef(blk.Cid()) } return err @@ -362,18 +333,8 @@ func (s *SplitStore) PutMany(blks []blocks.Block) error { s.trackWriteMany(batch) err := s.hot.PutMany(blks) - if err == nil && s.txnProtect != nil { - for _, cid := range batch { - err2 := s.txnProtect.Mark(cid) - if err2 != nil { - log.Errorf("error protecting object in compaction transaction: %s", err) - err = multierr.Combine(err, err2) - } - } - } - - if err != nil { - log.Errorf("error putting batch in hotstore: %s", err) + if err == nil { + s.trackTxnRefMany(batch) } return err @@ -425,14 +386,8 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { err := s.hot.View(cid, cb) switch err { case nil: - if s.txnProtect != nil { - err = s.txnProtect.Mark(cid) - if err != nil { - log.Errorf("error protecting object in compaction transaction: %s", err) - } - } - - return err + s.trackTxnRef(cid) + return nil case bstore.ErrNotFound: s.mx.Lock() @@ -628,14 +583,6 @@ func (s *SplitStore) trackWriteMany(cids []cid.Cid) { } } -func (s *SplitStore) isPendingWrite(c cid.Cid) bool { - s.mx.Lock() - defer s.mx.Unlock() - - _, ok := s.pendingWrites[c] - return ok -} - func (s *SplitStore) flushPendingWrites(locked bool) { if !locked { s.mx.Lock() @@ -679,6 +626,49 @@ func (s *SplitStore) flushPendingWrites(locked bool) { s.debug.LogWriteMany(s.curTs, cids, epoch) } +func (s *SplitStore) trackTxnRef(c cid.Cid) { + if s.txnProtect == nil { + // not compacting + return + } + + // NOTE: this occurs check assumes a markset without false positives, which is currently the case + // with the map + has, err := s.txnProtect.Has(c) + if err != nil { + log.Errorf("error occur checking object (cid: %s) for compaction transaction: %s", c, err) + return + } + + if has { + return + } + + if c.Prefix().Codec != cid.DagCBOR { + err = s.txnProtect.Mark(c) + } else { + err = s.walkLinks(c, cid.NewSet(), func(c cid.Cid) error { + return s.txnProtect.Mark(c) + }) + } + + if err != nil { + log.Errorf("error protecting object (cid: %s) from compaction transaction: %s", c, err) + return + } +} + +func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) { + if s.txnProtect == nil { + // not compacting + return + } + + for _, c := range cids { + s.trackTxnRef(c) + } +} + func (s *SplitStore) background() { ticker := time.NewTicker(time.Second) defer ticker.Stop() From 4071488ef25aadd6091a02b972c0c61c96b37dc0 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 14:25:02 +0300 Subject: [PATCH 074/197] first write, then track --- blockstore/splitstore/splitstore.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index fd3ddb63971..e258d0b994b 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -311,10 +311,9 @@ func (s *SplitStore) Put(blk blocks.Block) error { s.txnLk.RLock() defer s.txnLk.RUnlock() - s.trackWrite(blk.Cid()) - err := s.hot.Put(blk) if err == nil { + s.trackWrite(blk.Cid()) s.trackTxnRef(blk.Cid()) } @@ -330,10 +329,9 @@ func (s *SplitStore) PutMany(blks []blocks.Block) error { s.txnLk.RLock() defer s.txnLk.RUnlock() - s.trackWriteMany(batch) - err := s.hot.PutMany(blks) if err == nil { + s.trackWriteMany(batch) s.trackTxnRefMany(batch) } From da00fc66eef2985ad2967be5988db22084009665 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 14:31:31 +0300 Subject: [PATCH 075/197] downgrade a couple of logs to warnings --- blockstore/splitstore/splitstore.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index e258d0b994b..10cf22e0e4a 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -610,7 +610,7 @@ func (s *SplitStore) flushPendingWrites(locked bool) { }) if err != nil { - log.Errorf("error tracking dependent writes for cid %s: %s", c, err) + log.Warnf("error tracking dependent writes for cid %s: %s", c, err) } } s.pendingWrites = make(map[cid.Cid]struct{}) @@ -651,7 +651,7 @@ func (s *SplitStore) trackTxnRef(c cid.Cid) { } if err != nil { - log.Errorf("error protecting object (cid: %s) from compaction transaction: %s", c, err) + log.Warnf("error protecting object (cid: %s) from compaction transaction: %s", c, err) return } } From 1d41e1544a3f909c697341cb3339da30f92fa095 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 14:46:02 +0300 Subject: [PATCH 076/197] optimize transitive write tracking a bit --- blockstore/splitstore/splitstore.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 10cf22e0e4a..82e98e60f1c 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -592,8 +592,15 @@ func (s *SplitStore) flushPendingWrites(locked bool) { } cids := make([]cid.Cid, 0, len(s.pendingWrites)) + seen := make(map[cid.Cid]struct{}) for c := range s.pendingWrites { + _, ok := seen[c] + if ok { + continue + } + cids = append(cids, c) + seen[c] = struct{}{} // recursively walk dags to propagate dependent references if c.Prefix().Codec != cid.DagCBOR { @@ -601,9 +608,10 @@ func (s *SplitStore) flushPendingWrites(locked bool) { } err := s.walkLinks(c, cid.NewSet(), func(c cid.Cid) error { - _, has := s.pendingWrites[c] - if !has { + _, ok := seen[c] + if !ok { cids = append(cids, c) + seen[c] = struct{}{} } return nil From 484dfaebceb8e6dd89f6b9b92e9a0eff0a08bab2 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 15:32:55 +0300 Subject: [PATCH 077/197] reused cidset across all walks when flushing pending writes --- blockstore/splitstore/splitstore.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 82e98e60f1c..a51a48e2015 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -593,6 +593,7 @@ func (s *SplitStore) flushPendingWrites(locked bool) { cids := make([]cid.Cid, 0, len(s.pendingWrites)) seen := make(map[cid.Cid]struct{}) + walked := cid.NewSet() for c := range s.pendingWrites { _, ok := seen[c] if ok { @@ -607,7 +608,7 @@ func (s *SplitStore) flushPendingWrites(locked bool) { continue } - err := s.walkLinks(c, cid.NewSet(), func(c cid.Cid) error { + err := s.walkLinks(c, walked, func(c cid.Cid) error { _, ok := seen[c] if !ok { cids = append(cids, c) From 9d6bcd770522fd615f47e03cbf0353517f5d7c69 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 15:41:10 +0300 Subject: [PATCH 078/197] avoid clown shoes: only walk links for tracking in implicit writes/refs --- blockstore/splitstore/splitstore.go | 34 +++++++++++++++++++---------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index a51a48e2015..65eb7396236 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -150,7 +150,8 @@ type SplitStore struct { txnProtect MarkSet // pending write set - pendingWrites map[cid.Cid]struct{} + pendingWrites map[cid.Cid]struct{} + pendingWritesImplicit map[cid.Cid]struct{} } var _ bstore.Blockstore = (*SplitStore)(nil) @@ -232,11 +233,11 @@ func (s *SplitStore) Has(c cid.Cid) (bool, error) { // treat it as an implicit Write, absence options -- the vm uses this check to avoid duplicate // writes on Flush. When we have options in the API, the vm can explicitly signal that this is // an implicit Write. - s.trackWrite(c) + s.trackWrite(c, true) // also make sure the object is considered live during compaction in case we have already // flushed pending writes and started compaction - s.trackTxnRef(c) + s.trackTxnRef(c, true) return true, err } @@ -252,7 +253,7 @@ func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { switch err { case nil: - s.trackTxnRef(cid) + s.trackTxnRef(cid, false) return blk, err case bstore.ErrNotFound: @@ -284,7 +285,7 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { switch err { case nil: - s.trackTxnRef(cid) + s.trackTxnRef(cid, false) return size, err case bstore.ErrNotFound: @@ -313,8 +314,8 @@ func (s *SplitStore) Put(blk blocks.Block) error { err := s.hot.Put(blk) if err == nil { - s.trackWrite(blk.Cid()) - s.trackTxnRef(blk.Cid()) + s.trackWrite(blk.Cid(), false) + s.trackTxnRef(blk.Cid(), false) } return err @@ -384,7 +385,7 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { err := s.hot.View(cid, cb) switch err { case nil: - s.trackTxnRef(cid) + s.trackTxnRef(cid, false) return nil case bstore.ErrNotFound: @@ -564,7 +565,7 @@ func (s *SplitStore) updateWriteEpoch() { // Unfortunately we can't just directly tracker.Put one by one, as it is ridiculously slow with // bbolt because of syncing (order of 10ms), so we batch them. -func (s *SplitStore) trackWrite(c cid.Cid) { +func (s *SplitStore) trackWrite(c cid.Cid, implicit bool) { s.mx.Lock() defer s.mx.Unlock() @@ -603,6 +604,11 @@ func (s *SplitStore) flushPendingWrites(locked bool) { cids = append(cids, c) seen[c] = struct{}{} + _, implicit := s.pendingWritesImplicit[c] + if !implicit { + continue + } + // recursively walk dags to propagate dependent references if c.Prefix().Codec != cid.DagCBOR { continue @@ -622,7 +628,11 @@ func (s *SplitStore) flushPendingWrites(locked bool) { log.Warnf("error tracking dependent writes for cid %s: %s", c, err) } } + s.pendingWrites = make(map[cid.Cid]struct{}) + if len(s.pendingWritesImplicit) > 0 { + s.pendingWritesImplicit = make(map[cid.Cid]struct{}) + } epoch := s.writeEpoch err := s.tracker.PutBatch(cids, epoch) @@ -633,7 +643,7 @@ func (s *SplitStore) flushPendingWrites(locked bool) { s.debug.LogWriteMany(s.curTs, cids, epoch) } -func (s *SplitStore) trackTxnRef(c cid.Cid) { +func (s *SplitStore) trackTxnRef(c cid.Cid, implicit bool) { if s.txnProtect == nil { // not compacting return @@ -653,7 +663,7 @@ func (s *SplitStore) trackTxnRef(c cid.Cid) { if c.Prefix().Codec != cid.DagCBOR { err = s.txnProtect.Mark(c) - } else { + } else if implicit { err = s.walkLinks(c, cid.NewSet(), func(c cid.Cid) error { return s.txnProtect.Mark(c) }) @@ -672,7 +682,7 @@ func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) { } for _, c := range cids { - s.trackTxnRef(c) + s.trackTxnRef(c, false) } } From 637fbf6c5b0805de95ab50a02e658a1ccd44351c Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 15:49:33 +0300 Subject: [PATCH 079/197] fix faulty if/else logic for implicit txn protection --- blockstore/splitstore/splitstore.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 65eb7396236..29ad3a94b6e 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -661,9 +661,9 @@ func (s *SplitStore) trackTxnRef(c cid.Cid, implicit bool) { return } - if c.Prefix().Codec != cid.DagCBOR { + if c.Prefix().Codec != cid.DagCBOR || !implicit { err = s.txnProtect.Mark(c) - } else if implicit { + } else { err = s.walkLinks(c, cid.NewSet(), func(c cid.Cid) error { return s.txnProtect.Mark(c) }) From b87295db93494bbed6698eff3ce4b02ceac2a51b Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 20:57:46 +0300 Subject: [PATCH 080/197] bubble up dependent txn ref errors This cause Has to return false if it fails to traverse/protect all links, which would cause the vm to recompute. --- blockstore/splitstore/splitstore.go | 41 ++++++++++++++++++----------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 29ad3a94b6e..0eadcf3c079 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -237,9 +237,11 @@ func (s *SplitStore) Has(c cid.Cid) (bool, error) { // also make sure the object is considered live during compaction in case we have already // flushed pending writes and started compaction - s.trackTxnRef(c, true) + trackErr := s.trackTxnRef(c, true) - return true, err + // if we failed to track the object and all its dependencies, then return false so as + // to cause the vm to recompute + return trackErr == nil, nil } return s.cold.Has(c) @@ -253,7 +255,7 @@ func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { switch err { case nil: - s.trackTxnRef(cid, false) + err = s.trackTxnRef(cid, false) return blk, err case bstore.ErrNotFound: @@ -285,7 +287,7 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { switch err { case nil: - s.trackTxnRef(cid, false) + err = s.trackTxnRef(cid, false) return size, err case bstore.ErrNotFound: @@ -315,7 +317,7 @@ func (s *SplitStore) Put(blk blocks.Block) error { err := s.hot.Put(blk) if err == nil { s.trackWrite(blk.Cid(), false) - s.trackTxnRef(blk.Cid(), false) + err = s.trackTxnRef(blk.Cid(), false) } return err @@ -333,7 +335,7 @@ func (s *SplitStore) PutMany(blks []blocks.Block) error { err := s.hot.PutMany(blks) if err == nil { s.trackWriteMany(batch) - s.trackTxnRefMany(batch) + err = s.trackTxnRefMany(batch) } return err @@ -385,8 +387,8 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { err := s.hot.View(cid, cb) switch err { case nil: - s.trackTxnRef(cid, false) - return nil + err = s.trackTxnRef(cid, false) + return err case bstore.ErrNotFound: s.mx.Lock() @@ -643,10 +645,10 @@ func (s *SplitStore) flushPendingWrites(locked bool) { s.debug.LogWriteMany(s.curTs, cids, epoch) } -func (s *SplitStore) trackTxnRef(c cid.Cid, implicit bool) { +func (s *SplitStore) trackTxnRef(c cid.Cid, implicit bool) error { if s.txnProtect == nil { // not compacting - return + return nil } // NOTE: this occurs check assumes a markset without false positives, which is currently the case @@ -654,11 +656,11 @@ func (s *SplitStore) trackTxnRef(c cid.Cid, implicit bool) { has, err := s.txnProtect.Has(c) if err != nil { log.Errorf("error occur checking object (cid: %s) for compaction transaction: %s", c, err) - return + return err } if has { - return + return nil } if c.Prefix().Codec != cid.DagCBOR || !implicit { @@ -671,19 +673,26 @@ func (s *SplitStore) trackTxnRef(c cid.Cid, implicit bool) { if err != nil { log.Warnf("error protecting object (cid: %s) from compaction transaction: %s", c, err) - return } + + return err } -func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) { +func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) error { if s.txnProtect == nil { // not compacting - return + return nil } + var err error for _, c := range cids { - s.trackTxnRef(c, false) + err2 := s.trackTxnRef(c, false) + if err2 != nil { + err = multierr.Combine(err, err2) + } } + + return err } func (s *SplitStore) background() { From 68bc5d22916908e4e7f4010b670e630e9c4cf271 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 2 Jul 2021 22:34:00 +0300 Subject: [PATCH 081/197] skip moving cold blocks when running with a noop coldstore it is a noop but it still takes (a lot of) time because it has to read all the cold blocks. --- blockstore/splitstore/splitstore.go | 23 +++++++++++++++-------- node/modules/blockstore.go | 7 ++++--- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 0eadcf3c079..e9fe92f2f02 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -102,6 +102,12 @@ type Config struct { // This is necessary, and automatically set by DI in lotus node construction, if // you are running with a noop coldstore. HotHeaders bool + + // SkipMoveColdBlocks indicates whether to skip moving cold blocks to the coldstore. + // If the splitstore is running with a noop coldstore then this option is set to true + // which skips moving (as it is a noop, but still takes time to read all the cold objects) + // and directly purges cold blocks. + SkipMoveColdBlocks bool } // ChainAccessor allows the Splitstore to access the chain. It will most likely @@ -1055,15 +1061,16 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return xerrors.Errorf("compaction aborted") } - // 2.2 copy the cold objects to the coldstore - log.Info("moving cold blocks to the coldstore") - startMove := time.Now() - err = s.moveColdBlocks(cold) - if err != nil { - return xerrors.Errorf("error moving cold blocks: %w", err) + // 2.2 copy the cold objects to the coldstore -- if we have one + if !s.cfg.SkipMoveColdBlocks { + log.Info("moving cold blocks to the coldstore") + startMove := time.Now() + err = s.moveColdBlocks(cold) + if err != nil { + return xerrors.Errorf("error moving cold blocks: %w", err) + } + log.Infow("moving done", "took", time.Since(startMove)) } - log.Infow("moving done", "took", time.Since(startMove)) - // 2.3 purge cold objects from the hotstore log.Info("purging cold objects from the hotstore") startPurge := time.Now() diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index 3eefd4ac890..f4945f15c36 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -78,9 +78,10 @@ func SplitBlockstore(cfg *config.Chainstore) func(lc fx.Lifecycle, r repo.Locked } cfg := &splitstore.Config{ - TrackingStoreType: cfg.Splitstore.TrackingStoreType, - MarkSetType: cfg.Splitstore.MarkSetType, - HotHeaders: cfg.Splitstore.HotHeaders || cfg.Splitstore.ColdStoreType == "noop", + TrackingStoreType: cfg.Splitstore.TrackingStoreType, + MarkSetType: cfg.Splitstore.MarkSetType, + HotHeaders: cfg.Splitstore.HotHeaders || cfg.Splitstore.ColdStoreType == "noop", + SkipMoveColdBlocks: cfg.Splitstore.ColdStoreType == "noop", } ss, err := splitstore.Open(path, ds, hot, cold, cfg) if err != nil { From e4bb4be855600e2ccd1db2331eb927aeeaaf66b9 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 3 Jul 2021 08:13:26 +0300 Subject: [PATCH 082/197] fix some residual purge races --- blockstore/splitstore/splitstore.go | 67 ++++++++++++++++++----------- 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index e9fe92f2f02..b710c4ee795 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -236,17 +236,19 @@ func (s *SplitStore) Has(c cid.Cid) (bool, error) { } if has { - // treat it as an implicit Write, absence options -- the vm uses this check to avoid duplicate - // writes on Flush. When we have options in the API, the vm can explicitly signal that this is - // an implicit Write. + // treat it as an implicit (recursive) Write, absence options -- the vm uses this check to avoid + // duplicate writes on Copy. + // When we have options in the API, the vm can explicitly signal that this is an implicit Write. s.trackWrite(c, true) // also make sure the object is considered live during compaction in case we have already - // flushed pending writes and started compaction + // flushed pending writes and started compaction. + // in case of a race with purge, this will return a track error, which we can use to + // signal to the vm that the object is not fully present. trackErr := s.trackTxnRef(c, true) // if we failed to track the object and all its dependencies, then return false so as - // to cause the vm to recompute + // to cause the vm to copy return trackErr == nil, nil } @@ -673,6 +675,16 @@ func (s *SplitStore) trackTxnRef(c cid.Cid, implicit bool) error { err = s.txnProtect.Mark(c) } else { err = s.walkLinks(c, cid.NewSet(), func(c cid.Cid) error { + // this check is necessary to avoid races because objects are purged in random order + has, err := s.hot.Has(c) + if err != nil { + return err + } + + if !has { + return xerrors.Errorf("object (cid: %s) has been purged", c) + } + return s.txnProtect.Mark(c) }) } @@ -949,28 +961,8 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return xerrors.Errorf("error creating mark set: %w", err) } defer markSet.Close() //nolint:errcheck - - // create the pruge protect filter - s.txnLk.Lock() - s.txnProtect, err = s.txnEnv.Create("protected", s.markSetSize) - if err != nil { - s.txnLk.Unlock() - return xerrors.Errorf("error creating transactional mark set: %w", err) - } - s.txnLk.Unlock() - - defer func() { - s.txnLk.Lock() - _ = s.txnProtect.Close() - s.txnProtect = nil - s.txnLk.Unlock() - }() - defer s.debug.Flush() - // flush pending writes to update the tracker - s.flushPendingWrites(false) - // 1. mark reachable objects by walking the chain from the current epoch to the boundary epoch log.Infow("marking reachable blocks", "currentEpoch", currentEpoch, "boundaryEpoch", boundaryEpoch) startMark := time.Now() @@ -992,6 +984,28 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { log.Infow("marking done", "took", time.Since(startMark), "marked", count) + // create the transaction protect filter + s.txnLk.Lock() + s.txnProtect, err = s.txnEnv.Create("protected", s.markSetSize) + if err != nil { + s.txnLk.Unlock() + return xerrors.Errorf("error creating transactional mark set: %w", err) + } + s.txnLk.Unlock() + + defer func() { + s.txnLk.Lock() + _ = s.txnProtect.Close() + s.txnProtect = nil + s.txnLk.Unlock() + }() + + // flush pending writes to update the tracker + log.Info("flushing pending writes") + startFlush := time.Now() + s.flushPendingWrites(false) + log.Infow("flushing done", "took", time.Since(startFlush)) + // 2. move cold unreachable objects to the coldstore log.Info("collecting cold objects") startCollect := time.Now() @@ -1071,6 +1085,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { } log.Infow("moving done", "took", time.Since(startMove)) } + // 2.3 purge cold objects from the hotstore log.Info("purging cold objects from the hotstore") startPurge := time.Now() @@ -1293,7 +1308,7 @@ func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { deadCids := make([]cid.Cid, 0, batchSize) var purgeCnt, liveCnt int defer func() { - log.Infow("purged objects", "purged", purgeCnt, "live", liveCnt) + log.Infow("purged cold objects", "purged", purgeCnt, "live", liveCnt) }() return s.purgeBatch(cids, From 5834231e585bdfebee4354dc9f094984dfe16dc7 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 3 Jul 2021 08:33:16 +0300 Subject: [PATCH 083/197] create the transactional protect filter before walking --- blockstore/splitstore/splitstore.go | 32 ++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index b710c4ee795..b71b389272d 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -963,6 +963,22 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { defer markSet.Close() //nolint:errcheck defer s.debug.Flush() + // create the transaction protect filter + s.txnLk.Lock() + s.txnProtect, err = s.txnEnv.Create("protected", s.markSetSize) + if err != nil { + s.txnLk.Unlock() + return xerrors.Errorf("error creating transactional mark set: %w", err) + } + s.txnLk.Unlock() + + defer func() { + s.txnLk.Lock() + _ = s.txnProtect.Close() + s.txnProtect = nil + s.txnLk.Unlock() + }() + // 1. mark reachable objects by walking the chain from the current epoch to the boundary epoch log.Infow("marking reachable blocks", "currentEpoch", currentEpoch, "boundaryEpoch", boundaryEpoch) startMark := time.Now() @@ -984,22 +1000,6 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { log.Infow("marking done", "took", time.Since(startMark), "marked", count) - // create the transaction protect filter - s.txnLk.Lock() - s.txnProtect, err = s.txnEnv.Create("protected", s.markSetSize) - if err != nil { - s.txnLk.Unlock() - return xerrors.Errorf("error creating transactional mark set: %w", err) - } - s.txnLk.Unlock() - - defer func() { - s.txnLk.Lock() - _ = s.txnProtect.Close() - s.txnProtect = nil - s.txnLk.Unlock() - }() - // flush pending writes to update the tracker log.Info("flushing pending writes") startFlush := time.Now() From 39723bbe609e5f91d0a40378116a313cc56ae1fd Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 3 Jul 2021 10:27:03 +0300 Subject: [PATCH 084/197] use a single map for tracking pending writes, properly track implicits --- blockstore/splitstore/splitstore.go | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index b71b389272d..6cff953b868 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -156,8 +156,7 @@ type SplitStore struct { txnProtect MarkSet // pending write set - pendingWrites map[cid.Cid]struct{} - pendingWritesImplicit map[cid.Cid]struct{} + pendingWrites map[cid.Cid]bool } var _ bstore.Blockstore = (*SplitStore)(nil) @@ -199,7 +198,7 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co coldPurgeSize: defaultColdPurgeSize, - pendingWrites: make(map[cid.Cid]struct{}), + pendingWrites: make(map[cid.Cid]bool), } ss.ctx, ss.cancel = context.WithCancel(context.Background()) @@ -579,7 +578,7 @@ func (s *SplitStore) trackWrite(c cid.Cid, implicit bool) { s.mx.Lock() defer s.mx.Unlock() - s.pendingWrites[c] = struct{}{} + s.pendingWrites[c] = implicit } // and also combine batch writes into them @@ -588,7 +587,7 @@ func (s *SplitStore) trackWriteMany(cids []cid.Cid) { defer s.mx.Unlock() for _, c := range cids { - s.pendingWrites[c] = struct{}{} + s.pendingWrites[c] = false } } @@ -605,7 +604,7 @@ func (s *SplitStore) flushPendingWrites(locked bool) { cids := make([]cid.Cid, 0, len(s.pendingWrites)) seen := make(map[cid.Cid]struct{}) walked := cid.NewSet() - for c := range s.pendingWrites { + for c, implicit := range s.pendingWrites { _, ok := seen[c] if ok { continue @@ -614,7 +613,6 @@ func (s *SplitStore) flushPendingWrites(locked bool) { cids = append(cids, c) seen[c] = struct{}{} - _, implicit := s.pendingWritesImplicit[c] if !implicit { continue } @@ -639,10 +637,7 @@ func (s *SplitStore) flushPendingWrites(locked bool) { } } - s.pendingWrites = make(map[cid.Cid]struct{}) - if len(s.pendingWritesImplicit) > 0 { - s.pendingWritesImplicit = make(map[cid.Cid]struct{}) - } + s.pendingWrites = make(map[cid.Cid]bool) epoch := s.writeEpoch err := s.tracker.PutBatch(cids, epoch) From 736d6a3c19825f08c35cfcb2f22905fd6b48be28 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 3 Jul 2021 11:02:10 +0300 Subject: [PATCH 085/197] only treat Has as an implicit write within vm.Copy context --- blockstore/splitstore/splitstore.go | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 6cff953b868..421bc9ba022 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -5,6 +5,8 @@ import ( "context" "encoding/binary" "errors" + "runtime/debug" + "strings" "sync" "sync/atomic" "time" @@ -235,16 +237,21 @@ func (s *SplitStore) Has(c cid.Cid) (bool, error) { } if has { - // treat it as an implicit (recursive) Write, absence options -- the vm uses this check to avoid - // duplicate writes on Copy. - // When we have options in the API, the vm can explicitly signal that this is an implicit Write. - s.trackWrite(c, true) + // treat it as an implicit (recursive) Write, when it is within vm.Copy context. + // -- the vm uses this check to avoid duplicate writes on Copy. + // When we have options in the API (or something better), the vm can explicitly signal + // that this is an implicit Write. + vmCtx := s.isVMCopyContext() + if vmCtx { + s.trackWrite(c, true) + } // also make sure the object is considered live during compaction in case we have already // flushed pending writes and started compaction. + // when within vm copy context, dags will be recursively referenced. // in case of a race with purge, this will return a track error, which we can use to // signal to the vm that the object is not fully present. - trackErr := s.trackTxnRef(c, true) + trackErr := s.trackTxnRef(c, vmCtx) // if we failed to track the object and all its dependencies, then return false so as // to cause the vm to copy @@ -648,6 +655,11 @@ func (s *SplitStore) flushPendingWrites(locked bool) { s.debug.LogWriteMany(s.curTs, cids, epoch) } +func (s *SplitStore) isVMCopyContext() bool { + sk := string(debug.Stack()) + return strings.Contains(sk, "filecoin-project/lotus/chain/vm.Copy") +} + func (s *SplitStore) trackTxnRef(c cid.Cid, implicit bool) error { if s.txnProtect == nil { // not compacting From 8157f889ce31dbc4dc9a868b88fabaa88a1486b4 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 3 Jul 2021 12:02:36 +0300 Subject: [PATCH 086/197] short-circuit marking walks when encountering a block and more efficient walking --- blockstore/splitstore/splitstore.go | 92 +++++++++++++++++++---------- 1 file changed, 61 insertions(+), 31 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 421bc9ba022..cb4aae62178 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -77,6 +77,9 @@ var ( log = logging.Logger("splitstore") + // used to signal end of walk + errStopWalk = errors.New("stop walk") + // set this to true if you are debugging the splitstore to enable debug logging enableDebugLog = false // set this to true if you want to track origin stack traces in the write log @@ -636,6 +639,16 @@ func (s *SplitStore) flushPendingWrites(locked bool) { seen[c] = struct{}{} } + // if it is a block reference, short-circuit or else we'll end up walking the entire chain + isBlock, err := s.isBlockHeader(c) + if err != nil { + return xerrors.Errorf("error determining if cid %s is a block header: %w", c, err) + } + + if isBlock { + return errStopWalk + } + return nil }) @@ -660,6 +673,16 @@ func (s *SplitStore) isVMCopyContext() bool { return strings.Contains(sk, "filecoin-project/lotus/chain/vm.Copy") } +func (s *SplitStore) isBlockHeader(c cid.Cid) (isBlock bool, err error) { + err = s.view(c, func(data []byte) error { + var hdr types.BlockHeader + isBlock = hdr.UnmarshalCBOR(bytes.NewBuffer(data)) == nil + return nil + }) + + return isBlock, err +} + func (s *SplitStore) trackTxnRef(c cid.Cid, implicit bool) error { if s.txnProtect == nil { // not compacting @@ -682,17 +705,25 @@ func (s *SplitStore) trackTxnRef(c cid.Cid, implicit bool) error { err = s.txnProtect.Mark(c) } else { err = s.walkLinks(c, cid.NewSet(), func(c cid.Cid) error { - // this check is necessary to avoid races because objects are purged in random order - has, err := s.hot.Has(c) + // check if it is a block; implicitly checks if the object exists --if it doesn't because + // it has been purged, it will be an error + isBlock, err := s.isBlockHeader(c) + if err != nil { + return xerrors.Errorf("error determining if cid %s is a block header: %w", c, err) + } + + // mark the object + err = s.txnProtect.Mark(c) if err != nil { return err } - if !has { - return xerrors.Errorf("object (cid: %s) has been purged", c) + // if it is a block reference, short-circuit or else we'll end up walking the entire chain + if isBlock { + return errStopWalk } - return s.txnProtect.Mark(c) + return nil }) } @@ -1142,13 +1173,12 @@ func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs, f return err } - blk, err := s.get(c) - if err != nil { - return xerrors.Errorf("error retrieving block (cid: %s): %w", c, err) - } - var hdr types.BlockHeader - if err := hdr.UnmarshalCBOR(bytes.NewBuffer(blk.RawData())); err != nil { + err := s.view(c, func(data []byte) error { + return hdr.UnmarshalCBOR(bytes.NewBuffer(data)) + }) + + if err != nil { return xerrors.Errorf("error unmarshaling block header (cid: %s): %w", c, err) } @@ -1203,6 +1233,10 @@ func (s *SplitStore) walkLinks(c cid.Cid, walked *cid.Set, f func(cid.Cid) error } if err := f(c); err != nil { + if err == errStopWalk { + return nil + } + return err } @@ -1210,40 +1244,36 @@ func (s *SplitStore) walkLinks(c cid.Cid, walked *cid.Set, f func(cid.Cid) error return nil } - blk, err := s.get(c) + var links []cid.Cid + err := s.view(c, func(data []byte) error { + return cbg.ScanForLinks(bytes.NewReader(data), func(c cid.Cid) { + links = append(links, c) + }) + }) + if err != nil { - return xerrors.Errorf("error retrieving linked block (cid: %s): %w", c, err) + return xerrors.Errorf("error scanning linked block (cid: %s): %w", c, err) } - var rerr error - err = cbg.ScanForLinks(bytes.NewReader(blk.RawData()), func(c cid.Cid) { - if rerr != nil { - return - } - + for _, c := range links { err := s.walkLinks(c, walked, f) if err != nil { - rerr = err + return xerrors.Errorf("error walking link (cid: %s): %w", c, err) } - }) - - if err != nil { - return xerrors.Errorf("error scanning links (cid: %s): %w", c, err) } - return rerr + return nil } -// internal version used by walk so that we don't blow the txn -func (s *SplitStore) get(cid cid.Cid) (blocks.Block, error) { - blk, err := s.hot.Get(cid) - +// internal version used by walk +func (s *SplitStore) view(cid cid.Cid, cb func([]byte) error) error { + err := s.hot.View(cid, cb) switch err { case bstore.ErrNotFound: - return s.cold.Get(cid) + return s.cold.View(cid, cb) default: - return blk, err + return err } } From 9d6cabd18ab032b34fcb2863b578015f2e2a4415 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 3 Jul 2021 12:08:40 +0300 Subject: [PATCH 087/197] if it's not a dag, it's not a block --- blockstore/splitstore/splitstore.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index cb4aae62178..f04d18ec0a6 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -674,6 +674,10 @@ func (s *SplitStore) isVMCopyContext() bool { } func (s *SplitStore) isBlockHeader(c cid.Cid) (isBlock bool, err error) { + if c.Prefix().Codec != cid.DagCBOR { + return false, nil + } + err = s.view(c, func(data []byte) error { var hdr types.BlockHeader isBlock = hdr.UnmarshalCBOR(bytes.NewBuffer(data)) == nil From 228a435ba7ab4a28af2703cbf25a8cef9c72b22e Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 3 Jul 2021 16:10:37 +0300 Subject: [PATCH 088/197] rework tracking logic; do it lazily and far more efficiently --- blockstore/splitstore/markset.go | 2 +- blockstore/splitstore/splitstore.go | 316 +++++++++++++++------------- 2 files changed, 169 insertions(+), 149 deletions(-) diff --git a/blockstore/splitstore/markset.go b/blockstore/splitstore/markset.go index 2eb46f6d856..491020c6ecf 100644 --- a/blockstore/splitstore/markset.go +++ b/blockstore/splitstore/markset.go @@ -65,7 +65,7 @@ func NewMapMarkSetEnv(ts bool) (*MapMarkSetEnv, error) { func (e *MapMarkSetEnv) Create(name string, sizeHint int64) (MarkSet, error) { return &MapMarkSet{ - set: make(map[string]struct{}), + set: make(map[string]struct{}, sizeHint), ts: e.ts, }, nil } diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index f04d18ec0a6..9aca63ec920 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -79,6 +79,8 @@ var ( // used to signal end of walk errStopWalk = errors.New("stop walk") + // used to signal a missing object when protecting recursive references + errMissingObject = errors.New("missing object") // set this to true if you are debugging the splitstore to enable debug logging enableDebugLog = false @@ -146,8 +148,7 @@ type SplitStore struct { cold bstore.Blockstore tracker TrackingStore - env MarkSetEnv - + markSetEnv MarkSetEnv markSetSize int64 ctx context.Context @@ -159,9 +160,10 @@ type SplitStore struct { txnLk sync.RWMutex txnEnv MarkSetEnv txnProtect MarkSet + txnMarkSet MarkSet // pending write set - pendingWrites map[cid.Cid]bool + pendingWrites map[cid.Cid]struct{} } var _ bstore.Blockstore = (*SplitStore)(nil) @@ -177,7 +179,7 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co } // the markset env - env, err := OpenMarkSetEnv(path, cfg.MarkSetType) + markSetEnv, err := OpenMarkSetEnv(path, "mapts") if err != nil { _ = tracker.Close() return nil, err @@ -187,23 +189,23 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co txnEnv, err := OpenMarkSetEnv(path, "mapts") if err != nil { _ = tracker.Close() - _ = env.Close() + _ = markSetEnv.Close() return nil, err } // and now we can make a SplitStore ss := &SplitStore{ - cfg: cfg, - ds: ds, - hot: hot, - cold: cold, - tracker: tracker, - env: env, - txnEnv: txnEnv, + cfg: cfg, + ds: ds, + hot: hot, + cold: cold, + tracker: tracker, + markSetEnv: markSetEnv, + txnEnv: txnEnv, coldPurgeSize: defaultColdPurgeSize, - pendingWrites: make(map[cid.Cid]bool), + pendingWrites: make(map[cid.Cid]struct{}), } ss.ctx, ss.cancel = context.WithCancel(context.Background()) @@ -246,7 +248,7 @@ func (s *SplitStore) Has(c cid.Cid) (bool, error) { // that this is an implicit Write. vmCtx := s.isVMCopyContext() if vmCtx { - s.trackWrite(c, true) + s.trackWrite(c) } // also make sure the object is considered live during compaction in case we have already @@ -254,11 +256,14 @@ func (s *SplitStore) Has(c cid.Cid) (bool, error) { // when within vm copy context, dags will be recursively referenced. // in case of a race with purge, this will return a track error, which we can use to // signal to the vm that the object is not fully present. - trackErr := s.trackTxnRef(c, vmCtx) + err = s.trackTxnRef(c, vmCtx) + if xerrors.Is(err, errMissingObject) { + // we failed to recursively protect the object because some inner object has been purged; + // signal to the VM to copy. + return false, nil + } - // if we failed to track the object and all its dependencies, then return false so as - // to cause the vm to copy - return trackErr == nil, nil + return true, err } return s.cold.Has(c) @@ -333,7 +338,7 @@ func (s *SplitStore) Put(blk blocks.Block) error { err := s.hot.Put(blk) if err == nil { - s.trackWrite(blk.Cid(), false) + s.trackWrite(blk.Cid()) err = s.trackTxnRef(blk.Cid(), false) } @@ -509,7 +514,7 @@ func (s *SplitStore) Close() error { s.flushPendingWrites(false) s.cancel() - return multierr.Combine(s.tracker.Close(), s.env.Close(), s.debug.Close()) + return multierr.Combine(s.tracker.Close(), s.markSetEnv.Close(), s.debug.Close()) } func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { @@ -584,11 +589,11 @@ func (s *SplitStore) updateWriteEpoch() { // Unfortunately we can't just directly tracker.Put one by one, as it is ridiculously slow with // bbolt because of syncing (order of 10ms), so we batch them. -func (s *SplitStore) trackWrite(c cid.Cid, implicit bool) { +func (s *SplitStore) trackWrite(c cid.Cid) { s.mx.Lock() defer s.mx.Unlock() - s.pendingWrites[c] = implicit + s.pendingWrites[c] = struct{}{} } // and also combine batch writes into them @@ -597,7 +602,7 @@ func (s *SplitStore) trackWriteMany(cids []cid.Cid) { defer s.mx.Unlock() for _, c := range cids { - s.pendingWrites[c] = false + s.pendingWrites[c] = struct{}{} } } @@ -612,52 +617,10 @@ func (s *SplitStore) flushPendingWrites(locked bool) { } cids := make([]cid.Cid, 0, len(s.pendingWrites)) - seen := make(map[cid.Cid]struct{}) - walked := cid.NewSet() - for c, implicit := range s.pendingWrites { - _, ok := seen[c] - if ok { - continue - } - + for c := range s.pendingWrites { cids = append(cids, c) - seen[c] = struct{}{} - - if !implicit { - continue - } - - // recursively walk dags to propagate dependent references - if c.Prefix().Codec != cid.DagCBOR { - continue - } - - err := s.walkLinks(c, walked, func(c cid.Cid) error { - _, ok := seen[c] - if !ok { - cids = append(cids, c) - seen[c] = struct{}{} - } - - // if it is a block reference, short-circuit or else we'll end up walking the entire chain - isBlock, err := s.isBlockHeader(c) - if err != nil { - return xerrors.Errorf("error determining if cid %s is a block header: %w", c, err) - } - - if isBlock { - return errStopWalk - } - - return nil - }) - - if err != nil { - log.Warnf("error tracking dependent writes for cid %s: %s", c, err) - } } - - s.pendingWrites = make(map[cid.Cid]bool) + s.pendingWrites = make(map[cid.Cid]struct{}) epoch := s.writeEpoch err := s.tracker.PutBatch(cids, epoch) @@ -687,55 +650,52 @@ func (s *SplitStore) isBlockHeader(c cid.Cid) (isBlock bool, err error) { return isBlock, err } -func (s *SplitStore) trackTxnRef(c cid.Cid, implicit bool) error { +func (s *SplitStore) trackTxnRef(c cid.Cid, recursive bool) error { if s.txnProtect == nil { // not compacting return nil } - // NOTE: this occurs check assumes a markset without false positives, which is currently the case - // with the map - has, err := s.txnProtect.Has(c) - if err != nil { - log.Errorf("error occur checking object (cid: %s) for compaction transaction: %s", c, err) - return err + if !recursive { + return s.txnProtect.Mark(c) } - if has { - return nil - } + // it's a recursive reference in vm context, protect links if they are not in the markset already + return s.walkLinks(c, cid.NewSet(), func(c cid.Cid) error { + mark, err := s.txnMarkSet.Has(c) + if err != nil { + return xerrors.Errorf("error checking mark set for %s: %w", c, err) + } - if c.Prefix().Codec != cid.DagCBOR || !implicit { - err = s.txnProtect.Mark(c) - } else { - err = s.walkLinks(c, cid.NewSet(), func(c cid.Cid) error { - // check if it is a block; implicitly checks if the object exists --if it doesn't because - // it has been purged, it will be an error - isBlock, err := s.isBlockHeader(c) - if err != nil { - return xerrors.Errorf("error determining if cid %s is a block header: %w", c, err) - } + // it's marked, nothing to do + if mark { + return errStopWalk + } - // mark the object - err = s.txnProtect.Mark(c) - if err != nil { - return err - } + live, err := s.txnProtect.Has(c) + if err != nil { + return xerrors.Errorf("error checking portected set for %s: %w", c, err) + } - // if it is a block reference, short-circuit or else we'll end up walking the entire chain - if isBlock { - return errStopWalk - } + if live { + return errStopWalk + } - return nil - }) - } + // this occurs check is necessary because cold objects are purged in arbitrary order + has, err := s.hot.Has(c) + if err != nil { + return xerrors.Errorf("error checking hotstore for %s: %w", c, err) + } - if err != nil { - log.Warnf("error protecting object (cid: %s) from compaction transaction: %s", c, err) - } + // it has been deleted, signal to the vm to copy + if !has { + log.Warnf("missing object for recursive reference to %s: %s", c, err) + return errMissingObject + } - return err + // mark it + return s.txnProtect.Mark(c) + }) } func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) error { @@ -998,38 +958,32 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { log.Infow("running compaction", "currentEpoch", currentEpoch, "baseEpoch", s.baseEpoch, "coldEpoch", coldEpoch, "boundaryEpoch", boundaryEpoch) - markSet, err := s.env.Create("live", s.markSetSize) + markSet, err := s.markSetEnv.Create("live", s.markSetSize) if err != nil { return xerrors.Errorf("error creating mark set: %w", err) } defer markSet.Close() //nolint:errcheck defer s.debug.Flush() - // create the transaction protect filter - s.txnLk.Lock() - s.txnProtect, err = s.txnEnv.Create("protected", s.markSetSize) - if err != nil { - s.txnLk.Unlock() - return xerrors.Errorf("error creating transactional mark set: %w", err) - } - s.txnLk.Unlock() - - defer func() { - s.txnLk.Lock() - _ = s.txnProtect.Close() - s.txnProtect = nil - s.txnLk.Unlock() - }() - // 1. mark reachable objects by walking the chain from the current epoch to the boundary epoch log.Infow("marking reachable blocks", "currentEpoch", currentEpoch, "boundaryEpoch", boundaryEpoch) startMark := time.Now() var count int64 err = s.walk(curTs, boundaryEpoch, true, s.cfg.HotHeaders, - func(cid cid.Cid) error { + func(c cid.Cid) error { + mark, err := markSet.Has(c) + if err != nil { + return xerrors.Errorf("error checking mark set for %s: %w", c, err) + } + + if mark { + // already marked, don't recurse its links + return errStopWalk + } + count++ - return markSet.Mark(cid) + return markSet.Mark(c) }) if err != nil { @@ -1042,34 +996,45 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { log.Infow("marking done", "took", time.Since(startMark), "marked", count) + // create the transaction protect filter + s.txnLk.Lock() + s.txnProtect, err = s.txnEnv.Create("protected", s.markSetSize) + if err != nil { + s.txnLk.Unlock() + return xerrors.Errorf("error creating transactional mark set: %w", err) + } + s.txnMarkSet = markSet + s.txnLk.Unlock() + + defer func() { + s.txnLk.Lock() + _ = s.txnProtect.Close() + s.txnProtect = nil + s.txnMarkSet = nil + s.txnLk.Unlock() + }() + // flush pending writes to update the tracker - log.Info("flushing pending writes") - startFlush := time.Now() s.flushPendingWrites(false) - log.Infow("flushing done", "took", time.Since(startFlush)) // 2. move cold unreachable objects to the coldstore - log.Info("collecting cold objects") + log.Info("collecting candidate cold objects") startCollect := time.Now() - cold := make([]cid.Cid, 0, s.coldPurgeSize) + candidates := make(map[cid.Cid]struct{}, s.coldPurgeSize) + towalk := make([]cid.Cid, 0, count) // some stats for logging var hotCnt, coldCnt, liveCnt int // 2.1 iterate through the tracking store and collect unreachable cold objects - err = s.tracker.ForEach(func(cid cid.Cid, writeEpoch abi.ChainEpoch) error { - // is the object still hot? - if writeEpoch > coldEpoch { - // yes, stay in the hotstore - hotCnt++ - return nil - } - - // check whether it is reachable in the cold boundary - mark, err := markSet.Has(cid) + // for every hot object that is a dag and not in the markset, walk for links and + // and mark reachable objects + err = s.tracker.ForEach(func(c cid.Cid, writeEpoch abi.ChainEpoch) error { + // was it marked? + mark, err := markSet.Has(c) if err != nil { - return xerrors.Errorf("error checkiing mark set for %s: %w", cid, err) + return xerrors.Errorf("error checkiing mark set for %s: %w", c, err) } if mark { @@ -1077,37 +1042,92 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return nil } - live, err := s.txnProtect.Has(cid) - if err != nil { - return xerrors.Errorf("error checking liveness for %s: %w", cid, err) - } + // is the object still hot? + if writeEpoch > coldEpoch { + // yes, stay in the hotstore + hotCnt++ - if live { - liveCnt++ + // if it is a DAG, add it to the walk list to recursively update the markset + if c.Prefix().Codec != cid.DagCBOR { + return nil + } + + towalk = append(towalk, c) return nil } - // it's cold, mark it for move - cold = append(cold, cid) + // it's cold, mark it as candidate for move + candidates[c] = struct{}{} coldCnt++ return nil }) if err != nil { - return xerrors.Errorf("error collecting cold objects: %w", err) + return xerrors.Errorf("error collecting candidate cold objects: %w", err) } + log.Infow("candidate collection done", "took", time.Since(startCollect)) + if coldCnt > 0 { s.coldPurgeSize = coldCnt + coldCnt>>2 // overestimate a bit } - log.Infow("collection done", "took", time.Since(startCollect)) + // walk hot dags that were not marked and recursively update the mark set + log.Info("updating mark set for hot dags") + startMark = time.Now() + + walked := cid.NewSet() + for _, c := range towalk { + err = s.walkLinks(c, walked, func(c cid.Cid) error { + mark, err := markSet.Has(c) + if err != nil { + return xerrors.Errorf("error checking mark set for %s: %w", c, err) + } + + if mark { + // already marked, don't recurse its links + return errStopWalk + } + + liveCnt++ + return markSet.Mark(c) + }) + + if err != nil { + return xerrors.Errorf("error walking %s: %w", c, err) + } + } + + log.Infow("updating mark set done", "took", time.Since(startMark)) + + // filter the candidate set for objects newly marked as hot + if liveCnt > 0 { + for c := range candidates { + mark, err := markSet.Has(c) + if err != nil { + return xerrors.Errorf("error checking mark set for %s: %w", c, err) + } + + if mark { + delete(candidates, c) + } + } + } + + // create the cold object list + coldCnt -= liveCnt + cold := make([]cid.Cid, 0, coldCnt) + for c := range candidates { + cold = append(cold, c) + } + log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt, "live", liveCnt) stats.Record(context.Background(), metrics.SplitstoreCompactionHot.M(int64(hotCnt))) stats.Record(context.Background(), metrics.SplitstoreCompactionCold.M(int64(coldCnt))) // Enter critical section + log.Info("entering critical section") atomic.StoreInt32(&s.critsection, 1) defer atomic.StoreInt32(&s.critsection, 0) From 184d3802b6c8db3e1d7c6b2fb7331a81a6eddfed Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 3 Jul 2021 16:13:25 +0300 Subject: [PATCH 089/197] remove dead code --- blockstore/splitstore/splitstore.go | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 9aca63ec920..523a11442f6 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -636,20 +636,6 @@ func (s *SplitStore) isVMCopyContext() bool { return strings.Contains(sk, "filecoin-project/lotus/chain/vm.Copy") } -func (s *SplitStore) isBlockHeader(c cid.Cid) (isBlock bool, err error) { - if c.Prefix().Codec != cid.DagCBOR { - return false, nil - } - - err = s.view(c, func(data []byte) error { - var hdr types.BlockHeader - isBlock = hdr.UnmarshalCBOR(bytes.NewBuffer(data)) == nil - return nil - }) - - return isBlock, err -} - func (s *SplitStore) trackTxnRef(c cid.Cid, recursive bool) error { if s.txnProtect == nil { // not compacting From 2b03316cd91f972bdcde2b2d5c2949bd61ad8b21 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 3 Jul 2021 16:15:02 +0300 Subject: [PATCH 090/197] fix log message --- blockstore/splitstore/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 523a11442f6..9850a939b6c 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -625,7 +625,7 @@ func (s *SplitStore) flushPendingWrites(locked bool) { epoch := s.writeEpoch err := s.tracker.PutBatch(cids, epoch) if err != nil { - log.Errorf("error putting implicit write batch to tracker: %s", err) + log.Errorf("error putting write batch to tracker: %s", err) } s.debug.LogWriteMany(s.curTs, cids, epoch) From 6f58fdcb22a734cfef38f8a321e581901ba9ea97 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 3 Jul 2021 19:06:08 +0300 Subject: [PATCH 091/197] remove vm copy context detection hack stack tracing is slow. --- blockstore/splitstore/splitstore.go | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 9850a939b6c..264f1f3af77 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -5,8 +5,6 @@ import ( "context" "encoding/binary" "errors" - "runtime/debug" - "strings" "sync" "sync/atomic" "time" @@ -246,17 +244,14 @@ func (s *SplitStore) Has(c cid.Cid) (bool, error) { // -- the vm uses this check to avoid duplicate writes on Copy. // When we have options in the API (or something better), the vm can explicitly signal // that this is an implicit Write. - vmCtx := s.isVMCopyContext() - if vmCtx { - s.trackWrite(c) - } + s.trackWrite(c) // also make sure the object is considered live during compaction in case we have already // flushed pending writes and started compaction. // when within vm copy context, dags will be recursively referenced. // in case of a race with purge, this will return a track error, which we can use to // signal to the vm that the object is not fully present. - err = s.trackTxnRef(c, vmCtx) + err = s.trackTxnRef(c, true) if xerrors.Is(err, errMissingObject) { // we failed to recursively protect the object because some inner object has been purged; // signal to the VM to copy. @@ -631,11 +626,6 @@ func (s *SplitStore) flushPendingWrites(locked bool) { s.debug.LogWriteMany(s.curTs, cids, epoch) } -func (s *SplitStore) isVMCopyContext() bool { - sk := string(debug.Stack()) - return strings.Contains(sk, "filecoin-project/lotus/chain/vm.Copy") -} - func (s *SplitStore) trackTxnRef(c cid.Cid, recursive bool) error { if s.txnProtect == nil { // not compacting From d79e4da7aa283b17e27d0e1c2d278230d10940a7 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 3 Jul 2021 21:56:46 +0300 Subject: [PATCH 092/197] more accurate stats about mark set updates --- blockstore/splitstore/splitstore.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 264f1f3af77..658b915ddf9 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1053,6 +1053,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { log.Info("updating mark set for hot dags") startMark = time.Now() + count = 0 walked := cid.NewSet() for _, c := range towalk { err = s.walkLinks(c, walked, func(c cid.Cid) error { @@ -1066,7 +1067,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return errStopWalk } - liveCnt++ + count++ return markSet.Mark(c) }) @@ -1075,7 +1076,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { } } - log.Infow("updating mark set done", "took", time.Since(startMark)) + log.Infow("updating mark set done", "took", time.Since(startMark), "marked", count) // filter the candidate set for objects newly marked as hot if liveCnt > 0 { @@ -1087,13 +1088,13 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { if mark { delete(candidates, c) + liveCnt++ } } } // create the cold object list - coldCnt -= liveCnt - cold := make([]cid.Cid, 0, coldCnt) + cold := make([]cid.Cid, 0, len(candidates)) for c := range candidates { cold = append(cold, c) } From c5cf8e226b110118f74e42ebe57c24a59c4d4757 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 00:32:41 +0300 Subject: [PATCH 093/197] remove unnecessary code --- blockstore/splitstore/splitstore.go | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 658b915ddf9..c4a55490372 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -948,16 +948,6 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { var count int64 err = s.walk(curTs, boundaryEpoch, true, s.cfg.HotHeaders, func(c cid.Cid) error { - mark, err := markSet.Has(c) - if err != nil { - return xerrors.Errorf("error checking mark set for %s: %w", c, err) - } - - if mark { - // already marked, don't recurse its links - return errStopWalk - } - count++ return markSet.Mark(c) }) From 642f0e47407d91230845ac56273d9542626b9b0d Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 06:21:04 +0300 Subject: [PATCH 094/197] deal with memory pressure, don't walk under the boundary --- blockstore/splitstore/splitstore.go | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index c4a55490372..4b170f91f28 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -177,7 +177,7 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co } // the markset env - markSetEnv, err := OpenMarkSetEnv(path, "mapts") + markSetEnv, err := OpenMarkSetEnv(path, "bolt") if err != nil { _ = tracker.Close() return nil, err @@ -964,7 +964,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { // create the transaction protect filter s.txnLk.Lock() - s.txnProtect, err = s.txnEnv.Create("protected", s.markSetSize) + s.txnProtect, err = s.txnEnv.Create("protected", 0) if err != nil { s.txnLk.Unlock() return xerrors.Errorf("error creating transactional mark set: %w", err) @@ -988,10 +988,10 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { startCollect := time.Now() candidates := make(map[cid.Cid]struct{}, s.coldPurgeSize) - towalk := make([]cid.Cid, 0, count) + var towalk []cid.Cid // some stats for logging - var hotCnt, coldCnt, liveCnt int + var hotCnt, coldCnt, slackCnt, liveCnt int // 2.1 iterate through the tracking store and collect unreachable cold objects // for every hot object that is a dag and not in the markset, walk for links and @@ -1009,7 +1009,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { } // is the object still hot? - if writeEpoch > coldEpoch { + if writeEpoch >= boundaryEpoch { // yes, stay in the hotstore hotCnt++ @@ -1022,6 +1022,14 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return nil } + // is the object in slack region? + if writeEpoch > coldEpoch { + // yes stay in the hotstore, but we wont walk you + slackCnt++ + + return nil + } + // it's cold, mark it as candidate for move candidates[c] = struct{}{} coldCnt++ @@ -1089,7 +1097,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { cold = append(cold, c) } - log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt, "live", liveCnt) + log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt, "live", liveCnt, "slack", slackCnt) stats.Record(context.Background(), metrics.SplitstoreCompactionHot.M(int64(hotCnt))) stats.Record(context.Background(), metrics.SplitstoreCompactionCold.M(int64(coldCnt))) From 00fcf6dd72d04f741f82b37b3cf93db2c7f162d7 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 06:34:26 +0300 Subject: [PATCH 095/197] add staging cache to bolt tracking store --- blockstore/splitstore/markset_bolt.go | 51 ++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/blockstore/splitstore/markset_bolt.go b/blockstore/splitstore/markset_bolt.go index cab0dd74af9..bac7673b881 100644 --- a/blockstore/splitstore/markset_bolt.go +++ b/blockstore/splitstore/markset_bolt.go @@ -1,6 +1,7 @@ package splitstore import ( + "sync" "time" "golang.org/x/xerrors" @@ -9,6 +10,8 @@ import ( bolt "go.etcd.io/bbolt" ) +const boltMarkSetStaging = 16384 + type BoltMarkSetEnv struct { db *bolt.DB } @@ -18,6 +21,10 @@ var _ MarkSetEnv = (*BoltMarkSetEnv)(nil) type BoltMarkSet struct { db *bolt.DB bucketId []byte + + // cache for batching + mx sync.RWMutex + pend map[string]struct{} } var _ MarkSet = (*BoltMarkSet)(nil) @@ -49,7 +56,11 @@ func (e *BoltMarkSetEnv) Create(name string, hint int64) (MarkSet, error) { return nil, err } - return &BoltMarkSet{db: e.db, bucketId: bucketId}, nil + return &BoltMarkSet{ + db: e.db, + bucketId: bucketId, + pend: make(map[string]struct{}), + }, nil } func (e *BoltMarkSetEnv) Close() error { @@ -57,16 +68,48 @@ func (e *BoltMarkSetEnv) Close() error { } func (s *BoltMarkSet) Mark(cid cid.Cid) error { - return s.db.Update(func(tx *bolt.Tx) error { + s.mx.Lock() + defer s.mx.Unlock() + + key := cid.Hash() + s.pend[string(key)] = struct{}{} + + if len(s.pend) < boltMarkSetStaging { + return nil + } + + err := s.db.Batch(func(tx *bolt.Tx) error { b := tx.Bucket(s.bucketId) - return b.Put(cid.Hash(), markBytes) + for key := range s.pend { + err := b.Put([]byte(key), markBytes) + if err != nil { + return err + } + } + return nil }) + + if err != nil { + return err + } + + s.pend = make(map[string]struct{}) + return nil } func (s *BoltMarkSet) Has(cid cid.Cid) (result bool, err error) { + s.mx.RLock() + defer s.mx.RUnlock() + + key := cid.Hash() + _, result = s.pend[string(key)] + if result { + return result, nil + } + err = s.db.View(func(tx *bolt.Tx) error { b := tx.Bucket(s.bucketId) - v := b.Get(cid.Hash()) + v := b.Get(key) result = v != nil return nil }) From 68a83500bc278b061b83c2e10ad7defca1225d9d Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 07:00:37 +0300 Subject: [PATCH 096/197] fix bug that turned candidate filtering to dead code --- blockstore/splitstore/splitstore.go | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 4b170f91f28..2353181fb31 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1077,17 +1077,15 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { log.Infow("updating mark set done", "took", time.Since(startMark), "marked", count) // filter the candidate set for objects newly marked as hot - if liveCnt > 0 { - for c := range candidates { - mark, err := markSet.Has(c) - if err != nil { - return xerrors.Errorf("error checking mark set for %s: %w", c, err) - } + for c := range candidates { + mark, err := markSet.Has(c) + if err != nil { + return xerrors.Errorf("error checking mark set for %s: %w", c, err) + } - if mark { - delete(candidates, c) - liveCnt++ - } + if mark { + delete(candidates, c) + liveCnt++ } } From d476a3db2c1761a5ebd12737fe34719f9bb15c74 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 08:43:23 +0300 Subject: [PATCH 097/197] BlockstoreIterator trait with implementation for badger --- blockstore/badger/blockstore.go | 47 +++++++++++++++++++++++++++++++++ blockstore/blockstore.go | 5 ++++ 2 files changed, 52 insertions(+) diff --git a/blockstore/badger/blockstore.go b/blockstore/badger/blockstore.go index e03266ab7f9..1b67048a6c4 100644 --- a/blockstore/badger/blockstore.go +++ b/blockstore/badger/blockstore.go @@ -97,6 +97,7 @@ type Blockstore struct { var _ blockstore.Blockstore = (*Blockstore)(nil) var _ blockstore.Viewer = (*Blockstore)(nil) +var _ blockstore.BlockstoreIterator = (*Blockstore)(nil) var _ io.Closer = (*Blockstore)(nil) // Open creates a new badger-backed blockstore, with the supplied options. @@ -442,6 +443,52 @@ func (b *Blockstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { return ch, nil } +// Implementation of BlockstoreIterator interface +func (b *Blockstore) ForEachKey(f func(cid.Cid) error) error { + if atomic.LoadInt64(&b.state) != stateOpen { + return ErrBlockstoreClosed + } + + txn := b.DB.NewTransaction(false) + defer txn.Discard() + + opts := badger.IteratorOptions{PrefetchSize: 100} + if b.prefixing { + opts.Prefix = b.prefix + } + + iter := txn.NewIterator(opts) + defer iter.Close() + + for iter.Rewind(); iter.Valid(); iter.Next() { + if atomic.LoadInt64(&b.state) != stateOpen { + return ErrBlockstoreClosed + } + + k := iter.Item().Key() + if b.prefixing { + k = k[b.prefixLen:] + } + + klen := base32.RawStdEncoding.DecodedLen(len(k)) + buf := make([]byte, klen) + + n, err := base32.RawStdEncoding.Decode(buf, k) + if err != nil { + return err + } + + c := cid.NewCidV1(cid.Raw, buf[:n]) + + err = f(c) + if err != nil { + return err + } + } + + return nil +} + // HashOnRead implements Blockstore.HashOnRead. It is not supported by this // blockstore. func (b *Blockstore) HashOnRead(_ bool) { diff --git a/blockstore/blockstore.go b/blockstore/blockstore.go index 23f0bd7546c..084bbaecc57 100644 --- a/blockstore/blockstore.go +++ b/blockstore/blockstore.go @@ -30,6 +30,11 @@ type BatchDeleter interface { DeleteMany(cids []cid.Cid) error } +// BlockstoreIterator is a trait for efficient iteration +type BlockstoreIterator interface { + ForEachKey(func(cid.Cid) error) error +} + // WrapIDStore wraps the underlying blockstore in an "identity" blockstore. // The ID store filters out all puts for blocks with CIDs using the "identity" // hash function. It also extracts inlined blocks from CIDs using the identity From 1f2b604c070e3172405139ca43d2d3babac01b74 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 09:53:58 +0300 Subject: [PATCH 098/197] RIP tracking store --- blockstore/splitstore/debug.go | 13 +- blockstore/splitstore/splitstore.go | 509 ++++++++-------------------- 2 files changed, 156 insertions(+), 366 deletions(-) diff --git a/blockstore/splitstore/debug.go b/blockstore/splitstore/debug.go index 47d61816f3d..18ea436daab 100644 --- a/blockstore/splitstore/debug.go +++ b/blockstore/splitstore/debug.go @@ -18,6 +18,7 @@ import ( "github.com/filecoin-project/go-state-types/abi" "github.com/filecoin-project/lotus/chain/types" + blocks "github.com/ipfs/go-block-format" cid "github.com/ipfs/go-cid" ) @@ -102,7 +103,7 @@ func (d *debugLog) LogReadMiss(curTs *types.TipSet, cid cid.Cid) { } } -func (d *debugLog) LogWrite(curTs *types.TipSet, c cid.Cid, writeEpoch abi.ChainEpoch) { +func (d *debugLog) LogWrite(curTs *types.TipSet, blk blocks.Block, writeEpoch abi.ChainEpoch) { if d == nil { return } @@ -122,13 +123,13 @@ func (d *debugLog) LogWrite(curTs *types.TipSet, c cid.Cid, writeEpoch abi.Chain d.writeCnt++ - _, err := fmt.Fprintf(d.writeLog, "%s %d %s %d%s\n", d.timestamp(), curEpoch, c, writeEpoch, stack) + _, err := fmt.Fprintf(d.writeLog, "%s %d %s %d%s\n", d.timestamp(), curEpoch, blk.Cid(), writeEpoch, stack) if err != nil { log.Warnf("error writing write log: %s", err) } } -func (d *debugLog) LogWriteMany(curTs *types.TipSet, cids []cid.Cid, writeEpoch abi.ChainEpoch) { +func (d *debugLog) LogWriteMany(curTs *types.TipSet, blks []blocks.Block, writeEpoch abi.ChainEpoch) { if d == nil { return } @@ -146,11 +147,11 @@ func (d *debugLog) LogWriteMany(curTs *types.TipSet, cids []cid.Cid, writeEpoch d.writeMx.Lock() defer d.writeMx.Unlock() - d.writeCnt += len(cids) + d.writeCnt += len(blks) now := d.timestamp() - for _, c := range cids { - _, err := fmt.Fprintf(d.writeLog, "%s %d %s %d%s\n", now, curEpoch, c, writeEpoch, stack) + for _, blk := range blks { + _, err := fmt.Fprintf(d.writeLog, "%s %d %s %d%s\n", now, curEpoch, blk.Cid(), writeEpoch, stack) if err != nil { log.Warnf("error writing write log: %s", err) break diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 2353181fb31..8a32f359d47 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -103,16 +103,11 @@ type Config struct { // Supported values are: "bloom" (default if omitted), "bolt". MarkSetType string - // HotHeaders indicates whether to keep chain block headers in hotstore or not. - // This is necessary, and automatically set by DI in lotus node construction, if - // you are running with a noop coldstore. - HotHeaders bool - // SkipMoveColdBlocks indicates whether to skip moving cold blocks to the coldstore. // If the splitstore is running with a noop coldstore then this option is set to true // which skips moving (as it is a noop, but still takes time to read all the cold objects) // and directly purges cold blocks. - SkipMoveColdBlocks bool + DiscardColdBlocks bool } // ChainAccessor allows the Splitstore to access the chain. It will most likely @@ -140,11 +135,10 @@ type SplitStore struct { mx sync.Mutex curTs *types.TipSet - chain ChainAccessor - ds dstore.Datastore - hot bstore.Blockstore - cold bstore.Blockstore - tracker TrackingStore + chain ChainAccessor + ds dstore.Datastore + hot bstore.Blockstore + cold bstore.Blockstore markSetEnv MarkSetEnv markSetSize int64 @@ -159,9 +153,8 @@ type SplitStore struct { txnEnv MarkSetEnv txnProtect MarkSet txnMarkSet MarkSet - - // pending write set - pendingWrites map[cid.Cid]struct{} + txnRefs map[cid.Cid]struct{} + txnActive bool } var _ bstore.Blockstore = (*SplitStore)(nil) @@ -170,23 +163,20 @@ var _ bstore.Blockstore = (*SplitStore)(nil) // is backed by the provided hot and cold stores. The returned SplitStore MUST be // attached to the ChainStore with Start in order to trigger compaction. func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Config) (*SplitStore, error) { - // the tracking store - tracker, err := OpenTrackingStore(path, cfg.TrackingStoreType) - if err != nil { - return nil, err + // hot blockstore must support BlockstoreIterator + if _, ok := hot.(bstore.BlockstoreIterator); !ok { + return nil, xerrors.Errorf("hot blockstore does not support efficient iteration: %T", hot) } // the markset env - markSetEnv, err := OpenMarkSetEnv(path, "bolt") + markSetEnv, err := OpenMarkSetEnv(path, "mapts") if err != nil { - _ = tracker.Close() return nil, err } // the txn markset env txnEnv, err := OpenMarkSetEnv(path, "mapts") if err != nil { - _ = tracker.Close() _ = markSetEnv.Close() return nil, err } @@ -197,13 +187,10 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co ds: ds, hot: hot, cold: cold, - tracker: tracker, markSetEnv: markSetEnv, txnEnv: txnEnv, coldPurgeSize: defaultColdPurgeSize, - - pendingWrites: make(map[cid.Cid]struct{}), } ss.ctx, ss.cancel = context.WithCancel(context.Background()) @@ -244,13 +231,6 @@ func (s *SplitStore) Has(c cid.Cid) (bool, error) { // -- the vm uses this check to avoid duplicate writes on Copy. // When we have options in the API (or something better), the vm can explicitly signal // that this is an implicit Write. - s.trackWrite(c) - - // also make sure the object is considered live during compaction in case we have already - // flushed pending writes and started compaction. - // when within vm copy context, dags will be recursively referenced. - // in case of a race with purge, this will return a track error, which we can use to - // signal to the vm that the object is not fully present. err = s.trackTxnRef(c, true) if xerrors.Is(err, errMissingObject) { // we failed to recursively protect the object because some inner object has been purged; @@ -276,12 +256,14 @@ func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { return blk, err case bstore.ErrNotFound: - s.mx.Lock() - warmup := s.warmupEpoch > 0 - curTs := s.curTs - s.mx.Unlock() - if warmup { - s.debug.LogReadMiss(curTs, cid) + if s.debug != nil { + s.mx.Lock() + warm := s.warmupEpoch > 0 + curTs := s.curTs + s.mx.Unlock() + if warm { + s.debug.LogReadMiss(curTs, cid) + } } blk, err = s.cold.Get(cid) @@ -308,12 +290,14 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { return size, err case bstore.ErrNotFound: - s.mx.Lock() - warmup := s.warmupEpoch > 0 - curTs := s.curTs - s.mx.Unlock() - if warmup { - s.debug.LogReadMiss(curTs, cid) + if s.debug != nil { + s.mx.Lock() + warm := s.warmupEpoch > 0 + curTs := s.curTs + s.mx.Unlock() + if warm { + s.debug.LogReadMiss(curTs, cid) + } } size, err = s.cold.GetSize(cid) @@ -333,7 +317,13 @@ func (s *SplitStore) Put(blk blocks.Block) error { err := s.hot.Put(blk) if err == nil { - s.trackWrite(blk.Cid()) + if s.debug != nil { + s.mx.Lock() + curTs := s.curTs + writeEpoch := s.writeEpoch + s.mx.Unlock() + s.debug.LogWrite(curTs, blk, writeEpoch) + } err = s.trackTxnRef(blk.Cid(), false) } @@ -351,7 +341,14 @@ func (s *SplitStore) PutMany(blks []blocks.Block) error { err := s.hot.PutMany(blks) if err == nil { - s.trackWriteMany(batch) + if s.debug != nil { + s.mx.Lock() + curTs := s.curTs + writeEpoch := s.writeEpoch + s.mx.Unlock() + s.debug.LogWriteMany(curTs, blks, writeEpoch) + } + err = s.trackTxnRefMany(batch) } @@ -408,12 +405,14 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { return err case bstore.ErrNotFound: - s.mx.Lock() - warmup := s.warmupEpoch > 0 - curTs := s.curTs - s.mx.Unlock() - if warmup { - s.debug.LogReadMiss(curTs, cid) + if s.debug != nil { + s.mx.Lock() + warm := s.warmupEpoch > 0 + curTs := s.curTs + s.mx.Unlock() + if warm { + s.debug.LogReadMiss(curTs, cid) + } } err = s.cold.View(cid, cb) @@ -485,11 +484,11 @@ func (s *SplitStore) Start(chain ChainAccessor) error { return xerrors.Errorf("error loading mark set size: %w", err) } - s.updateWriteEpoch() + log.Infow("starting splitstore", "baseEpoch", s.baseEpoch, "warmupEpoch", s.warmupEpoch) - log.Infow("starting splitstore", "baseEpoch", s.baseEpoch, "warmupEpoch", s.warmupEpoch, "writeEpoch", s.writeEpoch) - - go s.background() + if s.debug != nil { + go s.background() + } // watch the chain chain.SubscribeHeadChanges(s.HeadChange) @@ -507,9 +506,8 @@ func (s *SplitStore) Close() error { } } - s.flushPendingWrites(false) s.cancel() - return multierr.Combine(s.tracker.Close(), s.markSetEnv.Close(), s.debug.Close()) + return multierr.Combine(s.markSetEnv.Close(), s.debug.Close()) } func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { @@ -524,8 +522,6 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { s.curTs = curTs s.mx.Unlock() - s.updateWriteEpoch() - timestamp := time.Unix(int64(curTs.MinTimestamp()), 0) if time.Since(timestamp) > SyncGapTime { // don't attempt compaction before we have caught up syncing @@ -557,6 +553,21 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { return nil } +func (s *SplitStore) background() { + ticker := time.NewTicker(time.Second) + defer ticker.Stop() + + for { + select { + case <-s.ctx.Done(): + return + + case <-ticker.C: + s.updateWriteEpoch() + } + } +} + func (s *SplitStore) updateWriteEpoch() { s.mx.Lock() defer s.mx.Unlock() @@ -568,7 +579,6 @@ func (s *SplitStore) updateWriteEpoch() { if dt < 0 { writeEpoch := curTs.Height() + 1 if writeEpoch > s.writeEpoch { - s.flushPendingWrites(true) s.writeEpoch = writeEpoch } @@ -577,67 +587,29 @@ func (s *SplitStore) updateWriteEpoch() { writeEpoch := curTs.Height() + abi.ChainEpoch(dt.Seconds())/builtin.EpochDurationSeconds + 1 if writeEpoch > s.writeEpoch { - s.flushPendingWrites(true) s.writeEpoch = writeEpoch } } -// Unfortunately we can't just directly tracker.Put one by one, as it is ridiculously slow with -// bbolt because of syncing (order of 10ms), so we batch them. -func (s *SplitStore) trackWrite(c cid.Cid) { - s.mx.Lock() - defer s.mx.Unlock() - - s.pendingWrites[c] = struct{}{} -} - -// and also combine batch writes into them -func (s *SplitStore) trackWriteMany(cids []cid.Cid) { - s.mx.Lock() - defer s.mx.Unlock() - - for _, c := range cids { - s.pendingWrites[c] = struct{}{} - } -} - -func (s *SplitStore) flushPendingWrites(locked bool) { - if !locked { - s.mx.Lock() - defer s.mx.Unlock() - } - - if len(s.pendingWrites) == 0 { - return - } - - cids := make([]cid.Cid, 0, len(s.pendingWrites)) - for c := range s.pendingWrites { - cids = append(cids, c) - } - s.pendingWrites = make(map[cid.Cid]struct{}) - - epoch := s.writeEpoch - err := s.tracker.PutBatch(cids, epoch) - if err != nil { - log.Errorf("error putting write batch to tracker: %s", err) - } - - s.debug.LogWriteMany(s.curTs, cids, epoch) -} - func (s *SplitStore) trackTxnRef(c cid.Cid, recursive bool) error { - if s.txnProtect == nil { + if !s.txnActive { // not compacting return nil } + if s.txnRefs != nil { + // we haven't finished marking yet, so track the reference + s.txnRefs[c] = struct{}{} + return nil + } + + // we have finished marking, protect the reference if !recursive { return s.txnProtect.Mark(c) } // it's a recursive reference in vm context, protect links if they are not in the markset already - return s.walkLinks(c, cid.NewSet(), func(c cid.Cid) error { + return s.walkObject(c, cid.NewSet(), func(c cid.Cid) error { mark, err := s.txnMarkSet.Has(c) if err != nil { return xerrors.Errorf("error checking mark set for %s: %w", c, err) @@ -675,7 +647,7 @@ func (s *SplitStore) trackTxnRef(c cid.Cid, recursive bool) error { } func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) error { - if s.txnProtect == nil { + if !s.txnActive { // not compacting return nil } @@ -691,27 +663,7 @@ func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) error { return err } -func (s *SplitStore) background() { - ticker := time.NewTicker(time.Second) - defer ticker.Stop() - - for { - select { - case <-s.ctx.Done(): - return - - case <-ticker.C: - s.updateWriteEpoch() - } - } -} - func (s *SplitStore) warmup(curTs *types.TipSet) error { - err := s.loadGenesisState() - if err != nil { - return xerrors.Errorf("error loading genesis state: %w", err) - } - if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) { return xerrors.Errorf("error locking compaction") } @@ -722,7 +674,7 @@ func (s *SplitStore) warmup(curTs *types.TipSet) error { log.Info("warming up hotstore") start := time.Now() - err = s.doWarmup(curTs) + err := s.doWarmup(curTs) if err != nil { log.Errorf("error warming up hotstore: %s", err) return @@ -734,75 +686,13 @@ func (s *SplitStore) warmup(curTs *types.TipSet) error { return nil } -func (s *SplitStore) loadGenesisState() error { - // makes sure the genesis and its state root are hot - gb, err := s.chain.GetGenesis() - if err != nil { - return xerrors.Errorf("error getting genesis: %w", err) - } - - genesis := gb.Cid() - genesisStateRoot := gb.ParentStateRoot - - has, err := s.hot.Has(genesis) - if err != nil { - return xerrors.Errorf("error checking hotstore for genesis: %w", err) - } - - if !has { - blk, err := gb.ToStorageBlock() - if err != nil { - return xerrors.Errorf("error converting genesis block to storage block: %w", err) - } - - err = s.hot.Put(blk) - if err != nil { - return xerrors.Errorf("error putting genesis block to hotstore: %w", err) - } - } - - err = s.walkLinks(genesisStateRoot, cid.NewSet(), func(c cid.Cid) error { - has, err = s.hot.Has(c) - if err != nil { - return xerrors.Errorf("error checking hotstore for genesis state root: %w", err) - } - - if !has { - blk, err := s.cold.Get(c) - if err != nil { - if err == bstore.ErrNotFound { - return nil - } - - return xerrors.Errorf("error retrieving genesis state linked object from coldstore: %w", err) - } - - err = s.hot.Put(blk) - if err != nil { - return xerrors.Errorf("error putting genesis state linked object to hotstore: %w", err) - } - } - - return nil - }) - - if err != nil { - return xerrors.Errorf("error walking genesis state root links: %w", err) - } - - return nil -} - func (s *SplitStore) doWarmup(curTs *types.TipSet) error { epoch := curTs.Height() - batchHot := make([]blocks.Block, 0, batchSize) - batchSnoop := make([]cid.Cid, 0, batchSize) - count := int64(0) xcount := int64(0) missing := int64(0) - err := s.walk(curTs, epoch, false, s.cfg.HotHeaders, + err := s.walkChain(curTs, epoch, false, func(cid cid.Cid) error { count++ @@ -827,15 +717,7 @@ func (s *SplitStore) doWarmup(curTs *types.TipSet) error { xcount++ batchHot = append(batchHot, blk) - batchSnoop = append(batchSnoop, cid) - if len(batchHot) == batchSize { - err = s.tracker.PutBatch(batchSnoop, epoch) - if err != nil { - return err - } - batchSnoop = batchSnoop[:0] - err = s.hot.PutMany(batchHot) if err != nil { return err @@ -851,11 +733,6 @@ func (s *SplitStore) doWarmup(curTs *types.TipSet) error { } if len(batchHot) > 0 { - err = s.tracker.PutBatch(batchSnoop, epoch) - if err != nil { - return err - } - err = s.hot.PutMany(batchHot) if err != nil { return err @@ -885,22 +762,8 @@ func (s *SplitStore) doWarmup(curTs *types.TipSet) error { // Compaction/GC Algorithm func (s *SplitStore) compact(curTs *types.TipSet) { - var err error - if s.markSetSize == 0 { - start := time.Now() - log.Info("estimating mark set size") - err = s.estimateMarkSetSize(curTs) - if err != nil { - log.Errorf("error estimating mark set size: %s; aborting compaction", err) - return - } - log.Infow("estimating mark set size done", "took", time.Since(start), "size", s.markSetSize) - } else { - log.Infow("current mark set size estimate", "size", s.markSetSize) - } - start := time.Now() - err = s.doCompact(curTs) + err := s.doCompact(curTs) took := time.Since(start).Milliseconds() stats.Record(context.Background(), metrics.SplitstoreCompactionTimeSeconds.M(float64(took)/1e3)) @@ -909,24 +772,6 @@ func (s *SplitStore) compact(curTs *types.TipSet) { } } -func (s *SplitStore) estimateMarkSetSize(curTs *types.TipSet) error { - epoch := curTs.Height() - - var count int64 - err := s.walk(curTs, epoch, false, s.cfg.HotHeaders, - func(cid cid.Cid) error { - count++ - return nil - }) - - if err != nil { - return err - } - - s.markSetSize = count + count>>2 // overestimate a bit - return nil -} - func (s *SplitStore) doCompact(curTs *types.TipSet) error { currentEpoch := curTs.Height() boundaryEpoch := currentEpoch - CompactionBoundary @@ -941,12 +786,18 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { defer markSet.Close() //nolint:errcheck defer s.debug.Flush() + // 0. Prepare the transaction + s.txnLk.Lock() + s.txnRefs = make(map[cid.Cid]struct{}) + s.txnActive = true + s.txnLk.Unlock() + // 1. mark reachable objects by walking the chain from the current epoch to the boundary epoch log.Infow("marking reachable blocks", "currentEpoch", currentEpoch, "boundaryEpoch", boundaryEpoch) startMark := time.Now() var count int64 - err = s.walk(curTs, boundaryEpoch, true, s.cfg.HotHeaders, + err = s.walkChain(curTs, boundaryEpoch, true, func(c cid.Cid) error { count++ return markSet.Mark(c) @@ -962,8 +813,10 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { log.Infow("marking done", "took", time.Since(startMark), "marked", count) - // create the transaction protect filter + // fetch refernces taken during marking and create the transaction protect filter s.txnLk.Lock() + txnRefs := s.txnRefs + s.txnRefs = nil s.txnProtect, err = s.txnEnv.Create("protected", 0) if err != nil { s.txnLk.Unlock() @@ -975,93 +828,34 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { defer func() { s.txnLk.Lock() _ = s.txnProtect.Close() + s.txnActive = false s.txnProtect = nil s.txnMarkSet = nil s.txnLk.Unlock() }() - // flush pending writes to update the tracker - s.flushPendingWrites(false) - - // 2. move cold unreachable objects to the coldstore - log.Info("collecting candidate cold objects") - startCollect := time.Now() - - candidates := make(map[cid.Cid]struct{}, s.coldPurgeSize) - var towalk []cid.Cid - - // some stats for logging - var hotCnt, coldCnt, slackCnt, liveCnt int - - // 2.1 iterate through the tracking store and collect unreachable cold objects - // for every hot object that is a dag and not in the markset, walk for links and - // and mark reachable objects - err = s.tracker.ForEach(func(c cid.Cid, writeEpoch abi.ChainEpoch) error { - // was it marked? + // 1.1 Update markset for references created during marking + log.Info("updating mark set for live references") + startMark = time.Now() + walked := cid.NewSet() + count = 0 + for c := range txnRefs { mark, err := markSet.Has(c) if err != nil { - return xerrors.Errorf("error checkiing mark set for %s: %w", c, err) + return xerrors.Errorf("error checking markset for %s: %w", c, err) } if mark { - hotCnt++ - return nil - } - - // is the object still hot? - if writeEpoch >= boundaryEpoch { - // yes, stay in the hotstore - hotCnt++ - - // if it is a DAG, add it to the walk list to recursively update the markset - if c.Prefix().Codec != cid.DagCBOR { - return nil - } - - towalk = append(towalk, c) - return nil - } - - // is the object in slack region? - if writeEpoch > coldEpoch { - // yes stay in the hotstore, but we wont walk you - slackCnt++ - - return nil + continue } - // it's cold, mark it as candidate for move - candidates[c] = struct{}{} - coldCnt++ - - return nil - }) - - if err != nil { - return xerrors.Errorf("error collecting candidate cold objects: %w", err) - } - - log.Infow("candidate collection done", "took", time.Since(startCollect)) - - if coldCnt > 0 { - s.coldPurgeSize = coldCnt + coldCnt>>2 // overestimate a bit - } - - // walk hot dags that were not marked and recursively update the mark set - log.Info("updating mark set for hot dags") - startMark = time.Now() - - count = 0 - walked := cid.NewSet() - for _, c := range towalk { - err = s.walkLinks(c, walked, func(c cid.Cid) error { + err = s.walkObject(c, walked, func(c cid.Cid) error { mark, err := markSet.Has(c) if err != nil { - return xerrors.Errorf("error checking mark set for %s: %w", c, err) + return xerrors.Errorf("error checking markset for %s: %w", c, err) } if mark { - // already marked, don't recurse its links return errStopWalk } @@ -1070,32 +864,49 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { }) if err != nil { - return xerrors.Errorf("error walking %s: %w", c, err) + return xerrors.Errorf("error walking %s for marking: %w", c, err) } } + log.Infow("update marking set done", "took", time.Since(startMark), "marked", count) - log.Infow("updating mark set done", "took", time.Since(startMark), "marked", count) + // 2. iterate through the hotstore to collect cold objects + log.Info("collecting cold objects") + startCollect := time.Now() + + // some stats for logging + var hotCnt, coldCnt int - // filter the candidate set for objects newly marked as hot - for c := range candidates { + cold := make([]cid.Cid, 0, s.coldPurgeSize) + err = s.hot.(bstore.BlockstoreIterator).ForEachKey(func(c cid.Cid) error { + // was it marked? mark, err := markSet.Has(c) if err != nil { - return xerrors.Errorf("error checking mark set for %s: %w", c, err) + return xerrors.Errorf("error checkiing mark set for %s: %w", c, err) } if mark { - delete(candidates, c) - liveCnt++ + hotCnt++ + return nil } - } - // create the cold object list - cold := make([]cid.Cid, 0, len(candidates)) - for c := range candidates { + // it's cold, mark it as candidate for move cold = append(cold, c) + coldCnt++ + + return nil + }) + + if err != nil { + return xerrors.Errorf("error collecting candidate cold objects: %w", err) } - log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt, "live", liveCnt, "slack", slackCnt) + log.Infow("candidate collection done", "took", time.Since(startCollect)) + + if coldCnt > 0 { + s.coldPurgeSize = coldCnt + coldCnt>>2 // overestimate a bit + } + + log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt) stats.Record(context.Background(), metrics.SplitstoreCompactionHot.M(int64(hotCnt))) stats.Record(context.Background(), metrics.SplitstoreCompactionCold.M(int64(coldCnt))) @@ -1110,8 +921,8 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return xerrors.Errorf("compaction aborted") } - // 2.2 copy the cold objects to the coldstore -- if we have one - if !s.cfg.SkipMoveColdBlocks { + // 3. copy the cold objects to the coldstore -- if we have one + if !s.cfg.DiscardColdBlocks { log.Info("moving cold blocks to the coldstore") startMove := time.Now() err = s.moveColdBlocks(cold) @@ -1121,7 +932,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { log.Infow("moving done", "took", time.Since(startMove)) } - // 2.3 purge cold objects from the hotstore + // 4. purge cold objects from the hotstore, taking protected references into account log.Info("purging cold objects from the hotstore") startPurge := time.Now() err = s.purge(curTs, cold) @@ -1131,11 +942,6 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { log.Infow("purging cold from hotstore done", "took", time.Since(startPurge)) // we are done; do some housekeeping - err = s.tracker.Sync() - if err != nil { - return xerrors.Errorf("error syncing tracker: %w", err) - } - s.gcHotstore() err = s.setBaseEpoch(coldEpoch) @@ -1151,7 +957,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return nil } -func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs, fullChain bool, +func (s *SplitStore) walkChain(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs bool, f func(cid.Cid) error) error { visited := cid.NewSet() walked := cid.NewSet() @@ -1179,25 +985,20 @@ func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs, f return xerrors.Errorf("error unmarshaling block header (cid: %s): %w", c, err) } - // don't walk under the boundary, unless we are walking the full chain - if hdr.Height < boundary && !fullChain { - return nil - } - - // we only scan the block if it is above the boundary + // we only scan the block if it is at or above the boundary if hdr.Height >= boundary { scanCnt++ if inclMsgs { - if err := s.walkLinks(hdr.Messages, walked, f); err != nil { + if err := s.walkObject(hdr.Messages, walked, f); err != nil { return xerrors.Errorf("error walking messages (cid: %s): %w", hdr.Messages, err) } - if err := s.walkLinks(hdr.ParentMessageReceipts, walked, f); err != nil { + if err := s.walkObject(hdr.ParentMessageReceipts, walked, f); err != nil { return xerrors.Errorf("error walking message receipts (cid: %s): %w", hdr.ParentMessageReceipts, err) } } - if err := s.walkLinks(hdr.ParentStateRoot, walked, f); err != nil { + if err := s.walkObject(hdr.ParentStateRoot, walked, f); err != nil { return xerrors.Errorf("error walking state root (cid: %s): %w", hdr.ParentStateRoot, err) } } @@ -1224,7 +1025,7 @@ func (s *SplitStore) walk(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs, f return nil } -func (s *SplitStore) walkLinks(c cid.Cid, walked *cid.Set, f func(cid.Cid) error) error { +func (s *SplitStore) walkObject(c cid.Cid, walked *cid.Set, f func(cid.Cid) error) error { if !walked.Visit(c) { return nil } @@ -1253,7 +1054,7 @@ func (s *SplitStore) walkLinks(c cid.Cid, walked *cid.Set, f func(cid.Cid) error } for _, c := range links { - err := s.walkLinks(c, walked, f) + err := s.walkObject(c, walked, f) if err != nil { return xerrors.Errorf("error walking link (cid: %s): %w", c, err) } @@ -1277,21 +1078,15 @@ func (s *SplitStore) view(cid cid.Cid, cb func([]byte) error) error { func (s *SplitStore) moveColdBlocks(cold []cid.Cid) error { batch := make([]blocks.Block, 0, batchSize) - for _, cid := range cold { - blk, err := s.hot.Get(cid) + for _, c := range cold { + blk, err := s.hot.Get(c) if err != nil { if err == bstore.ErrNotFound { - // this can happen if the node is killed after we have deleted the block from the hotstore - // but before we have deleted it from the tracker; just delete the tracker. - err = s.tracker.Delete(cid) - if err != nil { - return xerrors.Errorf("error deleting unreachable cid %s from tracker: %w", cid, err) - } - } else { - return xerrors.Errorf("error retrieving tracked block %s from hotstore: %w", cid, err) + log.Warnf("hotstore missing block %s", c) + continue } - continue + return xerrors.Errorf("error retrieving block %s from hotstore: %w", c, err) } batch = append(batch, blk) @@ -1367,18 +1162,12 @@ func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { s.debug.LogMove(curTs, c) } - err := s.tracker.DeleteBatch(deadCids) - if err != nil { - return xerrors.Errorf("error purging tracking: %w", err) - } - - err = s.hot.DeleteMany(deadCids) + err := s.hot.DeleteMany(deadCids) if err != nil { return xerrors.Errorf("error purging cold objects: %w", err) } purgeCnt += len(deadCids) - return nil }) } From 5f7ae1f4891038fa38e7ad1779314d7e71dfaa6a Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 09:55:12 +0300 Subject: [PATCH 099/197] update splistore DI constructor --- node/modules/blockstore.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index f4945f15c36..3354053ba12 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -78,10 +78,9 @@ func SplitBlockstore(cfg *config.Chainstore) func(lc fx.Lifecycle, r repo.Locked } cfg := &splitstore.Config{ - TrackingStoreType: cfg.Splitstore.TrackingStoreType, - MarkSetType: cfg.Splitstore.MarkSetType, - HotHeaders: cfg.Splitstore.HotHeaders || cfg.Splitstore.ColdStoreType == "noop", - SkipMoveColdBlocks: cfg.Splitstore.ColdStoreType == "noop", + TrackingStoreType: cfg.Splitstore.TrackingStoreType, + MarkSetType: cfg.Splitstore.MarkSetType, + DiscardColdBlocks: cfg.Splitstore.ColdStoreType == "noop", } ss, err := splitstore.Open(path, ds, hot, cold, cfg) if err != nil { From 6fa2cd232d2f95b2cde4d8e983fb157016f151be Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 10:06:55 +0300 Subject: [PATCH 100/197] simplify compaction model --- blockstore/splitstore/splitstore.go | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 8a32f359d47..669277abef9 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -34,26 +34,21 @@ var ( // from the previously compacted epoch to trigger a new compaction. // // |················· CompactionThreshold ··················| - // | | - // =======‖≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡‖----------|------------------------» - // | | | chain --> ↑__ current epoch - // | archived epochs ___↑ | - // | ↑________ CompactionBoundary - // ↑__ CompactionSlack + // | | + // =======‖≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡‖------------------------» + // | | chain --> ↑__ current epoch + // | archived epochs ___↑ + // ↑________ CompactionBoundary // // === :: cold (already archived) // ≡≡≡ :: to be archived in this compaction // --- :: hot - CompactionThreshold = 7 * build.Finality + CompactionThreshold = 6 * build.Finality // CompactionBoundary is the number of epochs from the current epoch at which // we will walk the chain for live objects. CompactionBoundary = 4 * build.Finality - // CompactionSlack is the number of epochs from the compaction boundary to the beginning - // of the cold epoch. - CompactionSlack = 2 * build.Finality - // SyncGapTime is the time delay from a tipset's min timestamp before we decide // there is a sync gap SyncGapTime = time.Minute @@ -775,9 +770,8 @@ func (s *SplitStore) compact(curTs *types.TipSet) { func (s *SplitStore) doCompact(curTs *types.TipSet) error { currentEpoch := curTs.Height() boundaryEpoch := currentEpoch - CompactionBoundary - coldEpoch := boundaryEpoch - CompactionSlack - log.Infow("running compaction", "currentEpoch", currentEpoch, "baseEpoch", s.baseEpoch, "coldEpoch", coldEpoch, "boundaryEpoch", boundaryEpoch) + log.Infow("running compaction", "currentEpoch", currentEpoch, "baseEpoch", s.baseEpoch, "boundaryEpoch", boundaryEpoch) markSet, err := s.markSetEnv.Create("live", s.markSetSize) if err != nil { @@ -793,7 +787,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { s.txnLk.Unlock() // 1. mark reachable objects by walking the chain from the current epoch to the boundary epoch - log.Infow("marking reachable blocks", "currentEpoch", currentEpoch, "boundaryEpoch", boundaryEpoch) + log.Info("marking reachable blocks") startMark := time.Now() var count int64 @@ -944,7 +938,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { // we are done; do some housekeeping s.gcHotstore() - err = s.setBaseEpoch(coldEpoch) + err = s.setBaseEpoch(boundaryEpoch) if err != nil { return xerrors.Errorf("error saving base epoch: %w", err) } From 36f93649efbd2a3bd7954d8c6f2758507229bda9 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 10:10:37 +0300 Subject: [PATCH 101/197] fix panic from concurrent map writes in txnRefs --- blockstore/splitstore/splitstore.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 669277abef9..ced493ed906 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -145,11 +145,12 @@ type SplitStore struct { // protection for concurrent read/writes during compaction txnLk sync.RWMutex + txnActive bool txnEnv MarkSetEnv txnProtect MarkSet txnMarkSet MarkSet + txnRefsMx sync.Mutex txnRefs map[cid.Cid]struct{} - txnActive bool } var _ bstore.Blockstore = (*SplitStore)(nil) @@ -594,7 +595,9 @@ func (s *SplitStore) trackTxnRef(c cid.Cid, recursive bool) error { if s.txnRefs != nil { // we haven't finished marking yet, so track the reference + s.txnRefsMx.Lock() s.txnRefs[c] = struct{}{} + s.txnRefsMx.Unlock() return nil } From eafffc16340d228c92348a6b63d6e88df1b6dedc Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 10:14:58 +0300 Subject: [PATCH 102/197] more efficient trackTxnRefMany --- blockstore/splitstore/splitstore.go | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index ced493ed906..679d2d8e839 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -650,15 +650,25 @@ func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) error { return nil } - var err error + if s.txnRefs != nil { + // we haven't finished marking yet, so track the reference + s.txnRefsMx.Lock() + for _, c := range cids { + s.txnRefs[c] = struct{}{} + } + s.txnRefsMx.Unlock() + return nil + } + + // we have finished marking, protect the refs for _, c := range cids { - err2 := s.trackTxnRef(c, false) - if err2 != nil { - err = multierr.Combine(err, err2) + err := s.txnProtect.Mark(c) + if err != nil { + return err } } - return err + return nil } func (s *SplitStore) warmup(curTs *types.TipSet) error { From 08cad30be24ac9bbdd94676068a9ef83c75a36a6 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 11:20:29 +0300 Subject: [PATCH 103/197] reuse key buffer in badger ForEachKey cid copies the bytes so it's safe --- blockstore/badger/blockstore.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/blockstore/badger/blockstore.go b/blockstore/badger/blockstore.go index 1b67048a6c4..f77691a6fd4 100644 --- a/blockstore/badger/blockstore.go +++ b/blockstore/badger/blockstore.go @@ -460,6 +460,7 @@ func (b *Blockstore) ForEachKey(f func(cid.Cid) error) error { iter := txn.NewIterator(opts) defer iter.Close() + var buf []byte for iter.Rewind(); iter.Valid(); iter.Next() { if atomic.LoadInt64(&b.state) != stateOpen { return ErrBlockstoreClosed @@ -471,7 +472,9 @@ func (b *Blockstore) ForEachKey(f func(cid.Cid) error) error { } klen := base32.RawStdEncoding.DecodedLen(len(k)) - buf := make([]byte, klen) + if klen > len(buf) { + buf = make([]byte, klen) + } n, err := base32.RawStdEncoding.Decode(buf, k) if err != nil { From 0a1d7b37321468793e00efb4e5f2202f83c6845e Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 11:23:15 +0300 Subject: [PATCH 104/197] fix log --- blockstore/splitstore/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 679d2d8e839..4c537788dcd 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -635,7 +635,7 @@ func (s *SplitStore) trackTxnRef(c cid.Cid, recursive bool) error { // it has been deleted, signal to the vm to copy if !has { - log.Warnf("missing object for recursive reference to %s: %s", c, err) + log.Warnf("missing object for recursive reference to %s", c) return errMissingObject } From 19d1b1f532ff1c09fe8bbc9d66e25ec1fc1972e4 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 12:14:29 +0300 Subject: [PATCH 105/197] deal with partially written objects --- blockstore/splitstore/splitstore.go | 70 +++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 3 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 4c537788dcd..eabf6a3caa6 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -79,6 +79,8 @@ var ( enableDebugLog = false // set this to true if you want to track origin stack traces in the write log enableDebugLogWriteTraces = false + + maxMissingScanRetries = 3 ) const ( @@ -633,7 +635,7 @@ func (s *SplitStore) trackTxnRef(c cid.Cid, recursive bool) error { return xerrors.Errorf("error checking hotstore for %s: %w", c, err) } - // it has been deleted, signal to the vm to copy + // it's not there (might have been deleted), signal to the vm to copy if !has { log.Warnf("missing object for recursive reference to %s", c) return errMissingObject @@ -846,6 +848,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { startMark = time.Now() walked := cid.NewSet() count = 0 + var missing []cid.Cid for c := range txnRefs { mark, err := markSet.Has(c) if err != nil { @@ -871,10 +874,71 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { }) if err != nil { - return xerrors.Errorf("error walking %s for marking: %w", c, err) + if xerrors.Is(err, bstore.ErrNotFound) { + log.Warnf("missing or incomplete object: %s", c) + missing = append(missing, c) + } else { + return xerrors.Errorf("error walking %s for marking: %w", c, err) + } + } + } + log.Infow("update marking set done", "took", time.Since(startMark), "marked", count, "missing", len(missing)) + + // 1.2 rescan for missing objects (after waiting a minute), as they might have not been copied yet + // by the vm + if len(missing) > 0 { + try := 0 + + log.Info("rescanning for missing objects") + startMark = time.Now() + + for len(missing) > 0 { + if try > maxMissingScanRetries { + return xerrors.Errorf("failed to fully scan transactional refs; %d missing objects", len(missing)) + } + + // discard previous walk short-cuts + walked = cid.NewSet() + towalk := missing + missing = nil + try++ + + log.Infof("rescanning for %d missing objects (attempt %d)", len(towalk), try) + // wait a minute first for in-flight writes to complete + time.Sleep(time.Minute) + + for _, c := range towalk { + // we can't reliably check the markset and short-circuit this time, we have to do full walks + // because the object was previously visited top-to-bottom, with root DAGs short circuiting + // their children. + // but we *can* short-circuit on the txn protection filter, as this implies that the object + // will be protected from purge. + err = s.walkObject(c, walked, func(c cid.Cid) error { + mark, err := s.txnProtect.Has(c) + if err != nil { + return xerrors.Errorf("error checking protected set for %s: %w", c, err) + } + + if mark { + return errStopWalk + } + + return markSet.Mark(c) + }) + + if err != nil { + if xerrors.Is(err, bstore.ErrNotFound) { + log.Warnf("missing or incomplete object: %s", c) + missing = append(missing, c) + } else { + return xerrors.Errorf("error walking %s for marking: %w", c, err) + } + } + } } + + log.Infow("rescanning done", "took", time.Since(startMark)) } - log.Infow("update marking set done", "took", time.Since(startMark), "marked", count) // 2. iterate through the hotstore to collect cold objects log.Info("collecting cold objects") From 190cb18ab0fa566dac4686fc0748d599d20386d6 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 12:23:30 +0300 Subject: [PATCH 106/197] housekeeping - remove defunct tracking store implementations - update splitstore node config - use mark set type config option (defaulting to mapts); a memory constrained node may want to use an on-disk one --- blockstore/splitstore/splitstore.go | 13 +-- blockstore/splitstore/tracking.go | 109 --------------------- blockstore/splitstore/tracking_bolt.go | 119 ---------------------- blockstore/splitstore/tracking_test.go | 130 ------------------------- node/config/def.go | 9 +- node/modules/blockstore.go | 1 - 6 files changed, 8 insertions(+), 373 deletions(-) delete mode 100644 blockstore/splitstore/tracking.go delete mode 100644 blockstore/splitstore/tracking_bolt.go delete mode 100644 blockstore/splitstore/tracking_test.go diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index eabf6a3caa6..343492beb35 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -90,17 +90,12 @@ const ( ) type Config struct { - // TrackingStore is the type of tracking store to use. - // - // Supported values are: "bolt" (default if omitted), "mem" (for tests and readonly access). - TrackingStoreType string - // MarkSetType is the type of mark set to use. // - // Supported values are: "bloom" (default if omitted), "bolt". + // Sane values are: "mapts", "bolt" (if you are memory constrained). MarkSetType string - // SkipMoveColdBlocks indicates whether to skip moving cold blocks to the coldstore. + // DiscardColdBlocks indicates whether to skip moving cold blocks to the coldstore. // If the splitstore is running with a noop coldstore then this option is set to true // which skips moving (as it is a noop, but still takes time to read all the cold objects) // and directly purges cold blocks. @@ -167,13 +162,13 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co } // the markset env - markSetEnv, err := OpenMarkSetEnv(path, "mapts") + markSetEnv, err := OpenMarkSetEnv(path, cfg.MarkSetType) if err != nil { return nil, err } // the txn markset env - txnEnv, err := OpenMarkSetEnv(path, "mapts") + txnEnv, err := OpenMarkSetEnv(path, cfg.MarkSetType) if err != nil { _ = markSetEnv.Close() return nil, err diff --git a/blockstore/splitstore/tracking.go b/blockstore/splitstore/tracking.go deleted file mode 100644 index d57fd45ef6a..00000000000 --- a/blockstore/splitstore/tracking.go +++ /dev/null @@ -1,109 +0,0 @@ -package splitstore - -import ( - "path/filepath" - "sync" - - "golang.org/x/xerrors" - - "github.com/filecoin-project/go-state-types/abi" - cid "github.com/ipfs/go-cid" -) - -// TrackingStore is a persistent store that tracks blocks that are added -// to the hotstore, tracking the epoch at which they are written. -type TrackingStore interface { - Put(cid.Cid, abi.ChainEpoch) error - PutBatch([]cid.Cid, abi.ChainEpoch) error - Get(cid.Cid) (abi.ChainEpoch, error) - Delete(cid.Cid) error - DeleteBatch([]cid.Cid) error - ForEach(func(cid.Cid, abi.ChainEpoch) error) error - Sync() error - Close() error -} - -// OpenTrackingStore opens a tracking store of the specified type in the -// specified path. -func OpenTrackingStore(path string, ttype string) (TrackingStore, error) { - switch ttype { - case "", "bolt": - return OpenBoltTrackingStore(filepath.Join(path, "tracker.bolt")) - case "mem": - return NewMemTrackingStore(), nil - default: - return nil, xerrors.Errorf("unknown tracking store type %s", ttype) - } -} - -// NewMemTrackingStore creates an in-memory tracking store. -// This is only useful for test or situations where you don't want to open the -// real tracking store (eg concurrent read only access on a node's datastore) -func NewMemTrackingStore() *MemTrackingStore { - return &MemTrackingStore{tab: make(map[cid.Cid]abi.ChainEpoch)} -} - -// MemTrackingStore is a simple in-memory tracking store -type MemTrackingStore struct { - sync.Mutex - tab map[cid.Cid]abi.ChainEpoch -} - -var _ TrackingStore = (*MemTrackingStore)(nil) - -func (s *MemTrackingStore) Put(cid cid.Cid, epoch abi.ChainEpoch) error { - s.Lock() - defer s.Unlock() - s.tab[cid] = epoch - return nil -} - -func (s *MemTrackingStore) PutBatch(cids []cid.Cid, epoch abi.ChainEpoch) error { - s.Lock() - defer s.Unlock() - for _, cid := range cids { - s.tab[cid] = epoch - } - return nil -} - -func (s *MemTrackingStore) Get(cid cid.Cid) (abi.ChainEpoch, error) { - s.Lock() - defer s.Unlock() - epoch, ok := s.tab[cid] - if ok { - return epoch, nil - } - return 0, xerrors.Errorf("missing tracking epoch for %s", cid) -} - -func (s *MemTrackingStore) Delete(cid cid.Cid) error { - s.Lock() - defer s.Unlock() - delete(s.tab, cid) - return nil -} - -func (s *MemTrackingStore) DeleteBatch(cids []cid.Cid) error { - s.Lock() - defer s.Unlock() - for _, cid := range cids { - delete(s.tab, cid) - } - return nil -} - -func (s *MemTrackingStore) ForEach(f func(cid.Cid, abi.ChainEpoch) error) error { - s.Lock() - defer s.Unlock() - for cid, epoch := range s.tab { - err := f(cid, epoch) - if err != nil { - return err - } - } - return nil -} - -func (s *MemTrackingStore) Sync() error { return nil } -func (s *MemTrackingStore) Close() error { return nil } diff --git a/blockstore/splitstore/tracking_bolt.go b/blockstore/splitstore/tracking_bolt.go deleted file mode 100644 index 2980e8c5ab1..00000000000 --- a/blockstore/splitstore/tracking_bolt.go +++ /dev/null @@ -1,119 +0,0 @@ -package splitstore - -import ( - "time" - - "golang.org/x/xerrors" - - cid "github.com/ipfs/go-cid" - bolt "go.etcd.io/bbolt" - - "github.com/filecoin-project/go-state-types/abi" -) - -type BoltTrackingStore struct { - db *bolt.DB - bucketId []byte -} - -var _ TrackingStore = (*BoltTrackingStore)(nil) - -func OpenBoltTrackingStore(path string) (*BoltTrackingStore, error) { - opts := &bolt.Options{ - Timeout: 1 * time.Second, - } - db, err := bolt.Open(path, 0644, opts) - if err != nil { - return nil, err - } - - bucketId := []byte("tracker") - err = db.Update(func(tx *bolt.Tx) error { - _, err := tx.CreateBucketIfNotExists(bucketId) - if err != nil { - return xerrors.Errorf("error creating bolt db bucket %s: %w", string(bucketId), err) - } - return nil - }) - - if err != nil { - _ = db.Close() - return nil, err - } - - return &BoltTrackingStore{db: db, bucketId: bucketId}, nil -} - -func (s *BoltTrackingStore) Put(cid cid.Cid, epoch abi.ChainEpoch) error { - val := epochToBytes(epoch) - return s.db.Batch(func(tx *bolt.Tx) error { - b := tx.Bucket(s.bucketId) - return b.Put(cid.Hash(), val) - }) -} - -func (s *BoltTrackingStore) PutBatch(cids []cid.Cid, epoch abi.ChainEpoch) error { - val := epochToBytes(epoch) - return s.db.Batch(func(tx *bolt.Tx) error { - b := tx.Bucket(s.bucketId) - for _, cid := range cids { - err := b.Put(cid.Hash(), val) - if err != nil { - return err - } - } - return nil - }) -} - -func (s *BoltTrackingStore) Get(cid cid.Cid) (epoch abi.ChainEpoch, err error) { - err = s.db.View(func(tx *bolt.Tx) error { - b := tx.Bucket(s.bucketId) - val := b.Get(cid.Hash()) - if val == nil { - return xerrors.Errorf("missing tracking epoch for %s", cid) - } - epoch = bytesToEpoch(val) - return nil - }) - return epoch, err -} - -func (s *BoltTrackingStore) Delete(cid cid.Cid) error { - return s.db.Batch(func(tx *bolt.Tx) error { - b := tx.Bucket(s.bucketId) - return b.Delete(cid.Hash()) - }) -} - -func (s *BoltTrackingStore) DeleteBatch(cids []cid.Cid) error { - return s.db.Batch(func(tx *bolt.Tx) error { - b := tx.Bucket(s.bucketId) - for _, cid := range cids { - err := b.Delete(cid.Hash()) - if err != nil { - return xerrors.Errorf("error deleting %s", cid) - } - } - return nil - }) -} - -func (s *BoltTrackingStore) ForEach(f func(cid.Cid, abi.ChainEpoch) error) error { - return s.db.View(func(tx *bolt.Tx) error { - b := tx.Bucket(s.bucketId) - return b.ForEach(func(k, v []byte) error { - cid := cid.NewCidV1(cid.Raw, k) - epoch := bytesToEpoch(v) - return f(cid, epoch) - }) - }) -} - -func (s *BoltTrackingStore) Sync() error { - return s.db.Sync() -} - -func (s *BoltTrackingStore) Close() error { - return s.db.Close() -} diff --git a/blockstore/splitstore/tracking_test.go b/blockstore/splitstore/tracking_test.go deleted file mode 100644 index afd475da5a5..00000000000 --- a/blockstore/splitstore/tracking_test.go +++ /dev/null @@ -1,130 +0,0 @@ -package splitstore - -import ( - "io/ioutil" - "testing" - - cid "github.com/ipfs/go-cid" - "github.com/multiformats/go-multihash" - - "github.com/filecoin-project/go-state-types/abi" -) - -func TestBoltTrackingStore(t *testing.T) { - testTrackingStore(t, "bolt") -} - -func testTrackingStore(t *testing.T, tsType string) { - t.Helper() - - makeCid := func(key string) cid.Cid { - h, err := multihash.Sum([]byte(key), multihash.SHA2_256, -1) - if err != nil { - t.Fatal(err) - } - - return cid.NewCidV1(cid.Raw, h) - } - - mustHave := func(s TrackingStore, cid cid.Cid, epoch abi.ChainEpoch) { - val, err := s.Get(cid) - if err != nil { - t.Fatal(err) - } - - if val != epoch { - t.Fatal("epoch mismatch") - } - } - - mustNotHave := func(s TrackingStore, cid cid.Cid) { - _, err := s.Get(cid) - if err == nil { - t.Fatal("expected error") - } - } - - path, err := ioutil.TempDir("", "snoop-test.*") - if err != nil { - t.Fatal(err) - } - - s, err := OpenTrackingStore(path, tsType) - if err != nil { - t.Fatal(err) - } - - k1 := makeCid("a") - k2 := makeCid("b") - k3 := makeCid("c") - k4 := makeCid("d") - - s.Put(k1, 1) //nolint - s.Put(k2, 2) //nolint - s.Put(k3, 3) //nolint - s.Put(k4, 4) //nolint - - mustHave(s, k1, 1) - mustHave(s, k2, 2) - mustHave(s, k3, 3) - mustHave(s, k4, 4) - - s.Delete(k1) // nolint - s.Delete(k2) // nolint - - mustNotHave(s, k1) - mustNotHave(s, k2) - mustHave(s, k3, 3) - mustHave(s, k4, 4) - - s.PutBatch([]cid.Cid{k1}, 1) //nolint - s.PutBatch([]cid.Cid{k2}, 2) //nolint - - mustHave(s, k1, 1) - mustHave(s, k2, 2) - mustHave(s, k3, 3) - mustHave(s, k4, 4) - - allKeys := map[string]struct{}{ - k1.String(): {}, - k2.String(): {}, - k3.String(): {}, - k4.String(): {}, - } - - err = s.ForEach(func(k cid.Cid, _ abi.ChainEpoch) error { - _, ok := allKeys[k.String()] - if !ok { - t.Fatal("unexpected key") - } - - delete(allKeys, k.String()) - return nil - }) - - if err != nil { - t.Fatal(err) - } - - if len(allKeys) != 0 { - t.Fatal("not all keys were returned") - } - - // no close and reopen and ensure the keys still exist - err = s.Close() - if err != nil { - t.Fatal(err) - } - - s, err = OpenTrackingStore(path, tsType) - if err != nil { - t.Fatal(err) - } - - mustHave(s, k1, 1) - mustHave(s, k2, 2) - mustHave(s, k3, 3) - mustHave(s, k4, 4) - - s.Close() //nolint:errcheck -} diff --git a/node/config/def.go b/node/config/def.go index c5770d9e070..81cd9929d2e 100644 --- a/node/config/def.go +++ b/node/config/def.go @@ -229,11 +229,9 @@ type Chainstore struct { } type Splitstore struct { - ColdStoreType string - HotStoreType string - TrackingStoreType string - MarkSetType string - HotHeaders bool + ColdStoreType string + HotStoreType string + MarkSetType string } // // Full Node @@ -306,6 +304,7 @@ func DefaultFullNode() *FullNode { Splitstore: Splitstore{ ColdStoreType: "universal", HotStoreType: "badger", + MarkSetType: "mapts", }, }, } diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index 3354053ba12..59a037d8dd6 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -78,7 +78,6 @@ func SplitBlockstore(cfg *config.Chainstore) func(lc fx.Lifecycle, r repo.Locked } cfg := &splitstore.Config{ - TrackingStoreType: cfg.Splitstore.TrackingStoreType, MarkSetType: cfg.Splitstore.MarkSetType, DiscardColdBlocks: cfg.Splitstore.ColdStoreType == "noop", } From 95c3aaec9ace9a487fe7f32027c32eee6dce4509 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 12:43:05 +0300 Subject: [PATCH 107/197] fix test --- blockstore/splitstore/splitstore_test.go | 134 ++++++++++++++++++++--- 1 file changed, 116 insertions(+), 18 deletions(-) diff --git a/blockstore/splitstore/splitstore_test.go b/blockstore/splitstore/splitstore_test.go index 3583146dce6..d8a8a0b3f0d 100644 --- a/blockstore/splitstore/splitstore_test.go +++ b/blockstore/splitstore/splitstore_test.go @@ -2,6 +2,7 @@ package splitstore import ( "context" + "errors" "fmt" "sync" "sync/atomic" @@ -14,6 +15,7 @@ import ( "github.com/filecoin-project/lotus/chain/types/mock" blocks "github.com/ipfs/go-block-format" + cid "github.com/ipfs/go-cid" datastore "github.com/ipfs/go-datastore" dssync "github.com/ipfs/go-datastore/sync" logging "github.com/ipfs/go-log/v2" @@ -22,7 +24,6 @@ import ( func init() { CompactionThreshold = 5 CompactionBoundary = 2 - CompactionSlack = 2 logging.SetLogLevel("splitstore", "DEBUG") } @@ -31,8 +32,8 @@ func testSplitStore(t *testing.T, cfg *Config) { // the myriads of stores ds := dssync.MutexWrap(datastore.NewMapDatastore()) - hot := blockstore.NewMemorySync() - cold := blockstore.NewMemorySync() + hot := newMockStore() + cold := newMockStore() // this is necessary to avoid the garbage mock puts in the blocks garbage := blocks.NewBlock([]byte{1, 2, 3}) @@ -110,18 +111,12 @@ func testSplitStore(t *testing.T, cfg *Config) { } // count objects in the cold and hot stores - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - countBlocks := func(bs blockstore.Blockstore) int { count := 0 - ch, err := bs.AllKeysChan(ctx) - if err != nil { - t.Fatal(err) - } - for range ch { + bs.(blockstore.BlockstoreIterator).ForEachKey(func(_ cid.Cid) error { count++ - } + return nil + }) return count } @@ -145,20 +140,20 @@ func testSplitStore(t *testing.T, cfg *Config) { coldCnt = countBlocks(cold) hotCnt = countBlocks(hot) - if coldCnt != 6 { - t.Errorf("expected %d cold blocks, but got %d", 6, coldCnt) + if coldCnt != 2 { + t.Errorf("expected %d cold blocks, but got %d", 2, coldCnt) } - if hotCnt != 7 { - t.Errorf("expected %d hot blocks, but got %d", 7, hotCnt) + if hotCnt != 11 { + t.Errorf("expected %d hot blocks, but got %d", 11, hotCnt) } // Make sure we can revert without panicking. chain.revert(2) } -func TestSplitStoreSimpleCompaction(t *testing.T) { - testSplitStore(t, &Config{TrackingStoreType: "mem"}) +func TestSplitStoreCompaction(t *testing.T) { + testSplitStore(t, &Config{MarkSetType: "mapts"}) } type mockChain struct { @@ -231,3 +226,106 @@ func (c *mockChain) GetHeaviestTipSet() *types.TipSet { func (c *mockChain) SubscribeHeadChanges(change func(revert []*types.TipSet, apply []*types.TipSet) error) { c.listener = change } + +type mockStore struct { + mx sync.Mutex + set map[cid.Cid]blocks.Block +} + +func newMockStore() *mockStore { + return &mockStore{set: make(map[cid.Cid]blocks.Block)} +} + +func (b *mockStore) Has(cid cid.Cid) (bool, error) { + b.mx.Lock() + defer b.mx.Unlock() + _, ok := b.set[cid] + return ok, nil +} + +func (b *mockStore) HashOnRead(hor bool) {} + +func (b *mockStore) Get(cid cid.Cid) (blocks.Block, error) { + b.mx.Lock() + defer b.mx.Unlock() + + blk, ok := b.set[cid] + if !ok { + return nil, blockstore.ErrNotFound + } + return blk, nil +} + +func (b *mockStore) GetSize(cid cid.Cid) (int, error) { + blk, err := b.Get(cid) + if err != nil { + return 0, err + } + + return len(blk.RawData()), nil +} + +func (b *mockStore) View(cid cid.Cid, f func([]byte) error) error { + blk, err := b.Get(cid) + if err != nil { + return err + } + return f(blk.RawData()) +} + +func (b *mockStore) Put(blk blocks.Block) error { + b.mx.Lock() + defer b.mx.Unlock() + + b.set[blk.Cid()] = blk + return nil +} + +func (b *mockStore) PutMany(blks []blocks.Block) error { + b.mx.Lock() + defer b.mx.Unlock() + + for _, blk := range blks { + b.set[blk.Cid()] = blk + } + return nil +} + +func (b *mockStore) DeleteBlock(cid cid.Cid) error { + b.mx.Lock() + defer b.mx.Unlock() + + delete(b.set, cid) + return nil +} + +func (b *mockStore) DeleteMany(cids []cid.Cid) error { + b.mx.Lock() + defer b.mx.Unlock() + + for _, c := range cids { + delete(b.set, c) + } + return nil +} + +func (b *mockStore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { + return nil, errors.New("not implemented") +} + +func (b *mockStore) ForEachKey(f func(cid.Cid) error) error { + b.mx.Lock() + defer b.mx.Unlock() + + for c := range b.set { + err := f(c) + if err != nil { + return err + } + } + return nil +} + +func (b *mockStore) Close() error { + return nil +} From 8e56fffb332754b79f89259920e1c05fea7e95de Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 12:46:45 +0300 Subject: [PATCH 108/197] walkChain should visit the genesis state root --- blockstore/splitstore/splitstore.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 343492beb35..c05bd803937 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1052,9 +1052,9 @@ func (s *SplitStore) walkChain(ts *types.TipSet, boundary abi.ChainEpoch, inclMs } // we only scan the block if it is at or above the boundary - if hdr.Height >= boundary { + if hdr.Height >= boundary || hdr.Height == 0 { scanCnt++ - if inclMsgs { + if inclMsgs && hdr.Height > 0 { if err := s.walkObject(hdr.Messages, walked, f); err != nil { return xerrors.Errorf("error walking messages (cid: %s): %w", hdr.Messages, err) } From 028a5c4942f43f779cf6ab9a53fe700122179b4b Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 12:56:01 +0300 Subject: [PATCH 109/197] make test do something useful again --- blockstore/splitstore/splitstore_test.go | 26 +++++++++++++++--------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/blockstore/splitstore/splitstore_test.go b/blockstore/splitstore/splitstore_test.go index d8a8a0b3f0d..82a42846d28 100644 --- a/blockstore/splitstore/splitstore_test.go +++ b/blockstore/splitstore/splitstore_test.go @@ -76,18 +76,22 @@ func testSplitStore(t *testing.T, cfg *Config) { } // make some tipsets, but not enough to cause compaction - mkBlock := func(curTs *types.TipSet, i int) *types.TipSet { + mkBlock := func(curTs *types.TipSet, i int, stateRoot blocks.Block) *types.TipSet { blk := mock.MkBlock(curTs, uint64(i), uint64(i)) blk.Messages = garbage.Cid() blk.ParentMessageReceipts = garbage.Cid() - blk.ParentStateRoot = garbage.Cid() + blk.ParentStateRoot = stateRoot.Cid() blk.Timestamp = uint64(time.Now().Unix()) sblk, err := blk.ToStorageBlock() if err != nil { t.Fatal(err) } + err = ss.Put(stateRoot) + if err != nil { + t.Fatal(err) + } err = ss.Put(sblk) if err != nil { t.Fatal(err) @@ -106,7 +110,8 @@ func testSplitStore(t *testing.T, cfg *Config) { curTs := genTs for i := 1; i < 5; i++ { - curTs = mkBlock(curTs, i) + stateRoot := blocks.NewBlock([]byte{byte(i), 3, 3, 7}) + curTs = mkBlock(curTs, i, stateRoot) waitForCompaction() } @@ -127,25 +132,26 @@ func testSplitStore(t *testing.T, cfg *Config) { t.Errorf("expected %d blocks, but got %d", 2, coldCnt) } - if hotCnt != 6 { - t.Errorf("expected %d blocks, but got %d", 6, hotCnt) + if hotCnt != 10 { + t.Errorf("expected %d blocks, but got %d", 10, hotCnt) } // trigger a compaction for i := 5; i < 10; i++ { - curTs = mkBlock(curTs, i) + stateRoot := blocks.NewBlock([]byte{byte(i), 3, 3, 7}) + curTs = mkBlock(curTs, i, stateRoot) waitForCompaction() } coldCnt = countBlocks(cold) hotCnt = countBlocks(hot) - if coldCnt != 2 { - t.Errorf("expected %d cold blocks, but got %d", 2, coldCnt) + if coldCnt != 5 { + t.Errorf("expected %d cold blocks, but got %d", 5, coldCnt) } - if hotCnt != 11 { - t.Errorf("expected %d hot blocks, but got %d", 11, hotCnt) + if hotCnt != 17 { + t.Errorf("expected %d hot blocks, but got %d", 17, hotCnt) } // Make sure we can revert without panicking. From 2c7a89a1dbcc2c20fd15bfc861daa0a2fb0c4c2b Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 13:15:45 +0300 Subject: [PATCH 110/197] short-circuit rescanning on block headers --- blockstore/splitstore/splitstore.go | 37 ++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index c05bd803937..914e2973fe1 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -918,7 +918,28 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return errStopWalk } - return markSet.Mark(c) + // mark it + err = markSet.Mark(c) + if err != nil { + return err + } + + // we also short-circuit in case of a block header, as it may cause us to walk the + // entire chain because of a network request (and fail if we were synced form a snapshot + // because of missing messages or receipts!) + // this is necessary because we don't have interface options to signal network request + // initiated API calls; when we have that, we can stop tracking those references and + // we can remove this check. + isBlock, err := s.isBlockHeader(c) + if err != nil { + return xerrors.Errorf("error checking object type for %s: %W", c, err) + } + + if isBlock { + return errStopWalk + } + + return nil }) if err != nil { @@ -1091,6 +1112,20 @@ func (s *SplitStore) walkChain(ts *types.TipSet, boundary abi.ChainEpoch, inclMs return nil } +func (s *SplitStore) isBlockHeader(c cid.Cid) (isBlock bool, err error) { + if c.Prefix().Codec != cid.DagCBOR { + return false, nil + } + + err = s.view(c, func(data []byte) error { + var hdr types.BlockHeader + isBlock = hdr.UnmarshalCBOR(bytes.NewBuffer(data)) == nil + return nil + }) + + return isBlock, err +} + func (s *SplitStore) walkObject(c cid.Cid, walked *cid.Set, f func(cid.Cid) error) error { if !walked.Visit(c) { return nil From 1f0242822589113c35b24dd135f41e2d1c14e1c8 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 13:17:31 +0300 Subject: [PATCH 111/197] fix lint --- blockstore/splitstore/splitstore.go | 2 +- blockstore/splitstore/splitstore_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 914e2973fe1..5ccdf80a589 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -817,7 +817,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { log.Infow("marking done", "took", time.Since(startMark), "marked", count) - // fetch refernces taken during marking and create the transaction protect filter + // fetch references taken during marking and create the transaction protect filter s.txnLk.Lock() txnRefs := s.txnRefs s.txnRefs = nil diff --git a/blockstore/splitstore/splitstore_test.go b/blockstore/splitstore/splitstore_test.go index 82a42846d28..624daa52548 100644 --- a/blockstore/splitstore/splitstore_test.go +++ b/blockstore/splitstore/splitstore_test.go @@ -118,7 +118,7 @@ func testSplitStore(t *testing.T, cfg *Config) { // count objects in the cold and hot stores countBlocks := func(bs blockstore.Blockstore) int { count := 0 - bs.(blockstore.BlockstoreIterator).ForEachKey(func(_ cid.Cid) error { + _ = bs.(blockstore.BlockstoreIterator).ForEachKey(func(_ cid.Cid) error { count++ return nil }) From 680af8eb09276c8b27096c777a6fbce4c934acdd Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 18:37:53 +0300 Subject: [PATCH 112/197] use deep object walking for more robust handling of transactional references --- blockstore/splitstore/splitstore.go | 311 +++++++++++++++++----------- 1 file changed, 194 insertions(+), 117 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 5ccdf80a589..58d9fa28260 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -141,13 +141,14 @@ type SplitStore struct { debug *debugLog // protection for concurrent read/writes during compaction - txnLk sync.RWMutex - txnActive bool - txnEnv MarkSetEnv - txnProtect MarkSet - txnMarkSet MarkSet - txnRefsMx sync.Mutex - txnRefs map[cid.Cid]struct{} + txnLk sync.RWMutex + txnActive bool + txnBoundaryEpoch abi.ChainEpoch + txnEnv MarkSetEnv + txnProtect MarkSet + txnMarkSet MarkSet + txnRefsMx sync.Mutex + txnRefs map[cid.Cid]struct{} } var _ bstore.Blockstore = (*SplitStore)(nil) @@ -600,45 +601,59 @@ func (s *SplitStore) trackTxnRef(c cid.Cid, recursive bool) error { // we have finished marking, protect the reference if !recursive { + // shallow protect return s.txnProtect.Mark(c) } // it's a recursive reference in vm context, protect links if they are not in the markset already - return s.walkObject(c, cid.NewSet(), func(c cid.Cid) error { - mark, err := s.txnMarkSet.Has(c) - if err != nil { - return xerrors.Errorf("error checking mark set for %s: %w", c, err) - } + // we do a deep walk to visit the children first, short-circuiting if the parent has been marked. + // the deep walk is necessary as internal references may be missing, e.g. because a defunct object + // got recreated by the VM. + return s.walkObjectDeep(c, cid.NewSet(), + func(c cid.Cid) error { + mark, err := s.txnMarkSet.Has(c) + if err != nil { + return xerrors.Errorf("error checking mark set for %s: %w", c, err) + } - // it's marked, nothing to do - if mark { - return errStopWalk - } + // it's marked, nothing to do + if mark { + return errStopWalk + } - live, err := s.txnProtect.Has(c) - if err != nil { - return xerrors.Errorf("error checking portected set for %s: %w", c, err) - } + // old block reference -- see comment in doCompact about the necessity of this + isOldBlock, err := s.isOldBlockHeader(c, s.txnBoundaryEpoch) + if err != nil { + return xerrors.Errorf("error checking object type for %s: %w", c, err) + } - if live { - return errStopWalk - } + if isOldBlock { + return errStopWalk + } - // this occurs check is necessary because cold objects are purged in arbitrary order - has, err := s.hot.Has(c) - if err != nil { - return xerrors.Errorf("error checking hotstore for %s: %w", c, err) - } + return nil + }, + func(c cid.Cid) error { + // this occurs check is necessary because cold objects are purged in arbitrary order + has, err := s.hot.Has(c) + if err != nil { + return xerrors.Errorf("error checking hotstore for %s: %w", c, err) + } - // it's not there (might have been deleted), signal to the vm to copy - if !has { - log.Warnf("missing object for recursive reference to %s", c) - return errMissingObject - } + // it's not there (might have been deleted), signal to the vm to copy + if !has { + log.Warnf("missing object for recursive reference to %s", c) + return errMissingObject + } - // mark it - return s.txnProtect.Mark(c) - }) + // mark it in *both* sets, so that we can short-circuit a concurrent walk. + err = s.txnMarkSet.Mark(c) + if err != nil { + return xerrors.Errorf("error marking %s: %w", c, err) + } + + return s.txnProtect.Mark(c) + }) } func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) error { @@ -657,7 +672,7 @@ func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) error { return nil } - // we have finished marking, protect the refs + // we have finished marking, shallow protect the refs for _, c := range cids { err := s.txnProtect.Mark(c) if err != nil { @@ -794,6 +809,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { s.txnLk.Lock() s.txnRefs = make(map[cid.Cid]struct{}) s.txnActive = true + s.txnBoundaryEpoch = boundaryEpoch s.txnLk.Unlock() // 1. mark reachable objects by walking the chain from the current epoch to the boundary epoch @@ -839,61 +855,89 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { }() // 1.1 Update markset for references created during marking - log.Info("updating mark set for live references") - startMark = time.Now() - walked := cid.NewSet() - count = 0 var missing []cid.Cid - for c := range txnRefs { - mark, err := markSet.Has(c) - if err != nil { - return xerrors.Errorf("error checking markset for %s: %w", c, err) - } - - if mark { - continue - } + if len(txnRefs) > 0 { + log.Info("updating mark set for live references", "refs", len(txnRefs)) + startMark = time.Now() + walked := cid.NewSet() + count = 0 - err = s.walkObject(c, walked, func(c cid.Cid) error { + for c := range txnRefs { mark, err := markSet.Has(c) if err != nil { return xerrors.Errorf("error checking markset for %s: %w", c, err) } if mark { - return errStopWalk + continue } - count++ - return markSet.Mark(c) - }) + // we have to do a deep walk here, as an early mark would stick even if there are + // missing references that haven't been written yet! + err = s.walkObjectDeep(c, walked, + func(c cid.Cid) error { + mark, err := markSet.Has(c) + if err != nil { + return xerrors.Errorf("error checking markset for %s: %w", c, err) + } - if err != nil { - if xerrors.Is(err, bstore.ErrNotFound) { - log.Warnf("missing or incomplete object: %s", c) - missing = append(missing, c) - } else { - return xerrors.Errorf("error walking %s for marking: %w", c, err) + if mark { + return errStopWalk + } + + // we also short-circuit on old blocks, as these can come from a network request + // and cause us to fail because we have purged its consistituents (or missing from + // the beginning in case of snapshot sync, e.g. parent message receipts or old messages) + // if these blocks are on our chain, they would have been marked but they might be + // from a fork. + // + // Ideally, we would have API options to preclude us from trcking references to such + // objects, but we don't so we have to do this check + isOldBlock, err := s.isOldBlockHeader(c, boundaryEpoch) + if err != nil { + return xerrors.Errorf("error checking object type for %s: %w", c, err) + } + + if isOldBlock { + return errStopWalk + } + + return nil + }, + func(c cid.Cid) error { + count++ + return markSet.Mark(c) + }) + + if err != nil { + if xerrors.Is(err, bstore.ErrNotFound) { + log.Warnf("missing or incomplete object: %s", c) + missing = append(missing, c) + } else { + return xerrors.Errorf("error walking %s for marking: %w", c, err) + } } } + + log.Infow("update marking set done", "took", time.Since(startMark), "marked", count, "missing", len(missing)) } - log.Infow("update marking set done", "took", time.Since(startMark), "marked", count, "missing", len(missing)) - // 1.2 rescan for missing objects (after waiting a minute), as they might have not been copied yet - // by the vm + // 1.2 rescan for missing objects (after waiting a bit), as they might have not been copied yet + // by the vm at the time of the update walk. if len(missing) > 0 { try := 0 log.Info("rescanning for missing objects") startMark = time.Now() + count = 0 for len(missing) > 0 { if try > maxMissingScanRetries { - return xerrors.Errorf("failed to fully scan transactional refs; %d missing objects", len(missing)) + return xerrors.Errorf("failed to fully scan transactional refs; %d missing objects after %d attempts", len(missing), try) } // discard previous walk short-cuts - walked = cid.NewSet() + walked := cid.NewSet() towalk := missing missing = nil try++ @@ -903,44 +947,34 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { time.Sleep(time.Minute) for _, c := range towalk { - // we can't reliably check the markset and short-circuit this time, we have to do full walks - // because the object was previously visited top-to-bottom, with root DAGs short circuiting - // their children. - // but we *can* short-circuit on the txn protection filter, as this implies that the object - // will be protected from purge. - err = s.walkObject(c, walked, func(c cid.Cid) error { - mark, err := s.txnProtect.Has(c) - if err != nil { - return xerrors.Errorf("error checking protected set for %s: %w", c, err) - } - - if mark { - return errStopWalk - } - - // mark it - err = markSet.Mark(c) - if err != nil { - return err - } - - // we also short-circuit in case of a block header, as it may cause us to walk the - // entire chain because of a network request (and fail if we were synced form a snapshot - // because of missing messages or receipts!) - // this is necessary because we don't have interface options to signal network request - // initiated API calls; when we have that, we can stop tracking those references and - // we can remove this check. - isBlock, err := s.isBlockHeader(c) - if err != nil { - return xerrors.Errorf("error checking object type for %s: %W", c, err) - } - - if isBlock { - return errStopWalk - } - - return nil - }) + // deep walk here again, as we are concerned about internal references not having been written + err = s.walkObjectDeep(c, walked, + func(c cid.Cid) error { + mark, err := markSet.Has(c) + if err != nil { + return xerrors.Errorf("error checking mark set for %s: %w", c, err) + } + + if mark { + return errStopWalk + } + + // see comment above for this check + isOldBlock, err := s.isOldBlockHeader(c, boundaryEpoch) + if err != nil { + return xerrors.Errorf("error checking object type for %s: %w", c, err) + } + + if isOldBlock { + return errStopWalk + } + + return nil + }, + func(c cid.Cid) error { + count++ + return markSet.Mark(c) + }) if err != nil { if xerrors.Is(err, bstore.ErrNotFound) { @@ -953,7 +987,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { } } - log.Infow("rescanning done", "took", time.Since(startMark)) + log.Infow("rescanning done", "took", time.Since(startMark), "marked", count) } // 2. iterate through the hotstore to collect cold objects @@ -1112,26 +1146,53 @@ func (s *SplitStore) walkChain(ts *types.TipSet, boundary abi.ChainEpoch, inclMs return nil } -func (s *SplitStore) isBlockHeader(c cid.Cid) (isBlock bool, err error) { - if c.Prefix().Codec != cid.DagCBOR { - return false, nil +func (s *SplitStore) walkObject(c cid.Cid, walked *cid.Set, f func(cid.Cid) error) error { + if !walked.Visit(c) { + return nil } - err = s.view(c, func(data []byte) error { - var hdr types.BlockHeader - isBlock = hdr.UnmarshalCBOR(bytes.NewBuffer(data)) == nil + if err := f(c); err != nil { + if err == errStopWalk { + return nil + } + + return err + } + + if c.Prefix().Codec != cid.DagCBOR { return nil + } + + var links []cid.Cid + err := s.view(c, func(data []byte) error { + return cbg.ScanForLinks(bytes.NewReader(data), func(c cid.Cid) { + links = append(links, c) + }) }) - return isBlock, err + if err != nil { + return xerrors.Errorf("error scanning linked block (cid: %s): %w", c, err) + } + + for _, c := range links { + err := s.walkObject(c, walked, f) + if err != nil { + return xerrors.Errorf("error walking link (cid: %s): %w", c, err) + } + } + + return nil } -func (s *SplitStore) walkObject(c cid.Cid, walked *cid.Set, f func(cid.Cid) error) error { +// like walkObject, but it visits leaves first, with pre invoked at the parent node to control +// whether the walk should stop +func (s *SplitStore) walkObjectDeep(c cid.Cid, walked *cid.Set, + pre func(cid.Cid) error, f func(cid.Cid) error) error { if !walked.Visit(c) { return nil } - if err := f(c); err != nil { + if err := pre(c); err != nil { if err == errStopWalk { return nil } @@ -1140,7 +1201,7 @@ func (s *SplitStore) walkObject(c cid.Cid, walked *cid.Set, f func(cid.Cid) erro } if c.Prefix().Codec != cid.DagCBOR { - return nil + return f(c) } var links []cid.Cid @@ -1155,13 +1216,13 @@ func (s *SplitStore) walkObject(c cid.Cid, walked *cid.Set, f func(cid.Cid) erro } for _, c := range links { - err := s.walkObject(c, walked, f) + err := s.walkObjectDeep(c, walked, pre, f) if err != nil { return xerrors.Errorf("error walking link (cid: %s): %w", c, err) } } - return nil + return f(c) } // internal version used by walk @@ -1176,6 +1237,22 @@ func (s *SplitStore) view(cid cid.Cid, cb func([]byte) error) error { } } +func (s *SplitStore) isOldBlockHeader(c cid.Cid, epoch abi.ChainEpoch) (isOldBlock bool, err error) { + if c.Prefix().Codec != cid.DagCBOR { + return false, nil + } + + err = s.view(c, func(data []byte) error { + var hdr types.BlockHeader + if hdr.UnmarshalCBOR(bytes.NewBuffer(data)) == nil { + isOldBlock = hdr.Height < epoch + } + return nil + }) + + return isOldBlock, err +} + func (s *SplitStore) moveColdBlocks(cold []cid.Cid) error { batch := make([]blocks.Block, 0, batchSize) From 4d286da5936c8c14f3e575115aea1e5492e61547 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 18:58:39 +0300 Subject: [PATCH 113/197] fix error message --- blockstore/splitstore/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 58d9fa28260..be196db3506 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -824,7 +824,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { }) if err != nil { - return xerrors.Errorf("error marking cold blocks: %w", err) + return xerrors.Errorf("error marking hot objects: %w", err) } if count > s.markSetSize { From f124389b6629ca4befb7ba208ccafb8a66a2fd22 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 19:21:00 +0300 Subject: [PATCH 114/197] recursively protect all references --- blockstore/splitstore/splitstore.go | 68 ++++++++++++++++++++---- blockstore/splitstore/splitstore_test.go | 1 + 2 files changed, 58 insertions(+), 11 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index be196db3506..656cabb907c 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -49,6 +49,10 @@ var ( // we will walk the chain for live objects. CompactionBoundary = 4 * build.Finality + // CompactionLookback is the number of epochs from the current epoch at which + // we will consider marking an old block reference. + CompactionLookback = 2 * build.Finality + // SyncGapTime is the time delay from a tipset's min timestamp before we decide // there is a sync gap SyncGapTime = time.Minute @@ -143,7 +147,7 @@ type SplitStore struct { // protection for concurrent read/writes during compaction txnLk sync.RWMutex txnActive bool - txnBoundaryEpoch abi.ChainEpoch + txnLookbackEpoch abi.ChainEpoch txnEnv MarkSetEnv txnProtect MarkSet txnMarkSet MarkSet @@ -585,7 +589,7 @@ func (s *SplitStore) updateWriteEpoch() { } } -func (s *SplitStore) trackTxnRef(c cid.Cid, recursive bool) error { +func (s *SplitStore) trackTxnRef(c cid.Cid, deep bool) error { if !s.txnActive { // not compacting return nil @@ -600,12 +604,53 @@ func (s *SplitStore) trackTxnRef(c cid.Cid, recursive bool) error { } // we have finished marking, protect the reference - if !recursive { - // shallow protect - return s.txnProtect.Mark(c) + if !deep { + return s.doTxnProtect(c) } - // it's a recursive reference in vm context, protect links if they are not in the markset already + return s.doTxnProtectDeep(c) +} + +func (s *SplitStore) doTxnProtect(c cid.Cid) error { + // it's a shallow reference, protect with a standard walk without occur checking + return s.walkObject(c, cid.NewSet(), + func(c cid.Cid) error { + mark, err := s.txnMarkSet.Has(c) + if err != nil { + return xerrors.Errorf("error checking mark set for %s: %w", c, err) + } + + // it's marked, nothing to do + if mark { + return errStopWalk + } + + mark, err = s.txnProtect.Has(c) + if err != nil { + return xerrors.Errorf("error checking mark set for %s: %w", c, err) + } + + // it's protected, nothing to do + if mark { + return errStopWalk + } + + // old block reference -- see comment in doCompact about the necessity of this + isOldBlock, err := s.isOldBlockHeader(c, s.txnLookbackEpoch) + if err != nil { + return xerrors.Errorf("error checking object type for %s: %w", c, err) + } + + if isOldBlock { + return errStopWalk + } + + return s.txnProtect.Mark(c) + }) +} + +func (s *SplitStore) doTxnProtectDeep(c cid.Cid) error { + // it's a deep reference potentially in vm context // we do a deep walk to visit the children first, short-circuiting if the parent has been marked. // the deep walk is necessary as internal references may be missing, e.g. because a defunct object // got recreated by the VM. @@ -622,7 +667,7 @@ func (s *SplitStore) trackTxnRef(c cid.Cid, recursive bool) error { } // old block reference -- see comment in doCompact about the necessity of this - isOldBlock, err := s.isOldBlockHeader(c, s.txnBoundaryEpoch) + isOldBlock, err := s.isOldBlockHeader(c, s.txnLookbackEpoch) if err != nil { return xerrors.Errorf("error checking object type for %s: %w", c, err) } @@ -795,8 +840,9 @@ func (s *SplitStore) compact(curTs *types.TipSet) { func (s *SplitStore) doCompact(curTs *types.TipSet) error { currentEpoch := curTs.Height() boundaryEpoch := currentEpoch - CompactionBoundary + lookbackEpoch := currentEpoch - CompactionLookback - log.Infow("running compaction", "currentEpoch", currentEpoch, "baseEpoch", s.baseEpoch, "boundaryEpoch", boundaryEpoch) + log.Infow("running compaction", "currentEpoch", currentEpoch, "baseEpoch", s.baseEpoch, "boundaryEpoch", boundaryEpoch, "lookbackEpoch", lookbackEpoch) markSet, err := s.markSetEnv.Create("live", s.markSetSize) if err != nil { @@ -809,7 +855,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { s.txnLk.Lock() s.txnRefs = make(map[cid.Cid]struct{}) s.txnActive = true - s.txnBoundaryEpoch = boundaryEpoch + s.txnLookbackEpoch = lookbackEpoch s.txnLk.Unlock() // 1. mark reachable objects by walking the chain from the current epoch to the boundary epoch @@ -893,7 +939,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { // // Ideally, we would have API options to preclude us from trcking references to such // objects, but we don't so we have to do this check - isOldBlock, err := s.isOldBlockHeader(c, boundaryEpoch) + isOldBlock, err := s.isOldBlockHeader(c, lookbackEpoch) if err != nil { return xerrors.Errorf("error checking object type for %s: %w", c, err) } @@ -960,7 +1006,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { } // see comment above for this check - isOldBlock, err := s.isOldBlockHeader(c, boundaryEpoch) + isOldBlock, err := s.isOldBlockHeader(c, lookbackEpoch) if err != nil { return xerrors.Errorf("error checking object type for %s: %w", c, err) } diff --git a/blockstore/splitstore/splitstore_test.go b/blockstore/splitstore/splitstore_test.go index 624daa52548..4a98f4631ef 100644 --- a/blockstore/splitstore/splitstore_test.go +++ b/blockstore/splitstore/splitstore_test.go @@ -24,6 +24,7 @@ import ( func init() { CompactionThreshold = 5 CompactionBoundary = 2 + CompactionLookback = 2 logging.SetLogLevel("splitstore", "DEBUG") } From 13d612f72f314e48268ae78d59bb43cf4e4a7089 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 19:33:49 +0300 Subject: [PATCH 115/197] smarter trackTxnRefMany --- blockstore/splitstore/splitstore.go | 79 +++++++++++++++++------------ 1 file changed, 47 insertions(+), 32 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 656cabb907c..ff3f8596c0d 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -605,16 +605,58 @@ func (s *SplitStore) trackTxnRef(c cid.Cid, deep bool) error { // we have finished marking, protect the reference if !deep { - return s.doTxnProtect(c) + return s.doTxnProtect(c, nil) } return s.doTxnProtectDeep(c) } -func (s *SplitStore) doTxnProtect(c cid.Cid) error { +func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) error { + if !s.txnActive { + // not compacting + return nil + } + + if s.txnRefs != nil { + // we haven't finished marking yet, so track the reference + s.txnRefsMx.Lock() + for _, c := range cids { + s.txnRefs[c] = struct{}{} + } + s.txnRefsMx.Unlock() + return nil + } + + // we have finished marking, protect the refs + batch := make(map[cid.Cid]struct{}, len(cids)) + for _, c := range cids { + batch[c] = struct{}{} + } + + for _, c := range cids { + err := s.doTxnProtect(c, batch) + if err != nil { + return err + } + } + + return nil +} + +func (s *SplitStore) doTxnProtect(root cid.Cid, batch map[cid.Cid]struct{}) error { // it's a shallow reference, protect with a standard walk without occur checking - return s.walkObject(c, cid.NewSet(), + return s.walkObject(root, cid.NewSet(), func(c cid.Cid) error { + if c != root { + _, ok := batch[c] + if ok { + // it's on the same batch, stop walk + // this check is necessary as the object may contain references to objects + // in the same batch (yet to be written) that cannot be loaded for the walk + return errStopWalk + } + } + mark, err := s.txnMarkSet.Has(c) if err != nil { return xerrors.Errorf("error checking mark set for %s: %w", c, err) @@ -649,12 +691,12 @@ func (s *SplitStore) doTxnProtect(c cid.Cid) error { }) } -func (s *SplitStore) doTxnProtectDeep(c cid.Cid) error { +func (s *SplitStore) doTxnProtectDeep(root cid.Cid) error { // it's a deep reference potentially in vm context // we do a deep walk to visit the children first, short-circuiting if the parent has been marked. // the deep walk is necessary as internal references may be missing, e.g. because a defunct object // got recreated by the VM. - return s.walkObjectDeep(c, cid.NewSet(), + return s.walkObjectDeep(root, cid.NewSet(), func(c cid.Cid) error { mark, err := s.txnMarkSet.Has(c) if err != nil { @@ -701,33 +743,6 @@ func (s *SplitStore) doTxnProtectDeep(c cid.Cid) error { }) } -func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) error { - if !s.txnActive { - // not compacting - return nil - } - - if s.txnRefs != nil { - // we haven't finished marking yet, so track the reference - s.txnRefsMx.Lock() - for _, c := range cids { - s.txnRefs[c] = struct{}{} - } - s.txnRefsMx.Unlock() - return nil - } - - // we have finished marking, shallow protect the refs - for _, c := range cids { - err := s.txnProtect.Mark(c) - if err != nil { - return err - } - } - - return nil -} - func (s *SplitStore) warmup(curTs *types.TipSet) error { if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) { return xerrors.Errorf("error locking compaction") From 40c271cda16658b3a9d086d880ecf070260e1660 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 20:17:07 +0300 Subject: [PATCH 116/197] sort cold objects before deleting so that we can't shoot ourselves in the foot by deleting the constituents of a DAG while it is still in the hotstore. --- blockstore/splitstore/splitstore.go | 53 ++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index ff3f8596c0d..63490d1d6dc 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -5,6 +5,7 @@ import ( "context" "encoding/binary" "errors" + "sort" "sync" "sync/atomic" "time" @@ -1114,7 +1115,17 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { log.Infow("moving done", "took", time.Since(startMove)) } - // 4. purge cold objects from the hotstore, taking protected references into account + // 4. sort cold objects so that the dags with most references are deleted first + // this ensures that we can't refer to a dag with its consituents already deleted + log.Info("sorting cold objects") + startSort := time.Now() + err = s.sortObjects(cold) + if err != nil { + return xerrors.Errorf("error sorting objects: %w", err) + } + log.Infow("sorting done", "took", time.Since(startSort)) + + // 5. purge cold objects from the hotstore, taking protected references into account log.Info("purging cold objects from the hotstore") startPurge := time.Now() err = s.purge(curTs, cold) @@ -1348,6 +1359,46 @@ func (s *SplitStore) moveColdBlocks(cold []cid.Cid) error { return nil } +func (s *SplitStore) sortObjects(cids []cid.Cid) error { + weight := make(map[cid.Cid]int) + for _, c := range cids { + if c.Prefix().Codec != cid.DagCBOR { + continue + } + + w := 0 + err := s.walkObject(c, cid.NewSet(), + func(c cid.Cid) error { + wc, ok := weight[c] + if ok { + w += wc + return errStopWalk + } + + w++ + return nil + }) + + if err != nil { + return xerrors.Errorf("error determining cold object weight: %w", err) + } + + weight[c] = w + } + + sort.Slice(cids, func(i, j int) bool { + wi := weight[cids[i]] + wj := weight[cids[j]] + if wi == wj { + return bytes.Compare(cids[i].Hash(), cids[j].Hash()) > 0 + } + + return wi > wj + }) + + return nil +} + func (s *SplitStore) purgeBatch(cids []cid.Cid, deleteBatch func([]cid.Cid) error) error { if len(cids) == 0 { return nil From f33d4e79aa74c9f5ebca1b2c80d33d342906c656 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 20:49:39 +0300 Subject: [PATCH 117/197] simplify transactional protection logic Now that we delete objects heaviest first, we don't have to do deep walk and rescan gymnastics. --- blockstore/splitstore/splitstore.go | 175 +++------------------------- 1 file changed, 18 insertions(+), 157 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 63490d1d6dc..58e76329ddd 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -77,8 +77,6 @@ var ( // used to signal end of walk errStopWalk = errors.New("stop walk") - // used to signal a missing object when protecting recursive references - errMissingObject = errors.New("missing object") // set this to true if you are debugging the splitstore to enable debug logging enableDebugLog = false @@ -226,17 +224,7 @@ func (s *SplitStore) Has(c cid.Cid) (bool, error) { } if has { - // treat it as an implicit (recursive) Write, when it is within vm.Copy context. - // -- the vm uses this check to avoid duplicate writes on Copy. - // When we have options in the API (or something better), the vm can explicitly signal - // that this is an implicit Write. - err = s.trackTxnRef(c, true) - if xerrors.Is(err, errMissingObject) { - // we failed to recursively protect the object because some inner object has been purged; - // signal to the VM to copy. - return false, nil - } - + err = s.trackTxnRef(c) return true, err } @@ -251,7 +239,7 @@ func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { switch err { case nil: - err = s.trackTxnRef(cid, false) + err = s.trackTxnRef(cid) return blk, err case bstore.ErrNotFound: @@ -285,7 +273,7 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { switch err { case nil: - err = s.trackTxnRef(cid, false) + err = s.trackTxnRef(cid) return size, err case bstore.ErrNotFound: @@ -323,7 +311,7 @@ func (s *SplitStore) Put(blk blocks.Block) error { s.mx.Unlock() s.debug.LogWrite(curTs, blk, writeEpoch) } - err = s.trackTxnRef(blk.Cid(), false) + err = s.trackTxnRef(blk.Cid()) } return err @@ -400,7 +388,7 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { err := s.hot.View(cid, cb) switch err { case nil: - err = s.trackTxnRef(cid, false) + err = s.trackTxnRef(cid) return err case bstore.ErrNotFound: @@ -590,7 +578,7 @@ func (s *SplitStore) updateWriteEpoch() { } } -func (s *SplitStore) trackTxnRef(c cid.Cid, deep bool) error { +func (s *SplitStore) trackTxnRef(c cid.Cid) error { if !s.txnActive { // not compacting return nil @@ -605,11 +593,7 @@ func (s *SplitStore) trackTxnRef(c cid.Cid, deep bool) error { } // we have finished marking, protect the reference - if !deep { - return s.doTxnProtect(c, nil) - } - - return s.doTxnProtectDeep(c) + return s.doTxnProtect(c, nil) } func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) error { @@ -645,8 +629,10 @@ func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) error { } func (s *SplitStore) doTxnProtect(root cid.Cid, batch map[cid.Cid]struct{}) error { - // it's a shallow reference, protect with a standard walk without occur checking - return s.walkObject(root, cid.NewSet(), + // Note: cold objects are deleted heaviest first, so the consituents of an object + // cannot be deleted before the object itself. + // so we just do a regular walk and mark in the protected set. + err := s.walkObject(root, cid.NewSet(), func(c cid.Cid) error { if c != root { _, ok := batch[c] @@ -690,58 +676,12 @@ func (s *SplitStore) doTxnProtect(root cid.Cid, batch map[cid.Cid]struct{}) erro return s.txnProtect.Mark(c) }) -} - -func (s *SplitStore) doTxnProtectDeep(root cid.Cid) error { - // it's a deep reference potentially in vm context - // we do a deep walk to visit the children first, short-circuiting if the parent has been marked. - // the deep walk is necessary as internal references may be missing, e.g. because a defunct object - // got recreated by the VM. - return s.walkObjectDeep(root, cid.NewSet(), - func(c cid.Cid) error { - mark, err := s.txnMarkSet.Has(c) - if err != nil { - return xerrors.Errorf("error checking mark set for %s: %w", c, err) - } - - // it's marked, nothing to do - if mark { - return errStopWalk - } - - // old block reference -- see comment in doCompact about the necessity of this - isOldBlock, err := s.isOldBlockHeader(c, s.txnLookbackEpoch) - if err != nil { - return xerrors.Errorf("error checking object type for %s: %w", c, err) - } - - if isOldBlock { - return errStopWalk - } - - return nil - }, - func(c cid.Cid) error { - // this occurs check is necessary because cold objects are purged in arbitrary order - has, err := s.hot.Has(c) - if err != nil { - return xerrors.Errorf("error checking hotstore for %s: %w", c, err) - } - - // it's not there (might have been deleted), signal to the vm to copy - if !has { - log.Warnf("missing object for recursive reference to %s", c) - return errMissingObject - } - // mark it in *both* sets, so that we can short-circuit a concurrent walk. - err = s.txnMarkSet.Mark(c) - if err != nil { - return xerrors.Errorf("error marking %s: %w", c, err) - } + if err != nil { + log.Warnf("error protecting object (cid: %s): %s", root, err) + } - return s.txnProtect.Mark(c) - }) + return err } func (s *SplitStore) warmup(curTs *types.TipSet) error { @@ -917,7 +857,6 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { }() // 1.1 Update markset for references created during marking - var missing []cid.Cid if len(txnRefs) > 0 { log.Info("updating mark set for live references", "refs", len(txnRefs)) startMark = time.Now() @@ -934,9 +873,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { continue } - // we have to do a deep walk here, as an early mark would stick even if there are - // missing references that haven't been written yet! - err = s.walkObjectDeep(c, walked, + err = s.walkObject(c, walked, func(c cid.Cid) error { mark, err := markSet.Has(c) if err != nil { @@ -964,92 +901,16 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return errStopWalk } - return nil - }, - func(c cid.Cid) error { count++ return markSet.Mark(c) }) if err != nil { - if xerrors.Is(err, bstore.ErrNotFound) { - log.Warnf("missing or incomplete object: %s", c) - missing = append(missing, c) - } else { - return xerrors.Errorf("error walking %s for marking: %w", c, err) - } - } - } - - log.Infow("update marking set done", "took", time.Since(startMark), "marked", count, "missing", len(missing)) - } - - // 1.2 rescan for missing objects (after waiting a bit), as they might have not been copied yet - // by the vm at the time of the update walk. - if len(missing) > 0 { - try := 0 - - log.Info("rescanning for missing objects") - startMark = time.Now() - count = 0 - - for len(missing) > 0 { - if try > maxMissingScanRetries { - return xerrors.Errorf("failed to fully scan transactional refs; %d missing objects after %d attempts", len(missing), try) - } - - // discard previous walk short-cuts - walked := cid.NewSet() - towalk := missing - missing = nil - try++ - - log.Infof("rescanning for %d missing objects (attempt %d)", len(towalk), try) - // wait a minute first for in-flight writes to complete - time.Sleep(time.Minute) - - for _, c := range towalk { - // deep walk here again, as we are concerned about internal references not having been written - err = s.walkObjectDeep(c, walked, - func(c cid.Cid) error { - mark, err := markSet.Has(c) - if err != nil { - return xerrors.Errorf("error checking mark set for %s: %w", c, err) - } - - if mark { - return errStopWalk - } - - // see comment above for this check - isOldBlock, err := s.isOldBlockHeader(c, lookbackEpoch) - if err != nil { - return xerrors.Errorf("error checking object type for %s: %w", c, err) - } - - if isOldBlock { - return errStopWalk - } - - return nil - }, - func(c cid.Cid) error { - count++ - return markSet.Mark(c) - }) - - if err != nil { - if xerrors.Is(err, bstore.ErrNotFound) { - log.Warnf("missing or incomplete object: %s", c) - missing = append(missing, c) - } else { - return xerrors.Errorf("error walking %s for marking: %w", c, err) - } - } + return xerrors.Errorf("error walking %s for marking: %w", c, err) } } - log.Infow("rescanning done", "took", time.Since(startMark), "marked", count) + log.Infow("update marking set done", "took", time.Since(startMark), "marked", count) } // 2. iterate through the hotstore to collect cold objects From 94efae419eda92629254ddbd9f1e9493fe76b22c Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 21:21:53 +0300 Subject: [PATCH 118/197] reduce length of critical section Just the purge; the rest is not critical -- e.g. it's ok if we do some duplicate copies to the coldstore, we'll have gc soon. --- blockstore/splitstore/splitstore.go | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 58e76329ddd..56ec954b1bd 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -954,17 +954,6 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { stats.Record(context.Background(), metrics.SplitstoreCompactionHot.M(int64(hotCnt))) stats.Record(context.Background(), metrics.SplitstoreCompactionCold.M(int64(coldCnt))) - // Enter critical section - log.Info("entering critical section") - atomic.StoreInt32(&s.critsection, 1) - defer atomic.StoreInt32(&s.critsection, 0) - - // check to see if we are closing first; if that's the case just return - if atomic.LoadInt32(&s.closing) == 1 { - log.Info("splitstore is closing; aborting compaction") - return xerrors.Errorf("compaction aborted") - } - // 3. copy the cold objects to the coldstore -- if we have one if !s.cfg.DiscardColdBlocks { log.Info("moving cold blocks to the coldstore") @@ -986,6 +975,17 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { } log.Infow("sorting done", "took", time.Since(startSort)) + // Enter critical section + log.Info("entering critical section") + atomic.StoreInt32(&s.critsection, 1) + defer atomic.StoreInt32(&s.critsection, 0) + + // check to see if we are closing first; if that's the case just return + if atomic.LoadInt32(&s.closing) == 1 { + log.Info("splitstore is closing; aborting compaction") + return xerrors.Errorf("compaction aborted") + } + // 5. purge cold objects from the hotstore, taking protected references into account log.Info("purging cold objects from the hotstore") startPurge := time.Now() From b08e0b7102fbe7280a25664d77dd158e6c33d792 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 21:24:15 +0300 Subject: [PATCH 119/197] fix lint --- blockstore/splitstore/splitstore.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 56ec954b1bd..d6e597aa3bf 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -82,8 +82,6 @@ var ( enableDebugLog = false // set this to true if you want to track origin stack traces in the write log enableDebugLogWriteTraces = false - - maxMissingScanRetries = 3 ) const ( From db53859e7a029800daf797e68dbd81ca3ee418d5 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 4 Jul 2021 22:12:51 +0300 Subject: [PATCH 120/197] reduce CompactionThreshold to 5 finalities so that we run compaction every finality, once we've first compacted --- blockstore/splitstore/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index d6e597aa3bf..31e50b356a3 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -44,7 +44,7 @@ var ( // === :: cold (already archived) // ≡≡≡ :: to be archived in this compaction // --- :: hot - CompactionThreshold = 6 * build.Finality + CompactionThreshold = 5 * build.Finality // CompactionBoundary is the number of epochs from the current epoch at which // we will walk the chain for live objects. From 1726eb993cba3adaaa519c2d596152b8db582cdc Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 10:22:52 +0300 Subject: [PATCH 121/197] deal with incomplete objects that need to be marked and protected seems that something is writing DAGs before its consituents, which causes problems. --- blockstore/splitstore/splitstore.go | 166 +++++++++++++++++++++++++--- 1 file changed, 150 insertions(+), 16 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 31e50b356a3..c6350cc76a1 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -148,6 +148,7 @@ type SplitStore struct { txnEnv MarkSetEnv txnProtect MarkSet txnMarkSet MarkSet + txnMissing map[cid.Cid]struct{} txnRefsMx sync.Mutex txnRefs map[cid.Cid]struct{} } @@ -629,15 +630,12 @@ func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) error { func (s *SplitStore) doTxnProtect(root cid.Cid, batch map[cid.Cid]struct{}) error { // Note: cold objects are deleted heaviest first, so the consituents of an object // cannot be deleted before the object itself. - // so we just do a regular walk and mark in the protected set. - err := s.walkObject(root, cid.NewSet(), + err := s.walkObjectIncomplete(root, cid.NewSet(), func(c cid.Cid) error { if c != root { _, ok := batch[c] if ok { // it's on the same batch, stop walk - // this check is necessary as the object may contain references to objects - // in the same batch (yet to be written) that cannot be loaded for the walk return errStopWalk } } @@ -673,6 +671,11 @@ func (s *SplitStore) doTxnProtect(root cid.Cid, batch map[cid.Cid]struct{}) erro } return s.txnProtect.Mark(c) + }, + func(c cid.Cid) error { + log.Warnf("missing object %s in %s", c, root) + s.txnMissing[c] = struct{}{} + return errStopWalk }) if err != nil { @@ -843,6 +846,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return xerrors.Errorf("error creating transactional mark set: %w", err) } s.txnMarkSet = markSet + s.txnMissing = make(map[cid.Cid]struct{}) s.txnLk.Unlock() defer func() { @@ -851,10 +855,12 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { s.txnActive = false s.txnProtect = nil s.txnMarkSet = nil + s.txnMissing = nil s.txnLk.Unlock() }() // 1.1 Update markset for references created during marking + missing := make(map[cid.Cid]struct{}) if len(txnRefs) > 0 { log.Info("updating mark set for live references", "refs", len(txnRefs)) startMark = time.Now() @@ -871,7 +877,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { continue } - err = s.walkObject(c, walked, + err = s.walkObjectIncomplete(c, walked, func(c cid.Cid) error { mark, err := markSet.Has(c) if err != nil { @@ -901,6 +907,10 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { count++ return markSet.Mark(c) + }, + func(c cid.Cid) error { + missing[c] = struct{}{} + return errStopWalk }) if err != nil { @@ -908,7 +918,76 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { } } - log.Infow("update marking set done", "took", time.Since(startMark), "marked", count) + log.Infow("update mark set done", "took", time.Since(startMark), "marked", count, "missing", len(missing)) + } + + // 1.2 if there were missing objects (presumably because they haven't been written yet), + // wait for them to be written and retry marking + if len(missing) > 0 { + log.Info("marking for missing objects") + startMark = time.Now() + try := 0 + count = 0 + + for len(missing) > 0 { + if try >= 5 { + return xerrors.Errorf("missing %d objects after %d attempts; giving up", len(missing), try) + } + try++ + + // wait a bit + time.Sleep(time.Minute) + log.Infow("marking missing objects", "attempt", try, "missing", len(missing), "marked", count) + + towalk := missing + missing = make(map[cid.Cid]struct{}) + walked := cid.NewSet() + + for c := range towalk { + mark, err := markSet.Has(c) + if err != nil { + return xerrors.Errorf("error checking markset for %s: %w", c, err) + } + + if mark { + continue + } + + err = s.walkObjectIncomplete(c, walked, + func(c cid.Cid) error { + mark, err := markSet.Has(c) + if err != nil { + return xerrors.Errorf("error checking markset for %s: %w", c, err) + } + + if mark { + return errStopWalk + } + + isOldBlock, err := s.isOldBlockHeader(c, lookbackEpoch) + if err != nil { + return xerrors.Errorf("error checking object type for %s: %w", c, err) + } + + if isOldBlock { + return errStopWalk + } + + count++ + return markSet.Mark(c) + }, + func(c cid.Cid) error { + missing[c] = struct{}{} + return errStopWalk + }) + + if err != nil { + return xerrors.Errorf("error walking %s for marking: %w", c, err) + } + } + } + + log.Infow("marking for missing objects done", "took", time.Since(startMark), "attempts", try, "marked", count) } // 2. iterate through the hotstore to collect cold objects @@ -1115,15 +1194,28 @@ func (s *SplitStore) walkObject(c cid.Cid, walked *cid.Set, f func(cid.Cid) erro return nil } -// like walkObject, but it visits leaves first, with pre invoked at the parent node to control -// whether the walk should stop -func (s *SplitStore) walkObjectDeep(c cid.Cid, walked *cid.Set, - pre func(cid.Cid) error, f func(cid.Cid) error) error { +// like walkObject, but the object may be potentially incomplete (references missing from the hotstore) +func (s *SplitStore) walkObjectIncomplete(c cid.Cid, walked *cid.Set, f, missing func(cid.Cid) error) error { if !walked.Visit(c) { return nil } - if err := pre(c); err != nil { + // occurs check + has, err := s.hot.Has(c) + if err != nil { + return xerrors.Errorf("error occur checking %s: %w", c, err) + } + + if !has { + err = missing(c) + if err == errStopWalk { + return nil + } + + return err + } + + if err := f(c); err != nil { if err == errStopWalk { return nil } @@ -1132,11 +1224,11 @@ func (s *SplitStore) walkObjectDeep(c cid.Cid, walked *cid.Set, } if c.Prefix().Codec != cid.DagCBOR { - return f(c) + return nil } var links []cid.Cid - err := s.view(c, func(data []byte) error { + err = s.view(c, func(data []byte) error { return cbg.ScanForLinks(bytes.NewReader(data), func(c cid.Cid) { links = append(links, c) }) @@ -1147,13 +1239,13 @@ func (s *SplitStore) walkObjectDeep(c cid.Cid, walked *cid.Set, } for _, c := range links { - err := s.walkObjectDeep(c, walked, pre, f) + err := s.walkObjectIncomplete(c, walked, f, missing) if err != nil { return xerrors.Errorf("error walking link (cid: %s): %w", c, err) } } - return f(c) + return nil } // internal version used by walk @@ -1263,7 +1355,8 @@ func (s *SplitStore) purgeBatch(cids []cid.Cid, deleteBatch func([]cid.Cid) erro return nil } - // don't delete one giant batch of 7M objects, but rather do smaller batches + // we don't delete one giant batch of millions of objects, but rather do smaller batches + // so that we don't stop the world for an extended period of time done := false for i := 0; !done; i++ { start := i * batchSize @@ -1289,11 +1382,52 @@ func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { log.Infow("purged cold objects", "purged", purgeCnt, "live", liveCnt) }() + protectMissing := func(missing map[cid.Cid]struct{}) error { + s.txnLk.RLock() + defer s.txnLk.RUnlock() + + for c := range missing { + err := s.doTxnProtect(c, missing) + if err != nil { + return err + } + } + + return nil + } + return s.purgeBatch(cids, func(cids []cid.Cid) error { deadCids := deadCids[:0] + // ideally this would be just s.txnLk.Lock() and defer s.txnLk.Unlock(), but we have to + // deal with incomplete object protection + try := 0 + again: s.txnLk.Lock() + if len(s.txnMissing) > 0 { + if try >= 5 { + count := len(s.txnMissing) + s.txnLk.Unlock() + return xerrors.Errorf("error purging: missing %d objects after %d attempts; giving up", count, try) + } + + try++ + log.Infof("delaying purge; missing %d protected objects (attempt: %d)", len(s.txnMissing)) + missing := s.txnMissing + s.txnMissing = make(map[cid.Cid]struct{}) + s.txnLk.Unlock() + + if try > 1 { + time.Sleep(time.Minute) + } + err := protectMissing(missing) + if err != nil { + return xerrors.Errorf("purge error: error protecting missing objects: %w", err) + } + + goto again + } defer s.txnLk.Unlock() for _, c := range cids { From 3597192d5893914a7e46ed6a8c2b7af0a615a31a Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 10:31:47 +0300 Subject: [PATCH 122/197] remove the sleeps and busy loop more times when waiting for missing objects --- blockstore/splitstore/splitstore.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index c6350cc76a1..97273a7965a 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -930,13 +930,12 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { count = 0 for len(missing) > 0 { - if try >= 5 { + if try >= 100 { return xerrors.Errorf("missing %d objects after %d attempts; giving up", len(missing), try) } try++ // wait a bit - time.Sleep(time.Minute) log.Infow("marking missing objects", "attempt", try, "missing", len(missing), "marked", count) towalk := missing @@ -1406,7 +1405,7 @@ func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { again: s.txnLk.Lock() if len(s.txnMissing) > 0 { - if try >= 5 { + if try >= 100 { count := len(s.txnMissing) s.txnLk.Unlock() return xerrors.Errorf("error purging: missing %d objects after %d attempts; giving up", count, try) @@ -1418,9 +1417,6 @@ func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { s.txnMissing = make(map[cid.Cid]struct{}) s.txnLk.Unlock() - if try > 1 { - time.Sleep(time.Minute) - } err := protectMissing(missing) if err != nil { return xerrors.Errorf("purge error: error protecting missing objects: %w", err) From 4c41f52828f1f4fead8ffe8db9f671a2f4c2ee55 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 10:35:04 +0300 Subject: [PATCH 123/197] add warning for missing objects for marking for debug purposes --- blockstore/splitstore/splitstore.go | 1 + 1 file changed, 1 insertion(+) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 97273a7965a..7bad9e993a8 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -976,6 +976,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return markSet.Mark(c) }, func(c cid.Cid) error { + log.Warnf("missing object for marking: %s", c) missing[c] = struct{}{} return errStopWalk }) From c81ae5fc20e91cc1345721815e99384df69f32eb Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 10:42:14 +0300 Subject: [PATCH 124/197] add some comments about the missing business and anothre log --- blockstore/splitstore/splitstore.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 7bad9e993a8..d5d426aac11 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -630,6 +630,8 @@ func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) error { func (s *SplitStore) doTxnProtect(root cid.Cid, batch map[cid.Cid]struct{}) error { // Note: cold objects are deleted heaviest first, so the consituents of an object // cannot be deleted before the object itself. + // Note on this missing business: it appears that some DAGs can be written before their + // consituents. THIS NEEDS TO BE FIXED -- but until then we do this missing dance business err := s.walkObjectIncomplete(root, cid.NewSet(), func(c cid.Cid) error { if c != root { @@ -860,6 +862,8 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { }() // 1.1 Update markset for references created during marking + // Note on this missing business: it appears that some DAGs can be written before their + // consituents. THIS NEEDS TO BE FIXED -- but until then we do this missing dance business missing := make(map[cid.Cid]struct{}) if len(txnRefs) > 0 { log.Info("updating mark set for live references", "refs", len(txnRefs)) @@ -909,6 +913,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return markSet.Mark(c) }, func(c cid.Cid) error { + log.Warnf("missing object for marking: %s", c) missing[c] = struct{}{} return errStopWalk }) @@ -1382,6 +1387,8 @@ func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { log.Infow("purged cold objects", "purged", purgeCnt, "live", liveCnt) }() + // Note on this missing business: it appears that some DAGs can be written before their + // consituents. THIS NEEDS TO BE FIXED -- but until then we do this missing dance business protectMissing := func(missing map[cid.Cid]struct{}) error { s.txnLk.RLock() defer s.txnLk.RUnlock() From 839f7bd2b5789b326056d782c2f819a8b5bd3a1b Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 11:11:08 +0300 Subject: [PATCH 125/197] only occur check for DAGs --- blockstore/splitstore/splitstore.go | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index d5d426aac11..e8022db73d6 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1205,19 +1205,21 @@ func (s *SplitStore) walkObjectIncomplete(c cid.Cid, walked *cid.Set, f, missing return nil } - // occurs check - has, err := s.hot.Has(c) - if err != nil { - return xerrors.Errorf("error occur checking %s: %w", c, err) - } - - if !has { - err = missing(c) - if err == errStopWalk { - return nil + // occurs check -- only for DAGs + if c.Prefix().Codec == cid.DagCBOR { + has, err := s.hot.Has(c) + if err != nil { + return xerrors.Errorf("error occur checking %s: %w", c, err) } - return err + if !has { + err = missing(c) + if err == errStopWalk { + return nil + } + + return err + } } if err := f(c); err != nil { @@ -1233,7 +1235,7 @@ func (s *SplitStore) walkObjectIncomplete(c cid.Cid, walked *cid.Set, f, missing } var links []cid.Cid - err = s.view(c, func(data []byte) error { + err := s.view(c, func(data []byte) error { return cbg.ScanForLinks(bytes.NewReader(data), func(c cid.Cid) { links = append(links, c) }) From 2ea2abc07dad626371c403b939ecd878477a43fc Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 11:32:52 +0300 Subject: [PATCH 126/197] short-circuit fil commitments they don't make it to the blockstore anyway --- blockstore/splitstore/splitstore.go | 39 ++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index e8022db73d6..ecd5c83b881 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -634,6 +634,10 @@ func (s *SplitStore) doTxnProtect(root cid.Cid, batch map[cid.Cid]struct{}) erro // consituents. THIS NEEDS TO BE FIXED -- but until then we do this missing dance business err := s.walkObjectIncomplete(root, cid.NewSet(), func(c cid.Cid) error { + if isFilCommitment(c) { + return errStopWalk + } + if c != root { _, ok := batch[c] if ok { @@ -717,10 +721,14 @@ func (s *SplitStore) doWarmup(curTs *types.TipSet) error { xcount := int64(0) missing := int64(0) err := s.walkChain(curTs, epoch, false, - func(cid cid.Cid) error { + func(c cid.Cid) error { + if isFilCommitment(c) { + return errStopWalk + } + count++ - has, err := s.hot.Has(cid) + has, err := s.hot.Has(c) if err != nil { return err } @@ -729,7 +737,7 @@ func (s *SplitStore) doWarmup(curTs *types.TipSet) error { return nil } - blk, err := s.cold.Get(cid) + blk, err := s.cold.Get(c) if err != nil { if err == bstore.ErrNotFound { missing++ @@ -824,6 +832,10 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { var count int64 err = s.walkChain(curTs, boundaryEpoch, true, func(c cid.Cid) error { + if isFilCommitment(c) { + return errStopWalk + } + count++ return markSet.Mark(c) }) @@ -883,6 +895,10 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { err = s.walkObjectIncomplete(c, walked, func(c cid.Cid) error { + if isFilCommitment(c) { + return errStopWalk + } + mark, err := markSet.Has(c) if err != nil { return xerrors.Errorf("error checking markset for %s: %w", c, err) @@ -948,6 +964,10 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { walked := cid.NewSet() for c := range towalk { + if isFilCommitment(c) { + continue + } + mark, err := markSet.Has(c) if err != nil { return xerrors.Errorf("error checking markset for %s: %w", c, err) @@ -959,6 +979,10 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { err = s.walkObjectIncomplete(c, walked, func(c cid.Cid) error { + if isFilCommitment(c) { + return errStopWalk + } + mark, err := markSet.Has(c) if err != nil { return xerrors.Errorf("error checking markset for %s: %w", c, err) @@ -1516,3 +1540,12 @@ func bytesToUint64(buf []byte) uint64 { i, _ := binary.Uvarint(buf) return i } + +func isFilCommitment(c cid.Cid) bool { + switch c.Prefix().Codec { + case cid.FilCommitmentSealed, cid.FilCommitmentUnsealed: + return true + default: + return false + } +} From 918a7ec7493fd326cb369c477b4f0e520b6026d7 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 11:38:53 +0300 Subject: [PATCH 127/197] a bit more fil commitment short-circuiting --- blockstore/splitstore/splitstore.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index ecd5c83b881..bcc16db73ce 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -884,6 +884,10 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { count = 0 for c := range txnRefs { + if isFilCommitment(c) { + continue + } + mark, err := markSet.Has(c) if err != nil { return xerrors.Errorf("error checking markset for %s: %w", c, err) From 3ec834b2e34ba8e1ec4fec0d62dc506ea4e503cc Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 11:41:09 +0300 Subject: [PATCH 128/197] improve logs and error messages --- blockstore/splitstore/splitstore.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index bcc16db73ce..d54c66a701c 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -826,7 +826,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { s.txnLk.Unlock() // 1. mark reachable objects by walking the chain from the current epoch to the boundary epoch - log.Info("marking reachable blocks") + log.Info("marking reachable objects") startMark := time.Now() var count int64 @@ -841,7 +841,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { }) if err != nil { - return xerrors.Errorf("error marking hot objects: %w", err) + return xerrors.Errorf("error marking: %w", err) } if count > s.markSetSize { @@ -1066,11 +1066,11 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { // 3. copy the cold objects to the coldstore -- if we have one if !s.cfg.DiscardColdBlocks { - log.Info("moving cold blocks to the coldstore") + log.Info("moving cold objects to the coldstore") startMove := time.Now() err = s.moveColdBlocks(cold) if err != nil { - return xerrors.Errorf("error moving cold blocks: %w", err) + return xerrors.Errorf("error moving cold objects: %w", err) } log.Infow("moving done", "took", time.Since(startMove)) } @@ -1103,7 +1103,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { if err != nil { return xerrors.Errorf("error purging cold blocks: %w", err) } - log.Infow("purging cold from hotstore done", "took", time.Since(startPurge)) + log.Infow("purging cold objects from hotstore done", "took", time.Since(startPurge)) // we are done; do some housekeeping s.gcHotstore() From d7709deb2b34324af3fd93a84e28ce7664ebdedb Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 11:51:22 +0300 Subject: [PATCH 129/197] reduce memory pressure from marksets when the size is decreased --- blockstore/splitstore/splitstore.go | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index d54c66a701c..5318302805f 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -773,10 +773,7 @@ func (s *SplitStore) doWarmup(curTs *types.TipSet) error { log.Infow("warmup stats", "visited", count, "warm", xcount, "missing", missing) - if count > s.markSetSize { - s.markSetSize = count + count>>2 // overestimate a bit - } - + s.markSetSize = count + count>>2 // overestimate a bit err = s.ds.Put(markSetSizeKey, int64ToBytes(s.markSetSize)) if err != nil { log.Warnf("error saving mark set size: %s", err) @@ -844,9 +841,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return xerrors.Errorf("error marking: %w", err) } - if count > s.markSetSize { - s.markSetSize = count + count>>2 // overestimate a bit - } + s.markSetSize = count + count>>2 // overestimate a bit log.Infow("marking done", "took", time.Since(startMark), "marked", count) From d8b8d75e0fae1763d73e278863572424381c4111 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 12:38:09 +0300 Subject: [PATCH 130/197] readd minute delay before trying for missing objects --- blockstore/splitstore/splitstore.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 5318302805f..ad9f56edab3 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -956,6 +956,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { try++ // wait a bit + time.Sleep(time.Minute) log.Infow("marking missing objects", "attempt", try, "missing", len(missing), "marked", count) towalk := missing @@ -1450,6 +1451,7 @@ func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { s.txnMissing = make(map[cid.Cid]struct{}) s.txnLk.Unlock() + time.Sleep(time.Minute) err := protectMissing(missing) if err != nil { return xerrors.Errorf("purge error: error protecting missing objects: %w", err) From 0b7153be86f6c8c2455c834df5263cfc6b2c7df7 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 12:41:11 +0300 Subject: [PATCH 131/197] use internal version of has for occurs checks --- blockstore/splitstore/splitstore.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index ad9f56edab3..38abdbf781a 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1231,7 +1231,7 @@ func (s *SplitStore) walkObjectIncomplete(c cid.Cid, walked *cid.Set, f, missing // occurs check -- only for DAGs if c.Prefix().Codec == cid.DagCBOR { - has, err := s.hot.Has(c) + has, err := s.has(c) if err != nil { return xerrors.Errorf("error occur checking %s: %w", c, err) } @@ -1291,6 +1291,16 @@ func (s *SplitStore) view(cid cid.Cid, cb func([]byte) error) error { } } +func (s *SplitStore) has(c cid.Cid) (bool, error) { + has, err := s.hot.Has(c) + + if has || err != nil { + return has, err + } + + return s.cold.Has(c) +} + func (s *SplitStore) isOldBlockHeader(c cid.Cid, epoch abi.ChainEpoch) (isOldBlock bool, err error) { if c.Prefix().Codec != cid.DagCBOR { return false, nil From 59936ef468cc539ce16281aada9c7dd20339377c Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 13:30:31 +0300 Subject: [PATCH 132/197] fix log --- blockstore/splitstore/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 38abdbf781a..4a67bd1ad62 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -873,7 +873,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { // consituents. THIS NEEDS TO BE FIXED -- but until then we do this missing dance business missing := make(map[cid.Cid]struct{}) if len(txnRefs) > 0 { - log.Info("updating mark set for live references", "refs", len(txnRefs)) + log.Infow("updating mark set for live references", "refs", len(txnRefs)) startMark = time.Now() walked := cid.NewSet() count = 0 From fa195bede2c5072c46addc50e1ff4be0e36f5fbe Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 13:36:34 +0300 Subject: [PATCH 133/197] get rid of ugly missing reference handling code those missing objects don't seem to ever get there, are they from an abandoned fork? --- blockstore/splitstore/splitstore.go | 131 +--------------------------- 1 file changed, 1 insertion(+), 130 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 4a67bd1ad62..8f1205eb27c 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -148,7 +148,6 @@ type SplitStore struct { txnEnv MarkSetEnv txnProtect MarkSet txnMarkSet MarkSet - txnMissing map[cid.Cid]struct{} txnRefsMx sync.Mutex txnRefs map[cid.Cid]struct{} } @@ -630,8 +629,6 @@ func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) error { func (s *SplitStore) doTxnProtect(root cid.Cid, batch map[cid.Cid]struct{}) error { // Note: cold objects are deleted heaviest first, so the consituents of an object // cannot be deleted before the object itself. - // Note on this missing business: it appears that some DAGs can be written before their - // consituents. THIS NEEDS TO BE FIXED -- but until then we do this missing dance business err := s.walkObjectIncomplete(root, cid.NewSet(), func(c cid.Cid) error { if isFilCommitment(c) { @@ -680,7 +677,6 @@ func (s *SplitStore) doTxnProtect(root cid.Cid, batch map[cid.Cid]struct{}) erro }, func(c cid.Cid) error { log.Warnf("missing object %s in %s", c, root) - s.txnMissing[c] = struct{}{} return errStopWalk }) @@ -855,7 +851,6 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return xerrors.Errorf("error creating transactional mark set: %w", err) } s.txnMarkSet = markSet - s.txnMissing = make(map[cid.Cid]struct{}) s.txnLk.Unlock() defer func() { @@ -864,14 +859,10 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { s.txnActive = false s.txnProtect = nil s.txnMarkSet = nil - s.txnMissing = nil s.txnLk.Unlock() }() // 1.1 Update markset for references created during marking - // Note on this missing business: it appears that some DAGs can be written before their - // consituents. THIS NEEDS TO BE FIXED -- but until then we do this missing dance business - missing := make(map[cid.Cid]struct{}) if len(txnRefs) > 0 { log.Infow("updating mark set for live references", "refs", len(txnRefs)) startMark = time.Now() @@ -929,7 +920,6 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { }, func(c cid.Cid) error { log.Warnf("missing object for marking: %s", c) - missing[c] = struct{}{} return errStopWalk }) @@ -938,85 +928,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { } } - log.Infow("update mark set done", "took", time.Since(startMark), "marked", count, "missing", len(missing)) - } - - // 1.2 if there were missing objects (presumably because they haven't been written yet), - // wait for them to be written and retry marking - if len(missing) > 0 { - log.Info("marking for missing objects") - startMark = time.Now() - try := 0 - count = 0 - - for len(missing) > 0 { - if try >= 100 { - return xerrors.Errorf("missing %d objects after %d attempts; giving up", len(missing), try) - } - try++ - - // wait a bit - time.Sleep(time.Minute) - log.Infow("marking missing objects", "attempt", try, "missing", len(missing), "marked", count) - - towalk := missing - missing = make(map[cid.Cid]struct{}) - walked := cid.NewSet() - - for c := range towalk { - if isFilCommitment(c) { - continue - } - - mark, err := markSet.Has(c) - if err != nil { - return xerrors.Errorf("error checking markset for %s: %w", c, err) - } - - if mark { - continue - } - - err = s.walkObjectIncomplete(c, walked, - func(c cid.Cid) error { - if isFilCommitment(c) { - return errStopWalk - } - - mark, err := markSet.Has(c) - if err != nil { - return xerrors.Errorf("error checking markset for %s: %w", c, err) - } - - if mark { - return errStopWalk - } - - isOldBlock, err := s.isOldBlockHeader(c, lookbackEpoch) - if err != nil { - return xerrors.Errorf("error checking object type for %s: %w", c, err) - } - - if isOldBlock { - return errStopWalk - } - - count++ - return markSet.Mark(c) - }, - func(c cid.Cid) error { - log.Warnf("missing object for marking: %s", c) - missing[c] = struct{}{} - return errStopWalk - }) - - if err != nil { - return xerrors.Errorf("error walking %s for marking: %w", c, err) - } - } - } - - log.Infow("marking for missing objects done", "took", time.Since(startMark), "attempts", try, "marked", count) + log.Infow("update mark set done", "took", time.Since(startMark), "marked", count) } // 2. iterate through the hotstore to collect cold objects @@ -1423,52 +1335,11 @@ func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { log.Infow("purged cold objects", "purged", purgeCnt, "live", liveCnt) }() - // Note on this missing business: it appears that some DAGs can be written before their - // consituents. THIS NEEDS TO BE FIXED -- but until then we do this missing dance business - protectMissing := func(missing map[cid.Cid]struct{}) error { - s.txnLk.RLock() - defer s.txnLk.RUnlock() - - for c := range missing { - err := s.doTxnProtect(c, missing) - if err != nil { - return err - } - } - - return nil - } - return s.purgeBatch(cids, func(cids []cid.Cid) error { deadCids := deadCids[:0] - // ideally this would be just s.txnLk.Lock() and defer s.txnLk.Unlock(), but we have to - // deal with incomplete object protection - try := 0 - again: s.txnLk.Lock() - if len(s.txnMissing) > 0 { - if try >= 100 { - count := len(s.txnMissing) - s.txnLk.Unlock() - return xerrors.Errorf("error purging: missing %d objects after %d attempts; giving up", count, try) - } - - try++ - log.Infof("delaying purge; missing %d protected objects (attempt: %d)", len(s.txnMissing)) - missing := s.txnMissing - s.txnMissing = make(map[cid.Cid]struct{}) - s.txnLk.Unlock() - - time.Sleep(time.Minute) - err := protectMissing(missing) - if err != nil { - return xerrors.Errorf("purge error: error protecting missing objects: %w", err) - } - - goto again - } defer s.txnLk.Unlock() for _, c := range cids { From 59639a07882b4cb4cf77406f9c7d896b389b2d37 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 16:08:08 +0300 Subject: [PATCH 134/197] reinstate some better code for handling missing references. --- blockstore/splitstore/splitstore.go | 110 +++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 2 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 8f1205eb27c..b6aa48ff743 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -150,6 +150,7 @@ type SplitStore struct { txnMarkSet MarkSet txnRefsMx sync.Mutex txnRefs map[cid.Cid]struct{} + txnMissing map[cid.Cid]struct{} } var _ bstore.Blockstore = (*SplitStore)(nil) @@ -677,6 +678,11 @@ func (s *SplitStore) doTxnProtect(root cid.Cid, batch map[cid.Cid]struct{}) erro }, func(c cid.Cid) error { log.Warnf("missing object %s in %s", c, root) + if s.txnMissing != nil { + s.txnRefsMx.Lock() + s.txnMissing[c] = struct{}{} + s.txnRefsMx.Unlock() + } return errStopWalk }) @@ -845,6 +851,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { s.txnLk.Lock() txnRefs := s.txnRefs s.txnRefs = nil + s.txnMissing = make(map[cid.Cid]struct{}) s.txnProtect, err = s.txnEnv.Create("protected", 0) if err != nil { s.txnLk.Unlock() @@ -859,10 +866,12 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { s.txnActive = false s.txnProtect = nil s.txnMarkSet = nil + s.txnMissing = nil s.txnLk.Unlock() }() // 1.1 Update markset for references created during marking + missing := make(map[cid.Cid]struct{}) if len(txnRefs) > 0 { log.Infow("updating mark set for live references", "refs", len(txnRefs)) startMark = time.Now() @@ -920,6 +929,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { }, func(c cid.Cid) error { log.Warnf("missing object for marking: %s", c) + missing[c] = struct{}{} return errStopWalk }) @@ -928,7 +938,12 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { } } - log.Infow("update mark set done", "took", time.Since(startMark), "marked", count) + log.Infow("update mark set done", "took", time.Since(startMark), "marked", count, "missing", len(missing)) + } + + // 1.2 if there are missing references wait a bit for them to see if they are written later + if len(missing) > 0 { + s.waitForMissingRefs(missing, markSet, nil) } // 2. iterate through the hotstore to collect cold objects @@ -962,7 +977,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return xerrors.Errorf("error collecting candidate cold objects: %w", err) } - log.Infow("candidate collection done", "took", time.Since(startCollect)) + log.Infow("cold collection done", "took", time.Since(startCollect)) if coldCnt > 0 { s.coldPurgeSize = coldCnt + coldCnt>>2 // overestimate a bit @@ -972,6 +987,17 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { stats.Record(context.Background(), metrics.SplitstoreCompactionHot.M(int64(hotCnt))) stats.Record(context.Background(), metrics.SplitstoreCompactionCold.M(int64(coldCnt))) + // now that we have collected cold objects, check for missing references from transactional i/o + // and disable further collection of such references (they will not be acted upon) + s.txnLk.Lock() + missing = s.txnMissing + s.txnMissing = nil + s.txnLk.Unlock() + + if len(missing) > 0 { + s.waitForMissingRefs(missing, s.txnProtect, markSet) + } + // 3. copy the cold objects to the coldstore -- if we have one if !s.cfg.DiscardColdBlocks { log.Info("moving cold objects to the coldstore") @@ -1367,6 +1393,86 @@ func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { }) } +// I really don't like having this code, but we seem to have some DAG references with missing +// constituents. During testing in mainnet *some* of these references *sometimes* appeared after a +// little bit. +// We need to figure out where they are coming from and eliminate that vector, but until then we +// have this gem[TM]. +func (s *SplitStore) waitForMissingRefs(missing map[cid.Cid]struct{}, markSet, ctlSet MarkSet) { + log.Info("waiting for missing references") + start := time.Now() + count := 0 + defer func() { + log.Infow("waiting for missing references done", "took", time.Since(start), "marked", count) + }() + + for i := 1; i <= 3 && len(missing) > 0; i++ { + wait := time.Duration(i) * time.Minute + log.Infof("retrying for %d missing references in %s (attempt: %d)", len(missing), wait, i) + time.Sleep(wait) + + towalk := missing + walked := cid.NewSet() + missing = make(map[cid.Cid]struct{}) + + for c := range towalk { + err := s.walkObjectIncomplete(c, walked, + func(c cid.Cid) error { + if isFilCommitment(c) { + return errStopWalk + } + + mark, err := markSet.Has(c) + if err != nil { + return xerrors.Errorf("error checking markset for %s: %w", c, err) + } + + if mark { + return errStopWalk + } + + if ctlSet != nil { + mark, err = ctlSet.Has(c) + if err != nil { + return xerrors.Errorf("error checking markset for %s: %w", c, err) + } + + if mark { + return errStopWalk + } + } + + isOldBlock, err := s.isOldBlockHeader(c, s.txnLookbackEpoch) + if err != nil { + return xerrors.Errorf("error checking object type for %s: %w", c, err) + } + + if isOldBlock { + return errStopWalk + } + + count++ + return markSet.Mark(c) + }, + func(c cid.Cid) error { + missing[c] = struct{}{} + return errStopWalk + }) + + if err != nil { + log.Warnf("error marking: %s", err) + } + } + } + + if len(missing) > 0 { + log.Warnf("still missing %d references", len(missing)) + for c := range missing { + log.Warnf("unresolved missing reference: %s", c) + } + } +} + func (s *SplitStore) gcHotstore() { if compact, ok := s.hot.(interface{ Compact() error }); ok { log.Infof("compacting hotstore") From 5a099b7d0539ba7a408dd938e263958cbdb769ea Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 16:12:17 +0300 Subject: [PATCH 135/197] more commentary on the missing refs situation --- blockstore/splitstore/splitstore.go | 1 + 1 file changed, 1 insertion(+) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index b6aa48ff743..c521985f109 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1398,6 +1398,7 @@ func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { // little bit. // We need to figure out where they are coming from and eliminate that vector, but until then we // have this gem[TM]. +// My best guess is that they are parent message receipts or yet to be computed state roots. func (s *SplitStore) waitForMissingRefs(missing map[cid.Cid]struct{}, markSet, ctlSet MarkSet) { log.Info("waiting for missing references") start := time.Now() From af8cf712be9b5e0cde7093eeccfc58aaeda08a80 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 18:16:54 +0300 Subject: [PATCH 136/197] handle all missing refs together so that we wait 6min at most, not 12. --- blockstore/splitstore/splitstore.go | 54 +++++++++++++---------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index c521985f109..b8abf3eae89 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -677,7 +677,7 @@ func (s *SplitStore) doTxnProtect(root cid.Cid, batch map[cid.Cid]struct{}) erro return s.txnProtect.Mark(c) }, func(c cid.Cid) error { - log.Warnf("missing object %s in %s", c, root) + log.Warnf("missing object reference %s in %s", c, root) if s.txnMissing != nil { s.txnRefsMx.Lock() s.txnMissing[c] = struct{}{} @@ -871,7 +871,6 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { }() // 1.1 Update markset for references created during marking - missing := make(map[cid.Cid]struct{}) if len(txnRefs) > 0 { log.Infow("updating mark set for live references", "refs", len(txnRefs)) startMark = time.Now() @@ -927,9 +926,11 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { count++ return markSet.Mark(c) }, - func(c cid.Cid) error { - log.Warnf("missing object for marking: %s", c) - missing[c] = struct{}{} + func(cm cid.Cid) error { + log.Warnf("missing object reference %s in %s", cm, c) + s.txnRefsMx.Lock() + s.txnMissing[cm] = struct{}{} + s.txnRefsMx.Unlock() return errStopWalk }) @@ -938,12 +939,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { } } - log.Infow("update mark set done", "took", time.Since(startMark), "marked", count, "missing", len(missing)) - } - - // 1.2 if there are missing references wait a bit for them to see if they are written later - if len(missing) > 0 { - s.waitForMissingRefs(missing, markSet, nil) + log.Infow("update mark set done", "took", time.Since(startMark), "marked", count) } // 2. iterate through the hotstore to collect cold objects @@ -989,14 +985,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { // now that we have collected cold objects, check for missing references from transactional i/o // and disable further collection of such references (they will not be acted upon) - s.txnLk.Lock() - missing = s.txnMissing - s.txnMissing = nil - s.txnLk.Unlock() - - if len(missing) > 0 { - s.waitForMissingRefs(missing, s.txnProtect, markSet) - } + s.waitForMissingRefs(markSet) // 3. copy the cold objects to the coldstore -- if we have one if !s.cfg.DiscardColdBlocks { @@ -1399,7 +1388,16 @@ func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { // We need to figure out where they are coming from and eliminate that vector, but until then we // have this gem[TM]. // My best guess is that they are parent message receipts or yet to be computed state roots. -func (s *SplitStore) waitForMissingRefs(missing map[cid.Cid]struct{}, markSet, ctlSet MarkSet) { +func (s *SplitStore) waitForMissingRefs(markSet MarkSet) { + s.txnLk.Lock() + missing := s.txnMissing + s.txnMissing = nil + s.txnLk.Unlock() + + if len(missing) == 0 { + return + } + log.Info("waiting for missing references") start := time.Now() count := 0 @@ -1432,15 +1430,13 @@ func (s *SplitStore) waitForMissingRefs(missing map[cid.Cid]struct{}, markSet, c return errStopWalk } - if ctlSet != nil { - mark, err = ctlSet.Has(c) - if err != nil { - return xerrors.Errorf("error checking markset for %s: %w", c, err) - } + mark, err = s.txnProtect.Has(c) + if err != nil { + return xerrors.Errorf("error checking markset for %s: %w", c, err) + } - if mark { - return errStopWalk - } + if mark { + return errStopWalk } isOldBlock, err := s.isOldBlockHeader(c, s.txnLookbackEpoch) @@ -1453,7 +1449,7 @@ func (s *SplitStore) waitForMissingRefs(missing map[cid.Cid]struct{}, markSet, c } count++ - return markSet.Mark(c) + return s.txnProtect.Mark(c) }, func(c cid.Cid) error { missing[c] = struct{}{} From 73d07999bf6beccb7e0b10544476d1674ba5cc9f Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 18:24:48 +0300 Subject: [PATCH 137/197] dont needlessly wait 1 min in first retry for missing refs --- blockstore/splitstore/splitstore.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index b8abf3eae89..cb20c85286f 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1405,10 +1405,12 @@ func (s *SplitStore) waitForMissingRefs(markSet MarkSet) { log.Infow("waiting for missing references done", "took", time.Since(start), "marked", count) }() - for i := 1; i <= 3 && len(missing) > 0; i++ { + for i := 0; i < 3 && len(missing) > 0; i++ { wait := time.Duration(i) * time.Minute - log.Infof("retrying for %d missing references in %s (attempt: %d)", len(missing), wait, i) - time.Sleep(wait) + log.Infof("retrying for %d missing references in %s (attempt: %d)", len(missing), wait, i+1) + if wait > 0 { + time.Sleep(wait) + } towalk := missing walked := cid.NewSet() From 3477d265c6e88620e2bc90baaa43879a40caed8b Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 20:10:47 +0300 Subject: [PATCH 138/197] unify the two marksets really, it's concurrent marking and there is no reason to have two different marksets --- blockstore/splitstore/splitstore.go | 45 ++++------------------------- 1 file changed, 5 insertions(+), 40 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index cb20c85286f..038d762af18 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -145,9 +145,7 @@ type SplitStore struct { txnLk sync.RWMutex txnActive bool txnLookbackEpoch abi.ChainEpoch - txnEnv MarkSetEnv txnProtect MarkSet - txnMarkSet MarkSet txnRefsMx sync.Mutex txnRefs map[cid.Cid]struct{} txnMissing map[cid.Cid]struct{} @@ -170,13 +168,6 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co return nil, err } - // the txn markset env - txnEnv, err := OpenMarkSetEnv(path, cfg.MarkSetType) - if err != nil { - _ = markSetEnv.Close() - return nil, err - } - // and now we can make a SplitStore ss := &SplitStore{ cfg: cfg, @@ -184,7 +175,6 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co hot: hot, cold: cold, markSetEnv: markSetEnv, - txnEnv: txnEnv, coldPurgeSize: defaultColdPurgeSize, } @@ -644,7 +634,7 @@ func (s *SplitStore) doTxnProtect(root cid.Cid, batch map[cid.Cid]struct{}) erro } } - mark, err := s.txnMarkSet.Has(c) + mark, err := s.txnProtect.Has(c) if err != nil { return xerrors.Errorf("error checking mark set for %s: %w", c, err) } @@ -654,16 +644,6 @@ func (s *SplitStore) doTxnProtect(root cid.Cid, batch map[cid.Cid]struct{}) erro return errStopWalk } - mark, err = s.txnProtect.Has(c) - if err != nil { - return xerrors.Errorf("error checking mark set for %s: %w", c, err) - } - - // it's protected, nothing to do - if mark { - return errStopWalk - } - // old block reference -- see comment in doCompact about the necessity of this isOldBlock, err := s.isOldBlockHeader(c, s.txnLookbackEpoch) if err != nil { @@ -852,12 +832,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { txnRefs := s.txnRefs s.txnRefs = nil s.txnMissing = make(map[cid.Cid]struct{}) - s.txnProtect, err = s.txnEnv.Create("protected", 0) - if err != nil { - s.txnLk.Unlock() - return xerrors.Errorf("error creating transactional mark set: %w", err) - } - s.txnMarkSet = markSet + s.txnProtect = markSet s.txnLk.Unlock() defer func() { @@ -865,7 +840,6 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { _ = s.txnProtect.Close() s.txnActive = false s.txnProtect = nil - s.txnMarkSet = nil s.txnMissing = nil s.txnLk.Unlock() }() @@ -985,7 +959,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { // now that we have collected cold objects, check for missing references from transactional i/o // and disable further collection of such references (they will not be acted upon) - s.waitForMissingRefs(markSet) + s.waitForMissingRefs() // 3. copy the cold objects to the coldstore -- if we have one if !s.cfg.DiscardColdBlocks { @@ -1388,7 +1362,7 @@ func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { // We need to figure out where they are coming from and eliminate that vector, but until then we // have this gem[TM]. // My best guess is that they are parent message receipts or yet to be computed state roots. -func (s *SplitStore) waitForMissingRefs(markSet MarkSet) { +func (s *SplitStore) waitForMissingRefs() { s.txnLk.Lock() missing := s.txnMissing s.txnMissing = nil @@ -1423,16 +1397,7 @@ func (s *SplitStore) waitForMissingRefs(markSet MarkSet) { return errStopWalk } - mark, err := markSet.Has(c) - if err != nil { - return xerrors.Errorf("error checking markset for %s: %w", c, err) - } - - if mark { - return errStopWalk - } - - mark, err = s.txnProtect.Has(c) + mark, err := s.txnProtect.Has(c) if err != nil { return xerrors.Errorf("error checking markset for %s: %w", c, err) } From e859942fa4bd8b7bd2c40ddc275eac5bd339c670 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 23:31:37 +0300 Subject: [PATCH 139/197] code cleanup: refactor txn state code into their own functions --- blockstore/splitstore/splitstore.go | 60 ++++++++++++++++++----------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 038d762af18..c9727e4d029 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -798,11 +798,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { defer s.debug.Flush() // 0. Prepare the transaction - s.txnLk.Lock() - s.txnRefs = make(map[cid.Cid]struct{}) - s.txnActive = true - s.txnLookbackEpoch = lookbackEpoch - s.txnLk.Unlock() + s.prepareTxnProtect(lookbackEpoch) // 1. mark reachable objects by walking the chain from the current epoch to the boundary epoch log.Info("marking reachable objects") @@ -827,22 +823,9 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { log.Infow("marking done", "took", time.Since(startMark), "marked", count) - // fetch references taken during marking and create the transaction protect filter - s.txnLk.Lock() - txnRefs := s.txnRefs - s.txnRefs = nil - s.txnMissing = make(map[cid.Cid]struct{}) - s.txnProtect = markSet - s.txnLk.Unlock() - - defer func() { - s.txnLk.Lock() - _ = s.txnProtect.Close() - s.txnActive = false - s.txnProtect = nil - s.txnMissing = nil - s.txnLk.Unlock() - }() + // begin transactional protection and fetch references created while marking + txnRefs := s.beginTxnProtect(markSet) + defer s.endTxnProtect() // 1.1 Update markset for references created during marking if len(txnRefs) > 0 { @@ -958,7 +941,8 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { stats.Record(context.Background(), metrics.SplitstoreCompactionCold.M(int64(coldCnt))) // now that we have collected cold objects, check for missing references from transactional i/o - // and disable further collection of such references (they will not be acted upon) + // and disable further collection of such references (they will not be acted upon as we can't + // possibly delete objects we didn't have when we were collecting cold objects) s.waitForMissingRefs() // 3. copy the cold objects to the coldstore -- if we have one @@ -1003,6 +987,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { log.Infow("purging cold objects from hotstore done", "took", time.Since(startPurge)) // we are done; do some housekeeping + s.endTxnProtect() s.gcHotstore() err = s.setBaseEpoch(boundaryEpoch) @@ -1018,6 +1003,37 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return nil } +func (s *SplitStore) prepareTxnProtect(lookbackEpoch abi.ChainEpoch) { + s.txnLk.Lock() + defer s.txnLk.Unlock() + + s.txnRefs = make(map[cid.Cid]struct{}) + s.txnActive = true + s.txnLookbackEpoch = lookbackEpoch +} + +func (s *SplitStore) beginTxnProtect(markSet MarkSet) map[cid.Cid]struct{} { + s.txnLk.Lock() + defer s.txnLk.Unlock() + + txnRefs := s.txnRefs + s.txnRefs = nil + s.txnMissing = make(map[cid.Cid]struct{}) + s.txnProtect = markSet + + return txnRefs +} + +func (s *SplitStore) endTxnProtect() { + s.txnLk.Lock() + defer s.txnLk.Unlock() + + _ = s.txnProtect.Close() + s.txnActive = false + s.txnProtect = nil + s.txnMissing = nil +} + func (s *SplitStore) walkChain(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs bool, f func(cid.Cid) error) error { visited := cid.NewSet() From bd436ab9ded01e192d842c557555f1d2b2ff6015 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 23:51:10 +0300 Subject: [PATCH 140/197] make endTxnProtect idempotent --- blockstore/splitstore/splitstore.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index c9727e4d029..fa9c919dc76 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1028,7 +1028,9 @@ func (s *SplitStore) endTxnProtect() { s.txnLk.Lock() defer s.txnLk.Unlock() - _ = s.txnProtect.Close() + if s.txnProtect != nil { + _ = s.txnProtect.Close() + } s.txnActive = false s.txnProtect = nil s.txnMissing = nil From 51ab891d5ca39c1046c644ef190ca2a36aee108c Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 23:53:45 +0300 Subject: [PATCH 141/197] quiet linter it's a false positive, function doesn't escape. --- blockstore/splitstore/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index fa9c919dc76..7789a8d319c 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -884,7 +884,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return markSet.Mark(c) }, func(cm cid.Cid) error { - log.Warnf("missing object reference %s in %s", cm, c) + log.Warnf("missing object reference %s in %s", cm, c) //nolint s.txnRefsMx.Lock() s.txnMissing[cm] = struct{}{} s.txnRefsMx.Unlock() From 2cbd3faf5ac06e0e1f39d7eafaa330127dcca77f Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 5 Jul 2021 23:56:31 +0300 Subject: [PATCH 142/197] make sure to nil everything in txnEndProtect --- blockstore/splitstore/splitstore.go | 1 + 1 file changed, 1 insertion(+) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 7789a8d319c..591fa479a69 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1033,6 +1033,7 @@ func (s *SplitStore) endTxnProtect() { } s.txnActive = false s.txnProtect = nil + s.txnRefs = nil s.txnMissing = nil } From c6ad8fdaedf9b243457972bcf902ee0836f455ee Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 6 Jul 2021 01:08:44 +0300 Subject: [PATCH 143/197] use walkObjectRaw for computing object weights cids that come out of the hotstore with ForEach are raw. --- blockstore/splitstore/splitstore.go | 57 ++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 591fa479a69..f58f318bede 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1143,6 +1143,57 @@ func (s *SplitStore) walkObject(c cid.Cid, walked *cid.Set, f func(cid.Cid) erro return nil } +// like walkObject, but treats Raw root cids as potential DAGs; this is necessary for walking cids +// that come out of the blockstore itself. +func (s *SplitStore) walkObjectRaw(c cid.Cid, walked *cid.Set, f func(cid.Cid) error) error { + if !walked.Visit(c) { + return nil + } + + if err := f(c); err != nil { + if err == errStopWalk { + return nil + } + + return err + } + + switch c.Prefix().Codec { + case cid.DagCBOR, cid.Raw: + default: + return nil + } + + var links []cid.Cid + err := s.view(c, func(data []byte) error { + return cbg.ScanForLinks(bytes.NewReader(data), func(c cid.Cid) { + links = append(links, c) + }) + }) + + if err != nil { + // don't fail if the scan fails + log.Warnf("error scanning linked block (cid: %s): %s", c, err) + return nil + } + + for _, c := range links { + // these are internal references and should no longer be raw, so we recurse with walkObject + err := s.walkObject(c, walked, f) + if err != nil { + if xerrors.Is(err, bstore.ErrNotFound) { + // potential false positive + log.Warnf("error walking link (cid: %s): %s", c, err) + continue + } + + return xerrors.Errorf("error walking link (cid: %s): %w", c, err) + } + } + + return nil +} + // like walkObject, but the object may be potentially incomplete (references missing from the hotstore) func (s *SplitStore) walkObjectIncomplete(c cid.Cid, walked *cid.Set, f, missing func(cid.Cid) error) error { if !walked.Visit(c) { @@ -1274,12 +1325,14 @@ func (s *SplitStore) moveColdBlocks(cold []cid.Cid) error { func (s *SplitStore) sortObjects(cids []cid.Cid) error { weight := make(map[cid.Cid]int) for _, c := range cids { - if c.Prefix().Codec != cid.DagCBOR { + switch c.Prefix().Codec { + case cid.DagCBOR, cid.Raw: + default: continue } w := 0 - err := s.walkObject(c, cid.NewSet(), + err := s.walkObjectRaw(c, cid.NewSet(), func(c cid.Cid) error { wc, ok := weight[c] if ok { From 525a2c71dd0b85392facdb512466a36707b496f6 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 6 Jul 2021 01:27:56 +0300 Subject: [PATCH 144/197] use hashes as keys in weight map to avoid duplicate work otherwise the root object will be raw, but internal references will be dag; duplicate work. --- blockstore/splitstore/splitstore.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index f58f318bede..cc455419a01 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1323,7 +1323,7 @@ func (s *SplitStore) moveColdBlocks(cold []cid.Cid) error { } func (s *SplitStore) sortObjects(cids []cid.Cid) error { - weight := make(map[cid.Cid]int) + weight := make(map[string]int) for _, c := range cids { switch c.Prefix().Codec { case cid.DagCBOR, cid.Raw: @@ -1334,7 +1334,7 @@ func (s *SplitStore) sortObjects(cids []cid.Cid) error { w := 0 err := s.walkObjectRaw(c, cid.NewSet(), func(c cid.Cid) error { - wc, ok := weight[c] + wc, ok := weight[string(c.Hash())] if ok { w += wc return errStopWalk @@ -1348,12 +1348,12 @@ func (s *SplitStore) sortObjects(cids []cid.Cid) error { return xerrors.Errorf("error determining cold object weight: %w", err) } - weight[c] = w + weight[string(c.Hash())] = w } sort.Slice(cids, func(i, j int) bool { - wi := weight[cids[i]] - wj := weight[cids[j]] + wi := weight[string(cids[i].Hash())] + wj := weight[string(cids[j].Hash())] if wi == wj { return bytes.Compare(cids[i].Hash(), cids[j].Hash()) > 0 } From 0659235e21f176344613824de77f676613c7deda Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 6 Jul 2021 07:26:13 +0300 Subject: [PATCH 145/197] cache cid strings in sort so as to avoid making a gazillion of strings --- blockstore/splitstore/splitstore.go | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index cc455419a01..b5251a602ff 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1323,6 +1323,18 @@ func (s *SplitStore) moveColdBlocks(cold []cid.Cid) error { } func (s *SplitStore) sortObjects(cids []cid.Cid) error { + // we cache the keys to avoid making a gazillion of strings + keys := make(map[cid.Cid]string) + key := func(c cid.Cid) string { + s, ok := keys[c] + if !ok { + s = string(c.Hash()) + keys[c] = s + } + return s + } + + // compute sorting weights as the cumulative number of DAG links weight := make(map[string]int) for _, c := range cids { switch c.Prefix().Codec { @@ -1334,7 +1346,7 @@ func (s *SplitStore) sortObjects(cids []cid.Cid) error { w := 0 err := s.walkObjectRaw(c, cid.NewSet(), func(c cid.Cid) error { - wc, ok := weight[string(c.Hash())] + wc, ok := weight[key(c)] if ok { w += wc return errStopWalk @@ -1348,12 +1360,13 @@ func (s *SplitStore) sortObjects(cids []cid.Cid) error { return xerrors.Errorf("error determining cold object weight: %w", err) } - weight[string(c.Hash())] = w + weight[key(c)] = w } + // sort! sort.Slice(cids, func(i, j int) bool { - wi := weight[string(cids[i].Hash())] - wj := weight[string(cids[j].Hash())] + wi := weight[key(cids[i])] + wj := weight[key(cids[j])] if wi == wj { return bytes.Compare(cids[i].Hash(), cids[j].Hash()) > 0 } From bf7aeb316728ace82fb986d128520c78d7f8506b Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 6 Jul 2021 08:10:57 +0300 Subject: [PATCH 146/197] optimize sort a tad it's taking a long time to compute weights... --- blockstore/splitstore/splitstore.go | 106 ++++++++++++++-------------- 1 file changed, 54 insertions(+), 52 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index b5251a602ff..7e54cb54f9f 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1143,57 +1143,6 @@ func (s *SplitStore) walkObject(c cid.Cid, walked *cid.Set, f func(cid.Cid) erro return nil } -// like walkObject, but treats Raw root cids as potential DAGs; this is necessary for walking cids -// that come out of the blockstore itself. -func (s *SplitStore) walkObjectRaw(c cid.Cid, walked *cid.Set, f func(cid.Cid) error) error { - if !walked.Visit(c) { - return nil - } - - if err := f(c); err != nil { - if err == errStopWalk { - return nil - } - - return err - } - - switch c.Prefix().Codec { - case cid.DagCBOR, cid.Raw: - default: - return nil - } - - var links []cid.Cid - err := s.view(c, func(data []byte) error { - return cbg.ScanForLinks(bytes.NewReader(data), func(c cid.Cid) { - links = append(links, c) - }) - }) - - if err != nil { - // don't fail if the scan fails - log.Warnf("error scanning linked block (cid: %s): %s", c, err) - return nil - } - - for _, c := range links { - // these are internal references and should no longer be raw, so we recurse with walkObject - err := s.walkObject(c, walked, f) - if err != nil { - if xerrors.Is(err, bstore.ErrNotFound) { - // potential false positive - log.Warnf("error walking link (cid: %s): %s", c, err) - continue - } - - return xerrors.Errorf("error walking link (cid: %s): %w", c, err) - } - } - - return nil -} - // like walkObject, but the object may be potentially incomplete (references missing from the hotstore) func (s *SplitStore) walkObjectIncomplete(c cid.Cid, walked *cid.Set, f, missing func(cid.Cid) error) error { if !walked.Visit(c) { @@ -1344,7 +1293,7 @@ func (s *SplitStore) sortObjects(cids []cid.Cid) error { } w := 0 - err := s.walkObjectRaw(c, cid.NewSet(), + err := s.scanObject(c, cid.NewSet(), func(c cid.Cid) error { wc, ok := weight[key(c)] if ok { @@ -1354,6 +1303,9 @@ func (s *SplitStore) sortObjects(cids []cid.Cid) error { w++ return nil + }, + func(_ cid.Cid, leaves int) { + w += leaves }) if err != nil { @@ -1377,6 +1329,56 @@ func (s *SplitStore) sortObjects(cids []cid.Cid) error { return nil } +// specialized version of walkObject for computing object weights +// 1. root keys are raw +// 2. some references may not exist +// 3. we don't care about visiting non-DAGs so short-circuit those +func (s *SplitStore) scanObject(c cid.Cid, walked *cid.Set, f func(cid.Cid) error, l func(cid.Cid, int)) error { + if !walked.Visit(c) { + return nil + } + + if err := f(c); err != nil { + if err == errStopWalk { + return nil + } + + return err + } + + var links []cid.Cid + err := s.view(c, func(data []byte) error { + return cbg.ScanForLinks(bytes.NewReader(data), func(c cid.Cid) { + links = append(links, c) + }) + }) + + if err != nil { + // don't fail if the scan fails or if the object is absent + return nil + } + + leaves := 0 + for _, c := range links { + // these are internal refs, so dags will be dags + if c.Prefix().Codec != cid.DagCBOR { + leaves++ + continue + } + + err := s.scanObject(c, walked, f, l) + if err != nil { + return xerrors.Errorf("error walking link (cid: %s): %w", c, err) + } + } + + if leaves > 0 { + l(c, leaves) + } + + return nil +} + func (s *SplitStore) purgeBatch(cids []cid.Cid, deleteBatch func([]cid.Cid) error) error { if len(cids) == 0 { return nil From 55a9e0ccd1438b7f3982cc779a00f791aa37c7f7 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 6 Jul 2021 08:22:43 +0300 Subject: [PATCH 147/197] short-circuit block headers on sort weight computation --- blockstore/splitstore/splitstore.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 7e54cb54f9f..460ce5ed148 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1237,6 +1237,20 @@ func (s *SplitStore) isOldBlockHeader(c cid.Cid, epoch abi.ChainEpoch) (isOldBlo return isOldBlock, err } +func (s *SplitStore) isBlockHeader(c cid.Cid) (isBlock bool, err error) { + if c.Prefix().Codec != cid.DagCBOR { + return false, nil + } + + err = s.view(c, func(data []byte) error { + var hdr types.BlockHeader + isBlock = hdr.UnmarshalCBOR(bytes.NewBuffer(data)) == nil + return nil + }) + + return isBlock, err +} + func (s *SplitStore) moveColdBlocks(cold []cid.Cid) error { batch := make([]blocks.Block, 0, batchSize) @@ -1302,6 +1316,13 @@ func (s *SplitStore) sortObjects(cids []cid.Cid) error { } w++ + + // short-circuit block headers or else we'll walk the entire chain + isBlock, err := s.isBlockHeader(c) + if isBlock || err == bstore.ErrNotFound { + return errStopWalk + } + return nil }, func(_ cid.Cid, leaves int) { From 169ab262f5bf150594e2a4abe7bcdce7c27daad5 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 6 Jul 2021 09:02:44 +0300 Subject: [PATCH 148/197] really optimize computing object weights sort is still taking a long time, this should be as fast as it gets. --- blockstore/splitstore/splitstore.go | 91 +++++++++-------------------- 1 file changed, 28 insertions(+), 63 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 460ce5ed148..745b9a5d85f 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1298,48 +1298,16 @@ func (s *SplitStore) sortObjects(cids []cid.Cid) error { } // compute sorting weights as the cumulative number of DAG links - weight := make(map[string]int) + weights := make(map[string]int) for _, c := range cids { - switch c.Prefix().Codec { - case cid.DagCBOR, cid.Raw: - default: - continue - } - - w := 0 - err := s.scanObject(c, cid.NewSet(), - func(c cid.Cid) error { - wc, ok := weight[key(c)] - if ok { - w += wc - return errStopWalk - } - - w++ - - // short-circuit block headers or else we'll walk the entire chain - isBlock, err := s.isBlockHeader(c) - if isBlock || err == bstore.ErrNotFound { - return errStopWalk - } - - return nil - }, - func(_ cid.Cid, leaves int) { - w += leaves - }) - - if err != nil { - return xerrors.Errorf("error determining cold object weight: %w", err) - } - - weight[key(c)] = w + w := s.getObjectWeight(c, weights, key) + weights[key(c)] = w } // sort! sort.Slice(cids, func(i, j int) bool { - wi := weight[key(cids[i])] - wj := weight[key(cids[j])] + wi := weights[key(cids[i])] + wj := weights[key(cids[j])] if wi == wj { return bytes.Compare(cids[i].Hash(), cids[j].Hash()) > 0 } @@ -1350,54 +1318,51 @@ func (s *SplitStore) sortObjects(cids []cid.Cid) error { return nil } -// specialized version of walkObject for computing object weights -// 1. root keys are raw -// 2. some references may not exist -// 3. we don't care about visiting non-DAGs so short-circuit those -func (s *SplitStore) scanObject(c cid.Cid, walked *cid.Set, f func(cid.Cid) error, l func(cid.Cid, int)) error { - if !walked.Visit(c) { - return nil +func (s *SplitStore) getObjectWeight(c cid.Cid, weights map[string]int, key func(cid.Cid) string) int { + w, ok := weights[key(c)] + if ok { + return w } - if err := f(c); err != nil { - if err == errStopWalk { - return nil - } + // we treat block headers specially to avoid walking the entire chain + var hdr types.BlockHeader + err := s.view(c, func(data []byte) error { + return hdr.UnmarshalCBOR(bytes.NewBuffer(data)) + }) + if err == nil { + w1 := s.getObjectWeight(hdr.ParentStateRoot, weights, key) + weights[key(hdr.ParentStateRoot)] = w1 - return err + w2 := s.getObjectWeight(hdr.Messages, weights, key) + weights[key(hdr.Messages)] = w2 + + return 1 + w1 + w2 } var links []cid.Cid - err := s.view(c, func(data []byte) error { + err = s.view(c, func(data []byte) error { return cbg.ScanForLinks(bytes.NewReader(data), func(c cid.Cid) { links = append(links, c) }) }) - if err != nil { - // don't fail if the scan fails or if the object is absent - return nil + return 1 } - leaves := 0 for _, c := range links { // these are internal refs, so dags will be dags if c.Prefix().Codec != cid.DagCBOR { - leaves++ + w++ continue } - err := s.scanObject(c, walked, f, l) - if err != nil { - return xerrors.Errorf("error walking link (cid: %s): %w", c, err) - } - } + wc := s.getObjectWeight(c, weights, key) + weights[key(c)] = wc - if leaves > 0 { - l(c, leaves) + w += wc } - return nil + return w } func (s *SplitStore) purgeBatch(cids []cid.Cid, deleteBatch func([]cid.Cid) error) error { From c4ae3e0c3dfef4d8a324e24d951880cfdbb0ee5c Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 6 Jul 2021 09:17:35 +0300 Subject: [PATCH 149/197] minor tweak --- blockstore/splitstore/splitstore.go | 1 + 1 file changed, 1 insertion(+) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 745b9a5d85f..3d67cd4cf8d 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1349,6 +1349,7 @@ func (s *SplitStore) getObjectWeight(c cid.Cid, weights map[string]int, key func return 1 } + w = 1 for _, c := range links { // these are internal refs, so dags will be dags if c.Prefix().Codec != cid.DagCBOR { From dc8139a1d29fbc68d5b4eb7139462210371facff Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 6 Jul 2021 13:23:12 +0300 Subject: [PATCH 150/197] add some comments for debug only code --- blockstore/splitstore/splitstore.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 3d67cd4cf8d..56971307f27 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -121,7 +121,7 @@ type SplitStore struct { baseEpoch abi.ChainEpoch warmupEpoch abi.ChainEpoch - writeEpoch abi.ChainEpoch + writeEpoch abi.ChainEpoch // for debug logging coldPurgeSize int @@ -529,6 +529,7 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { return nil } +// this is only used when debug logging is enabled func (s *SplitStore) background() { ticker := time.NewTicker(time.Second) defer ticker.Stop() @@ -544,6 +545,7 @@ func (s *SplitStore) background() { } } +// this is only used when debug logging is enabled func (s *SplitStore) updateWriteEpoch() { s.mx.Lock() defer s.mx.Unlock() From 5c514504f74ed3fc38f6f7c307a65948c801ba38 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 6 Jul 2021 14:41:41 +0300 Subject: [PATCH 151/197] remove unused GetGenesis method from ChainAccessor interface --- blockstore/splitstore/splitstore.go | 1 - blockstore/splitstore/splitstore_test.go | 4 ---- 2 files changed, 5 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 56971307f27..2fa191e7a5e 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -106,7 +106,6 @@ type Config struct { // ChainAccessor allows the Splitstore to access the chain. It will most likely // be a ChainStore at runtime. type ChainAccessor interface { - GetGenesis() (*types.BlockHeader, error) GetTipsetByHeight(context.Context, abi.ChainEpoch, *types.TipSet, bool) (*types.TipSet, error) GetHeaviestTipSet() *types.TipSet SubscribeHeadChanges(change func(revert []*types.TipSet, apply []*types.TipSet) error) diff --git a/blockstore/splitstore/splitstore_test.go b/blockstore/splitstore/splitstore_test.go index 4a98f4631ef..a0af5fe9d20 100644 --- a/blockstore/splitstore/splitstore_test.go +++ b/blockstore/splitstore/splitstore_test.go @@ -207,10 +207,6 @@ func (c *mockChain) revert(count int) { } } -func (c *mockChain) GetGenesis() (*types.BlockHeader, error) { - return c.genesis, nil -} - func (c *mockChain) GetTipsetByHeight(_ context.Context, epoch abi.ChainEpoch, _ *types.TipSet, _ bool) (*types.TipSet, error) { c.Lock() defer c.Unlock() From fdff1bebc91630c11cf0efe9df022a72fd5044ec Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 6 Jul 2021 14:44:40 +0300 Subject: [PATCH 152/197] move map markset implementation to its own file --- blockstore/splitstore/markset.go | 59 +-------------------------- blockstore/splitstore/markset_map.go | 61 ++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 57 deletions(-) create mode 100644 blockstore/splitstore/markset_map.go diff --git a/blockstore/splitstore/markset.go b/blockstore/splitstore/markset.go index 491020c6ecf..878deb267d7 100644 --- a/blockstore/splitstore/markset.go +++ b/blockstore/splitstore/markset.go @@ -2,7 +2,6 @@ package splitstore import ( "path/filepath" - "sync" "golang.org/x/xerrors" @@ -12,7 +11,7 @@ import ( // MarkSet is a utility to keep track of seen CID, and later query for them. // // * If the expected dataset is large, it can be backed by a datastore (e.g. bbolt). -// * If a probabilistic result is acceptable, it can be backed by a bloom filter (default). +// * If a probabilistic result is acceptable, it can be backed by a bloom filter type MarkSet interface { Mark(cid.Cid) error Has(cid.Cid) (bool, error) @@ -29,7 +28,7 @@ type MarkSetEnv interface { func OpenMarkSetEnv(path string, mtype string) (MarkSetEnv, error) { switch mtype { - case "", "bloom": + case "bloom": return NewBloomMarkSetEnv(false) case "bloomts": return NewBloomMarkSetEnv(true) @@ -43,57 +42,3 @@ func OpenMarkSetEnv(path string, mtype string) (MarkSetEnv, error) { return nil, xerrors.Errorf("unknown mark set type %s", mtype) } } - -type MapMarkSetEnv struct { - ts bool -} - -var _ MarkSetEnv = (*MapMarkSetEnv)(nil) - -type MapMarkSet struct { - mx sync.Mutex - set map[string]struct{} - - ts bool -} - -var _ MarkSet = (*MapMarkSet)(nil) - -func NewMapMarkSetEnv(ts bool) (*MapMarkSetEnv, error) { - return &MapMarkSetEnv{ts: ts}, nil -} - -func (e *MapMarkSetEnv) Create(name string, sizeHint int64) (MarkSet, error) { - return &MapMarkSet{ - set: make(map[string]struct{}, sizeHint), - ts: e.ts, - }, nil -} - -func (e *MapMarkSetEnv) Close() error { - return nil -} - -func (s *MapMarkSet) Mark(cid cid.Cid) error { - if s.ts { - s.mx.Lock() - defer s.mx.Unlock() - } - - s.set[string(cid.Hash())] = struct{}{} - return nil -} - -func (s *MapMarkSet) Has(cid cid.Cid) (bool, error) { - if s.ts { - s.mx.Lock() - defer s.mx.Unlock() - } - - _, ok := s.set[string(cid.Hash())] - return ok, nil -} - -func (s *MapMarkSet) Close() error { - return nil -} diff --git a/blockstore/splitstore/markset_map.go b/blockstore/splitstore/markset_map.go new file mode 100644 index 00000000000..5bac9a284a1 --- /dev/null +++ b/blockstore/splitstore/markset_map.go @@ -0,0 +1,61 @@ +package splitstore + +import ( + "sync" + + cid "github.com/ipfs/go-cid" +) + +type MapMarkSetEnv struct { + ts bool +} + +var _ MarkSetEnv = (*MapMarkSetEnv)(nil) + +type MapMarkSet struct { + mx sync.Mutex + set map[string]struct{} + + ts bool +} + +var _ MarkSet = (*MapMarkSet)(nil) + +func NewMapMarkSetEnv(ts bool) (*MapMarkSetEnv, error) { + return &MapMarkSetEnv{ts: ts}, nil +} + +func (e *MapMarkSetEnv) Create(name string, sizeHint int64) (MarkSet, error) { + return &MapMarkSet{ + set: make(map[string]struct{}, sizeHint), + ts: e.ts, + }, nil +} + +func (e *MapMarkSetEnv) Close() error { + return nil +} + +func (s *MapMarkSet) Mark(cid cid.Cid) error { + if s.ts { + s.mx.Lock() + defer s.mx.Unlock() + } + + s.set[string(cid.Hash())] = struct{}{} + return nil +} + +func (s *MapMarkSet) Has(cid cid.Cid) (bool, error) { + if s.ts { + s.mx.Lock() + defer s.mx.Unlock() + } + + _, ok := s.set[string(cid.Hash())] + return ok, nil +} + +func (s *MapMarkSet) Close() error { + return nil +} From c1c25868cc6621a87a7da2f6c05fa160cf9676de Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 6 Jul 2021 15:09:04 +0300 Subject: [PATCH 153/197] improve comments --- blockstore/splitstore/splitstore.go | 77 ++++++++++++++++------------- 1 file changed, 43 insertions(+), 34 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 2fa191e7a5e..dc2ace46177 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -114,13 +114,13 @@ type ChainAccessor interface { type SplitStore struct { compacting int32 // compaction (or warmp up) in progress critsection int32 // compaction critical section - closing int32 // the split store is closing + closing int32 // the splitstore is closing cfg *Config baseEpoch abi.ChainEpoch warmupEpoch abi.ChainEpoch - writeEpoch abi.ChainEpoch // for debug logging + writeEpoch abi.ChainEpoch // for debug logging only coldPurgeSize int @@ -140,7 +140,7 @@ type SplitStore struct { debug *debugLog - // protection for concurrent read/writes during compaction + // transactional protection for concurrent read/writes during compaction txnLk sync.RWMutex txnActive bool txnLookbackEpoch abi.ChainEpoch @@ -429,15 +429,13 @@ func (s *SplitStore) Start(chain ChainAccessor) error { } // load warmup epoch from metadata ds - // if none, then the splitstore will warm up the hotstore at first head change notif - // by walking the current tipset bs, err = s.ds.Get(warmupEpochKey) switch err { case nil: s.warmupEpoch = bytesToEpoch(bs) case dstore.ErrNotFound: - // the hotstore hasn't warmed up, load the genesis into the hotstore + // the hotstore hasn't warmed up, start a concurrent warm up err = s.warmup(s.curTs) if err != nil { return xerrors.Errorf("error warming up: %w", err) @@ -447,8 +445,7 @@ func (s *SplitStore) Start(chain ChainAccessor) error { return xerrors.Errorf("error loading warmup epoch: %w", err) } - // load markSetSize from metadata ds - // if none, the splitstore will compute it during warmup and update in every compaction + // load markSetSize from metadata ds to provide a size hint for marksets bs, err = s.ds.Get(markSetSizeKey) switch err { case nil: @@ -504,7 +501,7 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { } if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) { - // we are currently compacting, do nothing and wait for the next head change + // we are currently compacting (or warming up); do nothing and wait for the next head change return nil } @@ -568,6 +565,7 @@ func (s *SplitStore) updateWriteEpoch() { } } +// transactionally protect a reference to an object func (s *SplitStore) trackTxnRef(c cid.Cid) error { if !s.txnActive { // not compacting @@ -586,6 +584,7 @@ func (s *SplitStore) trackTxnRef(c cid.Cid) error { return s.doTxnProtect(c, nil) } +// transactionally protect a batch of references func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) error { if !s.txnActive { // not compacting @@ -593,7 +592,7 @@ func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) error { } if s.txnRefs != nil { - // we haven't finished marking yet, so track the reference + // we haven't finished marking yet, so track the references s.txnRefsMx.Lock() for _, c := range cids { s.txnRefs[c] = struct{}{} @@ -618,6 +617,8 @@ func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) error { return nil } +// transactionally protect a reference by walking the object and marking. +// concurrent markings are short circuited by checking the markset. func (s *SplitStore) doTxnProtect(root cid.Cid, batch map[cid.Cid]struct{}) error { // Note: cold objects are deleted heaviest first, so the consituents of an object // cannot be deleted before the object itself. @@ -674,6 +675,9 @@ func (s *SplitStore) doTxnProtect(root cid.Cid, batch map[cid.Cid]struct{}) erro return err } +// warmup acuiqres the compaction lock and spawns a goroutine to warm up the hotstore; +// this is necessary when we sync from a snapshot or when we enable the splitstore +// on top of an existing blockstore (which becomes the coldstore). func (s *SplitStore) warmup(curTs *types.TipSet) error { if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) { return xerrors.Errorf("error locking compaction") @@ -697,6 +701,9 @@ func (s *SplitStore) warmup(curTs *types.TipSet) error { return nil } +// the actual warmup procedure; it waslk the chain loading all state roots at the boundary +// and headers all the way up to genesis. +// objects are written in batches so as to minimize overhead. func (s *SplitStore) doWarmup(curTs *types.TipSet) error { epoch := curTs.Height() batchHot := make([]blocks.Block, 0, batchSize) @@ -772,7 +779,17 @@ func (s *SplitStore) doWarmup(curTs *types.TipSet) error { return nil } -// Compaction/GC Algorithm +// --- Compaction --- +// Compaction works transactionally with the following algorithm: +// - We prepare a transaction, whereby all i/o referenced objects through the API are tracked. +// - We walk the chain and mark reachable objects, keeping 4 finalities of state roots and messages and all headers all the way to genesis. +// - Once the chain walk is complete, we begin full transaction protection with concurrent marking; we walk and mark all references created during the chain walk. On the same time, all I/O through the API concurrently marks objects as live references. +// - We collect cold objects by iterating through the hotstore and checking the mark set; if an object is not marked, then it is candidate for purge. +// - When running with a coldstore, we next copy all cold objects to the coldstore. +// - At this point we are ready to begin purging: +// - We sort cold objects heaviest first, so as to never delete the consituents of a DAG before the DAG itself (which would leave dangling references) +// - We delete in small batches taking a lock; each batch is checked again for marks, from the concurrent transactional mark, so as to never delete anything live +// - We then end the transaction and compact/gc the hotstore. func (s *SplitStore) compact(curTs *types.TipSet) { start := time.Now() err := s.doCompact(curTs) @@ -801,7 +818,8 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { // 0. Prepare the transaction s.prepareTxnProtect(lookbackEpoch) - // 1. mark reachable objects by walking the chain from the current epoch to the boundary epoch + // 1. mark reachable objects by walking the chain from the current epoch; we keep state roots + // and messages until the boundary epoch. log.Info("marking reachable objects") startMark := time.Now() @@ -928,7 +946,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { }) if err != nil { - return xerrors.Errorf("error collecting candidate cold objects: %w", err) + return xerrors.Errorf("error collecting cold objects: %w", err) } log.Infow("cold collection done", "took", time.Since(startCollect)) @@ -958,7 +976,8 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { } // 4. sort cold objects so that the dags with most references are deleted first - // this ensures that we can't refer to a dag with its consituents already deleted + // this ensures that we can't refer to a dag with its consituents already deleted, ie + // we lave no dangling references. log.Info("sorting cold objects") startSort := time.Now() err = s.sortObjects(cold) @@ -1144,7 +1163,7 @@ func (s *SplitStore) walkObject(c cid.Cid, walked *cid.Set, f func(cid.Cid) erro return nil } -// like walkObject, but the object may be potentially incomplete (references missing from the hotstore) +// like walkObject, but the object may be potentially incomplete (references missing) func (s *SplitStore) walkObjectIncomplete(c cid.Cid, walked *cid.Set, f, missing func(cid.Cid) error) error { if !walked.Visit(c) { return nil @@ -1238,20 +1257,6 @@ func (s *SplitStore) isOldBlockHeader(c cid.Cid, epoch abi.ChainEpoch) (isOldBlo return isOldBlock, err } -func (s *SplitStore) isBlockHeader(c cid.Cid) (isBlock bool, err error) { - if c.Prefix().Codec != cid.DagCBOR { - return false, nil - } - - err = s.view(c, func(data []byte) error { - var hdr types.BlockHeader - isBlock = hdr.UnmarshalCBOR(bytes.NewBuffer(data)) == nil - return nil - }) - - return isBlock, err -} - func (s *SplitStore) moveColdBlocks(cold []cid.Cid) error { batch := make([]blocks.Block, 0, batchSize) @@ -1279,13 +1284,16 @@ func (s *SplitStore) moveColdBlocks(cold []cid.Cid) error { if len(batch) > 0 { err := s.cold.PutMany(batch) if err != nil { - return xerrors.Errorf("error putting cold to coldstore: %w", err) + return xerrors.Errorf("error putting batch to coldstore: %w", err) } } return nil } +// sorts a slice of objects heaviest first -- it's a little expensive but worth the +// guarantee that we don't leave dangling references behind, e.g. if we die in the middle +// of a purge. func (s *SplitStore) sortObjects(cids []cid.Cid) error { // we cache the keys to avoid making a gazillion of strings keys := make(map[cid.Cid]string) @@ -1431,12 +1439,13 @@ func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { }) } -// I really don't like having this code, but we seem to have some DAG references with missing -// constituents. During testing in mainnet *some* of these references *sometimes* appeared after a -// little bit. +// I really don't like having this code, but we seem to have some occasional DAG references with +// missing constituents. During testing in mainnet *some* of these references *sometimes* appeared +// after a little bit. // We need to figure out where they are coming from and eliminate that vector, but until then we // have this gem[TM]. -// My best guess is that they are parent message receipts or yet to be computed state roots. +// My best guess is that they are parent message receipts or yet to be computed state roots; magik +// thinks the cause may be block validation. func (s *SplitStore) waitForMissingRefs() { s.txnLk.Lock() missing := s.txnMissing From f2f4af669d0b93b6b51d922d5c93d0f4a2e240e2 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 6 Jul 2021 17:13:38 +0300 Subject: [PATCH 154/197] clean up: simplify debug log, get rid of ugly debug log --- blockstore/splitstore/debug.go | 110 ++++++++++++---------------- blockstore/splitstore/splitstore.go | 75 ++----------------- 2 files changed, 56 insertions(+), 129 deletions(-) diff --git a/blockstore/splitstore/debug.go b/blockstore/splitstore/debug.go index 18ea436daab..4c788a28b5b 100644 --- a/blockstore/splitstore/debug.go +++ b/blockstore/splitstore/debug.go @@ -15,19 +15,16 @@ import ( "go.uber.org/multierr" "golang.org/x/xerrors" - "github.com/filecoin-project/go-state-types/abi" - "github.com/filecoin-project/lotus/chain/types" - blocks "github.com/ipfs/go-block-format" cid "github.com/ipfs/go-cid" ) type debugLog struct { - readPath, writePath, movePath, stackPath string - readMx, writeMx, moveMx, stackMx sync.Mutex - readLog, writeLog, moveLog, stackLog *os.File - readCnt, writeCnt, moveCnt, stackCnt int - stackMap map[string]string + readPath, writePath, deletePath, stackPath string + readMx, writeMx, deleteMx, stackMx sync.Mutex + readLog, writeLog, deleteLog, stackLog *os.File + readCnt, writeCnt, deleteCnt, stackCnt int + stackMap map[string]string } func openDebugLog(path string) (*debugLog, error) { @@ -50,12 +47,12 @@ func openDebugLog(path string) (*debugLog, error) { return nil, xerrors.Errorf("error opening write log: %w", err) } - movePath := filepath.Join(basePath, "move.log") - moveFile, err := os.OpenFile(movePath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) + deletePath := filepath.Join(basePath, "delete.log") + deleteFile, err := os.OpenFile(deletePath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) if err != nil { _ = readFile.Close() _ = writeFile.Close() - return nil, xerrors.Errorf("error opening move log: %w", err) + return nil, xerrors.Errorf("error opening delete log: %w", err) } stackPath := filepath.Join(basePath, "stack.log") @@ -63,47 +60,42 @@ func openDebugLog(path string) (*debugLog, error) { if err != nil { _ = readFile.Close() _ = writeFile.Close() - _ = moveFile.Close() + _ = deleteFile.Close() return nil, xerrors.Errorf("error opening stack log: %w", err) } return &debugLog{ - readPath: readPath, - writePath: writePath, - movePath: movePath, - stackPath: stackPath, - readLog: readFile, - writeLog: writeFile, - moveLog: moveFile, - stackLog: stackFile, - stackMap: make(map[string]string), + readPath: readPath, + writePath: writePath, + deletePath: deletePath, + stackPath: stackPath, + readLog: readFile, + writeLog: writeFile, + deleteLog: deleteFile, + stackLog: stackFile, + stackMap: make(map[string]string), }, nil } -func (d *debugLog) LogReadMiss(curTs *types.TipSet, cid cid.Cid) { +func (d *debugLog) LogReadMiss(cid cid.Cid) { if d == nil { return } stack := d.getStack() - var epoch abi.ChainEpoch - if curTs != nil { - epoch = curTs.Height() - } - d.readMx.Lock() defer d.readMx.Unlock() d.readCnt++ - _, err := fmt.Fprintf(d.readLog, "%s %d %s %s\n", d.timestamp(), epoch, cid, stack) + _, err := fmt.Fprintf(d.readLog, "%s %s %s\n", d.timestamp(), cid, stack) if err != nil { log.Warnf("error writing read log: %s", err) } } -func (d *debugLog) LogWrite(curTs *types.TipSet, blk blocks.Block, writeEpoch abi.ChainEpoch) { +func (d *debugLog) LogWrite(blk blocks.Block) { if d == nil { return } @@ -113,23 +105,18 @@ func (d *debugLog) LogWrite(curTs *types.TipSet, blk blocks.Block, writeEpoch ab stack = " " + d.getStack() } - var curEpoch abi.ChainEpoch - if curTs != nil { - curEpoch = curTs.Height() - } - d.writeMx.Lock() defer d.writeMx.Unlock() d.writeCnt++ - _, err := fmt.Fprintf(d.writeLog, "%s %d %s %d%s\n", d.timestamp(), curEpoch, blk.Cid(), writeEpoch, stack) + _, err := fmt.Fprintf(d.writeLog, "%s %s%s\n", d.timestamp(), blk.Cid(), stack) if err != nil { log.Warnf("error writing write log: %s", err) } } -func (d *debugLog) LogWriteMany(curTs *types.TipSet, blks []blocks.Block, writeEpoch abi.ChainEpoch) { +func (d *debugLog) LogWriteMany(blks []blocks.Block) { if d == nil { return } @@ -139,11 +126,6 @@ func (d *debugLog) LogWriteMany(curTs *types.TipSet, blks []blocks.Block, writeE stack = " " + d.getStack() } - var curEpoch abi.ChainEpoch - if curTs != nil { - curEpoch = curTs.Height() - } - d.writeMx.Lock() defer d.writeMx.Unlock() @@ -151,7 +133,7 @@ func (d *debugLog) LogWriteMany(curTs *types.TipSet, blks []blocks.Block, writeE now := d.timestamp() for _, blk := range blks { - _, err := fmt.Fprintf(d.writeLog, "%s %d %s %d%s\n", now, curEpoch, blk.Cid(), writeEpoch, stack) + _, err := fmt.Fprintf(d.writeLog, "%s %s%s\n", now, blk.Cid(), stack) if err != nil { log.Warnf("error writing write log: %s", err) break @@ -159,19 +141,23 @@ func (d *debugLog) LogWriteMany(curTs *types.TipSet, blks []blocks.Block, writeE } } -func (d *debugLog) LogMove(curTs *types.TipSet, cid cid.Cid) { +func (d *debugLog) LogDelete(cids []cid.Cid) { if d == nil { return } - d.moveMx.Lock() - defer d.moveMx.Unlock() + d.deleteMx.Lock() + defer d.deleteMx.Unlock() - d.moveCnt++ + d.deleteCnt += len(cids) - _, err := fmt.Fprintf(d.moveLog, "%d %s\n", curTs.Height(), cid) - if err != nil { - log.Warnf("error writing move log: %s", err) + now := d.timestamp() + for _, c := range cids { + _, err := fmt.Fprintf(d.deleteLog, "%s %s\n", now, c) + if err != nil { + log.Warnf("error writing delete log: %s", err) + break + } } } @@ -183,7 +169,7 @@ func (d *debugLog) Flush() { // rotate non-empty logs d.rotateReadLog() d.rotateWriteLog() - d.rotateMoveLog() + d.rotateDeleteLog() d.rotateStackLog() } @@ -233,27 +219,27 @@ func (d *debugLog) rotateWriteLog() { d.writeCnt = 0 } -func (d *debugLog) rotateMoveLog() { - d.moveMx.Lock() - defer d.moveMx.Unlock() +func (d *debugLog) rotateDeleteLog() { + d.deleteMx.Lock() + defer d.deleteMx.Unlock() - if d.moveCnt == 0 { + if d.deleteCnt == 0 { return } - err := d.rotate(d.moveLog, d.movePath) + err := d.rotate(d.deleteLog, d.deletePath) if err != nil { - log.Warnf("error rotating move log: %s", err) + log.Warnf("error rotating delete log: %s", err) return } - d.moveLog, err = os.OpenFile(d.movePath, os.O_WRONLY|os.O_CREATE, 0644) + d.deleteLog, err = os.OpenFile(d.deletePath, os.O_WRONLY|os.O_CREATE, 0644) if err != nil { - log.Warnf("error opening move log file: %s", err) + log.Warnf("error opening delete log file: %s", err) return } - d.moveCnt = 0 + d.deleteCnt = 0 } func (d *debugLog) rotateStackLog() { @@ -315,9 +301,9 @@ func (d *debugLog) Close() error { err2 := d.writeLog.Close() d.writeMx.Unlock() - d.moveMx.Lock() - err3 := d.moveLog.Close() - d.moveMx.Unlock() + d.deleteMx.Lock() + err3 := d.deleteLog.Close() + d.deleteMx.Unlock() d.stackMx.Lock() err4 := d.stackLog.Close() diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index dc2ace46177..f13107a1665 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -25,7 +25,6 @@ import ( "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain/types" "github.com/filecoin-project/lotus/metrics" - "github.com/filecoin-project/specs-actors/v2/actors/builtin" "go.opencensus.io/stats" ) @@ -120,7 +119,6 @@ type SplitStore struct { baseEpoch abi.ChainEpoch warmupEpoch abi.ChainEpoch - writeEpoch abi.ChainEpoch // for debug logging only coldPurgeSize int @@ -234,10 +232,9 @@ func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { if s.debug != nil { s.mx.Lock() warm := s.warmupEpoch > 0 - curTs := s.curTs s.mx.Unlock() if warm { - s.debug.LogReadMiss(curTs, cid) + s.debug.LogReadMiss(cid) } } @@ -268,10 +265,9 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { if s.debug != nil { s.mx.Lock() warm := s.warmupEpoch > 0 - curTs := s.curTs s.mx.Unlock() if warm { - s.debug.LogReadMiss(curTs, cid) + s.debug.LogReadMiss(cid) } } @@ -292,13 +288,8 @@ func (s *SplitStore) Put(blk blocks.Block) error { err := s.hot.Put(blk) if err == nil { - if s.debug != nil { - s.mx.Lock() - curTs := s.curTs - writeEpoch := s.writeEpoch - s.mx.Unlock() - s.debug.LogWrite(curTs, blk, writeEpoch) - } + s.debug.LogWrite(blk) + err = s.trackTxnRef(blk.Cid()) } @@ -316,13 +307,7 @@ func (s *SplitStore) PutMany(blks []blocks.Block) error { err := s.hot.PutMany(blks) if err == nil { - if s.debug != nil { - s.mx.Lock() - curTs := s.curTs - writeEpoch := s.writeEpoch - s.mx.Unlock() - s.debug.LogWriteMany(curTs, blks, writeEpoch) - } + s.debug.LogWriteMany(blks) err = s.trackTxnRefMany(batch) } @@ -383,10 +368,9 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { if s.debug != nil { s.mx.Lock() warm := s.warmupEpoch > 0 - curTs := s.curTs s.mx.Unlock() if warm { - s.debug.LogReadMiss(curTs, cid) + s.debug.LogReadMiss(cid) } } @@ -458,10 +442,6 @@ func (s *SplitStore) Start(chain ChainAccessor) error { log.Infow("starting splitstore", "baseEpoch", s.baseEpoch, "warmupEpoch", s.warmupEpoch) - if s.debug != nil { - go s.background() - } - // watch the chain chain.SubscribeHeadChanges(s.HeadChange) @@ -525,46 +505,6 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { return nil } -// this is only used when debug logging is enabled -func (s *SplitStore) background() { - ticker := time.NewTicker(time.Second) - defer ticker.Stop() - - for { - select { - case <-s.ctx.Done(): - return - - case <-ticker.C: - s.updateWriteEpoch() - } - } -} - -// this is only used when debug logging is enabled -func (s *SplitStore) updateWriteEpoch() { - s.mx.Lock() - defer s.mx.Unlock() - - curTs := s.curTs - timestamp := time.Unix(int64(curTs.MinTimestamp()), 0) - - dt := time.Since(timestamp) - if dt < 0 { - writeEpoch := curTs.Height() + 1 - if writeEpoch > s.writeEpoch { - s.writeEpoch = writeEpoch - } - - return - } - - writeEpoch := curTs.Height() + abi.ChainEpoch(dt.Seconds())/builtin.EpochDurationSeconds + 1 - if writeEpoch > s.writeEpoch { - s.writeEpoch = writeEpoch - } -} - // transactionally protect a reference to an object func (s *SplitStore) trackTxnRef(c cid.Cid) error { if !s.txnActive { @@ -1426,7 +1366,6 @@ func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { } deadCids = append(deadCids, c) - s.debug.LogMove(curTs, c) } err := s.hot.DeleteMany(deadCids) @@ -1434,6 +1373,8 @@ func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { return xerrors.Errorf("error purging cold objects: %w", err) } + s.debug.LogDelete(deadCids) + purgeCnt += len(deadCids) return nil }) From 0e2af11f6a38ec1268a5009cc9895003726b9a02 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 7 Jul 2021 01:39:58 +0300 Subject: [PATCH 155/197] prepare the transaction before launching the compaction goroutine --- blockstore/splitstore/splitstore.go | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index f13107a1665..fa38621d3d8 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -486,9 +486,11 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { } if epoch-s.baseEpoch > CompactionThreshold { - // it's time to compact + // it's time to compact -- prepare the transaction and go! + s.prepareTxnProtect(curTs) go func() { defer atomic.StoreInt32(&s.compacting, 0) + defer s.endTxnProtect() log.Info("compacting splitstore") start := time.Now() @@ -755,9 +757,6 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { defer markSet.Close() //nolint:errcheck defer s.debug.Flush() - // 0. Prepare the transaction - s.prepareTxnProtect(lookbackEpoch) - // 1. mark reachable objects by walking the chain from the current epoch; we keep state roots // and messages until the boundary epoch. log.Info("marking reachable objects") @@ -782,9 +781,8 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { log.Infow("marking done", "took", time.Since(startMark), "marked", count) - // begin transactional protection and fetch references created while marking + // begin transactional protection with concurrent marking and fetch references created while marking txnRefs := s.beginTxnProtect(markSet) - defer s.endTxnProtect() // 1.1 Update markset for references created during marking if len(txnRefs) > 0 { @@ -940,7 +938,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { // 5. purge cold objects from the hotstore, taking protected references into account log.Info("purging cold objects from the hotstore") startPurge := time.Now() - err = s.purge(curTs, cold) + err = s.purge(cold) if err != nil { return xerrors.Errorf("error purging cold blocks: %w", err) } @@ -963,7 +961,10 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return nil } -func (s *SplitStore) prepareTxnProtect(lookbackEpoch abi.ChainEpoch) { +func (s *SplitStore) prepareTxnProtect(curTs *types.TipSet) { + lookbackEpoch := curTs.Height() - CompactionLookback + log.Info("preparing compaction transaction") + s.txnLk.Lock() defer s.txnLk.Unlock() @@ -1340,7 +1341,7 @@ func (s *SplitStore) purgeBatch(cids []cid.Cid, deleteBatch func([]cid.Cid) erro return nil } -func (s *SplitStore) purge(curTs *types.TipSet, cids []cid.Cid) error { +func (s *SplitStore) purge(cids []cid.Cid) error { deadCids := make([]cid.Cid, 0, batchSize) var purgeCnt, liveCnt int defer func() { From 90da6227b3a762cd9126835be0882d9b18c953de Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 7 Jul 2021 02:11:37 +0300 Subject: [PATCH 156/197] transactional protect incoming tipsets --- blockstore/splitstore/splitstore.go | 36 +++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index fa38621d3d8..d485ecdb460 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -474,14 +474,16 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { s.curTs = curTs s.mx.Unlock() - timestamp := time.Unix(int64(curTs.MinTimestamp()), 0) - if time.Since(timestamp) > SyncGapTime { - // don't attempt compaction before we have caught up syncing + if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) { + // we are currently compacting -- protect the new tipset(s) + s.protectTipSets(apply) return nil } - if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) { - // we are currently compacting (or warming up); do nothing and wait for the next head change + timestamp := time.Unix(int64(curTs.MinTimestamp()), 0) + if time.Since(timestamp) > SyncGapTime { + // don't attempt compaction before we have caught up syncing + atomic.StoreInt32(&s.compacting, 0) return nil } @@ -507,6 +509,30 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { return nil } +// transactionally protect incoming tipsets +func (s *SplitStore) protectTipSets(apply []*types.TipSet) { + s.txnLk.RLock() + if !s.txnActive { + s.txnLk.RUnlock() + return + } + + // do this in a goroutine to avoid blocking the notifier + go func() { + defer s.txnLk.RUnlock() + + var cids []cid.Cid + for _, ts := range apply { + cids = append(cids, ts.Cids()...) + } + + err := s.trackTxnRefMany(cids) + if err != nil { + log.Errorf("error protecting newly applied tipsets: %s", err) + } + }() +} + // transactionally protect a reference to an object func (s *SplitStore) trackTxnRef(c cid.Cid) error { if !s.txnActive { From 05dbbe9681b34d5b2674df0facba3ef119dac888 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 7 Jul 2021 09:52:31 +0300 Subject: [PATCH 157/197] rename som Txn methods for better readability --- blockstore/splitstore/splitstore.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index d485ecdb460..ed46f9d05c9 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -489,7 +489,7 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { if epoch-s.baseEpoch > CompactionThreshold { // it's time to compact -- prepare the transaction and go! - s.prepareTxnProtect(curTs) + s.beginTxnProtect(curTs) go func() { defer atomic.StoreInt32(&s.compacting, 0) defer s.endTxnProtect() @@ -808,7 +808,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { log.Infow("marking done", "took", time.Since(startMark), "marked", count) // begin transactional protection with concurrent marking and fetch references created while marking - txnRefs := s.beginTxnProtect(markSet) + txnRefs := s.beginTxnConcurrentMarking(markSet) // 1.1 Update markset for references created during marking if len(txnRefs) > 0 { @@ -987,7 +987,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return nil } -func (s *SplitStore) prepareTxnProtect(curTs *types.TipSet) { +func (s *SplitStore) beginTxnProtect(curTs *types.TipSet) { lookbackEpoch := curTs.Height() - CompactionLookback log.Info("preparing compaction transaction") @@ -999,7 +999,7 @@ func (s *SplitStore) prepareTxnProtect(curTs *types.TipSet) { s.txnLookbackEpoch = lookbackEpoch } -func (s *SplitStore) beginTxnProtect(markSet MarkSet) map[cid.Cid]struct{} { +func (s *SplitStore) beginTxnConcurrentMarking(markSet MarkSet) map[cid.Cid]struct{} { s.txnLk.Lock() defer s.txnLk.Unlock() From 6cc21127497e5988fa04bd1effdaca0d7d12e67d Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 7 Jul 2021 09:55:25 +0300 Subject: [PATCH 158/197] remove the curTs state variable; we don't need it --- blockstore/splitstore/splitstore.go | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index ed46f9d05c9..0926151a3d7 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -117,14 +117,12 @@ type SplitStore struct { cfg *Config + mx sync.Mutex baseEpoch abi.ChainEpoch warmupEpoch abi.ChainEpoch coldPurgeSize int - mx sync.Mutex - curTs *types.TipSet - chain ChainAccessor ds dstore.Datastore hot bstore.Blockstore @@ -388,7 +386,7 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { // State tracking func (s *SplitStore) Start(chain ChainAccessor) error { s.chain = chain - s.curTs = chain.GetHeaviestTipSet() + curTs := chain.GetHeaviestTipSet() // load base epoch from metadata ds // if none, then use current epoch because it's a fresh start @@ -398,12 +396,12 @@ func (s *SplitStore) Start(chain ChainAccessor) error { s.baseEpoch = bytesToEpoch(bs) case dstore.ErrNotFound: - if s.curTs == nil { + if curTs == nil { // this can happen in some tests break } - err = s.setBaseEpoch(s.curTs.Height()) + err = s.setBaseEpoch(curTs.Height()) if err != nil { return xerrors.Errorf("error saving base epoch: %w", err) } @@ -420,7 +418,7 @@ func (s *SplitStore) Start(chain ChainAccessor) error { case dstore.ErrNotFound: // the hotstore hasn't warmed up, start a concurrent warm up - err = s.warmup(s.curTs) + err = s.warmup(curTs) if err != nil { return xerrors.Errorf("error warming up: %w", err) } @@ -468,11 +466,8 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { return nil } - s.mx.Lock() curTs := apply[len(apply)-1] epoch := curTs.Height() - s.curTs = curTs - s.mx.Unlock() if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) { // we are currently compacting -- protect the new tipset(s) From 83c30dc4c083ebaff569b102da4709f59029bc43 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 7 Jul 2021 11:31:27 +0300 Subject: [PATCH 159/197] protect assignment of warmup epoch with the mutex --- blockstore/splitstore/splitstore.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 0926151a3d7..96f751add63 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -118,8 +118,8 @@ type SplitStore struct { cfg *Config mx sync.Mutex - baseEpoch abi.ChainEpoch - warmupEpoch abi.ChainEpoch + warmupEpoch abi.ChainEpoch // protected by mx + baseEpoch abi.ChainEpoch // protected by compaction lock coldPurgeSize int @@ -737,7 +737,9 @@ func (s *SplitStore) doWarmup(curTs *types.TipSet) error { if err != nil { return xerrors.Errorf("error saving warm up epoch: %w", err) } + s.mx.Lock() s.warmupEpoch = epoch + s.mx.Unlock() return nil } From 9dbb2e0abd7092c77f87ce6f05aa94907dc7ffdb Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 7 Jul 2021 16:34:02 +0300 Subject: [PATCH 160/197] don't leak tracking errors through the API --- blockstore/splitstore/splitstore.go | 58 +++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 96f751add63..0e1acd67993 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -197,22 +197,26 @@ func (s *SplitStore) DeleteMany(_ []cid.Cid) error { return errors.New("DeleteMany not implemented on SplitStore; don't do this Luke!") //nolint } -func (s *SplitStore) Has(c cid.Cid) (bool, error) { +func (s *SplitStore) Has(cid cid.Cid) (bool, error) { s.txnLk.RLock() defer s.txnLk.RUnlock() - has, err := s.hot.Has(c) + has, err := s.hot.Has(cid) if err != nil { return has, err } if has { - err = s.trackTxnRef(c) - return true, err + err = s.trackTxnRef(cid) + if err != nil { + log.Warnf("error tracking reference to %s: %s", cid, err) + } + + return true, nil } - return s.cold.Has(c) + return s.cold.Has(cid) } func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { @@ -224,7 +228,11 @@ func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { switch err { case nil: err = s.trackTxnRef(cid) - return blk, err + if err != nil { + log.Warnf("error tracking reference to %s: %s", cid, err) + } + + return blk, nil case bstore.ErrNotFound: if s.debug != nil { @@ -257,7 +265,11 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { switch err { case nil: err = s.trackTxnRef(cid) - return size, err + if err != nil { + log.Warnf("error tracking reference to %s: %s", cid, err) + } + + return size, nil case bstore.ErrNotFound: if s.debug != nil { @@ -285,13 +297,18 @@ func (s *SplitStore) Put(blk blocks.Block) error { defer s.txnLk.RUnlock() err := s.hot.Put(blk) - if err == nil { - s.debug.LogWrite(blk) + if err != nil { + return err + } - err = s.trackTxnRef(blk.Cid()) + s.debug.LogWrite(blk) + + err = s.trackTxnRef(blk.Cid()) + if err != nil { + log.Warnf("error tracking reference to %s: %s", blk.Cid(), err) } - return err + return nil } func (s *SplitStore) PutMany(blks []blocks.Block) error { @@ -304,13 +321,18 @@ func (s *SplitStore) PutMany(blks []blocks.Block) error { defer s.txnLk.RUnlock() err := s.hot.PutMany(blks) - if err == nil { - s.debug.LogWriteMany(blks) + if err != nil { + return err + } + + s.debug.LogWriteMany(blks) - err = s.trackTxnRefMany(batch) + err = s.trackTxnRefMany(batch) + if err != nil { + log.Warnf("error tracking reference to batch: %s", err) } - return err + return nil } func (s *SplitStore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { @@ -360,7 +382,11 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { switch err { case nil: err = s.trackTxnRef(cid) - return err + if err != nil { + log.Warnf("error tracking reference to %s: %s", cid, err) + } + + return nil case bstore.ErrNotFound: if s.debug != nil { From 451ddf50ab7398613a28fd8f93ba0a0a6b28e33e Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 7 Jul 2021 16:39:37 +0300 Subject: [PATCH 161/197] RIP bbolt-backed markset --- blockstore/splitstore/markset.go | 8 +- blockstore/splitstore/markset_bolt.go | 124 -------------------------- blockstore/splitstore/markset_test.go | 4 +- blockstore/splitstore/splitstore.go | 3 +- 4 files changed, 6 insertions(+), 133 deletions(-) delete mode 100644 blockstore/splitstore/markset_bolt.go diff --git a/blockstore/splitstore/markset.go b/blockstore/splitstore/markset.go index 878deb267d7..c7c80f6c557 100644 --- a/blockstore/splitstore/markset.go +++ b/blockstore/splitstore/markset.go @@ -1,8 +1,6 @@ package splitstore import ( - "path/filepath" - "golang.org/x/xerrors" cid "github.com/ipfs/go-cid" @@ -30,14 +28,12 @@ func OpenMarkSetEnv(path string, mtype string) (MarkSetEnv, error) { switch mtype { case "bloom": return NewBloomMarkSetEnv(false) - case "bloomts": + case "bloomts": // thread-safe return NewBloomMarkSetEnv(true) case "map": return NewMapMarkSetEnv(false) - case "mapts": + case "mapts": // thread-safe return NewMapMarkSetEnv(true) - case "bolt": - return NewBoltMarkSetEnv(filepath.Join(path, "markset.bolt")) default: return nil, xerrors.Errorf("unknown mark set type %s", mtype) } diff --git a/blockstore/splitstore/markset_bolt.go b/blockstore/splitstore/markset_bolt.go deleted file mode 100644 index bac7673b881..00000000000 --- a/blockstore/splitstore/markset_bolt.go +++ /dev/null @@ -1,124 +0,0 @@ -package splitstore - -import ( - "sync" - "time" - - "golang.org/x/xerrors" - - cid "github.com/ipfs/go-cid" - bolt "go.etcd.io/bbolt" -) - -const boltMarkSetStaging = 16384 - -type BoltMarkSetEnv struct { - db *bolt.DB -} - -var _ MarkSetEnv = (*BoltMarkSetEnv)(nil) - -type BoltMarkSet struct { - db *bolt.DB - bucketId []byte - - // cache for batching - mx sync.RWMutex - pend map[string]struct{} -} - -var _ MarkSet = (*BoltMarkSet)(nil) - -func NewBoltMarkSetEnv(path string) (*BoltMarkSetEnv, error) { - db, err := bolt.Open(path, 0644, - &bolt.Options{ - Timeout: 1 * time.Second, - NoSync: true, - }) - if err != nil { - return nil, err - } - - return &BoltMarkSetEnv{db: db}, nil -} - -func (e *BoltMarkSetEnv) Create(name string, hint int64) (MarkSet, error) { - bucketId := []byte(name) - err := e.db.Update(func(tx *bolt.Tx) error { - _, err := tx.CreateBucketIfNotExists(bucketId) - if err != nil { - return xerrors.Errorf("error creating bolt db bucket %s: %w", name, err) - } - return nil - }) - - if err != nil { - return nil, err - } - - return &BoltMarkSet{ - db: e.db, - bucketId: bucketId, - pend: make(map[string]struct{}), - }, nil -} - -func (e *BoltMarkSetEnv) Close() error { - return e.db.Close() -} - -func (s *BoltMarkSet) Mark(cid cid.Cid) error { - s.mx.Lock() - defer s.mx.Unlock() - - key := cid.Hash() - s.pend[string(key)] = struct{}{} - - if len(s.pend) < boltMarkSetStaging { - return nil - } - - err := s.db.Batch(func(tx *bolt.Tx) error { - b := tx.Bucket(s.bucketId) - for key := range s.pend { - err := b.Put([]byte(key), markBytes) - if err != nil { - return err - } - } - return nil - }) - - if err != nil { - return err - } - - s.pend = make(map[string]struct{}) - return nil -} - -func (s *BoltMarkSet) Has(cid cid.Cid) (result bool, err error) { - s.mx.RLock() - defer s.mx.RUnlock() - - key := cid.Hash() - _, result = s.pend[string(key)] - if result { - return result, nil - } - - err = s.db.View(func(tx *bolt.Tx) error { - b := tx.Bucket(s.bucketId) - v := b.Get(key) - result = v != nil - return nil - }) - - return result, err -} - -func (s *BoltMarkSet) Close() error { - return s.db.Update(func(tx *bolt.Tx) error { - return tx.DeleteBucket(s.bucketId) - }) -} diff --git a/blockstore/splitstore/markset_test.go b/blockstore/splitstore/markset_test.go index 367ab8d06e7..d5c01e22029 100644 --- a/blockstore/splitstore/markset_test.go +++ b/blockstore/splitstore/markset_test.go @@ -8,8 +8,8 @@ import ( "github.com/multiformats/go-multihash" ) -func TestBoltMarkSet(t *testing.T) { - testMarkSet(t, "bolt") +func TestMapMarkSet(t *testing.T) { + testMarkSet(t, "map") } func TestBloomMarkSet(t *testing.T) { diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 0e1acd67993..37793f2cc53 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -92,7 +92,8 @@ const ( type Config struct { // MarkSetType is the type of mark set to use. // - // Sane values are: "mapts", "bolt" (if you are memory constrained). + // Only current sane value is "mapts", but we may add an option for a disk-backed + // markset for memory-constrained situations. MarkSetType string // DiscardColdBlocks indicates whether to skip moving cold blocks to the coldstore. From ec586a852a9320cd5c21619733af18a144d29923 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 7 Jul 2021 16:41:11 +0300 Subject: [PATCH 162/197] remove bbolt dependency from go.mod --- go.mod | 1 - 1 file changed, 1 deletion(-) diff --git a/go.mod b/go.mod index 6f18bfa757c..6bddcd2b4d6 100644 --- a/go.mod +++ b/go.mod @@ -144,7 +144,6 @@ require ( github.com/whyrusleeping/multiaddr-filter v0.0.0-20160516205228-e903e4adabd7 github.com/whyrusleeping/pubsub v0.0.0-20190708150250-92bcb0691325 github.com/xorcare/golden v0.6.1-0.20191112154924-b87f686d7542 - go.etcd.io/bbolt v1.3.4 go.opencensus.io v0.23.0 go.uber.org/dig v1.10.0 // indirect go.uber.org/fx v1.9.0 From aec2ba2c82f589b32be8a4bee2573667c1c936d9 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 7 Jul 2021 16:46:14 +0300 Subject: [PATCH 163/197] nil map/bf on markset close --- blockstore/splitstore/markset_bloom.go | 20 +++++++++++++++----- blockstore/splitstore/markset_map.go | 6 ++++++ 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/blockstore/splitstore/markset_bloom.go b/blockstore/splitstore/markset_bloom.go index cffd4f23ad7..ca15da80747 100644 --- a/blockstore/splitstore/markset_bloom.go +++ b/blockstore/splitstore/markset_bloom.go @@ -3,6 +3,7 @@ package splitstore import ( "crypto/rand" "crypto/sha256" + "sync" "golang.org/x/xerrors" @@ -23,6 +24,7 @@ var _ MarkSetEnv = (*BloomMarkSetEnv)(nil) type BloomMarkSet struct { salt []byte + mx sync.Mutex bf *bbloom.Bloom ts bool } @@ -68,21 +70,29 @@ func (s *BloomMarkSet) saltedKey(cid cid.Cid) []byte { func (s *BloomMarkSet) Mark(cid cid.Cid) error { if s.ts { - s.bf.AddTS(s.saltedKey(cid)) - } else { - s.bf.Add(s.saltedKey(cid)) + s.mx.Lock() + defer s.mx.Unlock() } + s.bf.Add(s.saltedKey(cid)) return nil } func (s *BloomMarkSet) Has(cid cid.Cid) (bool, error) { if s.ts { - return s.bf.HasTS(s.saltedKey(cid)), nil + s.mx.Lock() + defer s.mx.Unlock() } - return s.bf.HasTS(s.saltedKey(cid)), nil + + return s.bf.Has(s.saltedKey(cid)), nil } func (s *BloomMarkSet) Close() error { + if s.ts { + s.mx.Lock() + defer s.mx.Unlock() + + } + s.bf = nil return nil } diff --git a/blockstore/splitstore/markset_map.go b/blockstore/splitstore/markset_map.go index 5bac9a284a1..f7baaaee635 100644 --- a/blockstore/splitstore/markset_map.go +++ b/blockstore/splitstore/markset_map.go @@ -57,5 +57,11 @@ func (s *MapMarkSet) Has(cid cid.Cid) (bool, error) { } func (s *MapMarkSet) Close() error { + if s.ts { + s.mx.Lock() + defer s.mx.Unlock() + + } + s.set = nil return nil } From c6421f8a7549c9d40db6411a1dc36462c9932489 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 7 Jul 2021 21:27:36 +0300 Subject: [PATCH 164/197] don't nil the mark sets on close, it's dangerous. a concurrent marking can panic. --- blockstore/splitstore/markset_bloom.go | 6 ------ blockstore/splitstore/markset_map.go | 6 ------ 2 files changed, 12 deletions(-) diff --git a/blockstore/splitstore/markset_bloom.go b/blockstore/splitstore/markset_bloom.go index ca15da80747..7676680fbc8 100644 --- a/blockstore/splitstore/markset_bloom.go +++ b/blockstore/splitstore/markset_bloom.go @@ -88,11 +88,5 @@ func (s *BloomMarkSet) Has(cid cid.Cid) (bool, error) { } func (s *BloomMarkSet) Close() error { - if s.ts { - s.mx.Lock() - defer s.mx.Unlock() - - } - s.bf = nil return nil } diff --git a/blockstore/splitstore/markset_map.go b/blockstore/splitstore/markset_map.go index f7baaaee635..5bac9a284a1 100644 --- a/blockstore/splitstore/markset_map.go +++ b/blockstore/splitstore/markset_map.go @@ -57,11 +57,5 @@ func (s *MapMarkSet) Has(cid cid.Cid) (bool, error) { } func (s *MapMarkSet) Close() error { - if s.ts { - s.mx.Lock() - defer s.mx.Unlock() - - } - s.set = nil return nil } From fee50b13a2cc212e1f30a0e900d654a75117d6c0 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 7 Jul 2021 21:32:05 +0300 Subject: [PATCH 165/197] check the closing state on each batch during the purge. --- blockstore/splitstore/splitstore.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 37793f2cc53..d72ec665da4 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1402,6 +1402,11 @@ func (s *SplitStore) purge(cids []cid.Cid) error { func(cids []cid.Cid) error { deadCids := deadCids[:0] + if atomic.LoadInt32(&s.closing) == 1 { + log.Info("splitstore is closing; aborting purge") + return xerrors.Errorf("compaction aborted") + } + s.txnLk.Lock() defer s.txnLk.Unlock() From 4f808367f8e40ef529ac72e924966fe70a23389b Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 7 Jul 2021 21:32:58 +0300 Subject: [PATCH 166/197] fix lint --- blockstore/splitstore/markset.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/blockstore/splitstore/markset.go b/blockstore/splitstore/markset.go index c7c80f6c557..7be8b8588a5 100644 --- a/blockstore/splitstore/markset.go +++ b/blockstore/splitstore/markset.go @@ -16,9 +16,6 @@ type MarkSet interface { Close() error } -// markBytes is deliberately a non-nil empty byte slice for serialization. -var markBytes = []byte{} - type MarkSetEnv interface { Create(name string, sizeHint int64) (MarkSet, error) Close() error From f5c45bd517d3643fca2b6684468039c014fa1699 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 8 Jul 2021 10:13:44 +0300 Subject: [PATCH 167/197] check the closing state variable often so that we have a reasonably quick graceful shutdown --- blockstore/splitstore/splitstore.go | 61 ++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 6 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index d72ec665da4..e1e7463d441 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -807,6 +807,10 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { defer markSet.Close() //nolint:errcheck defer s.debug.Flush() + if err := s.checkClosing(); err != nil { + return err + } + // 1. mark reachable objects by walking the chain from the current epoch; we keep state roots // and messages until the boundary epoch. log.Info("marking reachable objects") @@ -815,6 +819,11 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { var count int64 err = s.walkChain(curTs, boundaryEpoch, true, func(c cid.Cid) error { + // marking takes a while, so check this with every opportunity + if err := s.checkClosing(); err != nil { + return err + } + if isFilCommitment(c) { return errStopWalk } @@ -831,6 +840,10 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { log.Infow("marking done", "took", time.Since(startMark), "marked", count) + if err := s.checkClosing(); err != nil { + return err + } + // begin transactional protection with concurrent marking and fetch references created while marking txnRefs := s.beginTxnConcurrentMarking(markSet) @@ -842,6 +855,10 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { count = 0 for c := range txnRefs { + if err := s.checkClosing(); err != nil { + return err + } + if isFilCommitment(c) { continue } @@ -906,6 +923,10 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { log.Infow("update mark set done", "took", time.Since(startMark), "marked", count) } + if err := s.checkClosing(); err != nil { + return err + } + // 2. iterate through the hotstore to collect cold objects log.Info("collecting cold objects") startCollect := time.Now() @@ -947,11 +968,19 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { stats.Record(context.Background(), metrics.SplitstoreCompactionHot.M(int64(hotCnt))) stats.Record(context.Background(), metrics.SplitstoreCompactionCold.M(int64(coldCnt))) + if err := s.checkClosing(); err != nil { + return err + } + // now that we have collected cold objects, check for missing references from transactional i/o // and disable further collection of such references (they will not be acted upon as we can't // possibly delete objects we didn't have when we were collecting cold objects) s.waitForMissingRefs() + if err := s.checkClosing(); err != nil { + return err + } + // 3. copy the cold objects to the coldstore -- if we have one if !s.cfg.DiscardColdBlocks { log.Info("moving cold objects to the coldstore") @@ -980,9 +1009,8 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { defer atomic.StoreInt32(&s.critsection, 0) // check to see if we are closing first; if that's the case just return - if atomic.LoadInt32(&s.closing) == 1 { - log.Info("splitstore is closing; aborting compaction") - return xerrors.Errorf("compaction aborted") + if err := s.checkClosing(); err != nil { + return err } // 5. purge cold objects from the hotstore, taking protected references into account @@ -1232,6 +1260,15 @@ func (s *SplitStore) has(c cid.Cid) (bool, error) { return s.cold.Has(c) } +func (s *SplitStore) checkClosing() error { + if atomic.LoadInt32(&s.closing) == 1 { + log.Info("splitstore is closing; aborting compaction") + return xerrors.Errorf("compaction aborted") + } + + return nil +} + func (s *SplitStore) isOldBlockHeader(c cid.Cid, epoch abi.ChainEpoch) (isOldBlock bool, err error) { if c.Prefix().Codec != cid.DagCBOR { return false, nil @@ -1252,6 +1289,10 @@ func (s *SplitStore) moveColdBlocks(cold []cid.Cid) error { batch := make([]blocks.Block, 0, batchSize) for _, c := range cold { + if err := s.checkClosing(); err != nil { + return err + } + blk, err := s.hot.Get(c) if err != nil { if err == bstore.ErrNotFound { @@ -1300,6 +1341,11 @@ func (s *SplitStore) sortObjects(cids []cid.Cid) error { // compute sorting weights as the cumulative number of DAG links weights := make(map[string]int) for _, c := range cids { + // this can take quite a while, so check for shutdown with every opportunity + if err := s.checkClosing(); err != nil { + return err + } + w := s.getObjectWeight(c, weights, key) weights[key(c)] = w } @@ -1402,9 +1448,8 @@ func (s *SplitStore) purge(cids []cid.Cid) error { func(cids []cid.Cid) error { deadCids := deadCids[:0] - if atomic.LoadInt32(&s.closing) == 1 { - log.Info("splitstore is closing; aborting purge") - return xerrors.Errorf("compaction aborted") + if err := s.checkClosing(); err != nil { + return err } s.txnLk.Lock() @@ -1461,6 +1506,10 @@ func (s *SplitStore) waitForMissingRefs() { }() for i := 0; i < 3 && len(missing) > 0; i++ { + if err := s.checkClosing(); err != nil { + return + } + wait := time.Duration(i) * time.Minute log.Infof("retrying for %d missing references in %s (attempt: %d)", len(missing), wait, i+1) if wait > 0 { From 48f13a43b74de48eee6443c35fdeeaa89b395389 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 8 Jul 2021 10:18:43 +0300 Subject: [PATCH 168/197] intelligently close marksets and signal errors in concurrent operations --- blockstore/splitstore/markset.go | 4 ++++ blockstore/splitstore/markset_bloom.go | 13 +++++++++++++ blockstore/splitstore/markset_map.go | 13 +++++++++++++ 3 files changed, 30 insertions(+) diff --git a/blockstore/splitstore/markset.go b/blockstore/splitstore/markset.go index 7be8b8588a5..76d929a5d3a 100644 --- a/blockstore/splitstore/markset.go +++ b/blockstore/splitstore/markset.go @@ -1,11 +1,15 @@ package splitstore import ( + "errors" + "golang.org/x/xerrors" cid "github.com/ipfs/go-cid" ) +var errMarkSetClosed = errors.New("markset closed") + // MarkSet is a utility to keep track of seen CID, and later query for them. // // * If the expected dataset is large, it can be backed by a datastore (e.g. bbolt). diff --git a/blockstore/splitstore/markset_bloom.go b/blockstore/splitstore/markset_bloom.go index 7676680fbc8..9ce701607c2 100644 --- a/blockstore/splitstore/markset_bloom.go +++ b/blockstore/splitstore/markset_bloom.go @@ -74,6 +74,10 @@ func (s *BloomMarkSet) Mark(cid cid.Cid) error { defer s.mx.Unlock() } + if s.bf == nil { + return errMarkSetClosed + } + s.bf.Add(s.saltedKey(cid)) return nil } @@ -84,9 +88,18 @@ func (s *BloomMarkSet) Has(cid cid.Cid) (bool, error) { defer s.mx.Unlock() } + if s.bf == nil { + return false, errMarkSetClosed + } + return s.bf.Has(s.saltedKey(cid)), nil } func (s *BloomMarkSet) Close() error { + if s.ts { + s.mx.Lock() + defer s.mx.Unlock() + } + s.bf = nil return nil } diff --git a/blockstore/splitstore/markset_map.go b/blockstore/splitstore/markset_map.go index 5bac9a284a1..ac58cd8dda1 100644 --- a/blockstore/splitstore/markset_map.go +++ b/blockstore/splitstore/markset_map.go @@ -42,6 +42,10 @@ func (s *MapMarkSet) Mark(cid cid.Cid) error { defer s.mx.Unlock() } + if s.set == nil { + return errMarkSetClosed + } + s.set[string(cid.Hash())] = struct{}{} return nil } @@ -52,10 +56,19 @@ func (s *MapMarkSet) Has(cid cid.Cid) (bool, error) { defer s.mx.Unlock() } + if s.set == nil { + return false, errMarkSetClosed + } + _, ok := s.set[string(cid.Hash())] return ok, nil } func (s *MapMarkSet) Close() error { + if s.ts { + s.mx.Lock() + defer s.mx.Unlock() + } + s.set = nil return nil } From e6eacbdd5643334745d021f58fbd77e813730cee Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 8 Jul 2021 10:20:29 +0300 Subject: [PATCH 169/197] use RW mutexes in marksets --- blockstore/splitstore/markset_bloom.go | 6 +++--- blockstore/splitstore/markset_map.go | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/blockstore/splitstore/markset_bloom.go b/blockstore/splitstore/markset_bloom.go index 9ce701607c2..3b40b682be2 100644 --- a/blockstore/splitstore/markset_bloom.go +++ b/blockstore/splitstore/markset_bloom.go @@ -24,7 +24,7 @@ var _ MarkSetEnv = (*BloomMarkSetEnv)(nil) type BloomMarkSet struct { salt []byte - mx sync.Mutex + mx sync.RWMutex bf *bbloom.Bloom ts bool } @@ -84,8 +84,8 @@ func (s *BloomMarkSet) Mark(cid cid.Cid) error { func (s *BloomMarkSet) Has(cid cid.Cid) (bool, error) { if s.ts { - s.mx.Lock() - defer s.mx.Unlock() + s.mx.RLock() + defer s.mx.RUnlock() } if s.bf == nil { diff --git a/blockstore/splitstore/markset_map.go b/blockstore/splitstore/markset_map.go index ac58cd8dda1..029d674185f 100644 --- a/blockstore/splitstore/markset_map.go +++ b/blockstore/splitstore/markset_map.go @@ -13,7 +13,7 @@ type MapMarkSetEnv struct { var _ MarkSetEnv = (*MapMarkSetEnv)(nil) type MapMarkSet struct { - mx sync.Mutex + mx sync.RWMutex set map[string]struct{} ts bool @@ -52,8 +52,8 @@ func (s *MapMarkSet) Mark(cid cid.Cid) error { func (s *MapMarkSet) Has(cid cid.Cid) (bool, error) { if s.ts { - s.mx.Lock() - defer s.mx.Unlock() + s.mx.RLock() + defer s.mx.RUnlock() } if s.set == nil { From 9aa4f3b3b2d5c6ef6571ef458d1d02e924464dea Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 8 Jul 2021 12:32:41 +0300 Subject: [PATCH 170/197] add README for documentation --- blockstore/splitstore/README.md | 54 +++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 blockstore/splitstore/README.md diff --git a/blockstore/splitstore/README.md b/blockstore/splitstore/README.md new file mode 100644 index 00000000000..c133e025ea1 --- /dev/null +++ b/blockstore/splitstore/README.md @@ -0,0 +1,54 @@ +# SplitStore: An actively scalable blockstore for the Filecoin chain + +The SplitStore was first introduced in lotus v1.5.1, as an experiment +in reducing the performance impact of large blockstores. + +With lotus v1.11.1, we introduce the next iteration in design and +implementation, which we call SplitStore v1. + +The new design (see [#6474](https://github.com/filecoin-project/lotus/pull/6474) +evolves the splitstore to be a freestanding compacting blockstore that +allows us to keep a small (60-100GB) working set in a hot blockstore +and reliably archive out of scope objects in a coldstore. The coldstore +can be a noop store, whereby out of scope objects are discarded or a +regular badger blockstore (the default), which can be periodically +garbage collected according to configurable user retention policies. + +## Operation + +When the splitstore is first enabled, the existing blockstore becomes +the coldstore and a fresh hotstore is initialized. + +The hotstore is warmed up on first startup so as to load all chain +headers and state roots in the current head. This allows us to +immediately gain the performance benefits of a smallerblockstore which +can be substantial for full archival nodes. + +All new writes are directed to the hotstore, while reads first hit the +hotstore, with fallback to the coldstore. + +Once 5 finalities have ellapsed, and every finality henceforth, the +blockstore _compacts_. Compaction is the process of moving all +unreachable objects within the last 4 finalities from the hotstore to +the coldstore. If the system is configured with a noop coldstore, +these objects are discarded. Note that chain headers, all the way to +genesis, are considered reachable. Stateroots and messages are +considered reachable only within the last 4 finalities, unless there +is a live reference to them. + +## Compaction + +Compaction works transactionally with the following algorithm: +- We prepare a transaction, whereby all i/o referenced objects through the API are tracked. +- We walk the chain and mark reachable objects, keeping 4 finalities of state roots and messages and all headers all the way to genesis. +- Once the chain walk is complete, we begin full transaction protection with concurrent marking; we walk and mark all references created during the chain walk. On the same time, all I/O through the API concurrently marks objects as live references. +- We collect cold objects by iterating through the hotstore and checking the mark set; if an object is not marked, then it is candidate for purge. +- When running with a coldstore, we next copy all cold objects to the coldstore. +- At this point we are ready to begin purging: + - We sort cold objects heaviest first, so as to never delete the consituents of a DAG before the DAG itself (which would leave dangling references) + - We delete in small batches taking a lock; each batch is checked again for marks, from the concurrent transactional mark, so as to never delete anything live +- We then end the transaction and compact/gc the hotstore. + +## Coldstore Garbage Collection + +TBD -- see [#6577](https://github.com/filecoin-project/lotus/issues/6577) From 5cf1e09e813fa4443490d1df4ba3dbc720233e99 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 8 Jul 2021 13:00:31 +0300 Subject: [PATCH 171/197] README: add instructions for how to enable --- blockstore/splitstore/README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/blockstore/splitstore/README.md b/blockstore/splitstore/README.md index c133e025ea1..da07e17142a 100644 --- a/blockstore/splitstore/README.md +++ b/blockstore/splitstore/README.md @@ -14,6 +14,18 @@ can be a noop store, whereby out of scope objects are discarded or a regular badger blockstore (the default), which can be periodically garbage collected according to configurable user retention policies. +To enable the splitstore, edit `.lotus/config.toml` and add the following: +``` +[Chainstore] + EnableSplitstore = true +``` + +If you want to use the noop coldstore, also add the following: +``` + [Chainstore.Splitstore] + ColdStoreType = "noop" +``` + ## Operation When the splitstore is first enabled, the existing blockstore becomes From 00d7772f57ed2b847851bd081575a73122289690 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 8 Jul 2021 13:12:19 +0300 Subject: [PATCH 172/197] move check for closure in walkChain so that we don't do it too often and also cover warmup. --- blockstore/splitstore/splitstore.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index e1e7463d441..975cd0e0487 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -819,11 +819,6 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { var count int64 err = s.walkChain(curTs, boundaryEpoch, true, func(c cid.Cid) error { - // marking takes a while, so check this with every opportunity - if err := s.checkClosing(); err != nil { - return err - } - if isFilCommitment(c) { return errStopWalk } @@ -1130,6 +1125,11 @@ func (s *SplitStore) walkChain(ts *types.TipSet, boundary abi.ChainEpoch, inclMs } for len(toWalk) > 0 { + // walking can take a while, so check this with every opportunity + if err := s.checkClosing(); err != nil { + return err + } + walking := toWalk toWalk = nil for _, c := range walking { From fa30ac8c5d313c53f89f68aa9b235109a8a0b430 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 8 Jul 2021 17:53:59 +0300 Subject: [PATCH 173/197] fix typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Łukasz Magiera --- blockstore/splitstore/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 975cd0e0487..85ce3834ddc 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -691,7 +691,7 @@ func (s *SplitStore) warmup(curTs *types.TipSet) error { return nil } -// the actual warmup procedure; it waslk the chain loading all state roots at the boundary +// the actual warmup procedure; it walks the chain loading all state roots at the boundary // and headers all the way up to genesis. // objects are written in batches so as to minimize overhead. func (s *SplitStore) doWarmup(curTs *types.TipSet) error { From c0537848b31868a957f62baa42f199e16969e5d5 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 8 Jul 2021 17:54:16 +0300 Subject: [PATCH 174/197] fix typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Łukasz Magiera --- blockstore/splitstore/splitstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 85ce3834ddc..c02927d369c 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -934,7 +934,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { // was it marked? mark, err := markSet.Has(c) if err != nil { - return xerrors.Errorf("error checkiing mark set for %s: %w", c, err) + return xerrors.Errorf("error checking mark set for %s: %w", c, err) } if mark { From 60dd97c7fc5f8f114ebebf9fe78cbb39b6672e76 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 8 Jul 2021 21:18:59 +0300 Subject: [PATCH 175/197] fix potential deadlock in View As pointed out by magik, it is possible to deadlock if the view callback performs a blockstore operation while a Lock is pending. This fixes the issue by optimistically tracking the reference before actually calling the underlying View and limiting the scope of the lock. --- blockstore/splitstore/splitstore.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index c02927d369c..30d108656ca 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -376,19 +376,19 @@ func (s *SplitStore) HashOnRead(enabled bool) { } func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { + // optimistically protect the reference so that we can call the underlying View + // without holding hte lock. + // This allows the user callback to call into the blockstore without deadlocking. s.txnLk.RLock() - defer s.txnLk.RUnlock() + err := s.trackTxnRef(cid) + s.txnLk.RUnlock() - err := s.hot.View(cid, cb) - switch err { - case nil: - err = s.trackTxnRef(cid) - if err != nil { - log.Warnf("error tracking reference to %s: %s", cid, err) - } - - return nil + if err != nil { + log.Warnf("error tracking reference to %s: %s", cid, err) + } + err = s.hot.View(cid, cb) + switch err { case bstore.ErrNotFound: if s.debug != nil { s.mx.Lock() From b6611125b677268bb0ed935bea9e01ef5e49c5dc Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 8 Jul 2021 21:30:39 +0300 Subject: [PATCH 176/197] add environment variables to turn on the debug log without recompiling --- blockstore/splitstore/splitstore.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 30d108656ca..98c63b45f6d 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -5,6 +5,7 @@ import ( "context" "encoding/binary" "errors" + "os" "sort" "sync" "sync/atomic" @@ -149,6 +150,16 @@ type SplitStore struct { var _ bstore.Blockstore = (*SplitStore)(nil) +func init() { + if os.Getenv("LOTUS_SPLITSTORE_DEBUG_LOG") == "1" { + enableDebugLog = true + } + + if os.Getenv("LOTUS_SPLITSTORE_DEBUG_LOG_WRITE_TRACES") == "1" { + enableDebugLogWriteTraces = true + } +} + // Open opens an existing splistore, or creates a new splitstore. The splitstore // is backed by the provided hot and cold stores. The returned SplitStore MUST be // attached to the ChainStore with Start in order to trigger compaction. From abdf4a161a14267df3d851ed85cb9491cb0c740c Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 9 Jul 2021 04:26:36 +0300 Subject: [PATCH 177/197] explicitly switch marksets for concurrent marking this has very noticeable impact in initial marking time; it also allows us to get rid of the confusing ts monikers. --- blockstore/splitstore/markset.go | 9 +++------ blockstore/splitstore/markset_bloom.go | 14 ++++++++------ blockstore/splitstore/markset_map.go | 13 +++++++------ blockstore/splitstore/splitstore.go | 4 +++- node/config/def.go | 2 +- 5 files changed, 22 insertions(+), 20 deletions(-) diff --git a/blockstore/splitstore/markset.go b/blockstore/splitstore/markset.go index 76d929a5d3a..a644e727955 100644 --- a/blockstore/splitstore/markset.go +++ b/blockstore/splitstore/markset.go @@ -18,6 +18,7 @@ type MarkSet interface { Mark(cid.Cid) error Has(cid.Cid) (bool, error) Close() error + SetConcurrent() } type MarkSetEnv interface { @@ -28,13 +29,9 @@ type MarkSetEnv interface { func OpenMarkSetEnv(path string, mtype string) (MarkSetEnv, error) { switch mtype { case "bloom": - return NewBloomMarkSetEnv(false) - case "bloomts": // thread-safe - return NewBloomMarkSetEnv(true) + return NewBloomMarkSetEnv() case "map": - return NewMapMarkSetEnv(false) - case "mapts": // thread-safe - return NewMapMarkSetEnv(true) + return NewMapMarkSetEnv() default: return nil, xerrors.Errorf("unknown mark set type %s", mtype) } diff --git a/blockstore/splitstore/markset_bloom.go b/blockstore/splitstore/markset_bloom.go index 3b40b682be2..9261de7c753 100644 --- a/blockstore/splitstore/markset_bloom.go +++ b/blockstore/splitstore/markset_bloom.go @@ -16,9 +16,7 @@ const ( BloomFilterProbability = 0.01 ) -type BloomMarkSetEnv struct { - ts bool -} +type BloomMarkSetEnv struct{} var _ MarkSetEnv = (*BloomMarkSetEnv)(nil) @@ -31,8 +29,8 @@ type BloomMarkSet struct { var _ MarkSet = (*BloomMarkSet)(nil) -func NewBloomMarkSetEnv(ts bool) (*BloomMarkSetEnv, error) { - return &BloomMarkSetEnv{ts: ts}, nil +func NewBloomMarkSetEnv() (*BloomMarkSetEnv, error) { + return &BloomMarkSetEnv{}, nil } func (e *BloomMarkSetEnv) Create(name string, sizeHint int64) (MarkSet, error) { @@ -52,7 +50,7 @@ func (e *BloomMarkSetEnv) Create(name string, sizeHint int64) (MarkSet, error) { return nil, xerrors.Errorf("error creating bloom filter: %w", err) } - return &BloomMarkSet{salt: salt, bf: bf, ts: e.ts}, nil + return &BloomMarkSet{salt: salt, bf: bf}, nil } func (e *BloomMarkSetEnv) Close() error { @@ -103,3 +101,7 @@ func (s *BloomMarkSet) Close() error { s.bf = nil return nil } + +func (s *BloomMarkSet) SetConcurrent() { + s.ts = true +} diff --git a/blockstore/splitstore/markset_map.go b/blockstore/splitstore/markset_map.go index 029d674185f..197c824242a 100644 --- a/blockstore/splitstore/markset_map.go +++ b/blockstore/splitstore/markset_map.go @@ -6,9 +6,7 @@ import ( cid "github.com/ipfs/go-cid" ) -type MapMarkSetEnv struct { - ts bool -} +type MapMarkSetEnv struct{} var _ MarkSetEnv = (*MapMarkSetEnv)(nil) @@ -21,14 +19,13 @@ type MapMarkSet struct { var _ MarkSet = (*MapMarkSet)(nil) -func NewMapMarkSetEnv(ts bool) (*MapMarkSetEnv, error) { - return &MapMarkSetEnv{ts: ts}, nil +func NewMapMarkSetEnv() (*MapMarkSetEnv, error) { + return &MapMarkSetEnv{}, nil } func (e *MapMarkSetEnv) Create(name string, sizeHint int64) (MarkSet, error) { return &MapMarkSet{ set: make(map[string]struct{}, sizeHint), - ts: e.ts, }, nil } @@ -72,3 +69,7 @@ func (s *MapMarkSet) Close() error { s.set = nil return nil } + +func (s *MapMarkSet) SetConcurrent() { + s.ts = true +} diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 98c63b45f6d..82494795d63 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -93,7 +93,7 @@ const ( type Config struct { // MarkSetType is the type of mark set to use. // - // Only current sane value is "mapts", but we may add an option for a disk-backed + // Only current sane value is "map", but we may add an option for a disk-backed // markset for memory-constrained situations. MarkSetType string @@ -1061,6 +1061,8 @@ func (s *SplitStore) beginTxnConcurrentMarking(markSet MarkSet) map[cid.Cid]stru s.txnLk.Lock() defer s.txnLk.Unlock() + markSet.SetConcurrent() + txnRefs := s.txnRefs s.txnRefs = nil s.txnMissing = make(map[cid.Cid]struct{}) diff --git a/node/config/def.go b/node/config/def.go index 81cd9929d2e..9bbf8375c51 100644 --- a/node/config/def.go +++ b/node/config/def.go @@ -304,7 +304,7 @@ func DefaultFullNode() *FullNode { Splitstore: Splitstore{ ColdStoreType: "universal", HotStoreType: "badger", - MarkSetType: "mapts", + MarkSetType: "map", }, }, } From 909f7039d47c351c2581de0c39bb1c9b5d8a721d Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 9 Jul 2021 09:54:12 +0300 Subject: [PATCH 178/197] make badger Close-safe --- blockstore/badger/blockstore.go | 113 +++++++++++++++++++++----------- 1 file changed, 76 insertions(+), 37 deletions(-) diff --git a/blockstore/badger/blockstore.go b/blockstore/badger/blockstore.go index f77691a6fd4..17ebbd7eab9 100644 --- a/blockstore/badger/blockstore.go +++ b/blockstore/badger/blockstore.go @@ -5,7 +5,7 @@ import ( "fmt" "io" "runtime" - "sync/atomic" + "sync" "github.com/dgraph-io/badger/v2" "github.com/dgraph-io/badger/v2/options" @@ -73,20 +73,16 @@ func (b *badgerLogger) Warningf(format string, args ...interface{}) { } const ( - stateOpen int64 = iota + stateOpen = iota stateClosing stateClosed ) // Blockstore is a badger-backed IPLD blockstore. -// -// NOTE: once Close() is called, methods will try their best to return -// ErrBlockstoreClosed. This will guaranteed to happen for all subsequent -// operation calls after Close() has returned, but it may not happen for -// operations in progress. Those are likely to fail with a different error. type Blockstore struct { - // state is accessed atomically - state int64 + stateLk sync.RWMutex + state int + viewers sync.WaitGroup DB *badger.DB @@ -125,19 +121,51 @@ func Open(opts Options) (*Blockstore, error) { // Close closes the store. If the store has already been closed, this noops and // returns an error, even if the first closure resulted in error. func (b *Blockstore) Close() error { - if !atomic.CompareAndSwapInt64(&b.state, stateOpen, stateClosing) { + b.stateLk.Lock() + if b.state != stateOpen { + b.stateLk.Unlock() return nil } + b.state = stateClosing + b.stateLk.Unlock() + + defer func() { + b.stateLk.Lock() + b.state = stateClosed + b.stateLk.Unlock() + }() + + // wait for all accesses to complete + b.viewers.Wait() - defer atomic.StoreInt64(&b.state, stateClosed) return b.DB.Close() } +func (b *Blockstore) access() error { + b.stateLk.RLock() + defer b.stateLk.RUnlock() + + if b.state != stateOpen { + return ErrBlockstoreClosed + } + + b.viewers.Add(1) + return nil +} + +func (b *Blockstore) isOpen() bool { + b.stateLk.RLock() + defer b.stateLk.RUnlock() + + return b.state == stateOpen +} + // CollectGarbage runs garbage collection on the value log func (b *Blockstore) CollectGarbage() error { - if atomic.LoadInt64(&b.state) != stateOpen { - return ErrBlockstoreClosed + if err := b.access(); err != nil { + return err } + defer b.viewers.Done() var err error for err == nil { @@ -154,9 +182,10 @@ func (b *Blockstore) CollectGarbage() error { // Compact runs a synchronous compaction func (b *Blockstore) Compact() error { - if atomic.LoadInt64(&b.state) != stateOpen { - return ErrBlockstoreClosed + if err := b.access(); err != nil { + return err } + defer b.viewers.Done() nworkers := runtime.NumCPU() / 2 if nworkers < 2 { @@ -169,9 +198,10 @@ func (b *Blockstore) Compact() error { // View implements blockstore.Viewer, which leverages zero-copy read-only // access to values. func (b *Blockstore) View(cid cid.Cid, fn func([]byte) error) error { - if atomic.LoadInt64(&b.state) != stateOpen { - return ErrBlockstoreClosed + if err := b.access(); err != nil { + return err } + defer b.viewers.Done() k, pooled := b.PooledStorageKey(cid) if pooled { @@ -192,9 +222,10 @@ func (b *Blockstore) View(cid cid.Cid, fn func([]byte) error) error { // Has implements Blockstore.Has. func (b *Blockstore) Has(cid cid.Cid) (bool, error) { - if atomic.LoadInt64(&b.state) != stateOpen { - return false, ErrBlockstoreClosed + if err := b.access(); err != nil { + return false, err } + defer b.viewers.Done() k, pooled := b.PooledStorageKey(cid) if pooled { @@ -222,9 +253,10 @@ func (b *Blockstore) Get(cid cid.Cid) (blocks.Block, error) { return nil, blockstore.ErrNotFound } - if atomic.LoadInt64(&b.state) != stateOpen { - return nil, ErrBlockstoreClosed + if err := b.access(); err != nil { + return nil, err } + defer b.viewers.Done() k, pooled := b.PooledStorageKey(cid) if pooled { @@ -251,9 +283,10 @@ func (b *Blockstore) Get(cid cid.Cid) (blocks.Block, error) { // GetSize implements Blockstore.GetSize. func (b *Blockstore) GetSize(cid cid.Cid) (int, error) { - if atomic.LoadInt64(&b.state) != stateOpen { - return -1, ErrBlockstoreClosed + if err := b.access(); err != nil { + return 0, err } + defer b.viewers.Done() k, pooled := b.PooledStorageKey(cid) if pooled { @@ -280,9 +313,10 @@ func (b *Blockstore) GetSize(cid cid.Cid) (int, error) { // Put implements Blockstore.Put. func (b *Blockstore) Put(block blocks.Block) error { - if atomic.LoadInt64(&b.state) != stateOpen { - return ErrBlockstoreClosed + if err := b.access(); err != nil { + return err } + defer b.viewers.Done() k, pooled := b.PooledStorageKey(block.Cid()) if pooled { @@ -300,9 +334,10 @@ func (b *Blockstore) Put(block blocks.Block) error { // PutMany implements Blockstore.PutMany. func (b *Blockstore) PutMany(blocks []blocks.Block) error { - if atomic.LoadInt64(&b.state) != stateOpen { - return ErrBlockstoreClosed + if err := b.access(); err != nil { + return err } + defer b.viewers.Done() // toReturn tracks the byte slices to return to the pool, if we're using key // prefixing. we can't return each slice to the pool after each Set, because @@ -339,9 +374,10 @@ func (b *Blockstore) PutMany(blocks []blocks.Block) error { // DeleteBlock implements Blockstore.DeleteBlock. func (b *Blockstore) DeleteBlock(cid cid.Cid) error { - if atomic.LoadInt64(&b.state) != stateOpen { - return ErrBlockstoreClosed + if err := b.access(); err != nil { + return err } + defer b.viewers.Done() k, pooled := b.PooledStorageKey(cid) if pooled { @@ -354,9 +390,10 @@ func (b *Blockstore) DeleteBlock(cid cid.Cid) error { } func (b *Blockstore) DeleteMany(cids []cid.Cid) error { - if atomic.LoadInt64(&b.state) != stateOpen { - return ErrBlockstoreClosed + if err := b.access(); err != nil { + return err } + defer b.viewers.Done() // toReturn tracks the byte slices to return to the pool, if we're using key // prefixing. we can't return each slice to the pool after each Set, because @@ -393,8 +430,8 @@ func (b *Blockstore) DeleteMany(cids []cid.Cid) error { // AllKeysChan implements Blockstore.AllKeysChan. func (b *Blockstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { - if atomic.LoadInt64(&b.state) != stateOpen { - return nil, ErrBlockstoreClosed + if err := b.access(); err != nil { + return nil, err } txn := b.DB.NewTransaction(false) @@ -406,6 +443,7 @@ func (b *Blockstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { ch := make(chan cid.Cid) go func() { + defer b.viewers.Done() defer close(ch) defer iter.Close() @@ -416,7 +454,7 @@ func (b *Blockstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { if ctx.Err() != nil { return // context has fired. } - if atomic.LoadInt64(&b.state) != stateOpen { + if !b.isOpen() { // open iterators will run even after the database is closed... return // closing, yield. } @@ -445,9 +483,10 @@ func (b *Blockstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { // Implementation of BlockstoreIterator interface func (b *Blockstore) ForEachKey(f func(cid.Cid) error) error { - if atomic.LoadInt64(&b.state) != stateOpen { - return ErrBlockstoreClosed + if err := b.access(); err != nil { + return err } + defer b.viewers.Done() txn := b.DB.NewTransaction(false) defer txn.Discard() @@ -462,7 +501,7 @@ func (b *Blockstore) ForEachKey(f func(cid.Cid) error) error { var buf []byte for iter.Rewind(); iter.Valid(); iter.Next() { - if atomic.LoadInt64(&b.state) != stateOpen { + if !b.isOpen() { return ErrBlockstoreClosed } From de5e21bf1a86746166ee9cb637d4121ccde10bba Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 9 Jul 2021 11:31:04 +0300 Subject: [PATCH 179/197] correctly handle identity cids --- blockstore/splitstore/splitstore.go | 98 ++++++++++++++++++++++++++--- 1 file changed, 89 insertions(+), 9 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 82494795d63..46f9b5a3d7d 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -18,6 +18,7 @@ import ( cid "github.com/ipfs/go-cid" dstore "github.com/ipfs/go-datastore" logging "github.com/ipfs/go-log/v2" + mh "github.com/multiformats/go-multihash" cbg "github.com/whyrusleeping/cbor-gen" "github.com/filecoin-project/go-state-types/abi" @@ -210,6 +211,10 @@ func (s *SplitStore) DeleteMany(_ []cid.Cid) error { } func (s *SplitStore) Has(cid cid.Cid) (bool, error) { + if isIdentiyCid(cid) { + return true, nil + } + s.txnLk.RLock() defer s.txnLk.RUnlock() @@ -232,6 +237,15 @@ func (s *SplitStore) Has(cid cid.Cid) (bool, error) { } func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { + if isIdentiyCid(cid) { + data, err := decodeIdentityCid(cid) + if err != nil { + return nil, err + } + + return blocks.NewBlockWithCid(data, cid) + } + s.txnLk.RLock() defer s.txnLk.RUnlock() @@ -269,6 +283,15 @@ func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { } func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { + if isIdentiyCid(cid) { + data, err := decodeIdentityCid(cid) + if err != nil { + return 0, err + } + + return len(data), nil + } + s.txnLk.RLock() defer s.txnLk.RUnlock() @@ -305,6 +328,10 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { } func (s *SplitStore) Put(blk blocks.Block) error { + if isIdentiyCid(blk.Cid()) { + return nil + } + s.txnLk.RLock() defer s.txnLk.RUnlock() @@ -324,6 +351,31 @@ func (s *SplitStore) Put(blk blocks.Block) error { } func (s *SplitStore) PutMany(blks []blocks.Block) error { + // filter identites + idcids := 0 + for _, blk := range blks { + if isIdentiyCid(blk.Cid()) { + idcids++ + } + } + + if idcids > 0 { + if idcids == len(blks) { + // it's all identities + return nil + } + + filtered := make([]blocks.Block, 0, len(blks)-idcids) + for _, blk := range blks { + if isIdentiyCid(blk.Cid()) { + continue + } + filtered = append(filtered, blk) + } + + blks = filtered + } + batch := make([]cid.Cid, 0, len(blks)) for _, blk := range blks { batch = append(batch, blk.Cid()) @@ -387,6 +439,15 @@ func (s *SplitStore) HashOnRead(enabled bool) { } func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { + if isIdentiyCid(cid) { + data, err := decodeIdentityCid(cid) + if err != nil { + return err + } + + return cb(data) + } + // optimistically protect the reference so that we can call the underlying View // without holding hte lock. // This allows the user callback to call into the blockstore without deadlocking. @@ -625,7 +686,7 @@ func (s *SplitStore) doTxnProtect(root cid.Cid, batch map[cid.Cid]struct{}) erro // cannot be deleted before the object itself. err := s.walkObjectIncomplete(root, cid.NewSet(), func(c cid.Cid) error { - if isFilCommitment(c) { + if isUnitaryObject(c) { return errStopWalk } @@ -713,7 +774,7 @@ func (s *SplitStore) doWarmup(curTs *types.TipSet) error { missing := int64(0) err := s.walkChain(curTs, epoch, false, func(c cid.Cid) error { - if isFilCommitment(c) { + if isUnitaryObject(c) { return errStopWalk } @@ -830,7 +891,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { var count int64 err = s.walkChain(curTs, boundaryEpoch, true, func(c cid.Cid) error { - if isFilCommitment(c) { + if isUnitaryObject(c) { return errStopWalk } @@ -865,7 +926,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return err } - if isFilCommitment(c) { + if isUnitaryObject(c) { continue } @@ -880,7 +941,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { err = s.walkObjectIncomplete(c, walked, func(c cid.Cid) error { - if isFilCommitment(c) { + if isUnitaryObject(c) { return errStopWalk } @@ -1536,7 +1597,7 @@ func (s *SplitStore) waitForMissingRefs() { for c := range towalk { err := s.walkObjectIncomplete(c, walked, func(c cid.Cid) error { - if isFilCommitment(c) { + if isUnitaryObject(c) { return errStopWalk } @@ -1636,11 +1697,30 @@ func bytesToUint64(buf []byte) uint64 { return i } -func isFilCommitment(c cid.Cid) bool { - switch c.Prefix().Codec { +func isUnitaryObject(c cid.Cid) bool { + pre := c.Prefix() + switch pre.Codec { case cid.FilCommitmentSealed, cid.FilCommitmentUnsealed: return true default: - return false + return pre.MhType == mh.IDENTITY } } + +func isIdentiyCid(c cid.Cid) bool { + return c.Prefix().MhType == mh.IDENTITY +} + +func decodeIdentityCid(c cid.Cid) ([]byte, error) { + dmh, err := mh.Decode(c.Hash()) + if err != nil { + return nil, xerrors.Errorf("error decoding identity cid %s: %w", c, err) + } + + // sanity check + if dmh.Code != mh.IDENTITY { + return nil, xerrors.Errorf("error decoding identity cid %s: hash type is not identity", c) + } + + return dmh.Digest, nil +} From 4f89d260b05e7da546b82b7e0e2f42e50851c3a3 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 9 Jul 2021 11:35:10 +0300 Subject: [PATCH 180/197] kill isOldBlockHeader; it's dangerous. --- blockstore/splitstore/splitstore.go | 52 ----------------------------- 1 file changed, 52 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 46f9b5a3d7d..1a864f7c03e 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -708,16 +708,6 @@ func (s *SplitStore) doTxnProtect(root cid.Cid, batch map[cid.Cid]struct{}) erro return errStopWalk } - // old block reference -- see comment in doCompact about the necessity of this - isOldBlock, err := s.isOldBlockHeader(c, s.txnLookbackEpoch) - if err != nil { - return xerrors.Errorf("error checking object type for %s: %w", c, err) - } - - if isOldBlock { - return errStopWalk - } - return s.txnProtect.Mark(c) }, func(c cid.Cid) error { @@ -954,23 +944,6 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return errStopWalk } - // we also short-circuit on old blocks, as these can come from a network request - // and cause us to fail because we have purged its consistituents (or missing from - // the beginning in case of snapshot sync, e.g. parent message receipts or old messages) - // if these blocks are on our chain, they would have been marked but they might be - // from a fork. - // - // Ideally, we would have API options to preclude us from trcking references to such - // objects, but we don't so we have to do this check - isOldBlock, err := s.isOldBlockHeader(c, lookbackEpoch) - if err != nil { - return xerrors.Errorf("error checking object type for %s: %w", c, err) - } - - if isOldBlock { - return errStopWalk - } - count++ return markSet.Mark(c) }, @@ -1343,22 +1316,6 @@ func (s *SplitStore) checkClosing() error { return nil } -func (s *SplitStore) isOldBlockHeader(c cid.Cid, epoch abi.ChainEpoch) (isOldBlock bool, err error) { - if c.Prefix().Codec != cid.DagCBOR { - return false, nil - } - - err = s.view(c, func(data []byte) error { - var hdr types.BlockHeader - if hdr.UnmarshalCBOR(bytes.NewBuffer(data)) == nil { - isOldBlock = hdr.Height < epoch - } - return nil - }) - - return isOldBlock, err -} - func (s *SplitStore) moveColdBlocks(cold []cid.Cid) error { batch := make([]blocks.Block, 0, batchSize) @@ -1610,15 +1567,6 @@ func (s *SplitStore) waitForMissingRefs() { return errStopWalk } - isOldBlock, err := s.isOldBlockHeader(c, s.txnLookbackEpoch) - if err != nil { - return xerrors.Errorf("error checking object type for %s: %w", c, err) - } - - if isOldBlock { - return errStopWalk - } - count++ return s.txnProtect.Mark(c) }, From 565faff75404ae598c9894fea3f93bc830b40943 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 9 Jul 2021 11:38:09 +0300 Subject: [PATCH 181/197] fix test --- blockstore/splitstore/splitstore_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore_test.go b/blockstore/splitstore/splitstore_test.go index a0af5fe9d20..38501699b2b 100644 --- a/blockstore/splitstore/splitstore_test.go +++ b/blockstore/splitstore/splitstore_test.go @@ -160,7 +160,7 @@ func testSplitStore(t *testing.T, cfg *Config) { } func TestSplitStoreCompaction(t *testing.T) { - testSplitStore(t, &Config{MarkSetType: "mapts"}) + testSplitStore(t, &Config{MarkSetType: "map"}) } type mockChain struct { From acc4c374ef4226f953cfedf93121ca711f70cc4c Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 9 Jul 2021 13:20:18 +0300 Subject: [PATCH 182/197] properly handle protecting long-running views --- blockstore/splitstore/splitstore.go | 74 ++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 17 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 1a864f7c03e..2ccb2de1f21 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -143,6 +143,7 @@ type SplitStore struct { txnLk sync.RWMutex txnActive bool txnLookbackEpoch abi.ChainEpoch + txnViews *sync.WaitGroup txnProtect MarkSet txnRefsMx sync.Mutex txnRefs map[cid.Cid]struct{} @@ -184,6 +185,8 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co cold: cold, markSetEnv: markSetEnv, + txnViews: new(sync.WaitGroup), + coldPurgeSize: defaultColdPurgeSize, } @@ -448,18 +451,28 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { return cb(data) } - // optimistically protect the reference so that we can call the underlying View - // without holding hte lock. - // This allows the user callback to call into the blockstore without deadlocking. - s.txnLk.RLock() - err := s.trackTxnRef(cid) - s.txnLk.RUnlock() + err := s.hot.View(cid, + func(data []byte) error { + // views are protected two-fold: + // - if there is an active transaction, then the reference is protected. + // - if there is no active transaction, active views are tracked in a + // wait group and compaction is inhibited from starting until they + // have all completed. this is necessary to ensure that a (very) long-running + // view can't have its data pointer deleted, which would be catastrophic. + // Note that we can't just RLock for the duration of the view, as this could + // lead to deadlock with recursive views. + wg, err := s.protectView(cid) + if err != nil { + log.Warnf("error protecting view to %s: %s", cid, err) + } - if err != nil { - log.Warnf("error tracking reference to %s: %s", cid, err) - } + if wg != nil { + defer wg.Done() + } + + return cb(data) + }) - err = s.hot.View(cid, cb) switch err { case bstore.ErrNotFound: if s.debug != nil { @@ -583,7 +596,7 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { if epoch-s.baseEpoch > CompactionThreshold { // it's time to compact -- prepare the transaction and go! - s.beginTxnProtect(curTs) + wg := s.beginTxnProtect(curTs) go func() { defer atomic.StoreInt32(&s.compacting, 0) defer s.endTxnProtect() @@ -591,7 +604,7 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { log.Info("compacting splitstore") start := time.Now() - s.compact(curTs) + s.compact(curTs, wg) log.Infow("compaction done", "took", time.Since(start)) }() @@ -627,6 +640,20 @@ func (s *SplitStore) protectTipSets(apply []*types.TipSet) { }() } +// transactionally protect a view +func (s *SplitStore) protectView(c cid.Cid) (*sync.WaitGroup, error) { + s.txnLk.RLock() + defer s.txnLk.RUnlock() + + if !s.txnActive { + s.txnViews.Add(1) + return s.txnViews, nil + } + + err := s.trackTxnRef(c) + return nil, err +} + // transactionally protect a reference to an object func (s *SplitStore) trackTxnRef(c cid.Cid) error { if !s.txnActive { @@ -844,8 +871,13 @@ func (s *SplitStore) doWarmup(curTs *types.TipSet) error { // - We sort cold objects heaviest first, so as to never delete the consituents of a DAG before the DAG itself (which would leave dangling references) // - We delete in small batches taking a lock; each batch is checked again for marks, from the concurrent transactional mark, so as to never delete anything live // - We then end the transaction and compact/gc the hotstore. -func (s *SplitStore) compact(curTs *types.TipSet) { +func (s *SplitStore) compact(curTs *types.TipSet, wg *sync.WaitGroup) { + log.Info("waiting for active views to complete") start := time.Now() + wg.Wait() + log.Infow("waiting for active views done", "took", time.Since(start)) + + start = time.Now() err := s.doCompact(curTs) took := time.Since(start).Milliseconds() stats.Record(context.Background(), metrics.SplitstoreCompactionTimeSeconds.M(float64(took)/1e3)) @@ -1079,16 +1111,21 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return nil } -func (s *SplitStore) beginTxnProtect(curTs *types.TipSet) { +func (s *SplitStore) beginTxnProtect(curTs *types.TipSet) *sync.WaitGroup { lookbackEpoch := curTs.Height() - CompactionLookback log.Info("preparing compaction transaction") s.txnLk.Lock() defer s.txnLk.Unlock() - s.txnRefs = make(map[cid.Cid]struct{}) s.txnActive = true s.txnLookbackEpoch = lookbackEpoch + s.txnRefs = make(map[cid.Cid]struct{}) + + wg := s.txnViews + s.txnViews = nil + + return wg } func (s *SplitStore) beginTxnConcurrentMarking(markSet MarkSet) map[cid.Cid]struct{} { @@ -1109,13 +1146,16 @@ func (s *SplitStore) endTxnProtect() { s.txnLk.Lock() defer s.txnLk.Unlock() - if s.txnProtect != nil { - _ = s.txnProtect.Close() + if !s.txnActive { + return } + + _ = s.txnProtect.Close() s.txnActive = false s.txnProtect = nil s.txnRefs = nil s.txnMissing = nil + s.txnViews = new(sync.WaitGroup) } func (s *SplitStore) walkChain(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs bool, From da0feb3fa43a78bd7be3d8aa49495cd31e1a87c2 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 9 Jul 2021 15:10:02 +0300 Subject: [PATCH 183/197] dont mark references inline; instad rely on the main compaction thread to do concurrent marking The problem is that it is possible that an inline marking might take minutes for some objects (infrequent, but still possible for state roots and prohibitive if that's a block validation). So we simply track references continuously and rely on the main compaction thread to trigger concurrent marking for all references at opportune moments. Assumption: we can mark references faster than they are created during purge or else we'll never purge anything. --- blockstore/splitstore/splitstore.go | 363 ++++++++++++++-------------- 1 file changed, 188 insertions(+), 175 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 2ccb2de1f21..b1691762168 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -6,6 +6,7 @@ import ( "encoding/binary" "errors" "os" + "runtime" "sort" "sync" "sync/atomic" @@ -228,11 +229,7 @@ func (s *SplitStore) Has(cid cid.Cid) (bool, error) { } if has { - err = s.trackTxnRef(cid) - if err != nil { - log.Warnf("error tracking reference to %s: %s", cid, err) - } - + s.trackTxnRef(cid) return true, nil } @@ -256,11 +253,7 @@ func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { switch err { case nil: - err = s.trackTxnRef(cid) - if err != nil { - log.Warnf("error tracking reference to %s: %s", cid, err) - } - + s.trackTxnRef(cid) return blk, nil case bstore.ErrNotFound: @@ -302,11 +295,7 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { switch err { case nil: - err = s.trackTxnRef(cid) - if err != nil { - log.Warnf("error tracking reference to %s: %s", cid, err) - } - + s.trackTxnRef(cid) return size, nil case bstore.ErrNotFound: @@ -345,11 +334,7 @@ func (s *SplitStore) Put(blk blocks.Block) error { s.debug.LogWrite(blk) - err = s.trackTxnRef(blk.Cid()) - if err != nil { - log.Warnf("error tracking reference to %s: %s", blk.Cid(), err) - } - + s.trackTxnRef(blk.Cid()) return nil } @@ -394,11 +379,7 @@ func (s *SplitStore) PutMany(blks []blocks.Block) error { s.debug.LogWriteMany(blks) - err = s.trackTxnRefMany(batch) - if err != nil { - log.Warnf("error tracking reference to batch: %s", err) - } - + s.trackTxnRefMany(batch) return nil } @@ -461,11 +442,7 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { // view can't have its data pointer deleted, which would be catastrophic. // Note that we can't just RLock for the duration of the view, as this could // lead to deadlock with recursive views. - wg, err := s.protectView(cid) - if err != nil { - log.Warnf("error protecting view to %s: %s", cid, err) - } - + wg := s.protectView(cid) if wg != nil { defer wg.Done() } @@ -619,115 +596,191 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { // transactionally protect incoming tipsets func (s *SplitStore) protectTipSets(apply []*types.TipSet) { s.txnLk.RLock() + defer s.txnLk.RUnlock() + if !s.txnActive { - s.txnLk.RUnlock() return } - // do this in a goroutine to avoid blocking the notifier - go func() { - defer s.txnLk.RUnlock() - - var cids []cid.Cid - for _, ts := range apply { - cids = append(cids, ts.Cids()...) - } + var cids []cid.Cid + for _, ts := range apply { + cids = append(cids, ts.Cids()...) + } - err := s.trackTxnRefMany(cids) - if err != nil { - log.Errorf("error protecting newly applied tipsets: %s", err) - } - }() + s.trackTxnRefMany(cids) } // transactionally protect a view -func (s *SplitStore) protectView(c cid.Cid) (*sync.WaitGroup, error) { +func (s *SplitStore) protectView(c cid.Cid) *sync.WaitGroup { s.txnLk.RLock() defer s.txnLk.RUnlock() if !s.txnActive { s.txnViews.Add(1) - return s.txnViews, nil + return s.txnViews } - err := s.trackTxnRef(c) - return nil, err + s.trackTxnRef(c) + return nil } // transactionally protect a reference to an object -func (s *SplitStore) trackTxnRef(c cid.Cid) error { +func (s *SplitStore) trackTxnRef(c cid.Cid) { if !s.txnActive { // not compacting - return nil + return } - if s.txnRefs != nil { - // we haven't finished marking yet, so track the reference - s.txnRefsMx.Lock() - s.txnRefs[c] = struct{}{} - s.txnRefsMx.Unlock() - return nil + if isUnitaryObject(c) { + return + } + + if s.txnProtect != nil { + mark, err := s.txnProtect.Has(c) + if err != nil { + log.Warnf("error checking markset: %s", err) + goto track + } + + if mark { + return + } } - // we have finished marking, protect the reference - return s.doTxnProtect(c, nil) +track: + s.txnRefsMx.Lock() + s.txnRefs[c] = struct{}{} + s.txnRefsMx.Unlock() + return } // transactionally protect a batch of references -func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) error { +func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) { if !s.txnActive { // not compacting - return nil + return } - if s.txnRefs != nil { - // we haven't finished marking yet, so track the references + s.txnRefsMx.Lock() + quiet := false + for _, c := range cids { + if isUnitaryObject(c) { + continue + } + + if s.txnProtect != nil { + mark, err := s.txnProtect.Has(c) + if err != nil { + if !quiet { + quiet = true + log.Warnf("error checking markset: %s", err) + } + continue + } + + if mark { + continue + } + } + + s.txnRefs[c] = struct{}{} + } + s.txnRefsMx.Unlock() + return +} + +// protect all pending transactional references +func (s *SplitStore) protectTxnRefs(markSet MarkSet) error { + for { + var txnRefs map[cid.Cid]struct{} + s.txnRefsMx.Lock() - for _, c := range cids { - s.txnRefs[c] = struct{}{} + if len(s.txnRefs) > 0 { + txnRefs = s.txnRefs + s.txnRefs = make(map[cid.Cid]struct{}) } s.txnRefsMx.Unlock() - return nil - } - // we have finished marking, protect the refs - batch := make(map[cid.Cid]struct{}, len(cids)) - for _, c := range cids { - batch[c] = struct{}{} - } + if len(txnRefs) == 0 { + return nil + } - for _, c := range cids { - err := s.doTxnProtect(c, batch) - if err != nil { - return err + log.Infow("protecting transactional references", "refs", len(txnRefs)) + count := 0 + workch := make(chan cid.Cid, len(txnRefs)) + startProtect := time.Now() + + for c := range txnRefs { + mark, err := markSet.Has(c) + if err != nil { + return xerrors.Errorf("error checking markset: %w", err) + } + + if mark { + continue + } + + workch <- c + count++ } - } - return nil + if count == 0 { + return nil + } + + workers := runtime.NumCPU() / 2 + if workers < 2 { + workers = 2 + } + if workers > count { + workers = count + } + + close(workch) + + worker := func(wg *sync.WaitGroup) { + if wg != nil { + defer wg.Done() + } + + for c := range workch { + err := s.doTxnProtect(c, markSet) + if err != nil { + log.Warnf("error protecting transactional references: %s", err) + return + } + } + } + + if workers > 1 { + wg := new(sync.WaitGroup) + for i := 0; i < workers; i++ { + wg.Add(1) + go worker(wg) + } + wg.Wait() + } else { + worker(nil) + } + + log.Infow("protecting transactional refs done", "took", time.Since(startProtect), "protected", count) + } } // transactionally protect a reference by walking the object and marking. // concurrent markings are short circuited by checking the markset. -func (s *SplitStore) doTxnProtect(root cid.Cid, batch map[cid.Cid]struct{}) error { +func (s *SplitStore) doTxnProtect(root cid.Cid, markSet MarkSet) error { // Note: cold objects are deleted heaviest first, so the consituents of an object // cannot be deleted before the object itself. - err := s.walkObjectIncomplete(root, cid.NewSet(), + return s.walkObjectIncomplete(root, cid.NewSet(), func(c cid.Cid) error { if isUnitaryObject(c) { return errStopWalk } - if c != root { - _, ok := batch[c] - if ok { - // it's on the same batch, stop walk - return errStopWalk - } - } - - mark, err := s.txnProtect.Has(c) + mark, err := markSet.Has(c) if err != nil { - return xerrors.Errorf("error checking mark set for %s: %w", c, err) + return xerrors.Errorf("error checking markset: %w", err) } // it's marked, nothing to do @@ -735,23 +788,17 @@ func (s *SplitStore) doTxnProtect(root cid.Cid, batch map[cid.Cid]struct{}) erro return errStopWalk } - return s.txnProtect.Mark(c) + return markSet.Mark(c) }, func(c cid.Cid) error { - log.Warnf("missing object reference %s in %s", c, root) if s.txnMissing != nil { + log.Warnf("missing object reference %s in %s", c, root) s.txnRefsMx.Lock() s.txnMissing[c] = struct{}{} s.txnRefsMx.Unlock() } return errStopWalk }) - - if err != nil { - log.Warnf("error protecting object (cid: %s): %s", root, err) - } - - return err } // warmup acuiqres the compaction lock and spawns a goroutine to warm up the hotstore; @@ -905,6 +952,9 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return err } + // we are ready for concurrent marking + s.beginTxnMarking(markSet) + // 1. mark reachable objects by walking the chain from the current epoch; we keep state roots // and messages until the boundary epoch. log.Info("marking reachable objects") @@ -933,66 +983,10 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return err } - // begin transactional protection with concurrent marking and fetch references created while marking - txnRefs := s.beginTxnConcurrentMarking(markSet) - - // 1.1 Update markset for references created during marking - if len(txnRefs) > 0 { - log.Infow("updating mark set for live references", "refs", len(txnRefs)) - startMark = time.Now() - walked := cid.NewSet() - count = 0 - - for c := range txnRefs { - if err := s.checkClosing(); err != nil { - return err - } - - if isUnitaryObject(c) { - continue - } - - mark, err := markSet.Has(c) - if err != nil { - return xerrors.Errorf("error checking markset for %s: %w", c, err) - } - - if mark { - continue - } - - err = s.walkObjectIncomplete(c, walked, - func(c cid.Cid) error { - if isUnitaryObject(c) { - return errStopWalk - } - - mark, err := markSet.Has(c) - if err != nil { - return xerrors.Errorf("error checking markset for %s: %w", c, err) - } - - if mark { - return errStopWalk - } - - count++ - return markSet.Mark(c) - }, - func(cm cid.Cid) error { - log.Warnf("missing object reference %s in %s", cm, c) //nolint - s.txnRefsMx.Lock() - s.txnMissing[cm] = struct{}{} - s.txnRefsMx.Unlock() - return errStopWalk - }) - - if err != nil { - return xerrors.Errorf("error walking %s for marking: %w", c, err) - } - } - - log.Infow("update mark set done", "took", time.Since(startMark), "marked", count) + // 1.1 protect transactional refs + err = s.protectTxnRefs(markSet) + if err != nil { + return xerrors.Errorf("error protecting transactional refs: %w", err) } if err := s.checkClosing(); err != nil { @@ -1047,7 +1041,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { // now that we have collected cold objects, check for missing references from transactional i/o // and disable further collection of such references (they will not be acted upon as we can't // possibly delete objects we didn't have when we were collecting cold objects) - s.waitForMissingRefs() + s.waitForMissingRefs(markSet) if err := s.checkClosing(); err != nil { return err @@ -1062,6 +1056,10 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return xerrors.Errorf("error moving cold objects: %w", err) } log.Infow("moving done", "took", time.Since(startMove)) + + if err := s.checkClosing(); err != nil { + return err + } } // 4. sort cold objects so that the dags with most references are deleted first @@ -1075,6 +1073,16 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { } log.Infow("sorting done", "took", time.Since(startSort)) + // 4.1 protect transactional refs once more + err = s.protectTxnRefs(markSet) + if err != nil { + return xerrors.Errorf("error protecting transactional refs: %w", err) + } + + if err := s.checkClosing(); err != nil { + return err + } + // Enter critical section log.Info("entering critical section") atomic.StoreInt32(&s.critsection, 1) @@ -1088,7 +1096,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { // 5. purge cold objects from the hotstore, taking protected references into account log.Info("purging cold objects from the hotstore") startPurge := time.Now() - err = s.purge(cold) + err = s.purge(cold, markSet) if err != nil { return xerrors.Errorf("error purging cold blocks: %w", err) } @@ -1121,6 +1129,7 @@ func (s *SplitStore) beginTxnProtect(curTs *types.TipSet) *sync.WaitGroup { s.txnActive = true s.txnLookbackEpoch = lookbackEpoch s.txnRefs = make(map[cid.Cid]struct{}) + s.txnMissing = make(map[cid.Cid]struct{}) wg := s.txnViews s.txnViews = nil @@ -1128,18 +1137,12 @@ func (s *SplitStore) beginTxnProtect(curTs *types.TipSet) *sync.WaitGroup { return wg } -func (s *SplitStore) beginTxnConcurrentMarking(markSet MarkSet) map[cid.Cid]struct{} { - s.txnLk.Lock() - defer s.txnLk.Unlock() - +func (s *SplitStore) beginTxnMarking(markSet MarkSet) { markSet.SetConcurrent() - txnRefs := s.txnRefs - s.txnRefs = nil - s.txnMissing = make(map[cid.Cid]struct{}) + s.txnLk.Lock() s.txnProtect = markSet - - return txnRefs + s.txnLk.Unlock() } func (s *SplitStore) endTxnProtect() { @@ -1150,7 +1153,6 @@ func (s *SplitStore) endTxnProtect() { return } - _ = s.txnProtect.Close() s.txnActive = false s.txnProtect = nil s.txnRefs = nil @@ -1508,7 +1510,7 @@ func (s *SplitStore) purgeBatch(cids []cid.Cid, deleteBatch func([]cid.Cid) erro return nil } -func (s *SplitStore) purge(cids []cid.Cid) error { +func (s *SplitStore) purge(cids []cid.Cid, markSet MarkSet) error { deadCids := make([]cid.Cid, 0, batchSize) var purgeCnt, liveCnt int defer func() { @@ -1519,15 +1521,26 @@ func (s *SplitStore) purge(cids []cid.Cid) error { func(cids []cid.Cid) error { deadCids := deadCids[:0] + again: if err := s.checkClosing(); err != nil { return err } s.txnLk.Lock() - defer s.txnLk.Unlock() + if len(s.txnRefs) > 0 { + s.txnLk.Unlock() + + err := s.protectTxnRefs(markSet) + if err != nil { + return xerrors.Errorf("error protecting transactional refs: %w", err) + } + + goto again + } + defer s.txnLk.Unlock() for _, c := range cids { - live, err := s.txnProtect.Has(c) + live, err := markSet.Has(c) if err != nil { return xerrors.Errorf("error checking for liveness: %w", err) } @@ -1559,7 +1572,7 @@ func (s *SplitStore) purge(cids []cid.Cid) error { // have this gem[TM]. // My best guess is that they are parent message receipts or yet to be computed state roots; magik // thinks the cause may be block validation. -func (s *SplitStore) waitForMissingRefs() { +func (s *SplitStore) waitForMissingRefs(markSet MarkSet) { s.txnLk.Lock() missing := s.txnMissing s.txnMissing = nil @@ -1598,7 +1611,7 @@ func (s *SplitStore) waitForMissingRefs() { return errStopWalk } - mark, err := s.txnProtect.Has(c) + mark, err := markSet.Has(c) if err != nil { return xerrors.Errorf("error checking markset for %s: %w", c, err) } @@ -1608,7 +1621,7 @@ func (s *SplitStore) waitForMissingRefs() { } count++ - return s.txnProtect.Mark(c) + return markSet.Mark(c) }, func(c cid.Cid) error { missing[c] = struct{}{} From 095d7427bab6063aba1f31f6cb44acb2e5ade6e9 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 9 Jul 2021 15:41:10 +0300 Subject: [PATCH 184/197] make view protection optimistic again, as there is a race window --- blockstore/splitstore/splitstore.go | 30 +++++++++++++---------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index b1691762168..e822a60c2e7 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -432,24 +432,20 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { return cb(data) } - err := s.hot.View(cid, - func(data []byte) error { - // views are protected two-fold: - // - if there is an active transaction, then the reference is protected. - // - if there is no active transaction, active views are tracked in a - // wait group and compaction is inhibited from starting until they - // have all completed. this is necessary to ensure that a (very) long-running - // view can't have its data pointer deleted, which would be catastrophic. - // Note that we can't just RLock for the duration of the view, as this could - // lead to deadlock with recursive views. - wg := s.protectView(cid) - if wg != nil { - defer wg.Done() - } - - return cb(data) - }) + // views are (optimistically) protected two-fold: + // - if there is an active transaction, then the reference is protected. + // - if there is no active transaction, active views are tracked in a + // wait group and compaction is inhibited from starting until they + // have all completed. this is necessary to ensure that a (very) long-running + // view can't have its data pointer deleted, which would be catastrophic. + // Note that we can't just RLock for the duration of the view, as this could + // lead to deadlock with recursive views. + wg := s.protectView(cid) + if wg != nil { + defer wg.Done() + } + err := s.hot.View(cid, cb) switch err { case bstore.ErrNotFound: if s.debug != nil { From 18161fee389db4244d294748a76453372f104ca5 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 9 Jul 2021 19:12:58 +0300 Subject: [PATCH 185/197] remove unused lookback constructs --- blockstore/splitstore/splitstore.go | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index e822a60c2e7..d6995e6811d 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -52,10 +52,6 @@ var ( // we will walk the chain for live objects. CompactionBoundary = 4 * build.Finality - // CompactionLookback is the number of epochs from the current epoch at which - // we will consider marking an old block reference. - CompactionLookback = 2 * build.Finality - // SyncGapTime is the time delay from a tipset's min timestamp before we decide // there is a sync gap SyncGapTime = time.Minute @@ -141,14 +137,13 @@ type SplitStore struct { debug *debugLog // transactional protection for concurrent read/writes during compaction - txnLk sync.RWMutex - txnActive bool - txnLookbackEpoch abi.ChainEpoch - txnViews *sync.WaitGroup - txnProtect MarkSet - txnRefsMx sync.Mutex - txnRefs map[cid.Cid]struct{} - txnMissing map[cid.Cid]struct{} + txnLk sync.RWMutex + txnActive bool + txnViews *sync.WaitGroup + txnProtect MarkSet + txnRefsMx sync.Mutex + txnRefs map[cid.Cid]struct{} + txnMissing map[cid.Cid]struct{} } var _ bstore.Blockstore = (*SplitStore)(nil) @@ -569,7 +564,7 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { if epoch-s.baseEpoch > CompactionThreshold { // it's time to compact -- prepare the transaction and go! - wg := s.beginTxnProtect(curTs) + wg := s.beginTxnProtect() go func() { defer atomic.StoreInt32(&s.compacting, 0) defer s.endTxnProtect() @@ -933,9 +928,8 @@ func (s *SplitStore) compact(curTs *types.TipSet, wg *sync.WaitGroup) { func (s *SplitStore) doCompact(curTs *types.TipSet) error { currentEpoch := curTs.Height() boundaryEpoch := currentEpoch - CompactionBoundary - lookbackEpoch := currentEpoch - CompactionLookback - log.Infow("running compaction", "currentEpoch", currentEpoch, "baseEpoch", s.baseEpoch, "boundaryEpoch", boundaryEpoch, "lookbackEpoch", lookbackEpoch) + log.Infow("running compaction", "currentEpoch", currentEpoch, "baseEpoch", s.baseEpoch, "boundaryEpoch", boundaryEpoch) markSet, err := s.markSetEnv.Create("live", s.markSetSize) if err != nil { @@ -1115,15 +1109,13 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return nil } -func (s *SplitStore) beginTxnProtect(curTs *types.TipSet) *sync.WaitGroup { - lookbackEpoch := curTs.Height() - CompactionLookback +func (s *SplitStore) beginTxnProtect() *sync.WaitGroup { log.Info("preparing compaction transaction") s.txnLk.Lock() defer s.txnLk.Unlock() s.txnActive = true - s.txnLookbackEpoch = lookbackEpoch s.txnRefs = make(map[cid.Cid]struct{}) s.txnMissing = make(map[cid.Cid]struct{}) From c0a1cfffa1f1fbea767af4f214fbefde14187622 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 9 Jul 2021 19:19:37 +0300 Subject: [PATCH 186/197] rename noopstore to discardstore --- blockstore/discard.go | 66 ++++++++++++++++++++++++++++++++++++++ blockstore/noop.go | 66 -------------------------------------- node/builder.go | 4 +-- node/modules/blockstore.go | 6 ++-- 4 files changed, 71 insertions(+), 71 deletions(-) create mode 100644 blockstore/discard.go delete mode 100644 blockstore/noop.go diff --git a/blockstore/discard.go b/blockstore/discard.go new file mode 100644 index 00000000000..afd0651bc07 --- /dev/null +++ b/blockstore/discard.go @@ -0,0 +1,66 @@ +package blockstore + +import ( + "context" + "io" + + blocks "github.com/ipfs/go-block-format" + cid "github.com/ipfs/go-cid" +) + +var _ Blockstore = (*discardstore)(nil) + +type discardstore struct { + bs Blockstore +} + +func NewDiscardStore(bs Blockstore) Blockstore { + return &discardstore{bs: bs} +} + +func (b *discardstore) Has(cid cid.Cid) (bool, error) { + return b.bs.Has(cid) +} + +func (b *discardstore) HashOnRead(hor bool) { + b.bs.HashOnRead(hor) +} + +func (b *discardstore) Get(cid cid.Cid) (blocks.Block, error) { + return b.bs.Get(cid) +} + +func (b *discardstore) GetSize(cid cid.Cid) (int, error) { + return b.bs.GetSize(cid) +} + +func (b *discardstore) View(cid cid.Cid, f func([]byte) error) error { + return b.bs.View(cid, f) +} + +func (b *discardstore) Put(blk blocks.Block) error { + return nil +} + +func (b *discardstore) PutMany(blks []blocks.Block) error { + return nil +} + +func (b *discardstore) DeleteBlock(cid cid.Cid) error { + return nil +} + +func (b *discardstore) DeleteMany(cids []cid.Cid) error { + return nil +} + +func (b *discardstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { + return b.bs.AllKeysChan(ctx) +} + +func (b *discardstore) Close() error { + if c, ok := b.bs.(io.Closer); ok { + return c.Close() + } + return nil +} diff --git a/blockstore/noop.go b/blockstore/noop.go deleted file mode 100644 index f2658cbd739..00000000000 --- a/blockstore/noop.go +++ /dev/null @@ -1,66 +0,0 @@ -package blockstore - -import ( - "context" - "io" - - blocks "github.com/ipfs/go-block-format" - cid "github.com/ipfs/go-cid" -) - -var _ Blockstore = (*noopstore)(nil) - -type noopstore struct { - bs Blockstore -} - -func NewNoopStore(bs Blockstore) Blockstore { - return &noopstore{bs: bs} -} - -func (b *noopstore) Has(cid cid.Cid) (bool, error) { - return b.bs.Has(cid) -} - -func (b *noopstore) HashOnRead(hor bool) { - b.bs.HashOnRead(hor) -} - -func (b *noopstore) Get(cid cid.Cid) (blocks.Block, error) { - return b.bs.Get(cid) -} - -func (b *noopstore) GetSize(cid cid.Cid) (int, error) { - return b.bs.GetSize(cid) -} - -func (b *noopstore) View(cid cid.Cid, f func([]byte) error) error { - return b.bs.View(cid, f) -} - -func (b *noopstore) Put(blk blocks.Block) error { - return nil -} - -func (b *noopstore) PutMany(blks []blocks.Block) error { - return nil -} - -func (b *noopstore) DeleteBlock(cid cid.Cid) error { - return nil -} - -func (b *noopstore) DeleteMany(cids []cid.Cid) error { - return nil -} - -func (b *noopstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { - return b.bs.AllKeysChan(ctx) -} - -func (b *noopstore) Close() error { - if c, ok := b.bs.(io.Closer); ok { - return c.Close() - } - return nil -} diff --git a/node/builder.go b/node/builder.go index 4a678f37f8a..45957c4dd49 100644 --- a/node/builder.go +++ b/node/builder.go @@ -645,8 +645,8 @@ func Repo(r repo.Repo) Option { If(cfg.EnableSplitstore, If(cfg.Splitstore.ColdStoreType == "universal", Override(new(dtypes.ColdBlockstore), From(new(dtypes.UniversalBlockstore)))), - If(cfg.Splitstore.ColdStoreType == "noop", - Override(new(dtypes.ColdBlockstore), modules.NoopColdBlockstore)), + If(cfg.Splitstore.ColdStoreType == "discard", + Override(new(dtypes.ColdBlockstore), modules.DiscardColdBlockstore)), If(cfg.Splitstore.HotStoreType == "badger", Override(new(dtypes.HotBlockstore), modules.BadgerHotBlockstore)), Override(new(dtypes.SplitBlockstore), modules.SplitBlockstore(cfg)), diff --git a/node/modules/blockstore.go b/node/modules/blockstore.go index 59a037d8dd6..9e1293c8e08 100644 --- a/node/modules/blockstore.go +++ b/node/modules/blockstore.go @@ -37,8 +37,8 @@ func UniversalBlockstore(lc fx.Lifecycle, mctx helpers.MetricsCtx, r repo.Locked return bs, err } -func NoopColdBlockstore(lc fx.Lifecycle, bs dtypes.UniversalBlockstore) (dtypes.ColdBlockstore, error) { - return blockstore.NewNoopStore(bs), nil +func DiscardColdBlockstore(lc fx.Lifecycle, bs dtypes.UniversalBlockstore) (dtypes.ColdBlockstore, error) { + return blockstore.NewDiscardStore(bs), nil } func BadgerHotBlockstore(lc fx.Lifecycle, r repo.LockedRepo) (dtypes.HotBlockstore, error) { @@ -79,7 +79,7 @@ func SplitBlockstore(cfg *config.Chainstore) func(lc fx.Lifecycle, r repo.Locked cfg := &splitstore.Config{ MarkSetType: cfg.Splitstore.MarkSetType, - DiscardColdBlocks: cfg.Splitstore.ColdStoreType == "noop", + DiscardColdBlocks: cfg.Splitstore.ColdStoreType == "discard", } ss, err := splitstore.Open(path, ds, hot, cold, cfg) if err != nil { From b9a5ea8f7bed48942e85dbf0fae0976402bb4f6b Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 9 Jul 2021 19:23:55 +0300 Subject: [PATCH 187/197] update wording around discard store --- blockstore/splitstore/README.md | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/blockstore/splitstore/README.md b/blockstore/splitstore/README.md index da07e17142a..1c6569a34e7 100644 --- a/blockstore/splitstore/README.md +++ b/blockstore/splitstore/README.md @@ -9,10 +9,11 @@ implementation, which we call SplitStore v1. The new design (see [#6474](https://github.com/filecoin-project/lotus/pull/6474) evolves the splitstore to be a freestanding compacting blockstore that allows us to keep a small (60-100GB) working set in a hot blockstore -and reliably archive out of scope objects in a coldstore. The coldstore -can be a noop store, whereby out of scope objects are discarded or a -regular badger blockstore (the default), which can be periodically -garbage collected according to configurable user retention policies. +and reliably archive out of scope objects in a coldstore. The +coldstore can also be a discard store, whereby out of scope objects +are discarded or a regular badger blockstore (the default), which can +be periodically garbage collected according to configurable user +retention policies. To enable the splitstore, edit `.lotus/config.toml` and add the following: ``` @@ -20,11 +21,16 @@ To enable the splitstore, edit `.lotus/config.toml` and add the following: EnableSplitstore = true ``` -If you want to use the noop coldstore, also add the following: +If you intend to use the discard coldstore, your also need to add the following: ``` [Chainstore.Splitstore] - ColdStoreType = "noop" + ColdStoreType = "discard" ``` +In general you _should not_ have to use the discard store, unless you +are running a network booster or have very constrained hardware with +not enough disk space to maintain a coldstore, even with garbage +collection. + ## Operation @@ -42,7 +48,7 @@ hotstore, with fallback to the coldstore. Once 5 finalities have ellapsed, and every finality henceforth, the blockstore _compacts_. Compaction is the process of moving all unreachable objects within the last 4 finalities from the hotstore to -the coldstore. If the system is configured with a noop coldstore, +the coldstore. If the system is configured with a discard coldstore, these objects are discarded. Note that chain headers, all the way to genesis, are considered reachable. Stateroots and messages are considered reachable only within the last 4 finalities, unless there From 41290383e27caa4ddfc3a342fa6b660ed329a520 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 9 Jul 2021 19:24:44 +0300 Subject: [PATCH 188/197] fix test --- blockstore/splitstore/splitstore_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/blockstore/splitstore/splitstore_test.go b/blockstore/splitstore/splitstore_test.go index 38501699b2b..423a765368c 100644 --- a/blockstore/splitstore/splitstore_test.go +++ b/blockstore/splitstore/splitstore_test.go @@ -24,7 +24,6 @@ import ( func init() { CompactionThreshold = 5 CompactionBoundary = 2 - CompactionLookback = 2 logging.SetLogLevel("splitstore", "DEBUG") } From f5ae10e3d1b8447f814c7c44082a390cbc4a094d Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 9 Jul 2021 19:53:51 +0300 Subject: [PATCH 189/197] refactor debug log code to eliminate duplication --- blockstore/splitstore/debug.go | 291 ++++++++++++--------------------- 1 file changed, 102 insertions(+), 189 deletions(-) diff --git a/blockstore/splitstore/debug.go b/blockstore/splitstore/debug.go index 4c788a28b5b..2be85ebfe8d 100644 --- a/blockstore/splitstore/debug.go +++ b/blockstore/splitstore/debug.go @@ -20,60 +20,58 @@ import ( ) type debugLog struct { - readPath, writePath, deletePath, stackPath string - readMx, writeMx, deleteMx, stackMx sync.Mutex - readLog, writeLog, deleteLog, stackLog *os.File - readCnt, writeCnt, deleteCnt, stackCnt int - stackMap map[string]string + readLog, writeLog, deleteLog, stackLog *debugLogOp + + stackMx sync.Mutex + stackMap map[string]string +} + +type debugLogOp struct { + path string + mx sync.Mutex + log *os.File + count int } func openDebugLog(path string) (*debugLog, error) { basePath := filepath.Join(path, "debug") err := os.MkdirAll(basePath, 0755) if err != nil { - return nil, xerrors.Errorf("error creating debug log directory: %w", err) + return nil, err } - readPath := filepath.Join(basePath, "read.log") - readFile, err := os.OpenFile(readPath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) + readLog, err := openDebugLogOp(basePath, "read.log") if err != nil { - return nil, xerrors.Errorf("error opening read log: %w", err) + return nil, err } - writePath := filepath.Join(basePath, "write.log") - writeFile, err := os.OpenFile(writePath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) + writeLog, err := openDebugLogOp(basePath, "write.log") if err != nil { - _ = readFile.Close() - return nil, xerrors.Errorf("error opening write log: %w", err) + _ = readLog.Close() + return nil, err } - deletePath := filepath.Join(basePath, "delete.log") - deleteFile, err := os.OpenFile(deletePath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) + deleteLog, err := openDebugLogOp(basePath, "delete.log") if err != nil { - _ = readFile.Close() - _ = writeFile.Close() - return nil, xerrors.Errorf("error opening delete log: %w", err) + _ = readLog.Close() + _ = writeLog.Close() + return nil, err } - stackPath := filepath.Join(basePath, "stack.log") - stackFile, err := os.OpenFile(stackPath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) + stackLog, err := openDebugLogOp(basePath, "stack.log") if err != nil { - _ = readFile.Close() - _ = writeFile.Close() - _ = deleteFile.Close() + _ = readLog.Close() + _ = writeLog.Close() + _ = deleteLog.Close() return nil, xerrors.Errorf("error opening stack log: %w", err) } return &debugLog{ - readPath: readPath, - writePath: writePath, - deletePath: deletePath, - stackPath: stackPath, - readLog: readFile, - writeLog: writeFile, - deleteLog: deleteFile, - stackLog: stackFile, - stackMap: make(map[string]string), + readLog: readLog, + writeLog: writeLog, + deleteLog: deleteLog, + stackLog: stackLog, + stackMap: make(map[string]string), }, nil } @@ -83,13 +81,7 @@ func (d *debugLog) LogReadMiss(cid cid.Cid) { } stack := d.getStack() - - d.readMx.Lock() - defer d.readMx.Unlock() - - d.readCnt++ - - _, err := fmt.Fprintf(d.readLog, "%s %s %s\n", d.timestamp(), cid, stack) + err := d.readLog.Log("%s %s %s\n", d.timestamp(), cid, stack) if err != nil { log.Warnf("error writing read log: %s", err) } @@ -105,12 +97,7 @@ func (d *debugLog) LogWrite(blk blocks.Block) { stack = " " + d.getStack() } - d.writeMx.Lock() - defer d.writeMx.Unlock() - - d.writeCnt++ - - _, err := fmt.Fprintf(d.writeLog, "%s %s%s\n", d.timestamp(), blk.Cid(), stack) + err := d.writeLog.Log("%s %s%s\n", d.timestamp(), blk.Cid(), stack) if err != nil { log.Warnf("error writing write log: %s", err) } @@ -126,14 +113,9 @@ func (d *debugLog) LogWriteMany(blks []blocks.Block) { stack = " " + d.getStack() } - d.writeMx.Lock() - defer d.writeMx.Unlock() - - d.writeCnt += len(blks) - now := d.timestamp() for _, blk := range blks { - _, err := fmt.Fprintf(d.writeLog, "%s %s%s\n", now, blk.Cid(), stack) + err := d.writeLog.Log("%s %s%s\n", now, blk.Cid(), stack) if err != nil { log.Warnf("error writing write log: %s", err) break @@ -146,14 +128,9 @@ func (d *debugLog) LogDelete(cids []cid.Cid) { return } - d.deleteMx.Lock() - defer d.deleteMx.Unlock() - - d.deleteCnt += len(cids) - now := d.timestamp() for _, c := range cids { - _, err := fmt.Fprintf(d.deleteLog, "%s %s\n", now, c) + err := d.deleteLog.Log("%s %s\n", now, c) if err != nil { log.Warnf("error writing delete log: %s", err) break @@ -167,125 +144,10 @@ func (d *debugLog) Flush() { } // rotate non-empty logs - d.rotateReadLog() - d.rotateWriteLog() - d.rotateDeleteLog() - d.rotateStackLog() -} - -func (d *debugLog) rotateReadLog() { - d.readMx.Lock() - defer d.readMx.Unlock() - - if d.readCnt == 0 { - return - } - - err := d.rotate(d.readLog, d.readPath) - if err != nil { - log.Warnf("error rotating read log: %s", err) - return - } - - d.readLog, err = os.OpenFile(d.readPath, os.O_WRONLY|os.O_CREATE, 0644) - if err != nil { - log.Warnf("error opening log file: %s", err) - return - } - - d.readCnt = 0 -} - -func (d *debugLog) rotateWriteLog() { - d.writeMx.Lock() - defer d.writeMx.Unlock() - - if d.writeCnt == 0 { - return - } - - err := d.rotate(d.writeLog, d.writePath) - if err != nil { - log.Warnf("error rotating write log: %s", err) - return - } - - d.writeLog, err = os.OpenFile(d.writePath, os.O_WRONLY|os.O_CREATE, 0644) - if err != nil { - log.Warnf("error opening write log file: %s", err) - return - } - - d.writeCnt = 0 -} - -func (d *debugLog) rotateDeleteLog() { - d.deleteMx.Lock() - defer d.deleteMx.Unlock() - - if d.deleteCnt == 0 { - return - } - - err := d.rotate(d.deleteLog, d.deletePath) - if err != nil { - log.Warnf("error rotating delete log: %s", err) - return - } - - d.deleteLog, err = os.OpenFile(d.deletePath, os.O_WRONLY|os.O_CREATE, 0644) - if err != nil { - log.Warnf("error opening delete log file: %s", err) - return - } - - d.deleteCnt = 0 -} - -func (d *debugLog) rotateStackLog() { - d.stackMx.Lock() - defer d.stackMx.Unlock() - - if d.stackCnt == 0 { - return - } - - err := d.rotate(d.stackLog, d.stackPath) - if err != nil { - log.Warnf("error rotating stack log: %s", err) - return - } - - d.stackLog, err = os.OpenFile(d.stackPath, os.O_WRONLY|os.O_CREATE, 0644) - if err != nil { - log.Warnf("error opening stack log file: %s", err) - return - } - - d.stackCnt = 0 -} - -func (d *debugLog) rotate(f *os.File, path string) error { - err := f.Close() - if err != nil { - return xerrors.Errorf("error closing file: %w", err) - } - - arxivPath := fmt.Sprintf("%s-%d", path, time.Now().Unix()) - err = os.Rename(path, arxivPath) - if err != nil { - return xerrors.Errorf("error moving file: %w", err) - } - - go func() { - cmd := exec.Command("gzip", arxivPath) - err := cmd.Run() - if err != nil { - log.Warnf("error compressing log: %s", err) - } - }() - - return nil + d.readLog.Rotate() + d.writeLog.Rotate() + d.deleteLog.Rotate() + d.stackLog.Rotate() } func (d *debugLog) Close() error { @@ -293,21 +155,10 @@ func (d *debugLog) Close() error { return nil } - d.readMx.Lock() err1 := d.readLog.Close() - d.readMx.Unlock() - - d.writeMx.Lock() err2 := d.writeLog.Close() - d.writeMx.Unlock() - - d.deleteMx.Lock() err3 := d.deleteLog.Close() - d.deleteMx.Unlock() - - d.stackMx.Lock() err4 := d.stackLog.Close() - d.stackMx.Unlock() return multierr.Combine(err1, err2, err3, err4) } @@ -322,9 +173,8 @@ func (d *debugLog) getStack() string { if !ok { repr = hex.EncodeToString(hash[:]) d.stackMap[key] = repr - d.stackCnt++ - _, err := fmt.Fprintf(d.stackLog, "%s\n%s\n", repr, sk) + err := d.stackLog.Log("%s\n%s\n", repr, sk) if err != nil { log.Warnf("error writing stack trace for %s: %s", repr, err) } @@ -358,3 +208,66 @@ func (d *debugLog) timestamp() string { ts, _ := time.Now().MarshalText() return string(ts) } + +func openDebugLogOp(basePath, name string) (*debugLogOp, error) { + path := filepath.Join(basePath, name) + file, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) + if err != nil { + return nil, xerrors.Errorf("error opening %s: %w", name, err) + } + + return &debugLogOp{path: path, log: file}, nil +} + +func (d *debugLogOp) Close() error { + d.mx.Lock() + defer d.mx.Unlock() + + return d.log.Close() +} + +func (d *debugLogOp) Log(template string, arg ...interface{}) error { + d.mx.Lock() + defer d.mx.Unlock() + + d.count++ + _, err := fmt.Fprintf(d.log, template, arg...) + return err +} + +func (d *debugLogOp) Rotate() { + d.mx.Lock() + defer d.mx.Unlock() + + if d.count == 0 { + return + } + + err := d.log.Close() + if err != nil { + log.Warnf("error closing log (file: %s): %s", d.path, err) + return + } + + arxivPath := fmt.Sprintf("%s-%d", d.path, time.Now().Unix()) + err = os.Rename(d.path, arxivPath) + if err != nil { + log.Warnf("error moving log (file: %s): %s", d.path, err) + return + } + + go func() { + cmd := exec.Command("gzip", arxivPath) + err := cmd.Run() + if err != nil { + log.Warnf("error compressing log (file: %s): %s", arxivPath, err) + } + }() + + d.count = 0 + d.log, err = os.OpenFile(d.path, os.O_WRONLY|os.O_CREATE, 0644) + if err != nil { + log.Warnf("error opening log (file: %s): %s", d.path, err) + return + } +} From 870a47f55d25dd0be3b16a0e94d2878fc7e8956b Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 9 Jul 2021 20:07:17 +0300 Subject: [PATCH 190/197] handle id cids in internal versions of view/get --- blockstore/splitstore/splitstore.go | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index d6995e6811d..6a13be19cc5 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1316,11 +1316,20 @@ func (s *SplitStore) walkObjectIncomplete(c cid.Cid, walked *cid.Set, f, missing } // internal version used by walk -func (s *SplitStore) view(cid cid.Cid, cb func([]byte) error) error { - err := s.hot.View(cid, cb) +func (s *SplitStore) view(c cid.Cid, cb func([]byte) error) error { + if isIdentiyCid(c) { + data, err := decodeIdentityCid(c) + if err != nil { + return err + } + + return cb(data) + } + + err := s.hot.View(c, cb) switch err { case bstore.ErrNotFound: - return s.cold.View(cid, cb) + return s.cold.View(c, cb) default: return err @@ -1328,6 +1337,10 @@ func (s *SplitStore) view(cid cid.Cid, cb func([]byte) error) error { } func (s *SplitStore) has(c cid.Cid) (bool, error) { + if isIdentiyCid(c) { + return true, nil + } + has, err := s.hot.Has(c) if has || err != nil { From 0c5e336ff19c0530cf48459151c5e6d2ef55ad6b Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 10 Jul 2021 16:30:27 +0300 Subject: [PATCH 191/197] address review comments --- blockstore/splitstore/splitstore.go | 206 ++++++++++++++++------------ 1 file changed, 117 insertions(+), 89 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 6a13be19cc5..16b922ab2dd 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -13,6 +13,7 @@ import ( "time" "go.uber.org/multierr" + "golang.org/x/sync/errgroup" "golang.org/x/xerrors" blocks "github.com/ipfs/go-block-format" @@ -110,10 +111,16 @@ type ChainAccessor interface { SubscribeHeadChanges(change func(revert []*types.TipSet, apply []*types.TipSet) error) } +// hotstore is the interface that must be satisfied by the hot blockstore; it is an extension +// of the Blockstore interface with the traits we need for compaction. +type hotstore interface { + bstore.Blockstore + bstore.BlockstoreIterator +} + type SplitStore struct { - compacting int32 // compaction (or warmp up) in progress - critsection int32 // compaction critical section - closing int32 // the splitstore is closing + compacting int32 // compaction (or warmp up) in progress + closing int32 // the splitstore is closing cfg *Config @@ -125,8 +132,8 @@ type SplitStore struct { chain ChainAccessor ds dstore.Datastore - hot bstore.Blockstore cold bstore.Blockstore + hot hotstore markSetEnv MarkSetEnv markSetSize int64 @@ -139,7 +146,7 @@ type SplitStore struct { // transactional protection for concurrent read/writes during compaction txnLk sync.RWMutex txnActive bool - txnViews *sync.WaitGroup + txnViews sync.WaitGroup txnProtect MarkSet txnRefsMx sync.Mutex txnRefs map[cid.Cid]struct{} @@ -162,9 +169,15 @@ func init() { // is backed by the provided hot and cold stores. The returned SplitStore MUST be // attached to the ChainStore with Start in order to trigger compaction. func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Config) (*SplitStore, error) { - // hot blockstore must support BlockstoreIterator - if _, ok := hot.(bstore.BlockstoreIterator); !ok { - return nil, xerrors.Errorf("hot blockstore does not support efficient iteration: %T", hot) + // hot blockstore must support the hotstore interface + hots, ok := hot.(hotstore) + if !ok { + // be specific about what is missing + if _, ok := hot.(bstore.BlockstoreIterator); !ok { + return nil, xerrors.Errorf("hot blockstore does not support efficient iteration: %T", hot) + } + + return nil, xerrors.Errorf("hot blockstore does not support the necessary traits: %T", hot) } // the markset env @@ -177,12 +190,10 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co ss := &SplitStore{ cfg: cfg, ds: ds, - hot: hot, cold: cold, + hot: hots, markSetEnv: markSetEnv, - txnViews: new(sync.WaitGroup), - coldPurgeSize: defaultColdPurgeSize, } @@ -252,18 +263,13 @@ func (s *SplitStore) Get(cid cid.Cid) (blocks.Block, error) { return blk, nil case bstore.ErrNotFound: - if s.debug != nil { - s.mx.Lock() - warm := s.warmupEpoch > 0 - s.mx.Unlock() - if warm { - s.debug.LogReadMiss(cid) - } + if s.isWarm() { + s.debug.LogReadMiss(cid) } blk, err = s.cold.Get(cid) if err == nil { - stats.Record(context.Background(), metrics.SplitstoreMiss.M(1)) + stats.Record(s.ctx, metrics.SplitstoreMiss.M(1)) } return blk, err @@ -294,18 +300,13 @@ func (s *SplitStore) GetSize(cid cid.Cid) (int, error) { return size, nil case bstore.ErrNotFound: - if s.debug != nil { - s.mx.Lock() - warm := s.warmupEpoch > 0 - s.mx.Unlock() - if warm { - s.debug.LogReadMiss(cid) - } + if s.isWarm() { + s.debug.LogReadMiss(cid) } size, err = s.cold.GetSize(cid) if err == nil { - stats.Record(context.Background(), metrics.SplitstoreMiss.M(1)) + stats.Record(s.ctx, metrics.SplitstoreMiss.M(1)) } return size, err @@ -393,15 +394,21 @@ func (s *SplitStore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { return nil, err } - ch := make(chan cid.Cid) + seen := cid.NewSet() + ch := make(chan cid.Cid, 8) // buffer is arbitrary, just enough to avoid context switches go func() { defer cancel() defer close(ch) for _, in := range []<-chan cid.Cid{chHot, chCold} { - for cid := range in { + for c := range in { + // ensure we only emit each key once + if !seen.Visit(c) { + continue + } + select { - case ch <- cid: + case ch <- c: case <-ctx.Done(): return } @@ -443,18 +450,13 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { err := s.hot.View(cid, cb) switch err { case bstore.ErrNotFound: - if s.debug != nil { - s.mx.Lock() - warm := s.warmupEpoch > 0 - s.mx.Unlock() - if warm { - s.debug.LogReadMiss(cid) - } + if s.isWarm() { + s.debug.LogReadMiss(cid) } err = s.cold.View(cid, cb) if err == nil { - stats.Record(context.Background(), metrics.SplitstoreMiss.M(1)) + stats.Record(s.ctx, metrics.SplitstoreMiss.M(1)) } return err @@ -463,6 +465,12 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { } } +func (s *SplitStore) isWarm() bool { + s.mx.Lock() + defer s.mx.Unlock() + return s.warmupEpoch > 0 +} + // State tracking func (s *SplitStore) Start(chain ChainAccessor) error { s.chain = chain @@ -527,11 +535,14 @@ func (s *SplitStore) Start(chain ChainAccessor) error { } func (s *SplitStore) Close() error { - atomic.StoreInt32(&s.closing, 1) + if !atomic.CompareAndSwapInt32(&s.closing, 0, 1) { + // already closing + return nil + } - if atomic.LoadInt32(&s.critsection) == 1 { - log.Warn("ongoing compaction in critical section; waiting for it to finish...") - for atomic.LoadInt32(&s.critsection) == 1 { + if atomic.LoadInt32(&s.compacting) == 1 { + log.Warn("close with ongoing compaction in progress; waiting for it to finish...") + for atomic.LoadInt32(&s.compacting) == 1 { time.Sleep(time.Second) } } @@ -549,12 +560,24 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { curTs := apply[len(apply)-1] epoch := curTs.Height() + // NOTE: there is an implicit invariant assumption that HeadChange is invoked + // synchronously and no other HeadChange can be invoked while one is in + // progress. + // this is guaranteed by the chainstore, and it is pervasive in all lotus + // -- if that ever changes then all hell will break loose in general and + // we will have a rance to protectTipSets here. if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) { // we are currently compacting -- protect the new tipset(s) s.protectTipSets(apply) return nil } + // check if we are actually closing first + if atomic.LoadInt32(&s.closing) == 1 { + atomic.StoreInt32(&s.compacting, 0) + return nil + } + timestamp := time.Unix(int64(curTs.MinTimestamp()), 0) if time.Since(timestamp) > SyncGapTime { // don't attempt compaction before we have caught up syncing @@ -608,7 +631,7 @@ func (s *SplitStore) protectView(c cid.Cid) *sync.WaitGroup { if !s.txnActive { s.txnViews.Add(1) - return s.txnViews + return &s.txnViews } s.trackTxnRef(c) @@ -653,6 +676,8 @@ func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) { } s.txnRefsMx.Lock() + defer s.txnRefsMx.Unlock() + quiet := false for _, c := range cids { if isUnitaryObject(c) { @@ -676,7 +701,7 @@ func (s *SplitStore) trackTxnRefMany(cids []cid.Cid) { s.txnRefs[c] = struct{}{} } - s.txnRefsMx.Unlock() + return } @@ -714,6 +739,7 @@ func (s *SplitStore) protectTxnRefs(markSet MarkSet) error { workch <- c count++ } + close(workch) if count == 0 { return nil @@ -727,31 +753,23 @@ func (s *SplitStore) protectTxnRefs(markSet MarkSet) error { workers = count } - close(workch) - - worker := func(wg *sync.WaitGroup) { - if wg != nil { - defer wg.Done() - } - + worker := func() error { for c := range workch { err := s.doTxnProtect(c, markSet) if err != nil { - log.Warnf("error protecting transactional references: %s", err) - return + return xerrors.Errorf("error protecting transactional references to %s: %w", c, err) } } + return nil } - if workers > 1 { - wg := new(sync.WaitGroup) - for i := 0; i < workers; i++ { - wg.Add(1) - go worker(wg) - } - wg.Wait() - } else { - worker(nil) + g := new(errgroup.Group) + for i := 0; i < workers; i++ { + g.Go(worker) + } + + if err := g.Wait(); err != nil { + return err } log.Infow("protecting transactional refs done", "took", time.Since(startProtect), "protected", count) @@ -761,6 +779,10 @@ func (s *SplitStore) protectTxnRefs(markSet MarkSet) error { // transactionally protect a reference by walking the object and marking. // concurrent markings are short circuited by checking the markset. func (s *SplitStore) doTxnProtect(root cid.Cid, markSet MarkSet) error { + if err := s.checkClosing(); err != nil { + return err + } + // Note: cold objects are deleted heaviest first, so the consituents of an object // cannot be deleted before the object itself. return s.walkObjectIncomplete(root, cid.NewSet(), @@ -918,7 +940,7 @@ func (s *SplitStore) compact(curTs *types.TipSet, wg *sync.WaitGroup) { start = time.Now() err := s.doCompact(curTs) took := time.Since(start).Milliseconds() - stats.Record(context.Background(), metrics.SplitstoreCompactionTimeSeconds.M(float64(took)/1e3)) + stats.Record(s.ctx, metrics.SplitstoreCompactionTimeSeconds.M(float64(took)/1e3)) if err != nil { log.Errorf("COMPACTION ERROR: %s", err) @@ -991,7 +1013,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { var hotCnt, coldCnt int cold := make([]cid.Cid, 0, s.coldPurgeSize) - err = s.hot.(bstore.BlockstoreIterator).ForEachKey(func(c cid.Cid) error { + err = s.hot.ForEachKey(func(c cid.Cid) error { // was it marked? mark, err := markSet.Has(c) if err != nil { @@ -1021,8 +1043,8 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { } log.Infow("compaction stats", "hot", hotCnt, "cold", coldCnt) - stats.Record(context.Background(), metrics.SplitstoreCompactionHot.M(int64(hotCnt))) - stats.Record(context.Background(), metrics.SplitstoreCompactionCold.M(int64(coldCnt))) + stats.Record(s.ctx, metrics.SplitstoreCompactionHot.M(int64(hotCnt))) + stats.Record(s.ctx, metrics.SplitstoreCompactionCold.M(int64(coldCnt))) if err := s.checkClosing(); err != nil { return err @@ -1064,6 +1086,9 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { log.Infow("sorting done", "took", time.Since(startSort)) // 4.1 protect transactional refs once more + // strictly speaking, this is not necessary as purge will do it before deleting each + // batch. however, there is likely a largish number of references accumulated during + // ths sort and this protects before entering pruge context. err = s.protectTxnRefs(markSet) if err != nil { return xerrors.Errorf("error protecting transactional refs: %w", err) @@ -1073,16 +1098,6 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return err } - // Enter critical section - log.Info("entering critical section") - atomic.StoreInt32(&s.critsection, 1) - defer atomic.StoreInt32(&s.critsection, 0) - - // check to see if we are closing first; if that's the case just return - if err := s.checkClosing(); err != nil { - return err - } - // 5. purge cold objects from the hotstore, taking protected references into account log.Info("purging cold objects from the hotstore") startPurge := time.Now() @@ -1119,10 +1134,7 @@ func (s *SplitStore) beginTxnProtect() *sync.WaitGroup { s.txnRefs = make(map[cid.Cid]struct{}) s.txnMissing = make(map[cid.Cid]struct{}) - wg := s.txnViews - s.txnViews = nil - - return wg + return &s.txnViews } func (s *SplitStore) beginTxnMarking(markSet MarkSet) { @@ -1141,11 +1153,13 @@ func (s *SplitStore) endTxnProtect() { return } + // release markset memory + s.txnProtect.Close() + s.txnActive = false s.txnProtect = nil s.txnRefs = nil s.txnMissing = nil - s.txnViews = new(sync.WaitGroup) } func (s *SplitStore) walkChain(ts *types.TipSet, boundary abi.ChainEpoch, inclMsgs bool, @@ -1238,6 +1252,11 @@ func (s *SplitStore) walkObject(c cid.Cid, walked *cid.Set, f func(cid.Cid) erro return nil } + // check this before recursing + if err := s.checkClosing(); err != nil { + return err + } + var links []cid.Cid err := s.view(c, func(data []byte) error { return cbg.ScanForLinks(bytes.NewReader(data), func(c cid.Cid) { @@ -1294,6 +1313,11 @@ func (s *SplitStore) walkObjectIncomplete(c cid.Cid, walked *cid.Set, f, missing return nil } + // check this before recursing + if err := s.checkClosing(); err != nil { + return err + } + var links []cid.Cid err := s.view(c, func(data []byte) error { return cbg.ScanForLinks(bytes.NewReader(data), func(c cid.Cid) { @@ -1522,24 +1546,28 @@ func (s *SplitStore) purge(cids []cid.Cid, markSet MarkSet) error { func(cids []cid.Cid) error { deadCids := deadCids[:0] - again: - if err := s.checkClosing(); err != nil { - return err - } + for { + if err := s.checkClosing(); err != nil { + return err + } - s.txnLk.Lock() - if len(s.txnRefs) > 0 { + s.txnLk.Lock() + if len(s.txnRefs) == 0 { + // keep the lock! + break + } + + // unlock and protect s.txnLk.Unlock() err := s.protectTxnRefs(markSet) if err != nil { return xerrors.Errorf("error protecting transactional refs: %w", err) } - - goto again } defer s.txnLk.Unlock() + for _, c := range cids { live, err := markSet.Has(c) if err != nil { From df9670c58d1229586fd119367fc34964a19b55c5 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 10 Jul 2021 16:38:40 +0300 Subject: [PATCH 192/197] fix lint --- blockstore/splitstore/splitstore.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 16b922ab2dd..f089b39874a 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -1154,7 +1154,9 @@ func (s *SplitStore) endTxnProtect() { } // release markset memory - s.txnProtect.Close() + if s.txnProtect != nil { + _ = s.txnProtect.Close() + } s.txnActive = false s.txnProtect = nil From 759594d01c22a63cc920138f7c67285eea3ff051 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 13 Jul 2021 03:11:40 +0300 Subject: [PATCH 193/197] always return the waitgroup in protectView so that we preclude the following scenario: Start compaction. Start view. Finish compaction. Start compaction. which would not wait for the view to complete. --- blockstore/splitstore/splitstore.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index f089b39874a..90ebbffcd76 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -629,13 +629,12 @@ func (s *SplitStore) protectView(c cid.Cid) *sync.WaitGroup { s.txnLk.RLock() defer s.txnLk.RUnlock() - if !s.txnActive { - s.txnViews.Add(1) - return &s.txnViews + s.txnViews.Add(1) + if s.txnActive { + s.trackTxnRef(c) } - s.trackTxnRef(c) - return nil + return &s.txnViews } // transactionally protect a reference to an object From 60212c86cbd855ba531fb9e7a8303cf57eb25b67 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 13 Jul 2021 03:14:13 +0300 Subject: [PATCH 194/197] put a mutex around HeadChange --- blockstore/splitstore/splitstore.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 90ebbffcd76..d45c79d93ac 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -128,6 +128,8 @@ type SplitStore struct { warmupEpoch abi.ChainEpoch // protected by mx baseEpoch abi.ChainEpoch // protected by compaction lock + headChangeMx sync.Mutex + coldPurgeSize int chain ChainAccessor @@ -552,6 +554,9 @@ func (s *SplitStore) Close() error { } func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { + s.headChangeMx.Lock() + defer s.headChangeMx.Unlock() + // Revert only. if len(apply) == 0 { return nil @@ -566,6 +571,8 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { // this is guaranteed by the chainstore, and it is pervasive in all lotus // -- if that ever changes then all hell will break loose in general and // we will have a rance to protectTipSets here. + // Reagrdless, we put a mutex in HeadChange just to be safe + if !atomic.CompareAndSwapInt32(&s.compacting, 0, 1) { // we are currently compacting -- protect the new tipset(s) s.protectTipSets(apply) From 04abd190ab55af075f19abf805734c9884c017bd Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Mon, 12 Jul 2021 21:46:18 -0700 Subject: [PATCH 195/197] nit: remove useless goto Because stebalien has allergies. --- blockstore/splitstore/splitstore.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index d45c79d93ac..3adc02fa9c1 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -659,19 +659,15 @@ func (s *SplitStore) trackTxnRef(c cid.Cid) { mark, err := s.txnProtect.Has(c) if err != nil { log.Warnf("error checking markset: %s", err) - goto track - } - - if mark { + // track it anyways + } else if mark { return } } -track: s.txnRefsMx.Lock() s.txnRefs[c] = struct{}{} s.txnRefsMx.Unlock() - return } // transactionally protect a batch of references From 257423e917ffb929907f2309df77c2ed3f62f32a Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 13 Jul 2021 09:01:50 +0300 Subject: [PATCH 196/197] fix view waiting issues with the WaitGroup We can add after Wait is called, which is problematic with WaitGroups. This instead uses a mx/cond combo and waits while the count is > 0. The only downside is that we might needlessly wait for (a bunch) of views that started while the txn is active, but we can live with that. --- blockstore/splitstore/splitstore.go | 61 +++++++++++++++++++---------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 3adc02fa9c1..563b14bc4fe 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -146,13 +146,15 @@ type SplitStore struct { debug *debugLog // transactional protection for concurrent read/writes during compaction - txnLk sync.RWMutex - txnActive bool - txnViews sync.WaitGroup - txnProtect MarkSet - txnRefsMx sync.Mutex - txnRefs map[cid.Cid]struct{} - txnMissing map[cid.Cid]struct{} + txnLk sync.RWMutex + txnViewsMx sync.Mutex + txnViewsCond sync.Cond + txnViews int + txnActive bool + txnProtect MarkSet + txnRefsMx sync.Mutex + txnRefs map[cid.Cid]struct{} + txnMissing map[cid.Cid]struct{} } var _ bstore.Blockstore = (*SplitStore)(nil) @@ -199,6 +201,7 @@ func Open(path string, ds dstore.Datastore, hot, cold bstore.Blockstore, cfg *Co coldPurgeSize: defaultColdPurgeSize, } + ss.txnViewsCond.L = &ss.txnViewsMx ss.ctx, ss.cancel = context.WithCancel(context.Background()) if enableDebugLog { @@ -444,10 +447,8 @@ func (s *SplitStore) View(cid cid.Cid, cb func([]byte) error) error { // view can't have its data pointer deleted, which would be catastrophic. // Note that we can't just RLock for the duration of the view, as this could // lead to deadlock with recursive views. - wg := s.protectView(cid) - if wg != nil { - defer wg.Done() - } + s.protectView(cid) + defer s.viewDone() err := s.hot.View(cid, cb) switch err { @@ -594,7 +595,7 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { if epoch-s.baseEpoch > CompactionThreshold { // it's time to compact -- prepare the transaction and go! - wg := s.beginTxnProtect() + s.beginTxnProtect() go func() { defer atomic.StoreInt32(&s.compacting, 0) defer s.endTxnProtect() @@ -602,7 +603,7 @@ func (s *SplitStore) HeadChange(_, apply []*types.TipSet) error { log.Info("compacting splitstore") start := time.Now() - s.compact(curTs, wg) + s.compact(curTs) log.Infow("compaction done", "took", time.Since(start)) }() @@ -632,16 +633,36 @@ func (s *SplitStore) protectTipSets(apply []*types.TipSet) { } // transactionally protect a view -func (s *SplitStore) protectView(c cid.Cid) *sync.WaitGroup { +func (s *SplitStore) protectView(c cid.Cid) { s.txnLk.RLock() defer s.txnLk.RUnlock() - s.txnViews.Add(1) if s.txnActive { s.trackTxnRef(c) } - return &s.txnViews + s.txnViewsMx.Lock() + s.txnViews++ + s.txnViewsMx.Unlock() +} + +func (s *SplitStore) viewDone() { + s.txnViewsMx.Lock() + defer s.txnViewsMx.Unlock() + + s.txnViews-- + if s.txnViews == 0 { + s.txnViewsCond.Signal() + } +} + +func (s *SplitStore) viewWait() { + s.txnViewsMx.Lock() + defer s.txnViewsMx.Unlock() + + for s.txnViews > 0 { + s.txnViewsCond.Wait() + } } // transactionally protect a reference to an object @@ -933,10 +954,10 @@ func (s *SplitStore) doWarmup(curTs *types.TipSet) error { // - We sort cold objects heaviest first, so as to never delete the consituents of a DAG before the DAG itself (which would leave dangling references) // - We delete in small batches taking a lock; each batch is checked again for marks, from the concurrent transactional mark, so as to never delete anything live // - We then end the transaction and compact/gc the hotstore. -func (s *SplitStore) compact(curTs *types.TipSet, wg *sync.WaitGroup) { +func (s *SplitStore) compact(curTs *types.TipSet) { log.Info("waiting for active views to complete") start := time.Now() - wg.Wait() + s.viewWait() log.Infow("waiting for active views done", "took", time.Since(start)) start = time.Now() @@ -1126,7 +1147,7 @@ func (s *SplitStore) doCompact(curTs *types.TipSet) error { return nil } -func (s *SplitStore) beginTxnProtect() *sync.WaitGroup { +func (s *SplitStore) beginTxnProtect() { log.Info("preparing compaction transaction") s.txnLk.Lock() @@ -1135,8 +1156,6 @@ func (s *SplitStore) beginTxnProtect() *sync.WaitGroup { s.txnActive = true s.txnRefs = make(map[cid.Cid]struct{}) s.txnMissing = make(map[cid.Cid]struct{}) - - return &s.txnViews } func (s *SplitStore) beginTxnMarking(markSet MarkSet) { From af399529ecca4c6e082474cada76c9b96a3d8f86 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 13 Jul 2021 09:06:40 +0300 Subject: [PATCH 197/197] finetune view waiting --- blockstore/splitstore/splitstore.go | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/blockstore/splitstore/splitstore.go b/blockstore/splitstore/splitstore.go index 563b14bc4fe..75989b53c7e 100644 --- a/blockstore/splitstore/splitstore.go +++ b/blockstore/splitstore/splitstore.go @@ -146,15 +146,16 @@ type SplitStore struct { debug *debugLog // transactional protection for concurrent read/writes during compaction - txnLk sync.RWMutex - txnViewsMx sync.Mutex - txnViewsCond sync.Cond - txnViews int - txnActive bool - txnProtect MarkSet - txnRefsMx sync.Mutex - txnRefs map[cid.Cid]struct{} - txnMissing map[cid.Cid]struct{} + txnLk sync.RWMutex + txnViewsMx sync.Mutex + txnViewsCond sync.Cond + txnViews int + txnViewsWaiting bool + txnActive bool + txnProtect MarkSet + txnRefsMx sync.Mutex + txnRefs map[cid.Cid]struct{} + txnMissing map[cid.Cid]struct{} } var _ bstore.Blockstore = (*SplitStore)(nil) @@ -651,7 +652,7 @@ func (s *SplitStore) viewDone() { defer s.txnViewsMx.Unlock() s.txnViews-- - if s.txnViews == 0 { + if s.txnViews == 0 && s.txnViewsWaiting { s.txnViewsCond.Signal() } } @@ -660,9 +661,11 @@ func (s *SplitStore) viewWait() { s.txnViewsMx.Lock() defer s.txnViewsMx.Unlock() + s.txnViewsWaiting = true for s.txnViews > 0 { s.txnViewsCond.Wait() } + s.txnViewsWaiting = false } // transactionally protect a reference to an object