-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathbackend_provider.go
483 lines (405 loc) · 15.4 KB
/
backend_provider.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
package zikade
import (
"bytes"
"context"
"encoding/binary"
"fmt"
"io"
"strings"
"sync"
"time"
"github.com/benbjohnson/clock"
lru "github.com/hashicorp/golang-lru/v2"
ds "github.com/ipfs/go-datastore"
dsq "github.com/ipfs/go-datastore/query"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/libp2p/go-libp2p/core/peerstore"
"github.com/multiformats/go-base32"
"go.opentelemetry.io/otel/metric"
"golang.org/x/exp/slog"
"github.com/probe-lab/zikade/tele"
)
// ProvidersBackend implements the [Backend] interface and handles provider
// record requests for the "/providers/" namespace.
type ProvidersBackend struct {
// namespace holds the namespace string - usually
// this is set to namespaceProviders ("providers")
namespace string
// cfg is set to DefaultProviderBackendConfig by default
cfg *ProvidersBackendConfig
// log is convenience accessor of cfg.Logger
log *slog.Logger
// cache is a LRU cache for frequently requested records. It is populated
// when peers request a record and pruned during garbage collection.
// TODO: is that really so effective? The cache size is quite low either.
cache *lru.Cache[string, providerSet]
// addrBook holds a reference to the peerstore's address book to store and
// fetch peer multiaddresses from (we don't save them in the datastore).
addrBook peerstore.AddrBook
// datastore is where we save the peer IDs providing a certain multihash.
// The datastore must be thread-safe.
datastore ds.Datastore
// gcSkip is a sync map that marks records as to-be-skipped by the garbage
// collection process. TODO: this is a sub-optimal pattern.
gcSkip sync.Map
// gcActive indicates whether the garbage collection loop is running
gcCancelMu sync.RWMutex
gcCancel context.CancelFunc
gcDone chan struct{}
}
var (
_ Backend = (*ProvidersBackend)(nil)
_ io.Closer = (*ProvidersBackend)(nil)
)
// ProvidersBackendConfig is used to construct a [ProvidersBackend]. Use
// [DefaultProviderBackendConfig] to get a default configuration struct and then
// modify it to your liking.
type ProvidersBackendConfig struct {
// clk is an unexported field that's used for testing time related methods
clk clock.Clock
// ProvideValidity specifies for how long provider records are valid
ProvideValidity time.Duration
// AddressTTL specifies for how long we will keep around provider multi
// addresses in the peerstore's address book. If such multiaddresses are
// present we send them alongside the peer ID to the requesting peer. This
// prevents the necessity for a second look for the multiaddresses on the
// requesting peers' side.
AddressTTL time.Duration
// CacheSize specifies the LRU cache size
CacheSize int
// GCInterval defines how frequently garbage collection should run
GCInterval time.Duration
// Logger is the logger to use
Logger *slog.Logger
// Tele holds a reference to the telemetry struct to capture metrics and
// traces.
Tele *Telemetry
// AddressFilter is a filter function that any addresses that we attempt to
// store or fetch from the peerstore's address book need to pass through.
// If you're manually configuring this backend, make sure to align the
// filter with the one configured in [Config.AddressFilter].
AddressFilter AddressFilter
}
// DefaultProviderBackendConfig returns a default [ProvidersBackend]
// configuration. Use this as a starting point and modify it. If a nil
// configuration is passed to [NewBackendProvider], this default configuration
// here is used.
func DefaultProviderBackendConfig() (*ProvidersBackendConfig, error) {
telemetry, err := NewWithGlobalProviders()
if err != nil {
return nil, fmt.Errorf("new telemetry: %w", err)
}
return &ProvidersBackendConfig{
clk: clock.New(),
ProvideValidity: 48 * time.Hour, // empirically measured in: https://github.com/probe-lab/network-measurements/blob/master/results/rfm17-provider-record-liveness.md
AddressTTL: 24 * time.Hour, // MAGIC
CacheSize: 256, // MAGIC
GCInterval: time.Hour, // MAGIC
Logger: slog.Default(),
Tele: telemetry,
AddressFilter: AddrFilterIdentity, // verify alignment with [Config.AddressFilter]
}, nil
}
// Store implements the [Backend] interface. In the case of a [ProvidersBackend]
// this method accepts a [peer.AddrInfo] as a value and stores it in the
// configured datastore.
func (p *ProvidersBackend) Store(ctx context.Context, key string, value any) (any, error) {
addrInfo, ok := value.(peer.AddrInfo)
if !ok {
return nil, fmt.Errorf("expected peer.AddrInfo value type, got: %T", value)
}
rec := expiryRecord{
expiry: p.cfg.clk.Now(),
}
cacheKey := newDatastoreKey(p.namespace, key).String()
dsKey := newDatastoreKey(p.namespace, key, string(addrInfo.ID))
if provs, ok := p.cache.Get(cacheKey); ok {
provs.addProvider(addrInfo, rec.expiry)
}
filtered := p.cfg.AddressFilter(addrInfo.Addrs)
p.addrBook.AddAddrs(addrInfo.ID, filtered, p.cfg.AddressTTL)
_, found := p.gcSkip.LoadOrStore(dsKey.String(), struct{}{})
if err := p.datastore.Put(ctx, dsKey, rec.MarshalBinary()); err != nil {
p.cache.Remove(cacheKey)
// if we have just added the key to the collectGarbage skip list, delete it again
// if we have added it in a previous Store invocation, keep it around
if !found {
p.gcSkip.Delete(dsKey.String())
}
return nil, fmt.Errorf("datastore put: %w", err)
}
return addrInfo, nil
}
// Fetch implements the [Backend] interface. In the case of a [ProvidersBackend]
// this method returns a [providerSet] (unexported) that contains all peer IDs
// and known multiaddresses for the given key. The key parameter should be of
// the form "/providers/$binary_multihash".
func (p *ProvidersBackend) Fetch(ctx context.Context, key string) (any, error) {
qKey := newDatastoreKey(p.namespace, key)
if cached, ok := p.cache.Get(qKey.String()); ok {
p.trackCacheQuery(ctx, true)
return cached, nil
}
p.trackCacheQuery(ctx, false)
q, err := p.datastore.Query(ctx, dsq.Query{Prefix: qKey.String()})
if err != nil {
return nil, err
}
defer func() {
if err = q.Close(); err != nil {
p.log.LogAttrs(ctx, slog.LevelWarn, "failed closing fetch query", slog.String("err", err.Error()))
}
}()
now := p.cfg.clk.Now()
out := &providerSet{
providers: []peer.AddrInfo{},
set: make(map[peer.ID]time.Time),
}
for e := range q.Next() {
if e.Error != nil {
p.log.LogAttrs(ctx, slog.LevelWarn, "Fetch datastore entry contains error", slog.String("key", e.Key), slog.String("err", e.Error.Error()))
continue
}
rec := expiryRecord{}
if err = rec.UnmarshalBinary(e.Value); err != nil {
p.log.LogAttrs(ctx, slog.LevelWarn, "Fetch provider record unmarshalling failed", slog.String("key", e.Key), slog.String("err", err.Error()))
p.delete(ctx, ds.RawKey(e.Key))
continue
} else if now.Sub(rec.expiry) > p.cfg.ProvideValidity {
// record is expired
p.delete(ctx, ds.RawKey(e.Key))
continue
}
idx := strings.LastIndex(e.Key, "/")
binPeerID, err := base32.RawStdEncoding.DecodeString(e.Key[idx+1:])
if err != nil {
p.log.LogAttrs(ctx, slog.LevelWarn, "base32 key decoding error", slog.String("key", e.Key), slog.String("err", err.Error()))
p.delete(ctx, ds.RawKey(e.Key))
continue
}
maddrs := p.addrBook.Addrs(peer.ID(binPeerID))
addrInfo := peer.AddrInfo{
ID: peer.ID(binPeerID),
Addrs: p.cfg.AddressFilter(maddrs),
}
out.addProvider(addrInfo, rec.expiry)
}
if len(out.providers) == 0 {
return nil, ds.ErrNotFound
} else {
p.cache.Add(qKey.String(), *out)
}
return out, nil
}
// Validate verifies that the given values are of type [peer.AddrInfo]. Then it
// decides based on the number of attached multi addresses which value is
// "better" than the other. If there is a tie, Validate will return the index
// of the earliest occurrence.
func (p *ProvidersBackend) Validate(ctx context.Context, key string, values ...any) (int, error) {
// short circuit if it's just a single value
if len(values) == 1 {
_, ok := values[0].(peer.AddrInfo)
if !ok {
return -1, fmt.Errorf("invalid type %T", values[0])
}
return 0, nil
}
bestIdx := -1
for i, value := range values {
addrInfo, ok := value.(peer.AddrInfo)
if !ok {
continue
}
if bestIdx == -1 {
bestIdx = i
} else if len(values[bestIdx].(peer.AddrInfo).Addrs) < len(addrInfo.Addrs) {
bestIdx = i
}
}
if bestIdx == -1 {
return -1, fmt.Errorf("no value of correct type")
}
return bestIdx, nil
}
// Close is here to implement the [io.Closer] interface. This will get called
// when the [DHT] "shuts down"/closes.
func (p *ProvidersBackend) Close() error {
p.StopGarbageCollection()
return nil
}
// StartGarbageCollection starts the garbage collection loop. The garbage
// collection interval can be configured with [ProvidersBackendConfig.GCInterval].
// The garbage collection loop can only be started a single time. Use
// [StopGarbageCollection] to stop the garbage collection loop.
func (p *ProvidersBackend) StartGarbageCollection() {
p.gcCancelMu.Lock()
if p.gcCancel != nil {
p.log.Info("Provider backend's garbage collection is already running")
p.gcCancelMu.Unlock()
return
}
defer p.gcCancelMu.Unlock()
ctx, cancel := context.WithCancel(context.Background())
p.gcCancel = cancel
p.gcDone = make(chan struct{})
// init ticker outside the goroutine to prevent race condition with
// clock mock in garbage collection test.
ticker := p.cfg.clk.Ticker(p.cfg.GCInterval)
go func() {
defer close(p.gcDone)
defer ticker.Stop()
p.log.Info("Provider backend started garbage collection schedule")
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
p.collectGarbage(ctx)
}
}
}()
}
// StopGarbageCollection stops the garbage collection loop started with
// [StartGarbageCollection]. If garbage collection is not running, this method
// is a no-op.
func (p *ProvidersBackend) StopGarbageCollection() {
p.gcCancelMu.Lock()
if p.gcCancel == nil {
p.log.Info("Provider backend's garbage collection isn't running")
p.gcCancelMu.Unlock()
return
}
defer p.gcCancelMu.Unlock()
p.gcCancel()
<-p.gcDone
p.gcDone = nil
p.gcCancel = nil
p.log.Info("Provider backend's garbage collection stopped")
}
// collectGarbage sweeps through the datastore and deletes all provider records
// that have expired. A record is expired if the
// [ProvidersBackendConfig].ProvideValidity is exceeded.
func (p *ProvidersBackend) collectGarbage(ctx context.Context) {
p.log.Info("Provider backend starting garbage collection...")
defer p.log.Info("Provider backend finished garbage collection!")
// Faster to purge than garbage collecting
p.cache.Purge()
// erase map
p.gcSkip.Range(func(key interface{}, value interface{}) bool {
p.gcSkip.Delete(key)
return true
})
// Now, kick off a GC of the datastore.
q, err := p.datastore.Query(ctx, dsq.Query{Prefix: p.namespace})
if err != nil {
p.log.LogAttrs(ctx, slog.LevelWarn, "provider record garbage collection query failed", slog.String("err", err.Error()))
return
}
defer func() {
if err = q.Close(); err != nil {
p.log.LogAttrs(ctx, slog.LevelWarn, "failed closing garbage collection query", slog.String("err", err.Error()))
}
}()
for e := range q.Next() {
if e.Error != nil {
p.log.LogAttrs(ctx, slog.LevelWarn, "Garbage collection datastore entry contains error", slog.String("key", e.Key), slog.String("err", e.Error.Error()))
continue
}
if _, found := p.gcSkip.Load(e.Key); found {
continue
}
rec := expiryRecord{}
now := p.cfg.clk.Now()
if err = rec.UnmarshalBinary(e.Value); err != nil {
p.log.LogAttrs(ctx, slog.LevelWarn, "Garbage collection provider record unmarshalling failed", slog.String("key", e.Key), slog.String("err", err.Error()))
p.delete(ctx, ds.RawKey(e.Key))
} else if now.Sub(rec.expiry) <= p.cfg.ProvideValidity {
continue
}
// record expired -> garbage collect
p.delete(ctx, ds.RawKey(e.Key))
}
}
// trackCacheQuery updates the prometheus metrics about cache hit/miss performance
func (p *ProvidersBackend) trackCacheQuery(ctx context.Context, hit bool) {
set := tele.FromContext(ctx,
tele.AttrCacheHit(hit),
tele.AttrRecordType("provider"),
)
p.cfg.Tele.LRUCache.Add(ctx, 1, metric.WithAttributeSet(set))
}
// delete is a convenience method to delete the record at the given datastore
// key. It doesn't return any error but logs it instead as a warning.
func (p *ProvidersBackend) delete(ctx context.Context, dsKey ds.Key) {
if err := p.datastore.Delete(ctx, dsKey); err != nil {
p.log.LogAttrs(ctx, slog.LevelWarn, "failed to remove provider record from disk", slog.String("key", dsKey.String()), slog.String("err", err.Error()))
}
}
// expiryRecord is captures the information that gets written to the datastore
// for any provider record. This record doesn't include any peer IDs or
// multiaddresses because peer IDs are part of the key that this record gets
// stored under and multiaddresses are stored in the addrBook. This record
// just tracks the expiry time of the record. It implements binary marshalling
// and unmarshalling methods for easy (de)serialization into the datastore.
type expiryRecord struct {
expiry time.Time
}
// MarshalBinary returns the byte slice that should be stored in the datastore.
// This method doesn't comply to the [encoding.BinaryMarshaler] interface
// because it doesn't return an error. We don't need the conformance here
// though.
func (e *expiryRecord) MarshalBinary() (data []byte) {
buf := make([]byte, 16)
n := binary.PutVarint(buf, e.expiry.UnixNano())
return buf[:n]
}
// UnmarshalBinary is the inverse operation to the above MarshalBinary and is
// used to deserialize any blob of bytes that was previously stored in the
// datastore.
func (e *expiryRecord) UnmarshalBinary(data []byte) error {
nsec, n := binary.Varint(data)
if n == 0 {
return fmt.Errorf("failed to parse time")
}
e.expiry = time.Unix(0, nsec)
return nil
}
// A providerSet is used to gather provider information in a single struct. It
// also makes sure that the user doesn't add any duplicate peers.
type providerSet struct {
providers []peer.AddrInfo
set map[peer.ID]time.Time
}
// addProvider adds the given address information to the providerSet. If the
// provider already exists, only the time is updated.
func (ps *providerSet) addProvider(addrInfo peer.AddrInfo, t time.Time) {
_, found := ps.set[addrInfo.ID]
if !found {
ps.providers = append(ps.providers, addrInfo)
}
ps.set[addrInfo.ID] = t
}
// newDatastoreKey assembles a datastore for the given namespace and set of
// binary strings. For example, the IPNS record keys have the format:
// "/ipns/$binary_id" (see [Routing Record]). To construct a datastore key this
// function base32-encodes the $binary_id (and any additional path components)
// and joins the parts together separated by forward slashes.
//
// [Routing Record]: https://specs.ipfs.tech/ipns/ipns-record/#routing-record
func newDatastoreKey(namespace string, binStrs ...string) ds.Key {
elems := make([]string, len(binStrs)+1)
elems[0] = namespace
for i, bin := range binStrs {
elems[i+1] = base32.RawStdEncoding.EncodeToString([]byte(bin))
}
return ds.NewKey("/" + strings.Join(elems, "/"))
}
// newRoutingKey uses the given namespace and binary string key and constructs
// a new string of the format: /$namespace/$binStr
func newRoutingKey(namespace string, binStr string) string {
buf := make([]byte, 0, 2+len(namespace)+len(binStr))
buffer := bytes.NewBuffer(buf)
buffer.WriteString("/" + namespace + "/")
buffer.Write([]byte(binStr))
return buffer.String()
}