-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathvshard.go
421 lines (343 loc) · 12.8 KB
/
vshard.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
package vshard_router //nolint:revive
import (
"context"
"fmt"
"sync"
"sync/atomic"
"time"
"github.com/google/uuid"
"github.com/snksoft/crc"
"github.com/vmihailenco/msgpack/v5"
tarantool "github.com/tarantool/go-tarantool/v2"
)
var (
// ErrInvalidConfig is returned when the configuration is invalid.
ErrInvalidConfig = fmt.Errorf("config invalid")
// ErrInvalidInstanceInfo is returned when the instance information is invalid.
ErrInvalidInstanceInfo = fmt.Errorf("invalid instance info")
// ErrInvalidReplicasetInfo is returned when the replicaset information is invalid.
ErrInvalidReplicasetInfo = fmt.Errorf("invalid replicaset info")
// ErrTopologyProvider is returned when there is an error from the topology provider.
ErrTopologyProvider = fmt.Errorf("got error from topology provider")
)
// This data struct is instroduced by https://github.com/tarantool/go-vshard-router/issues/39.
// We use an array of atomics to lock-free handling elements of routeMap.
// knownBucketCount reflects a statistic over routeMap.
// knownBucketCount might be inconsistent for a few mksecs, because at first we change routeMap[bucketID],
// only after that we change knownBucketCount: this is not an atomic change of complex state.
// It it is not a problem at all.
//
// While changing `knownBucketCount` we heavily rely on commutative property of algebraic sum operation ("+"),
// due to this property we don't afraid any amount of concurrent modifications.
// See: https://en.wikipedia.org/wiki/Commutative_property
//
// Since RouteMapClean creates a new routeMap, we have to assign knownBucketCount := 0.
// But assign is not a commutative operation, therefore we have to create a completely new atomic variable,
// that reflects a statistic over newly created routeMap.
type consistentView struct {
routeMap []atomic.Pointer[Replicaset]
knownBucketCount atomic.Int32
}
type Router struct {
cfg Config
// nameToReplicasetMutex guards not the map itself, but the variable idToReplicaset.
// nameToReplicaset is an immutable object by our convention.
// Whenever we add or remove a replicaset, we create a new map object.
// nameToReplicaset can be modified only by TopologyController methods.
// Assuming that we rarely add or remove some replicaset,
// it should be the simplest and most efficient way of handling concurrent access.
// Additionally, we can safely iterate over a map because it never changes.
nameToReplicasetMutex sync.RWMutex
nameToReplicaset map[string]*Replicaset
viewMutex sync.RWMutex
view *consistentView
// ----------------------- Map-Reduce -----------------------
// Storage Ref ID. It must be unique for each ref request
// and therefore is global and monotonically growing.
refID atomic.Int64
cancelDiscovery func()
}
func (r *Router) metrics() MetricsProvider {
return r.cfg.Metrics
}
func (r *Router) log() LogfProvider {
return r.cfg.Loggerf
}
func (r *Router) getConsistentView() *consistentView {
r.viewMutex.RLock()
view := r.view
r.viewMutex.RUnlock()
return view
}
func (r *Router) setConsistentView(view *consistentView) {
r.viewMutex.Lock()
r.view = view
r.viewMutex.Unlock()
}
type Config struct {
// Providers
// Loggerf injects a custom logger. By default there is no logger is used.
Loggerf LogfProvider // Loggerf is not required
Metrics MetricsProvider // Metrics is not required
TopologyProvider TopologyProvider // TopologyProvider is required provider
// Discovery
// DiscoveryTimeout is timeout between cron discovery job; by default there is no timeout.
DiscoveryTimeout time.Duration
DiscoveryMode DiscoveryMode
// DiscoveryWorkStep is a pause between calling buckets_discovery on storage
// in buckets discovering logic. Default is 10ms.
DiscoveryWorkStep time.Duration
// BucketsSearchMode defines policy for Router.Route method.
// Default value is BucketsSearchLegacy.
// See BucketsSearchMode constants for more detail.
BucketsSearchMode BucketsSearchMode
TotalBucketCount uint64
User string
Password string
PoolOpts tarantool.Opts
// BucketGetter is an optional argument.
// You can specify a function that will receive the bucket id from the context.
// This is useful if you use middleware that inserts the calculated bucket id into the request context.
BucketGetter func(ctx context.Context) uint64
// RequestTimeout timeout for requests to Tarantool.
// Don't rely on using this timeout.
// This is the difference between the timeout of the library itself
// that is, our retry timeout if the buckets, for example, move.
// Currently, it only works for sugar implementations .
RequestTimeout time.Duration
}
type BucketStatInfo struct {
BucketID uint64 `msgpack:"id"`
Status string `msgpack:"status"`
}
// tnt vshard storage returns map with 'int' keys for bucketStatInfo,
// example: map[id:48 status:active 1:48 2:active].
// But msgpackv5 supports only string keys when decoding maps into structs,
// see issue: https://github.com/vmihailenco/msgpack/issues/372
// To workaround this we decode BucketStatInfo manually.
// When the issue above will be resolved, this code can be (and should be) deleted.
func (bsi *BucketStatInfo) DecodeMsgpack(d *msgpack.Decoder) error {
nKeys, err := d.DecodeMapLen()
if err != nil {
return err
}
for i := 0; i < nKeys; i++ {
key, err := d.DecodeInterface()
if err != nil {
return err
}
keyName, _ := key.(string)
switch keyName {
case "id":
if err := d.Decode(&bsi.BucketID); err != nil {
return err
}
case "status":
if err := d.Decode(&bsi.Status); err != nil {
return err
}
default:
// skip unused value
if err := d.Skip(); err != nil {
return err
}
}
}
return nil
}
// InstanceInfo represents the information about an instance.
// This struct holds the necessary details such as the name, address, and UUID of the instance.
type InstanceInfo struct {
// Name is a required field for the instance.
// Starting with Tarantool 3.0, this definition is made into a human-readable name,
// and it is now mandatory for use in the library.
// The Name should be a unique identifier for the instance.
Name string
// Addr specifies the address of the instance.
// This can be an IP address or a domain name, depending on how the instance is accessed.
// It is necessary for connecting to the instance or identifying its location.
Addr string
// UUID is an optional field that provides a globally unique identifier (UUID) for the instance.
// While this information is not mandatory, it can be useful for internal management or tracking purposes.
// The UUID ensures that each instance can be identified uniquely, but it is not required for basic operations.
UUID uuid.UUID
}
func (ii InstanceInfo) String() string {
if ii.UUID == uuid.Nil {
return fmt.Sprintf("{name: %s, addr: %s}", ii.Name, ii.Addr)
}
return fmt.Sprintf("{name: %s, uuid: %s, addr: %s}", ii.Name, ii.UUID, ii.Addr)
}
func (ii InstanceInfo) Validate() error {
if ii.Name == "" {
return fmt.Errorf("%w: empty name", ErrInvalidInstanceInfo)
}
if ii.Addr == "" {
return fmt.Errorf("%w: empty addr", ErrInvalidInstanceInfo)
}
return nil
}
// --------------------------------------------------------------------------------
// -- Configuration
// --------------------------------------------------------------------------------
func NewRouter(ctx context.Context, cfg Config) (*Router, error) {
var err error
cfg, err = prepareCfg(ctx, cfg)
if err != nil {
return nil, err
}
router := &Router{
cfg: cfg,
nameToReplicaset: make(map[string]*Replicaset),
view: &consistentView{
routeMap: make([]atomic.Pointer[Replicaset], cfg.TotalBucketCount+1),
},
}
err = cfg.TopologyProvider.Init(router.Topology())
if err != nil {
router.log().Errorf(ctx, "Can't create new topology provider with err: %s", err)
return nil, fmt.Errorf("%w; cant init topology with err: %w", ErrTopologyProvider, err)
}
err = router.DiscoveryAllBuckets(ctx)
if err != nil {
router.log().Errorf(ctx, "router.DiscoveryAllBuckets failed: %v", err)
}
if cfg.DiscoveryMode == DiscoveryModeOn {
discoveryCronCtx, cancelFunc := context.WithCancel(ctx)
// run background cron discovery loop
// suppress linter warning: Non-inherited new context, use function like `context.WithXXX` instead (contextcheck)
//nolint:contextcheck
go router.cronDiscovery(discoveryCronCtx)
router.cancelDiscovery = cancelFunc
}
return router, nil
}
// BucketSet Set a bucket to a replicaset.
func (r *Router) BucketSet(bucketID uint64, rsName string) (*Replicaset, error) {
nameToReplicasetRef := r.getNameToReplicaset()
rs := nameToReplicasetRef[rsName]
if rs == nil {
return nil, newVShardErrorNoRouteToBucket(bucketID)
}
view := r.getConsistentView()
if oldRs := view.routeMap[bucketID].Swap(rs); oldRs == nil {
view.knownBucketCount.Add(1)
}
return rs, nil
}
func (r *Router) BucketReset(bucketID uint64) {
view := r.getConsistentView()
if bucketID > r.cfg.TotalBucketCount {
return
}
if old := view.routeMap[bucketID].Swap(nil); old != nil {
view.knownBucketCount.Add(-1)
}
}
func (r *Router) RouteMapClean() {
newView := &consistentView{
routeMap: make([]atomic.Pointer[Replicaset], r.cfg.TotalBucketCount+1),
}
r.setConsistentView(newView)
}
func prepareCfg(ctx context.Context, cfg Config) (Config, error) {
const discoveryTimeoutDefault = 1 * time.Minute
const discoveryWorkStepDefault = 10 * time.Millisecond
err := validateCfg(cfg)
if err != nil {
return Config{}, fmt.Errorf("%v: %v", ErrInvalidConfig, err)
}
if cfg.DiscoveryTimeout == 0 {
cfg.DiscoveryTimeout = discoveryTimeoutDefault
}
if cfg.Loggerf == nil {
cfg.Loggerf = emptyLogfProvider
}
// Log tarantool internal events using the same logger as router uses.
cfg.PoolOpts.Logger = tarantoolOptsLogger{
loggerf: cfg.Loggerf,
ctx: ctx,
}
if cfg.Metrics == nil {
cfg.Metrics = emptyMetricsProvider
}
if cfg.DiscoveryWorkStep == 0 {
cfg.DiscoveryWorkStep = discoveryWorkStepDefault
}
return cfg, nil
}
func validateCfg(cfg Config) error {
if cfg.TopologyProvider == nil {
return fmt.Errorf("topology provider is nil")
}
if cfg.TotalBucketCount == 0 {
return fmt.Errorf("bucket count must be greater than 0")
}
return nil
}
// --------------------------------------------------------------------------------
// -- Other
// --------------------------------------------------------------------------------
func BucketIDStrCRC32(shardKey string, totalBucketCount uint64) uint64 {
return crc.CalculateCRC(&crc.Parameters{
Width: 32,
Polynomial: 0x1EDC6F41,
FinalXor: 0x0,
ReflectIn: true,
ReflectOut: true,
Init: 0xFFFFFFFF,
}, []byte(shardKey))%totalBucketCount + 1
}
// BucketIDStrCRC32 return the bucket identifier from the parameter used for sharding.
func (r *Router) BucketIDStrCRC32(shardKey string) uint64 {
return BucketIDStrCRC32(shardKey, r.cfg.TotalBucketCount)
}
// BucketCount returns the total number of buckets specified in cfg.
func (r *Router) BucketCount() uint64 {
return r.cfg.TotalBucketCount
}
// -------------------------------------------------------------------------------_
// -- Bootstrap
// --------------------------------------------------------------------------------
// ClusterBootstrap initializes the cluster by bootstrapping the necessary buckets
// across the available replicasets. It checks the current state of each replicaset
// and creates buckets if required. The function takes a context for managing
// cancellation and deadlines, and a boolean parameter ifNotBootstrapped to control
// error handling. If ifNotBootstrapped is true, the function will log any errors
// encountered during the bootstrapping process but will not halt execution; instead,
// it will return the last error encountered. If ifNotBootstrapped is false, any
// error will result in an immediate return, ensuring that the operation either
// succeeds fully or fails fast.
func (r *Router) ClusterBootstrap(ctx context.Context, ifNotBootstrapped bool) error {
nameToReplicasetRef := r.getNameToReplicaset()
rssToBootstrap := make([]Replicaset, 0, len(nameToReplicasetRef))
var lastErr error
for _, rs := range nameToReplicasetRef {
rssToBootstrap = append(rssToBootstrap, *rs)
}
err := CalculateEtalonBalance(rssToBootstrap, r.cfg.TotalBucketCount)
if err != nil {
return err
}
bucketID := uint64(1)
for id, rs := range rssToBootstrap {
if rs.EtalonBucketCount > 0 {
err = rs.BucketForceCreate(ctx, bucketID, rs.EtalonBucketCount)
if err != nil {
if ifNotBootstrapped {
lastErr = err
} else {
return err
}
} else {
nextBucketID := bucketID + rs.EtalonBucketCount
r.log().Infof(ctx, "Buckets from %d to %d are bootstrapped on \"%s\"", bucketID, nextBucketID-1, id)
bucketID = nextBucketID
}
}
}
if lastErr != nil {
return lastErr
}
return nil
}