Skip to content

Commit

Permalink
Rax size tracking (valkey-io#688)
Browse files Browse the repository at this point in the history
Introduce a `size_t` field into the rax struct to track allocation size.
Update the allocation size on rax insert and deletes.
Return the allocation size when `raxAllocSize` is called.

This size tracking is now used in MEMORY USAGE and MEMORY STATS in place
of the previous method based on sampling.

The module API allows to create sorted dictionaries, which are backed by
rax. Users now also get precise memory allocation for them (through
`ValkeyModule_MallocSizeDict`).

Fixes valkey-io#677.

For the release notes:

* MEMORY USAGE and MEMORY STATS are now exact for streams, rather than
based on sampling.

---------

Signed-off-by: Guillaume Koenig <[email protected]>
Signed-off-by: Guillaume Koenig <[email protected]>
Co-authored-by: Joey <[email protected]>
Co-authored-by: Viktor Söderqvist <[email protected]>
  • Loading branch information
3 people authored and SoftlyRaining committed Oct 11, 2024
1 parent d1ba700 commit 76bee0f
Show file tree
Hide file tree
Showing 8 changed files with 1,078 additions and 34 deletions.
1 change: 1 addition & 0 deletions .config/typos.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ extend-ignore-re = [
"D4C4DAA4", # sha1.c
"Georg Nees",
"\\[l\\]ist", # eval.c
"LKE", # test_rax.c
]

[type.tcl]
Expand Down
6 changes: 2 additions & 4 deletions src/module.c
Original file line number Diff line number Diff line change
Expand Up @@ -10840,10 +10840,8 @@ size_t VM_MallocSizeString(ValkeyModuleString *str) {
* it does not include the allocation size of the keys and values.
*/
size_t VM_MallocSizeDict(ValkeyModuleDict *dict) {
size_t size = sizeof(ValkeyModuleDict) + sizeof(rax);
size += dict->rax->numnodes * sizeof(raxNode);
/* For more info about this weird line, see streamRadixTreeMemoryUsage */
size += dict->rax->numnodes * sizeof(long) * 30;
size_t size = sizeof(ValkeyModuleDict);
size += raxAllocSize(dict->rax);
return size;
}

Expand Down
29 changes: 3 additions & 26 deletions src/object.c
Original file line number Diff line number Diff line change
Expand Up @@ -952,29 +952,6 @@ char *strEncoding(int encoding) {
/* =========================== Memory introspection ========================= */


/* This is a helper function with the goal of estimating the memory
* size of a radix tree that is used to store Stream IDs.
*
* Note: to guess the size of the radix tree is not trivial, so we
* approximate it considering 16 bytes of data overhead for each
* key (the ID), and then adding the number of bare nodes, plus some
* overhead due by the data and child pointers. This secret recipe
* was obtained by checking the average radix tree created by real
* workloads, and then adjusting the constants to get numbers that
* more or less match the real memory usage.
*
* Actually the number of nodes and keys may be different depending
* on the insertion speed and thus the ability of the radix tree
* to compress prefixes. */
size_t streamRadixTreeMemoryUsage(rax *rax) {
size_t size = sizeof(*rax);
size = rax->numele * sizeof(streamID);
size += rax->numnodes * sizeof(raxNode);
/* Add a fixed overhead due to the aux data pointer, children, ... */
size += rax->numnodes * sizeof(long) * 30;
return size;
}

/* Returns the size in bytes consumed by the key's value in RAM.
* Note that the returned value is just an approximation, especially in the
* case of aggregated data types where only "sample_size" elements
Expand Down Expand Up @@ -1072,7 +1049,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
} else if (o->type == OBJ_STREAM) {
stream *s = o->ptr;
asize = sizeof(*o) + sizeof(*s);
asize += streamRadixTreeMemoryUsage(s->rax);
asize += raxAllocSize(s->rax);

/* Now we have to add the listpacks. The last listpack is often non
* complete, so we estimate the size of the first N listpacks, and
Expand Down Expand Up @@ -1112,7 +1089,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
while (raxNext(&ri)) {
streamCG *cg = ri.data;
asize += sizeof(*cg);
asize += streamRadixTreeMemoryUsage(cg->pel);
asize += raxAllocSize(cg->pel);
asize += sizeof(streamNACK) * raxSize(cg->pel);

/* For each consumer we also need to add the basic data
Expand All @@ -1124,7 +1101,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
streamConsumer *consumer = cri.data;
asize += sizeof(*consumer);
asize += sdslen(consumer->name);
asize += streamRadixTreeMemoryUsage(consumer->pel);
asize += raxAllocSize(consumer->pel);
/* Don't count NACKs again, they are shared with the
* consumer group PEL. */
}
Expand Down
28 changes: 27 additions & 1 deletion src/rax.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ rax *raxNew(void) {
rax->numele = 0;
rax->numnodes = 1;
rax->head = raxNewNode(0, 0);
rax->alloc_size = rax_ptr_alloc_size(rax) + rax_ptr_alloc_size(rax->head);
if (rax->head == NULL) {
rax_free(rax);
return NULL;
Expand Down Expand Up @@ -510,8 +511,12 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
debugf("### Insert: node representing key exists\n");
/* Make space for the value pointer if needed. */
if (!h->iskey || (h->isnull && overwrite)) {
size_t oldalloc = rax_ptr_alloc_size(h);
h = raxReallocForData(h, data);
if (h) memcpy(parentlink, &h, sizeof(h));
if (h) {
memcpy(parentlink, &h, sizeof(h));
rax->alloc_size = rax->alloc_size - oldalloc + rax_ptr_alloc_size(h);
}
}
if (h == NULL) {
errno = ENOMEM;
Expand Down Expand Up @@ -706,6 +711,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
return 0;
}
splitnode->data[0] = h->data[j];
rax->alloc_size += rax_ptr_alloc_size(splitnode);

if (j == 0) {
/* 3a: Replace the old node with the split node. */
Expand All @@ -730,6 +736,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
memcpy(parentlink, &trimmed, sizeof(trimmed));
parentlink = cp; /* Set parentlink to splitnode parent. */
rax->numnodes++;
rax->alloc_size += rax_ptr_alloc_size(trimmed);
}

/* 4: Create the postfix node: what remains of the original
Expand All @@ -744,6 +751,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
raxNode **cp = raxNodeLastChildPtr(postfix);
memcpy(cp, &next, sizeof(next));
rax->numnodes++;
rax->alloc_size += rax_ptr_alloc_size(postfix);
} else {
/* 4b: just use next as postfix node. */
postfix = next;
Expand All @@ -756,6 +764,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
/* 6. Continue insertion: this will cause the splitnode to
* get a new child (the non common character at the currently
* inserted key). */
rax->alloc_size -= rax_ptr_alloc_size(h);
rax_free(h);
h = splitnode;
} else if (h->iscompr && i == len) {
Expand Down Expand Up @@ -794,6 +803,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
raxNode **cp = raxNodeLastChildPtr(postfix);
memcpy(cp, &next, sizeof(next));
rax->numnodes++;
rax->alloc_size += rax_ptr_alloc_size(postfix);

/* 3: Trim the compressed node. */
trimmed->size = j;
Expand All @@ -806,6 +816,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
void *aux = raxGetData(h);
raxSetData(trimmed, aux);
}
rax->alloc_size += rax_ptr_alloc_size(trimmed);

/* Fix the trimmed node child pointer to point to
* the postfix node. */
Expand All @@ -815,6 +826,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
/* Finish! We don't need to continue with the insertion
* algorithm for ALGO 2. The key is already inserted. */
rax->numele++;
rax->alloc_size -= rax_ptr_alloc_size(h);
rax_free(h);
return 1; /* Key inserted. */
}
Expand All @@ -823,6 +835,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
* chars in our string. We need to insert the missing nodes. */
while (i < len) {
raxNode *child;
size_t oldalloc = rax_ptr_alloc_size(h);

/* If this node is going to have a single child, and there
* are other characters, so that that would result in a chain
Expand All @@ -848,14 +861,17 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
i++;
}
rax->numnodes++;
rax->alloc_size = rax->alloc_size - oldalloc + rax_ptr_alloc_size(h) + rax_ptr_alloc_size(child);
h = child;
}
size_t oldalloc = rax_ptr_alloc_size(h);
raxNode *newh = raxReallocForData(h, data);
if (newh == NULL) goto oom;
h = newh;
if (!h->iskey) rax->numele++;
raxSetData(h, data);
memcpy(parentlink, &h, sizeof(h));
rax->alloc_size = rax->alloc_size - oldalloc + rax_ptr_alloc_size(h);
return 1; /* Element inserted. */

oom:
Expand Down Expand Up @@ -1025,6 +1041,7 @@ int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) {
child = h;
debugf("Freeing child %p [%.*s] key:%d\n", (void *)child, (int)child->size, (char *)child->data,
child->iskey);
rax->alloc_size -= rax_ptr_alloc_size(child);
rax_free(child);
rax->numnodes--;
h = raxStackPop(&ts);
Expand All @@ -1034,7 +1051,9 @@ int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) {
}
if (child) {
debugf("Unlinking child %p from parent %p\n", (void *)child, (void *)h);
size_t oldalloc = rax_ptr_alloc_size(h);
raxNode *new = raxRemoveChild(h, child);
rax->alloc_size = rax->alloc_size - oldalloc + rax_ptr_alloc_size(new);
if (new != h) {
raxNode *parent = raxStackPeek(&ts);
raxNode **parentlink;
Expand Down Expand Up @@ -1151,6 +1170,7 @@ int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) {
new->iscompr = 1;
new->size = comprsize;
rax->numnodes++;
rax->alloc_size += rax_ptr_alloc_size(new);

/* Scan again, this time to populate the new node content and
* to fix the new node child pointer. At the same time we free
Expand All @@ -1163,6 +1183,7 @@ int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) {
raxNode **cp = raxNodeLastChildPtr(h);
raxNode *tofree = h;
memcpy(&h, cp, sizeof(h));
rax->alloc_size -= rax_ptr_alloc_size(tofree);
rax_free(tofree);
rax->numnodes--;
if (h->iskey || (!h->iscompr && h->size != 1)) break;
Expand Down Expand Up @@ -1764,6 +1785,11 @@ uint64_t raxSize(rax *rax) {
return rax->numele;
}

/* Return the rax tree allocation size in bytes */
size_t raxAllocSize(rax *rax) {
return rax->alloc_size;
}

/* ----------------------------- Introspection ------------------------------ */

/* This function is mostly used for debugging and learning purposes.
Expand Down
8 changes: 5 additions & 3 deletions src/rax.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,10 @@ typedef struct raxNode {
} raxNode;

typedef struct rax {
raxNode *head;
uint64_t numele;
uint64_t numnodes;
raxNode *head; /* Pointer to root node of tree */
uint64_t numele; /* Number of keys in the tree */
uint64_t numnodes; /* Number of rax nodes in the tree */
size_t alloc_size; /* Total allocation size of the tree in bytes */
} rax;

/* Stack data structure used by raxLowWalk() in order to, optionally, return
Expand Down Expand Up @@ -203,6 +204,7 @@ void raxStop(raxIterator *it);
int raxEOF(raxIterator *it);
void raxShow(rax *rax);
uint64_t raxSize(rax *rax);
size_t raxAllocSize(rax *rax);
unsigned long raxTouch(raxNode *n);
void raxSetDebugMsg(int onoff);

Expand Down
1 change: 1 addition & 0 deletions src/rax_malloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,5 @@
#define rax_malloc zmalloc
#define rax_realloc zrealloc
#define rax_free zfree
#define rax_ptr_alloc_size zmalloc_size
#endif
14 changes: 14 additions & 0 deletions src/unit/test_files.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,18 @@ int test_listpackBenchmarkLpValidateIntegrity(int argc, char **argv, int flags);
int test_listpackBenchmarkLpCompareWithString(int argc, char **argv, int flags);
int test_listpackBenchmarkLpCompareWithNumber(int argc, char **argv, int flags);
int test_listpackBenchmarkFree(int argc, char **argv, int flags);
int test_raxRandomWalk(int argc, char **argv, int flags);
int test_raxIteratorUnitTests(int argc, char **argv, int flags);
int test_raxTryInsertUnitTests(int argc, char **argv, int flags);
int test_raxRegressionTest1(int argc, char **argv, int flags);
int test_raxRegressionTest2(int argc, char **argv, int flags);
int test_raxRegressionTest3(int argc, char **argv, int flags);
int test_raxRegressionTest4(int argc, char **argv, int flags);
int test_raxRegressionTest5(int argc, char **argv, int flags);
int test_raxRegressionTest6(int argc, char **argv, int flags);
int test_raxBenchmark(int argc, char **argv, int flags);
int test_raxHugeKey(int argc, char **argv, int flags);
int test_raxFuzz(int argc, char **argv, int flags);
int test_sds(int argc, char **argv, int flags);
int test_typesAndAllocSize(int argc, char **argv, int flags);
int test_sdsHeaderSizes(int argc, char **argv, int flags);
Expand Down Expand Up @@ -156,6 +168,7 @@ unitTest __test_hashset_c[] = {{"test_cursor", test_cursor}, {"test_set_hash_fun
unitTest __test_intset_c[] = {{"test_intsetValueEncodings", test_intsetValueEncodings}, {"test_intsetBasicAdding", test_intsetBasicAdding}, {"test_intsetLargeNumberRandomAdd", test_intsetLargeNumberRandomAdd}, {"test_intsetUpgradeFromint16Toint32", test_intsetUpgradeFromint16Toint32}, {"test_intsetUpgradeFromint16Toint64", test_intsetUpgradeFromint16Toint64}, {"test_intsetUpgradeFromint32Toint64", test_intsetUpgradeFromint32Toint64}, {"test_intsetStressLookups", test_intsetStressLookups}, {"test_intsetStressAddDelete", test_intsetStressAddDelete}, {NULL, NULL}};
unitTest __test_kvstore_c[] = {{"test_kvstoreAdd16Keys", test_kvstoreAdd16Keys}, {"test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyDict", test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyDict}, {"test_kvstoreIteratorRemoveAllKeysDeleteEmptyDict", test_kvstoreIteratorRemoveAllKeysDeleteEmptyDict}, {"test_kvstoreDictIteratorRemoveAllKeysNoDeleteEmptyDict", test_kvstoreDictIteratorRemoveAllKeysNoDeleteEmptyDict}, {"test_kvstoreDictIteratorRemoveAllKeysDeleteEmptyDict", test_kvstoreDictIteratorRemoveAllKeysDeleteEmptyDict}, {NULL, NULL}};
unitTest __test_listpack_c[] = {{"test_listpackCreateIntList", test_listpackCreateIntList}, {"test_listpackCreateList", test_listpackCreateList}, {"test_listpackLpPrepend", test_listpackLpPrepend}, {"test_listpackLpPrependInteger", test_listpackLpPrependInteger}, {"test_listpackGetELementAtIndex", test_listpackGetELementAtIndex}, {"test_listpackPop", test_listpackPop}, {"test_listpackGetELementAtIndex2", test_listpackGetELementAtIndex2}, {"test_listpackIterate0toEnd", test_listpackIterate0toEnd}, {"test_listpackIterate1toEnd", test_listpackIterate1toEnd}, {"test_listpackIterate2toEnd", test_listpackIterate2toEnd}, {"test_listpackIterateBackToFront", test_listpackIterateBackToFront}, {"test_listpackIterateBackToFrontWithDelete", test_listpackIterateBackToFrontWithDelete}, {"test_listpackDeleteWhenNumIsMinusOne", test_listpackDeleteWhenNumIsMinusOne}, {"test_listpackDeleteWithNegativeIndex", test_listpackDeleteWithNegativeIndex}, {"test_listpackDeleteInclusiveRange0_0", test_listpackDeleteInclusiveRange0_0}, {"test_listpackDeleteInclusiveRange0_1", test_listpackDeleteInclusiveRange0_1}, {"test_listpackDeleteInclusiveRange1_2", test_listpackDeleteInclusiveRange1_2}, {"test_listpackDeleteWitStartIndexOutOfRange", test_listpackDeleteWitStartIndexOutOfRange}, {"test_listpackDeleteWitNumOverflow", test_listpackDeleteWitNumOverflow}, {"test_listpackBatchDelete", test_listpackBatchDelete}, {"test_listpackDeleteFooWhileIterating", test_listpackDeleteFooWhileIterating}, {"test_listpackReplaceWithSameSize", test_listpackReplaceWithSameSize}, {"test_listpackReplaceWithDifferentSize", test_listpackReplaceWithDifferentSize}, {"test_listpackRegressionGt255Bytes", test_listpackRegressionGt255Bytes}, {"test_listpackCreateLongListAndCheckIndices", test_listpackCreateLongListAndCheckIndices}, {"test_listpackCompareStrsWithLpEntries", test_listpackCompareStrsWithLpEntries}, {"test_listpackLpMergeEmptyLps", test_listpackLpMergeEmptyLps}, {"test_listpackLpMergeLp1Larger", test_listpackLpMergeLp1Larger}, {"test_listpackLpMergeLp2Larger", test_listpackLpMergeLp2Larger}, {"test_listpackLpNextRandom", test_listpackLpNextRandom}, {"test_listpackLpNextRandomCC", test_listpackLpNextRandomCC}, {"test_listpackRandomPairWithOneElement", test_listpackRandomPairWithOneElement}, {"test_listpackRandomPairWithManyElements", test_listpackRandomPairWithManyElements}, {"test_listpackRandomPairsWithOneElement", test_listpackRandomPairsWithOneElement}, {"test_listpackRandomPairsWithManyElements", test_listpackRandomPairsWithManyElements}, {"test_listpackRandomPairsUniqueWithOneElement", test_listpackRandomPairsUniqueWithOneElement}, {"test_listpackRandomPairsUniqueWithManyElements", test_listpackRandomPairsUniqueWithManyElements}, {"test_listpackPushVariousEncodings", test_listpackPushVariousEncodings}, {"test_listpackLpFind", test_listpackLpFind}, {"test_listpackLpValidateIntegrity", test_listpackLpValidateIntegrity}, {"test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN", test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN}, {"test_listpackStressWithRandom", test_listpackStressWithRandom}, {"test_listpackSTressWithVariableSize", test_listpackSTressWithVariableSize}, {"test_listpackBenchmarkInit", test_listpackBenchmarkInit}, {"test_listpackBenchmarkLpAppend", test_listpackBenchmarkLpAppend}, {"test_listpackBenchmarkLpFindString", test_listpackBenchmarkLpFindString}, {"test_listpackBenchmarkLpFindNumber", test_listpackBenchmarkLpFindNumber}, {"test_listpackBenchmarkLpSeek", test_listpackBenchmarkLpSeek}, {"test_listpackBenchmarkLpValidateIntegrity", test_listpackBenchmarkLpValidateIntegrity}, {"test_listpackBenchmarkLpCompareWithString", test_listpackBenchmarkLpCompareWithString}, {"test_listpackBenchmarkLpCompareWithNumber", test_listpackBenchmarkLpCompareWithNumber}, {"test_listpackBenchmarkFree", test_listpackBenchmarkFree}, {NULL, NULL}};
unitTest __test_rax_c[] = {{"test_raxRandomWalk", test_raxRandomWalk}, {"test_raxIteratorUnitTests", test_raxIteratorUnitTests}, {"test_raxTryInsertUnitTests", test_raxTryInsertUnitTests}, {"test_raxRegressionTest1", test_raxRegressionTest1}, {"test_raxRegressionTest2", test_raxRegressionTest2}, {"test_raxRegressionTest3", test_raxRegressionTest3}, {"test_raxRegressionTest4", test_raxRegressionTest4}, {"test_raxRegressionTest5", test_raxRegressionTest5}, {"test_raxRegressionTest6", test_raxRegressionTest6}, {"test_raxBenchmark", test_raxBenchmark}, {"test_raxHugeKey", test_raxHugeKey}, {"test_raxFuzz", test_raxFuzz}, {NULL, NULL}};
unitTest __test_sds_c[] = {{"test_sds", test_sds}, {"test_typesAndAllocSize", test_typesAndAllocSize}, {"test_sdsHeaderSizes", test_sdsHeaderSizes}, {NULL, NULL}};
unitTest __test_sha1_c[] = {{"test_sha1", test_sha1}, {NULL, NULL}};
unitTest __test_util_c[] = {{"test_string2ll", test_string2ll}, {"test_string2l", test_string2l}, {"test_ll2string", test_ll2string}, {"test_ld2string", test_ld2string}, {"test_fixedpoint_d2string", test_fixedpoint_d2string}, {"test_version2num", test_version2num}, {"test_reclaimFilePageCache", test_reclaimFilePageCache}, {NULL, NULL}};
Expand All @@ -175,6 +188,7 @@ struct unitTestSuite {
{"test_intset.c", __test_intset_c},
{"test_kvstore.c", __test_kvstore_c},
{"test_listpack.c", __test_listpack_c},
{"test_rax.c", __test_rax_c},
{"test_sds.c", __test_sds_c},
{"test_sha1.c", __test_sha1_c},
{"test_util.c", __test_util_c},
Expand Down
Loading

0 comments on commit 76bee0f

Please sign in to comment.