diff --git a/.config/typos.toml b/.config/typos.toml index d378b5655a..1dc44ea0e9 100644 --- a/.config/typos.toml +++ b/.config/typos.toml @@ -20,6 +20,7 @@ extend-ignore-re = [ "D4C4DAA4", # sha1.c "Georg Nees", "\\[l\\]ist", # eval.c + "LKE", # test_rax.c ] [type.tcl] diff --git a/src/module.c b/src/module.c index 15a7fb91f4..38d0c2d968 100644 --- a/src/module.c +++ b/src/module.c @@ -10840,10 +10840,8 @@ size_t VM_MallocSizeString(ValkeyModuleString *str) { * it does not include the allocation size of the keys and values. */ size_t VM_MallocSizeDict(ValkeyModuleDict *dict) { - size_t size = sizeof(ValkeyModuleDict) + sizeof(rax); - size += dict->rax->numnodes * sizeof(raxNode); - /* For more info about this weird line, see streamRadixTreeMemoryUsage */ - size += dict->rax->numnodes * sizeof(long) * 30; + size_t size = sizeof(ValkeyModuleDict); + size += raxAllocSize(dict->rax); return size; } diff --git a/src/object.c b/src/object.c index d409fa8d5c..2508f20ab6 100644 --- a/src/object.c +++ b/src/object.c @@ -952,29 +952,6 @@ char *strEncoding(int encoding) { /* =========================== Memory introspection ========================= */ -/* This is a helper function with the goal of estimating the memory - * size of a radix tree that is used to store Stream IDs. - * - * Note: to guess the size of the radix tree is not trivial, so we - * approximate it considering 16 bytes of data overhead for each - * key (the ID), and then adding the number of bare nodes, plus some - * overhead due by the data and child pointers. This secret recipe - * was obtained by checking the average radix tree created by real - * workloads, and then adjusting the constants to get numbers that - * more or less match the real memory usage. - * - * Actually the number of nodes and keys may be different depending - * on the insertion speed and thus the ability of the radix tree - * to compress prefixes. */ -size_t streamRadixTreeMemoryUsage(rax *rax) { - size_t size = sizeof(*rax); - size = rax->numele * sizeof(streamID); - size += rax->numnodes * sizeof(raxNode); - /* Add a fixed overhead due to the aux data pointer, children, ... */ - size += rax->numnodes * sizeof(long) * 30; - return size; -} - /* Returns the size in bytes consumed by the key's value in RAM. * Note that the returned value is just an approximation, especially in the * case of aggregated data types where only "sample_size" elements @@ -1072,7 +1049,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) { } else if (o->type == OBJ_STREAM) { stream *s = o->ptr; asize = sizeof(*o) + sizeof(*s); - asize += streamRadixTreeMemoryUsage(s->rax); + asize += raxAllocSize(s->rax); /* Now we have to add the listpacks. The last listpack is often non * complete, so we estimate the size of the first N listpacks, and @@ -1112,7 +1089,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) { while (raxNext(&ri)) { streamCG *cg = ri.data; asize += sizeof(*cg); - asize += streamRadixTreeMemoryUsage(cg->pel); + asize += raxAllocSize(cg->pel); asize += sizeof(streamNACK) * raxSize(cg->pel); /* For each consumer we also need to add the basic data @@ -1124,7 +1101,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) { streamConsumer *consumer = cri.data; asize += sizeof(*consumer); asize += sdslen(consumer->name); - asize += streamRadixTreeMemoryUsage(consumer->pel); + asize += raxAllocSize(consumer->pel); /* Don't count NACKs again, they are shared with the * consumer group PEL. */ } diff --git a/src/rax.c b/src/rax.c index 319d89a2dc..ed17f3735d 100644 --- a/src/rax.c +++ b/src/rax.c @@ -192,6 +192,7 @@ rax *raxNew(void) { rax->numele = 0; rax->numnodes = 1; rax->head = raxNewNode(0, 0); + rax->alloc_size = rax_ptr_alloc_size(rax) + rax_ptr_alloc_size(rax->head); if (rax->head == NULL) { rax_free(rax); return NULL; @@ -510,8 +511,12 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void ** debugf("### Insert: node representing key exists\n"); /* Make space for the value pointer if needed. */ if (!h->iskey || (h->isnull && overwrite)) { + size_t oldalloc = rax_ptr_alloc_size(h); h = raxReallocForData(h, data); - if (h) memcpy(parentlink, &h, sizeof(h)); + if (h) { + memcpy(parentlink, &h, sizeof(h)); + rax->alloc_size = rax->alloc_size - oldalloc + rax_ptr_alloc_size(h); + } } if (h == NULL) { errno = ENOMEM; @@ -706,6 +711,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void ** return 0; } splitnode->data[0] = h->data[j]; + rax->alloc_size += rax_ptr_alloc_size(splitnode); if (j == 0) { /* 3a: Replace the old node with the split node. */ @@ -730,6 +736,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void ** memcpy(parentlink, &trimmed, sizeof(trimmed)); parentlink = cp; /* Set parentlink to splitnode parent. */ rax->numnodes++; + rax->alloc_size += rax_ptr_alloc_size(trimmed); } /* 4: Create the postfix node: what remains of the original @@ -744,6 +751,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void ** raxNode **cp = raxNodeLastChildPtr(postfix); memcpy(cp, &next, sizeof(next)); rax->numnodes++; + rax->alloc_size += rax_ptr_alloc_size(postfix); } else { /* 4b: just use next as postfix node. */ postfix = next; @@ -756,6 +764,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void ** /* 6. Continue insertion: this will cause the splitnode to * get a new child (the non common character at the currently * inserted key). */ + rax->alloc_size -= rax_ptr_alloc_size(h); rax_free(h); h = splitnode; } else if (h->iscompr && i == len) { @@ -794,6 +803,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void ** raxNode **cp = raxNodeLastChildPtr(postfix); memcpy(cp, &next, sizeof(next)); rax->numnodes++; + rax->alloc_size += rax_ptr_alloc_size(postfix); /* 3: Trim the compressed node. */ trimmed->size = j; @@ -806,6 +816,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void ** void *aux = raxGetData(h); raxSetData(trimmed, aux); } + rax->alloc_size += rax_ptr_alloc_size(trimmed); /* Fix the trimmed node child pointer to point to * the postfix node. */ @@ -815,6 +826,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void ** /* Finish! We don't need to continue with the insertion * algorithm for ALGO 2. The key is already inserted. */ rax->numele++; + rax->alloc_size -= rax_ptr_alloc_size(h); rax_free(h); return 1; /* Key inserted. */ } @@ -823,6 +835,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void ** * chars in our string. We need to insert the missing nodes. */ while (i < len) { raxNode *child; + size_t oldalloc = rax_ptr_alloc_size(h); /* If this node is going to have a single child, and there * are other characters, so that that would result in a chain @@ -848,14 +861,17 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void ** i++; } rax->numnodes++; + rax->alloc_size = rax->alloc_size - oldalloc + rax_ptr_alloc_size(h) + rax_ptr_alloc_size(child); h = child; } + size_t oldalloc = rax_ptr_alloc_size(h); raxNode *newh = raxReallocForData(h, data); if (newh == NULL) goto oom; h = newh; if (!h->iskey) rax->numele++; raxSetData(h, data); memcpy(parentlink, &h, sizeof(h)); + rax->alloc_size = rax->alloc_size - oldalloc + rax_ptr_alloc_size(h); return 1; /* Element inserted. */ oom: @@ -1025,6 +1041,7 @@ int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) { child = h; debugf("Freeing child %p [%.*s] key:%d\n", (void *)child, (int)child->size, (char *)child->data, child->iskey); + rax->alloc_size -= rax_ptr_alloc_size(child); rax_free(child); rax->numnodes--; h = raxStackPop(&ts); @@ -1034,7 +1051,9 @@ int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) { } if (child) { debugf("Unlinking child %p from parent %p\n", (void *)child, (void *)h); + size_t oldalloc = rax_ptr_alloc_size(h); raxNode *new = raxRemoveChild(h, child); + rax->alloc_size = rax->alloc_size - oldalloc + rax_ptr_alloc_size(new); if (new != h) { raxNode *parent = raxStackPeek(&ts); raxNode **parentlink; @@ -1151,6 +1170,7 @@ int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) { new->iscompr = 1; new->size = comprsize; rax->numnodes++; + rax->alloc_size += rax_ptr_alloc_size(new); /* Scan again, this time to populate the new node content and * to fix the new node child pointer. At the same time we free @@ -1163,6 +1183,7 @@ int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) { raxNode **cp = raxNodeLastChildPtr(h); raxNode *tofree = h; memcpy(&h, cp, sizeof(h)); + rax->alloc_size -= rax_ptr_alloc_size(tofree); rax_free(tofree); rax->numnodes--; if (h->iskey || (!h->iscompr && h->size != 1)) break; @@ -1764,6 +1785,11 @@ uint64_t raxSize(rax *rax) { return rax->numele; } +/* Return the rax tree allocation size in bytes */ +size_t raxAllocSize(rax *rax) { + return rax->alloc_size; +} + /* ----------------------------- Introspection ------------------------------ */ /* This function is mostly used for debugging and learning purposes. diff --git a/src/rax.h b/src/rax.h index 5347dc480e..2d0c940698 100644 --- a/src/rax.h +++ b/src/rax.h @@ -131,9 +131,10 @@ typedef struct raxNode { } raxNode; typedef struct rax { - raxNode *head; - uint64_t numele; - uint64_t numnodes; + raxNode *head; /* Pointer to root node of tree */ + uint64_t numele; /* Number of keys in the tree */ + uint64_t numnodes; /* Number of rax nodes in the tree */ + size_t alloc_size; /* Total allocation size of the tree in bytes */ } rax; /* Stack data structure used by raxLowWalk() in order to, optionally, return @@ -203,6 +204,7 @@ void raxStop(raxIterator *it); int raxEOF(raxIterator *it); void raxShow(rax *rax); uint64_t raxSize(rax *rax); +size_t raxAllocSize(rax *rax); unsigned long raxTouch(raxNode *n); void raxSetDebugMsg(int onoff); diff --git a/src/rax_malloc.h b/src/rax_malloc.h index 03c952e1a4..49a626595a 100644 --- a/src/rax_malloc.h +++ b/src/rax_malloc.h @@ -41,4 +41,5 @@ #define rax_malloc zmalloc #define rax_realloc zrealloc #define rax_free zfree +#define rax_ptr_alloc_size zmalloc_size #endif diff --git a/src/unit/test_files.h b/src/unit/test_files.h index 71952e343f..cd2e0c5b92 100644 --- a/src/unit/test_files.h +++ b/src/unit/test_files.h @@ -84,6 +84,18 @@ int test_listpackBenchmarkLpValidateIntegrity(int argc, char **argv, int flags); int test_listpackBenchmarkLpCompareWithString(int argc, char **argv, int flags); int test_listpackBenchmarkLpCompareWithNumber(int argc, char **argv, int flags); int test_listpackBenchmarkFree(int argc, char **argv, int flags); +int test_raxRandomWalk(int argc, char **argv, int flags); +int test_raxIteratorUnitTests(int argc, char **argv, int flags); +int test_raxTryInsertUnitTests(int argc, char **argv, int flags); +int test_raxRegressionTest1(int argc, char **argv, int flags); +int test_raxRegressionTest2(int argc, char **argv, int flags); +int test_raxRegressionTest3(int argc, char **argv, int flags); +int test_raxRegressionTest4(int argc, char **argv, int flags); +int test_raxRegressionTest5(int argc, char **argv, int flags); +int test_raxRegressionTest6(int argc, char **argv, int flags); +int test_raxBenchmark(int argc, char **argv, int flags); +int test_raxHugeKey(int argc, char **argv, int flags); +int test_raxFuzz(int argc, char **argv, int flags); int test_sds(int argc, char **argv, int flags); int test_typesAndAllocSize(int argc, char **argv, int flags); int test_sdsHeaderSizes(int argc, char **argv, int flags); @@ -144,6 +156,7 @@ unitTest __test_endianconv_c[] = {{"test_endianconv", test_endianconv}, {NULL, N unitTest __test_intset_c[] = {{"test_intsetValueEncodings", test_intsetValueEncodings}, {"test_intsetBasicAdding", test_intsetBasicAdding}, {"test_intsetLargeNumberRandomAdd", test_intsetLargeNumberRandomAdd}, {"test_intsetUpgradeFromint16Toint32", test_intsetUpgradeFromint16Toint32}, {"test_intsetUpgradeFromint16Toint64", test_intsetUpgradeFromint16Toint64}, {"test_intsetUpgradeFromint32Toint64", test_intsetUpgradeFromint32Toint64}, {"test_intsetStressLookups", test_intsetStressLookups}, {"test_intsetStressAddDelete", test_intsetStressAddDelete}, {NULL, NULL}}; unitTest __test_kvstore_c[] = {{"test_kvstoreAdd16Keys", test_kvstoreAdd16Keys}, {"test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyDict", test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyDict}, {"test_kvstoreIteratorRemoveAllKeysDeleteEmptyDict", test_kvstoreIteratorRemoveAllKeysDeleteEmptyDict}, {"test_kvstoreDictIteratorRemoveAllKeysNoDeleteEmptyDict", test_kvstoreDictIteratorRemoveAllKeysNoDeleteEmptyDict}, {"test_kvstoreDictIteratorRemoveAllKeysDeleteEmptyDict", test_kvstoreDictIteratorRemoveAllKeysDeleteEmptyDict}, {NULL, NULL}}; unitTest __test_listpack_c[] = {{"test_listpackCreateIntList", test_listpackCreateIntList}, {"test_listpackCreateList", test_listpackCreateList}, {"test_listpackLpPrepend", test_listpackLpPrepend}, {"test_listpackLpPrependInteger", test_listpackLpPrependInteger}, {"test_listpackGetELementAtIndex", test_listpackGetELementAtIndex}, {"test_listpackPop", test_listpackPop}, {"test_listpackGetELementAtIndex2", test_listpackGetELementAtIndex2}, {"test_listpackIterate0toEnd", test_listpackIterate0toEnd}, {"test_listpackIterate1toEnd", test_listpackIterate1toEnd}, {"test_listpackIterate2toEnd", test_listpackIterate2toEnd}, {"test_listpackIterateBackToFront", test_listpackIterateBackToFront}, {"test_listpackIterateBackToFrontWithDelete", test_listpackIterateBackToFrontWithDelete}, {"test_listpackDeleteWhenNumIsMinusOne", test_listpackDeleteWhenNumIsMinusOne}, {"test_listpackDeleteWithNegativeIndex", test_listpackDeleteWithNegativeIndex}, {"test_listpackDeleteInclusiveRange0_0", test_listpackDeleteInclusiveRange0_0}, {"test_listpackDeleteInclusiveRange0_1", test_listpackDeleteInclusiveRange0_1}, {"test_listpackDeleteInclusiveRange1_2", test_listpackDeleteInclusiveRange1_2}, {"test_listpackDeleteWitStartIndexOutOfRange", test_listpackDeleteWitStartIndexOutOfRange}, {"test_listpackDeleteWitNumOverflow", test_listpackDeleteWitNumOverflow}, {"test_listpackBatchDelete", test_listpackBatchDelete}, {"test_listpackDeleteFooWhileIterating", test_listpackDeleteFooWhileIterating}, {"test_listpackReplaceWithSameSize", test_listpackReplaceWithSameSize}, {"test_listpackReplaceWithDifferentSize", test_listpackReplaceWithDifferentSize}, {"test_listpackRegressionGt255Bytes", test_listpackRegressionGt255Bytes}, {"test_listpackCreateLongListAndCheckIndices", test_listpackCreateLongListAndCheckIndices}, {"test_listpackCompareStrsWithLpEntries", test_listpackCompareStrsWithLpEntries}, {"test_listpackLpMergeEmptyLps", test_listpackLpMergeEmptyLps}, {"test_listpackLpMergeLp1Larger", test_listpackLpMergeLp1Larger}, {"test_listpackLpMergeLp2Larger", test_listpackLpMergeLp2Larger}, {"test_listpackLpNextRandom", test_listpackLpNextRandom}, {"test_listpackLpNextRandomCC", test_listpackLpNextRandomCC}, {"test_listpackRandomPairWithOneElement", test_listpackRandomPairWithOneElement}, {"test_listpackRandomPairWithManyElements", test_listpackRandomPairWithManyElements}, {"test_listpackRandomPairsWithOneElement", test_listpackRandomPairsWithOneElement}, {"test_listpackRandomPairsWithManyElements", test_listpackRandomPairsWithManyElements}, {"test_listpackRandomPairsUniqueWithOneElement", test_listpackRandomPairsUniqueWithOneElement}, {"test_listpackRandomPairsUniqueWithManyElements", test_listpackRandomPairsUniqueWithManyElements}, {"test_listpackPushVariousEncodings", test_listpackPushVariousEncodings}, {"test_listpackLpFind", test_listpackLpFind}, {"test_listpackLpValidateIntegrity", test_listpackLpValidateIntegrity}, {"test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN", test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN}, {"test_listpackStressWithRandom", test_listpackStressWithRandom}, {"test_listpackSTressWithVariableSize", test_listpackSTressWithVariableSize}, {"test_listpackBenchmarkInit", test_listpackBenchmarkInit}, {"test_listpackBenchmarkLpAppend", test_listpackBenchmarkLpAppend}, {"test_listpackBenchmarkLpFindString", test_listpackBenchmarkLpFindString}, {"test_listpackBenchmarkLpFindNumber", test_listpackBenchmarkLpFindNumber}, {"test_listpackBenchmarkLpSeek", test_listpackBenchmarkLpSeek}, {"test_listpackBenchmarkLpValidateIntegrity", test_listpackBenchmarkLpValidateIntegrity}, {"test_listpackBenchmarkLpCompareWithString", test_listpackBenchmarkLpCompareWithString}, {"test_listpackBenchmarkLpCompareWithNumber", test_listpackBenchmarkLpCompareWithNumber}, {"test_listpackBenchmarkFree", test_listpackBenchmarkFree}, {NULL, NULL}}; +unitTest __test_rax_c[] = {{"test_raxRandomWalk", test_raxRandomWalk}, {"test_raxIteratorUnitTests", test_raxIteratorUnitTests}, {"test_raxTryInsertUnitTests", test_raxTryInsertUnitTests}, {"test_raxRegressionTest1", test_raxRegressionTest1}, {"test_raxRegressionTest2", test_raxRegressionTest2}, {"test_raxRegressionTest3", test_raxRegressionTest3}, {"test_raxRegressionTest4", test_raxRegressionTest4}, {"test_raxRegressionTest5", test_raxRegressionTest5}, {"test_raxRegressionTest6", test_raxRegressionTest6}, {"test_raxBenchmark", test_raxBenchmark}, {"test_raxHugeKey", test_raxHugeKey}, {"test_raxFuzz", test_raxFuzz}, {NULL, NULL}}; unitTest __test_sds_c[] = {{"test_sds", test_sds}, {"test_typesAndAllocSize", test_typesAndAllocSize}, {"test_sdsHeaderSizes", test_sdsHeaderSizes}, {NULL, NULL}}; unitTest __test_sha1_c[] = {{"test_sha1", test_sha1}, {NULL, NULL}}; unitTest __test_util_c[] = {{"test_string2ll", test_string2ll}, {"test_string2l", test_string2l}, {"test_ll2string", test_ll2string}, {"test_ld2string", test_ld2string}, {"test_fixedpoint_d2string", test_fixedpoint_d2string}, {"test_version2num", test_version2num}, {"test_reclaimFilePageCache", test_reclaimFilePageCache}, {NULL, NULL}}; @@ -162,6 +175,7 @@ struct unitTestSuite { {"test_intset.c", __test_intset_c}, {"test_kvstore.c", __test_kvstore_c}, {"test_listpack.c", __test_listpack_c}, + {"test_rax.c", __test_rax_c}, {"test_sds.c", __test_sds_c}, {"test_sha1.c", __test_sha1_c}, {"test_util.c", __test_util_c}, diff --git a/src/unit/test_rax.c b/src/unit/test_rax.c new file mode 100644 index 0000000000..5f346b4115 --- /dev/null +++ b/src/unit/test_rax.c @@ -0,0 +1,1025 @@ +/* Rax -- A radix tree implementation. + * + * Copyright (c) 2017-2018, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include "../rax.c" +#include "../mt19937-64.c" +#include "test_help.h" + +uint16_t crc16(const char *buf, int len); /* From crc16.c */ +long long _ustime(void); /* From test_crc64combine.c */ + +/* --------------------------------------------------------------------------- + * Simple hash table implementation, no rehashing, just chaining. This is + * used in order to test the radix tree implementation against something that + * will always "tell the truth" :-) */ + +/* This is huge but we want it fast enough without reahshing needed. */ +#define HT_TABLE_SIZE 100000 +typedef struct htNode { + uint64_t keylen; + unsigned char *key; + void *data; + struct htNode *next; +} htNode; + +typedef struct ht { + uint64_t numele; + htNode *table[HT_TABLE_SIZE]; +} hashtable; + +/* Create a new hash table. */ +hashtable *htNew(void) { + hashtable *ht = zcalloc(sizeof(*ht)); + ht->numele = 0; + return ht; +} + +/* djb2 hash function. */ +uint32_t htHash(unsigned char *s, size_t len) { + uint32_t hash = 5381; + for (size_t i = 0; i < len; i++) hash = hash * 33 + s[i]; + return hash % HT_TABLE_SIZE; +} + +/* Low level hash table lookup function. */ +htNode *htRawLookup(hashtable *t, unsigned char *s, size_t len, uint32_t *hash, htNode ***parentlink) { + uint32_t h = htHash(s, len); + if (hash) *hash = h; + htNode *n = t->table[h]; + if (parentlink) *parentlink = &t->table[h]; + while (n) { + if (n->keylen == len && memcmp(n->key, s, len) == 0) return n; + if (parentlink) *parentlink = &n->next; + n = n->next; + } + return NULL; +} + +/* Add an element to the hash table, return 1 if the element is new, + * 0 if it existed and the value was updated to the new one. */ +int htAdd(hashtable *t, unsigned char *s, size_t len, void *data) { + uint32_t hash; + htNode *n = htRawLookup(t, s, len, &hash, NULL); + + if (!n) { + n = zmalloc(sizeof(*n)); + n->key = zmalloc(len); + memcpy(n->key, s, len); + n->keylen = len; + n->data = data; + n->next = t->table[hash]; + t->table[hash] = n; + t->numele++; + return 1; + } else { + n->data = data; + return 0; + } +} + +/* Remove the specified element, returns 1 on success, 0 if the element + * was not there already. */ +int htRem(hashtable *t, unsigned char *s, size_t len) { + htNode **parentlink; + htNode *n = htRawLookup(t, s, len, NULL, &parentlink); + + if (!n) return 0; + *parentlink = n->next; + zfree(n->key); + zfree(n); + t->numele--; + return 1; +} + +void *htNotFound = (void *)"ht-not-found"; + +/* Find an element inside the hash table. Returns htNotFound if the + * element is not there, otherwise returns the associated value. */ +void *htFind(hashtable *t, unsigned char *s, size_t len) { + htNode *n = htRawLookup(t, s, len, NULL, NULL); + if (!n) return htNotFound; + return n->data; +} + +/* Free the whole hash table including all the linked nodes. */ +void htFree(hashtable *ht) { + for (int j = 0; j < HT_TABLE_SIZE; j++) { + htNode *next = ht->table[j]; + while (next) { + htNode *this = next; + next = this->next; + zfree(this->key); + zfree(this); + } + } + zfree(ht); +} + +/* -------------------------------------------------------------------------- + * Utility functions to generate keys, check time usage and so forth. + * -------------------------------------------------------------------------*/ + +/* This is a simple Feistel network in order to turn every possible + * uint32_t input into another "randomly" looking uint32_t. It is a + * one to one map so there are no repetitions. */ +static uint32_t int2int(uint32_t input) { + uint16_t l = input & 0xffff; + uint16_t r = input >> 16; + for (int i = 0; i < 8; i++) { + uint16_t nl = r; + uint16_t F = (((r * 31) + (r >> 5) + 7 * 371) ^ r) & 0xffff; + r = l ^ F; + l = nl; + } + return (r << 16) | l; +} + +/* Turn an uint32_t integer into an alphanumerical key and return its + * length. This function is used in order to generate keys that have + * a large charset, so that the radix tree can be testsed with many + * children per node. */ +static size_t int2alphakey(char *s, size_t maxlen, uint32_t i) { + const char *set = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789"; + const size_t setlen = 62; + + if (maxlen == 0) return 0; + maxlen--; /* Space for null term char. */ + size_t len = 0; + while (len < maxlen) { + s[len++] = set[i % setlen]; + i /= setlen; + if (i == 0) break; + } + s[len] = '\0'; + return len; +} + + +/* Turn the integer 'i' into a key according to 'mode'. + * KEY_INT: Just represents the integer as a string. + * KEY_UNIQUE_ALPHA: Turn it into a random-looking alphanumerical string + * according to the int2alphakey() function, so that + * at every integer is mapped a different string. + * KEY_RANDOM: Totally random string up to maxlen bytes. + * KEY_RANDOM_ALPHA: Alphanumerical random string up to maxlen bytes. + * KEY_RANDOM_SMALL_CSET: Small charset random strings. + * KEY_CHAIN: 'i' times the character "A". */ +#define KEY_INT 0 +#define KEY_UNIQUE_ALPHA 1 +#define KEY_RANDOM 2 +#define KEY_RANDOM_ALPHA 3 +#define KEY_RANDOM_SMALL_CSET 4 +#define KEY_CHAIN 5 +static size_t int2key(char *s, size_t maxlen, uint32_t i, int mode) { + if (mode == KEY_INT) { + return snprintf(s, maxlen, "%lu", (unsigned long)i); + } else if (mode == KEY_UNIQUE_ALPHA) { + if (maxlen > 16) maxlen = 16; + i = int2int(i); + return int2alphakey(s, maxlen, i); + } else if (mode == KEY_RANDOM) { + if (maxlen > 16) maxlen = 16; + int r = genrand64_int64() % maxlen; + for (int i = 0; i < r; i++) s[i] = genrand64_int64() & 0xff; + return r; + } else if (mode == KEY_RANDOM_ALPHA) { + if (maxlen > 16) maxlen = 16; + int r = genrand64_int64() % maxlen; + for (int i = 0; i < r; i++) s[i] = 'A' + genrand64_int64() % ('z' - 'A' + 1); + return r; + } else if (mode == KEY_RANDOM_SMALL_CSET) { + if (maxlen > 16) maxlen = 16; + int r = genrand64_int64() % maxlen; + for (int i = 0; i < r; i++) s[i] = 'A' + genrand64_int64() % 4; + return r; + } else if (mode == KEY_CHAIN) { + if (i > maxlen) i = maxlen; + memset(s, 'A', i); + return i; + } else { + return 0; + } +} + +/* -------------------------------------------------------------------------- */ + +/* Perform a fuzz test, returns 0 on success, 1 on error. */ +int fuzzTest(int keymode, size_t count, double addprob, double remprob) { + hashtable *ht = htNew(); + rax *rax = raxNew(); + + printf("Fuzz test in mode %d [%zu]: ", keymode, count); + fflush(stdout); + + /* Perform random operations on both the dictionaries. */ + for (size_t i = 0; i < count; i++) { + unsigned char key[1024]; + uint32_t keylen; + + /* Insert element. */ + if ((double)genrand64_int64() / RAND_MAX < addprob) { + keylen = int2key((char *)key, sizeof(key), i, keymode); + void *val = (void *)(unsigned long)genrand64_int64(); + /* Stress NULL values more often, they use a special encoding. */ + if (!(genrand64_int64() % 100)) val = NULL; + int retval1 = htAdd(ht, key, keylen, val); + int retval2 = raxInsert(rax, key, keylen, val, NULL); + if (retval1 != retval2) { + printf("Fuzz: key insertion reported mismatching value in HT/RAX\n"); + return 1; + } + } + + /* Remove element. */ + if ((double)genrand64_int64() / RAND_MAX < remprob) { + keylen = int2key((char *)key, sizeof(key), i, keymode); + int retval1 = htRem(ht, key, keylen); + int retval2 = raxRemove(rax, key, keylen, NULL); + if (retval1 != retval2) { + printf("Fuzz: key deletion of '%.*s' reported mismatching " + "value in HT=%d RAX=%d\n", + (int)keylen, (char *)key, retval1, retval2); + return 1; + } + } + } + + /* Check that count matches. */ + if (ht->numele != raxSize(rax)) { + printf("Fuzz: HT / RAX keys count mismatch: %lu vs %lu\n", (unsigned long)ht->numele, + (unsigned long)raxSize(rax)); + return 1; + } + printf("%lu elements inserted\n", (unsigned long)ht->numele); + + /* Check that elements match. */ + raxIterator iter; + raxStart(&iter, rax); + raxSeek(&iter, "^", NULL, 0); + + size_t numkeys = 0; + while (raxNext(&iter)) { + void *val1 = htFind(ht, iter.key, iter.key_len); + void *val2 = NULL; + raxFind(rax, iter.key, iter.key_len, &val2); + if (val1 != val2) { + printf("Fuzz: HT=%p, RAX=%p value do not match " + "for key %.*s\n", + val1, val2, (int)iter.key_len, (char *)iter.key); + return 1; + } + numkeys++; + } + + /* Check that the iterator reported all the elements. */ + if (ht->numele != numkeys) { + printf("Fuzz: the iterator reported %lu keys instead of %lu\n", (unsigned long)numkeys, + (unsigned long)ht->numele); + return 1; + } + + raxStop(&iter); + raxFree(rax); + htFree(ht); + return 0; +} + +/* Redis Cluster alike fuzz testing. + * + * This test simulates the radix tree usage made by Redis Cluster in order + * to maintain the hash slot -> keys mappig. The keys are alphanumerical + * but the first two bytes that are binary (and are the key hashed). + * + * In this test there is no comparison with the hash table, the only goal + * is to crash the radix tree implementation, or to trigger Valgrind + * warnings. */ +int fuzzTestCluster(size_t count, double addprob, double remprob) { + unsigned char key[128]; + int keylen = 0; + + printf("Cluster Fuzz test [keys:%zu keylen:%d]: ", count, keylen); + fflush(stdout); + + rax *rax = raxNew(); + + /* This is our template to generate keys. The first two bytes will + * be replaced with the binary redis cluster hash slot. */ + keylen = snprintf((char *)key, sizeof(key), "__geocode:2e68e5df3624"); + char *cset = "0123456789abcdef"; + + for (unsigned long j = 0; j < count; j++) { + /* Generate a random key by altering our template key. */ + + /* With a given probability, let's use a common prefix so that there + * is a subset of keys that have an higher percentage of probability + * of being hit again and again. */ + size_t commonprefix = genrand64_int64() & 0xf; + if (commonprefix == 0) memcpy(key + 10, "2e68e5", 6); + + /* Alter a random char in the key. */ + int pos = 10 + genrand64_int64() % 12; + key[pos] = cset[genrand64_int64() % 16]; + + /* Compute the Redis Cluster hash slot to set the first two + * binary bytes of the key. */ + int hashslot = crc16((char *)key, keylen) & 0x3FFF; + key[0] = (hashslot >> 8) & 0xff; + key[1] = hashslot & 0xff; + + /* Insert element. */ + if ((double)genrand64_int64() / RAND_MAX < addprob) { + raxInsert(rax, key, keylen, NULL, NULL); + TEST_ASSERT(raxAllocSize(rax) == zmalloc_used_memory()); + } + + /* Remove element. */ + if ((double)genrand64_int64() / RAND_MAX < remprob) { + raxRemove(rax, key, keylen, NULL); + TEST_ASSERT(raxAllocSize(rax) == zmalloc_used_memory()); + } + } + size_t finalkeys = raxSize(rax); + raxFree(rax); + printf("ok with %zu final keys\n", finalkeys); + return 0; +} + +/* Iterator fuzz testing. Compared the items returned by the Rax iterator with + * a C implementation obtained by sorting the inserted strings in a linear + * array. */ +typedef struct arrayItem { + unsigned char *key; + size_t key_len; +} arrayItem; + +/* Utility functions used with qsort() in order to sort the array of strings + * in the same way Rax sorts keys (which is, lexicographically considering + * every byte an unsigned integer. */ +int compareAB(const unsigned char *keya, size_t lena, const unsigned char *keyb, size_t lenb) { + size_t minlen = (lena <= lenb) ? lena : lenb; + int retval = memcmp(keya, keyb, minlen); + if (lena == lenb || retval != 0) return retval; + return (lena > lenb) ? 1 : -1; +} + +int compareArrayItems(const void *aptr, const void *bptr) { + const arrayItem *a = aptr; + const arrayItem *b = bptr; + return compareAB(a->key, a->key_len, b->key, b->key_len); +} + +/* Seek an element in the array, returning the seek index (the index inside the + * array). If the seek is not possible (== operator and key not found or empty + * array) -1 is returned. */ +int arraySeek(arrayItem *array, int count, unsigned char *key, size_t len, char *op) { + if (count == 0) return -1; + if (op[0] == '^') return 0; + if (op[0] == '$') return count - 1; + + int eq = 0, lt = 0, gt = 0; + if (op[1] == '=') eq = 1; + if (op[0] == '<') lt = 1; + if (op[0] == '>') gt = 1; + + int i; + for (i = 0; i < count; i++) { + int cmp = compareAB(array[i].key, array[i].key_len, key, len); + if (eq && !cmp) return i; + if (cmp > 0 && gt) return i; + if (cmp >= 0 && lt) { + i--; + break; + } + } + if (lt && i == count) return count - 1; + if (i < 0 || i >= count) return -1; + return i; +} + +int iteratorFuzzTest(int keymode, size_t count) { + count = genrand64_int64() % count; + rax *rax = raxNew(); + arrayItem *array = zmalloc(sizeof(arrayItem) * count); + + /* Fill a radix tree and a linear array with some data. */ + unsigned char key[1024]; + size_t j = 0; + for (size_t i = 0; i < count; i++) { + uint32_t keylen = int2key((char *)key, sizeof(key), i, keymode); + void *val = (void *)(unsigned long)htHash(key, keylen); + + if (raxInsert(rax, key, keylen, val, NULL)) { + array[j].key = zmalloc(keylen); + array[j].key_len = keylen; + memcpy(array[j].key, key, keylen); + j++; + } + } + count = raxSize(rax); + + /* Sort the array. */ + qsort(array, count, sizeof(arrayItem), compareArrayItems); + + /* Perform a random seek operation. */ + uint32_t keylen = int2key((char *)key, sizeof(key), genrand64_int64() % (count ? count : 1), keymode); + raxIterator iter; + raxStart(&iter, rax); + char *seekops[] = {"==", ">=", "<=", ">", "<", "^", "$"}; + char *seekop = seekops[genrand64_int64() % 7]; + raxSeek(&iter, seekop, key, keylen); + int seekidx = arraySeek(array, count, key, keylen, seekop); + + int next = genrand64_int64() % 2; + int iteration = 0; + while (1) { + int rax_res; + int array_res; + unsigned char *array_key = NULL; + size_t array_key_len = 0; + + array_res = (seekidx == -1) ? 0 : 1; + if (array_res) { + if (next && seekidx == (signed)count) array_res = 0; + if (!next && seekidx == -1) array_res = 0; + if (array_res != 0) { + array_key = array[seekidx].key; + array_key_len = array[seekidx].key_len; + } + } + + if (next) { + rax_res = raxNext(&iter); + if (array_res) seekidx++; + } else { + rax_res = raxPrev(&iter); + if (array_res) seekidx--; + } + + /* Both the iteratos should agree about EOF. */ + if (array_res != rax_res) { + printf("Iter fuzz: iterators do not agree about EOF " + "at iteration %d: " + "array_more=%d rax_more=%d next=%d\n", + iteration, array_res, rax_res, next); + return 1; + } + if (array_res == 0) break; /* End of iteration reached. */ + + /* Check that the returned keys are the same. */ + if (iter.key_len != array_key_len || memcmp(iter.key, array_key, iter.key_len)) { + printf("Iter fuzz: returned element %d mismatch\n", iteration); + printf("SEEKOP was %s\n", seekop); + if (keymode != KEY_RANDOM) { + printf("\n"); + printf("BUG SEEKING: %s %.*s\n", seekop, keylen, key); + printf("%.*s (iter) VS %.*s (array) next=%d idx=%d " + "count=%lu keymode=%d\n", + (int)iter.key_len, (char *)iter.key, (int)array_key_len, (char *)array_key, next, seekidx, + (unsigned long)count, keymode); + if (count < 500) { + printf("\n"); + for (unsigned int j = 0; j < count; j++) { + printf("%d) '%.*s'\n", j, (int)array[j].key_len, array[j].key); + } + } + exit(1); + } + return 1; + } + iteration++; + } + + for (unsigned int i = 0; i < count; i++) zfree(array[i].key); + zfree(array); + raxStop(&iter); + raxFree(rax); + return 0; +} + +/* Test the random walk function. */ +int test_raxRandomWalk(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + rax *t = raxNew(); + char *toadd[] = {"alligator", "alien", "byword", "chromodynamic", "romane", "romanus", "romulus", "rubens", + "ruber", "rubicon", "rubicundus", "all", "rub", "by", NULL}; + + long numele; + for (numele = 0; toadd[numele] != NULL; numele++) { + raxInsert(t, (unsigned char *)toadd[numele], strlen(toadd[numele]), (void *)numele, NULL); + TEST_ASSERT(raxAllocSize(t) == zmalloc_used_memory()); + } + + raxIterator iter; + raxStart(&iter, t); + raxSeek(&iter, "^", NULL, 0); + int maxloops = 100000; + while (raxRandomWalk(&iter, 0) && maxloops--) { + int nulls = 0; + for (long i = 0; i < numele; i++) { + if (toadd[i] == NULL) { + nulls++; + continue; + } + if (strlen(toadd[i]) == iter.key_len && memcmp(toadd[i], iter.key, iter.key_len) == 0) { + toadd[i] = NULL; + nulls++; + } + } + if (nulls == numele) break; + } + if (maxloops == 0) { + printf("randomWalkTest() is unable to report all the elements " + "after 100k iterations!\n"); + return 1; + } + raxStop(&iter); + raxFree(t); + return 0; +} + +int test_raxIteratorUnitTests(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + rax *t = raxNew(); + char *toadd[] = {"alligator", "alien", "byword", "chromodynamic", "romane", "romanus", "romulus", "rubens", + "ruber", "rubicon", "rubicundus", "all", "rub", "by", NULL}; + + for (int x = 0; x < 10000; x++) genrand64_int64(); + + long items = 0; + while (toadd[items] != NULL) items++; + + for (long i = 0; i < items; i++) { + raxInsert(t, (unsigned char *)toadd[i], strlen(toadd[i]), (void *)i, NULL); + TEST_ASSERT(raxAllocSize(t) == zmalloc_used_memory()); + } + + raxIterator iter; + raxStart(&iter, t); + + struct { + char *seek; + size_t seeklen; + char *seekop; + char *expected; + } tests[] = {/* Seek value. */ /* Expected result. */ + {"rpxxx", 5, "<=", "romulus"}, + {"rom", 3, ">=", "romane"}, + {"rub", 3, ">=", "rub"}, + {"rub", 3, ">", "rubens"}, + {"rub", 3, "<", "romulus"}, + {"rom", 3, ">", "romane"}, + {"chro", 4, ">", "chromodynamic"}, + {"chro", 4, "<", "byword"}, + {"chromz", 6, "<", "chromodynamic"}, + {"", 0, "^", "alien"}, + {"zorro", 5, "<=", "rubicundus"}, + {"zorro", 5, "<", "rubicundus"}, + {"zorro", 5, "<", "rubicundus"}, + {"", 0, "$", "rubicundus"}, + {"ro", 2, ">=", "romane"}, + {"zo", 2, ">", NULL}, + {"zo", 2, "==", NULL}, + {"romane", 6, "==", "romane"}}; + + for (int i = 0; tests[i].expected != NULL; i++) { + raxSeek(&iter, tests[i].seekop, (unsigned char *)tests[i].seek, tests[i].seeklen); + int retval = raxNext(&iter); + + if (tests[i].expected != NULL) { + if (strlen(tests[i].expected) != iter.key_len || memcmp(tests[i].expected, iter.key, iter.key_len) != 0) { + printf("Iterator unit test error: " + "test %d, %s expected, %.*s reported\n", + i, tests[i].expected, (int)iter.key_len, (char *)iter.key); + return 1; + } + } else { + if (retval != 0) { + printf("Iterator unit test error: " + "EOF expected in test %d\n", + i); + return 1; + } + } + } + raxStop(&iter); + raxFree(t); + return 0; +} + +/* Test that raxInsert() / raxTryInsert() overwrite semantic + * works as expected. */ +int test_raxTryInsertUnitTests(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + rax *t = raxNew(); + raxInsert(t, (unsigned char *)"FOO", 3, (void *)(long)1, NULL); + void *old, *val; + raxTryInsert(t, (unsigned char *)"FOO", 3, (void *)(long)2, &old); + if (old != (void *)(long)1) { + printf("Old value not returned correctly by raxTryInsert(): %p", old); + return 1; + } + + val = NULL; + raxFind(t, (unsigned char *)"FOO", 3, &val); + if (val != (void *)(long)1) { + printf("FOO value mismatch: is %p instead of 1", val); + return 1; + } + + raxInsert(t, (unsigned char *)"FOO", 3, (void *)(long)2, NULL); + val = NULL; + raxFind(t, (unsigned char *)"FOO", 3, &val); + if (val != (void *)(long)2) { + printf("FOO value mismatch: is %p instead of 2", val); + return 1; + } + + raxFree(t); + return 0; +} + +/* Regression test #1: Iterator wrong element returned after seek. */ +int test_raxRegressionTest1(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + rax *rax = raxNew(); + raxInsert(rax, (unsigned char *)"LKE", 3, (void *)(long)1, NULL); + raxInsert(rax, (unsigned char *)"TQ", 2, (void *)(long)2, NULL); + raxInsert(rax, (unsigned char *)"B", 1, (void *)(long)3, NULL); + raxInsert(rax, (unsigned char *)"FY", 2, (void *)(long)4, NULL); + raxInsert(rax, (unsigned char *)"WI", 2, (void *)(long)5, NULL); + + raxIterator iter; + raxStart(&iter, rax); + raxSeek(&iter, ">", (unsigned char *)"FMP", 3); + if (raxNext(&iter)) { + if (iter.key_len != 2 || memcmp(iter.key, "FY", 2)) { + printf("Regression test 1 failed: 'FY' expected, got: '%.*s'\n", (int)iter.key_len, (char *)iter.key); + return 1; + } + } + + raxStop(&iter); + raxFree(rax); + return 0; +} + +/* Regression test #2: Crash when mixing NULL and not NULL values. */ +int test_raxRegressionTest2(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + rax *rt = raxNew(); + raxInsert(rt, (unsigned char *)"a", 1, (void *)100, NULL); + raxInsert(rt, (unsigned char *)"ab", 2, (void *)101, NULL); + raxInsert(rt, (unsigned char *)"abc", 3, (void *)NULL, NULL); + raxInsert(rt, (unsigned char *)"abcd", 4, (void *)NULL, NULL); + raxInsert(rt, (unsigned char *)"abc", 3, (void *)102, NULL); + raxFree(rt); + return 0; +} + +/* Regression test #3: Wrong access at node value in raxRemoveChild() + * when iskey == 1 and isnull == 1: the memmove() was performed including + * the value length regardless of the fact there was no actual value. + * + * Note that this test always returns success but will trigger a + * Valgrind error. */ +int test_raxRegressionTest3(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + rax *rt = raxNew(); + raxInsert(rt, (unsigned char *)"D", 1, (void *)1, NULL); + raxInsert(rt, (unsigned char *)"", 0, NULL, NULL); + raxRemove(rt, (unsigned char *)"D", 1, NULL); + raxFree(rt); + return 0; +} + +/* Regression test #4: Github issue #8, iterator does not populate the + * data field after seek in case of exact match. The test case is looks odd + * because it is quite indirect: Seeking "^" will result into seeking + * the element >= "", and since we just added "" an exact match happens, + * however we are using the original one from the bug report, since this + * is quite odd and may later protect against different bugs related to + * storing and fetching the empty string key. */ +int test_raxRegressionTest4(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + rax *rt = raxNew(); + raxIterator iter; + raxInsert(rt, (unsigned char *)"", 0, (void *)-1, NULL); + void *val = NULL; + raxFind(rt, (unsigned char *)"", 0, &val); + if (val != (void *)-1) { + printf("Regression test 4 failed. Key value mismatch in raxFind()\n"); + return 1; + } + raxStart(&iter, rt); + raxSeek(&iter, "^", NULL, 0); + raxNext(&iter); + if (iter.data != (void *)-1) { + printf("Regression test 4 failed. Key value mismatch in raxNext()\n"); + return 1; + } + raxStop(&iter); + raxFree(rt); + return 0; +} + +/* Less than seek bug when stopping in the middle of a compressed node. */ +int test_raxRegressionTest5(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + rax *rax = raxNew(); + + raxInsert(rax, (unsigned char *)"b", 1, (void *)(long)1, NULL); + raxInsert(rax, (unsigned char *)"by", 2, (void *)(long)2, NULL); + raxInsert(rax, (unsigned char *)"byword", 6, (void *)(long)3, NULL); + + raxInsert(rax, (unsigned char *)"f", 1, (void *)(long)4, NULL); + raxInsert(rax, (unsigned char *)"foobar", 6, (void *)(long)5, NULL); + raxInsert(rax, (unsigned char *)"foobar123", 9, (void *)(long)6, NULL); + + raxIterator ri; + raxStart(&ri, rax); + + raxSeek(&ri, "<", (unsigned char *)"foo", 3); + raxNext(&ri); + if (ri.key_len != 1 || ri.key[0] != 'f') { + printf("Regression test 4 failed. Key value mismatch in raxNext()\n"); + return 1; + } + + raxStop(&ri); + raxFree(rax); + return 0; +} + +/* Seek may not populate iterator data. See issue #25. */ +int test_raxRegressionTest6(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + rax *rax = raxNew(); + + char *key1 = "172.17.141.2/adminguide/v5.0/"; + char *key2 = "172.17.141.2/adminguide/v5.0/entitlements-configure.html"; + char *seekpoint = "172.17.141.2/adminguide/v5.0/entitlements"; + + raxInsert(rax, (unsigned char *)key1, strlen(key1), (void *)(long)1234, NULL); + raxInsert(rax, (unsigned char *)key2, strlen(key2), (void *)(long)5678, NULL); + + raxIterator ri; + raxStart(&ri, rax); + raxSeek(&ri, "<=", (unsigned char *)seekpoint, strlen(seekpoint)); + raxPrev(&ri); + if ((long)ri.data != 1234) { + printf("Regression test 6 failed. Key data not populated.\n"); + return 1; + } + + raxStop(&ri); + raxFree(rax); + return 0; +} + +int test_raxBenchmark(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + + if (!(flags & UNIT_TEST_SINGLE)) return 0; + + for (int mode = 0; mode < 2; mode++) { + printf("Benchmark with %s keys:\n", (mode == 0) ? "integer" : "alphanumerical"); + rax *t = raxNew(); + long long start = _ustime(); + for (int i = 0; i < 5000000; i++) { + char buf[64]; + int len = int2key(buf, sizeof(buf), i, mode); + raxInsert(t, (unsigned char *)buf, len, (void *)(long)i, NULL); + TEST_ASSERT(raxAllocSize(t) == zmalloc_used_memory()); + } + printf("Insert: %f\n", (double)(_ustime() - start) / 1000000); + printf("%llu total nodes\n", (unsigned long long)t->numnodes); + printf("%llu total elements\n", (unsigned long long)t->numele); + + start = _ustime(); + for (int i = 0; i < 5000000; i++) { + char buf[64]; + int len = int2key(buf, sizeof(buf), i, mode); + void *data; + if (!raxFind(t, (unsigned char *)buf, len, &data) || data != (void *)(long)i) { + printf("Issue with %s: %p instead of %p\n", buf, data, (void *)(long)i); + } + } + printf("Linear lookup: %f\n", (double)(_ustime() - start) / 1000000); + + start = _ustime(); + for (int i = 0; i < 5000000; i++) { + char buf[64]; + int r = genrand64_int64() % 5000000; + int len = int2key(buf, sizeof(buf), r, mode); + void *data; + if (!raxFind(t, (unsigned char *)buf, len, &data) || data != (void *)(long)r) { + printf("Issue with %s: %p instead of %p\n", buf, data, (void *)(long)r); + } + } + printf("Random lookup: %f\n", (double)(_ustime() - start) / 1000000); + + start = _ustime(); + for (int i = 0; i < 5000000; i++) { + char buf[64]; + int len = int2key(buf, sizeof(buf), i, mode); + buf[i % len] = '!'; /* "!" is never set into keys. */ + TEST_ASSERT_MESSAGE("Lookup should have failed", !raxFind(t, (unsigned char *)buf, len, NULL)); + } + printf("Failed lookup: %f\n", (double)(_ustime() - start) / 1000000); + + start = _ustime(); + raxIterator ri; + raxStart(&ri, t); + raxSeek(&ri, "^", NULL, 0); + int iter = 0; + while (raxNext(&ri)) iter++; + TEST_ASSERT_MESSAGE("Iteration is incomplete", iter == 5000000); + raxStop(&ri); + printf("Full iteration: %f\n", (double)(_ustime() - start) / 1000000); + + start = _ustime(); + for (int i = 0; i < 5000000; i++) { + char buf[64]; + int len = int2key(buf, sizeof(buf), i, mode); + int retval = raxRemove(t, (unsigned char *)buf, len, NULL); + TEST_ASSERT(retval == 1); + TEST_ASSERT(raxAllocSize(t) == zmalloc_used_memory()); + } + printf("Deletion: %f\n", (double)(_ustime() - start) / 1000000); + + printf("%llu total nodes\n", (unsigned long long)t->numnodes); + printf("%llu total elements\n", (unsigned long long)t->numele); + raxFree(t); + } + + return 0; +} + +/* Compressed nodes can only hold (2^29)-1 characters, so it is important + * to test for keys bigger than this amount, in order to make sure that + * the code to handle this edge case works as expected. + * + * This test is disabled by default because it uses a lot of memory. */ +int test_raxHugeKey(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + + if (!(flags & UNIT_TEST_LARGE_MEMORY)) return 0; + + size_t max_keylen = ((1 << 29) - 1) + 100; + unsigned char *key = zmalloc(max_keylen); + if (key == NULL) goto oom; + + memset(key, 'a', max_keylen); + key[10] = 'X'; + key[max_keylen - 1] = 'Y'; + rax *rax = raxNew(); + int retval = raxInsert(rax, (unsigned char *)"aaabbb", 6, (void *)5678L, NULL); + if (retval == 0 && errno == ENOMEM) goto oom; + retval = raxInsert(rax, key, max_keylen, (void *)1234L, NULL); + if (retval == 0 && errno == ENOMEM) goto oom; + void *value1, *value2; + int found1 = raxFind(rax, (unsigned char *)"aaabbb", 6, &value1); + int found2 = raxFind(rax, key, max_keylen, &value2); + zfree(key); + if (!found1 || !found2) { + printf("Huge key test failed on elementhood\n"); + return 1; + } + if (value1 != (void *)5678L || value2 != (void *)1234L) { + printf("Huge key test failed\n"); + return 1; + } + raxFree(rax); + return 0; + +oom: + fprintf(stderr, "Sorry, not enough memory to execute --hugekey test."); + exit(1); +} + +int test_raxFuzz(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + + if (!(flags & UNIT_TEST_ACCURATE)) return 0; + + int errors = 0; + + init_genrand64(1234); + + for (int i = 0; i < 10; i++) { + double alpha = (double)genrand64_int64() / RAND_MAX; + double beta = 1 - alpha; + if (fuzzTestCluster(genrand64_int64() % 100000000, alpha, beta)) errors++; + } + + for (int i = 0; i < 10; i++) { + double alpha = (double)genrand64_int64() / RAND_MAX; + double beta = 1 - alpha; + if (fuzzTest(KEY_INT, genrand64_int64() % 10000, alpha, beta)) errors++; + if (fuzzTest(KEY_UNIQUE_ALPHA, genrand64_int64() % 10000, alpha, beta)) errors++; + if (fuzzTest(KEY_RANDOM, genrand64_int64() % 10000, alpha, beta)) errors++; + if (fuzzTest(KEY_RANDOM_ALPHA, genrand64_int64() % 10000, alpha, beta)) errors++; + if (fuzzTest(KEY_RANDOM_SMALL_CSET, genrand64_int64() % 10000, alpha, beta)) errors++; + } + + size_t numops = 100000, cycles = 3; + while (cycles--) { + if (fuzzTest(KEY_INT, numops, .7, .3)) errors++; + if (fuzzTest(KEY_UNIQUE_ALPHA, numops, .7, .3)) errors++; + if (fuzzTest(KEY_RANDOM, numops, .7, .3)) errors++; + if (fuzzTest(KEY_RANDOM_ALPHA, numops, .7, .3)) errors++; + if (fuzzTest(KEY_RANDOM_SMALL_CSET, numops, .7, .3)) errors++; + numops *= 10; + } + + if (fuzzTest(KEY_CHAIN, 1000, .7, .3)) errors++; + printf("Iterator fuzz test: "); + fflush(stdout); + for (int i = 0; i < 100000; i++) { + if (iteratorFuzzTest(KEY_INT, 100)) errors++; + if (iteratorFuzzTest(KEY_UNIQUE_ALPHA, 100)) errors++; + if (iteratorFuzzTest(KEY_RANDOM_ALPHA, 1000)) errors++; + if (iteratorFuzzTest(KEY_RANDOM, 1000)) errors++; + if (i && !(i % 100)) { + printf("."); + if (!(i % 1000)) { + printf("%d%% done", i / 1000); + } + fflush(stdout); + } + } + printf("\n"); + + if (errors) { + printf("!!! WARNING !!!: %d errors found\n", errors); + } else { + printf("OK! \\o/\n"); + } + return !!errors; +}