From a106e8a743b2c9c0dbaae42c0acf96839806fa33 Mon Sep 17 00:00:00 2001
From: Guillaume Koenig <106696198+knggk@users.noreply.github.com>
Date: Wed, 2 Oct 2024 13:28:55 -0400
Subject: [PATCH] Rax size tracking (#688)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce a `size_t` field into the rax struct to track allocation size.
Update the allocation size on rax insert and deletes.
Return the allocation size when `raxAllocSize` is called.

This size tracking is now used in MEMORY USAGE and MEMORY STATS in place
of the previous method based on sampling.

The module API allows to create sorted dictionaries, which are backed by
rax. Users now also get precise memory allocation for them (through
`ValkeyModule_MallocSizeDict`).

Fixes #677.

For the release notes:

* MEMORY USAGE and MEMORY STATS are now exact for streams, rather than
based on sampling.

---------

Signed-off-by: Guillaume Koenig <knggk@amazon.com>
Signed-off-by: Guillaume Koenig <106696198+knggk@users.noreply.github.com>
Co-authored-by: Joey <yzhaon@amazon.com>
Co-authored-by: Viktor Söderqvist <viktor.soderqvist@est.tech>
---
 .config/typos.toml    |    1 +
 src/module.c          |    6 +-
 src/object.c          |   29 +-
 src/rax.c             |   28 +-
 src/rax.h             |    8 +-
 src/rax_malloc.h      |    1 +
 src/unit/test_files.h |   14 +
 src/unit/test_rax.c   | 1025 +++++++++++++++++++++++++++++++++++++++++
 8 files changed, 1078 insertions(+), 34 deletions(-)
 create mode 100644 src/unit/test_rax.c

diff --git a/.config/typos.toml b/.config/typos.toml
index d378b5655a..1dc44ea0e9 100644
--- a/.config/typos.toml
+++ b/.config/typos.toml
@@ -20,6 +20,7 @@ extend-ignore-re = [
     "D4C4DAA4", # sha1.c
     "Georg Nees",
     "\\[l\\]ist", # eval.c
+    "LKE", # test_rax.c
 ]
 
 [type.tcl]
diff --git a/src/module.c b/src/module.c
index 15a7fb91f4..38d0c2d968 100644
--- a/src/module.c
+++ b/src/module.c
@@ -10840,10 +10840,8 @@ size_t VM_MallocSizeString(ValkeyModuleString *str) {
  * it does not include the allocation size of the keys and values.
  */
 size_t VM_MallocSizeDict(ValkeyModuleDict *dict) {
-    size_t size = sizeof(ValkeyModuleDict) + sizeof(rax);
-    size += dict->rax->numnodes * sizeof(raxNode);
-    /* For more info about this weird line, see streamRadixTreeMemoryUsage */
-    size += dict->rax->numnodes * sizeof(long) * 30;
+    size_t size = sizeof(ValkeyModuleDict);
+    size += raxAllocSize(dict->rax);
     return size;
 }
 
diff --git a/src/object.c b/src/object.c
index d409fa8d5c..2508f20ab6 100644
--- a/src/object.c
+++ b/src/object.c
@@ -952,29 +952,6 @@ char *strEncoding(int encoding) {
 /* =========================== Memory introspection ========================= */
 
 
-/* This is a helper function with the goal of estimating the memory
- * size of a radix tree that is used to store Stream IDs.
- *
- * Note: to guess the size of the radix tree is not trivial, so we
- * approximate it considering 16 bytes of data overhead for each
- * key (the ID), and then adding the number of bare nodes, plus some
- * overhead due by the data and child pointers. This secret recipe
- * was obtained by checking the average radix tree created by real
- * workloads, and then adjusting the constants to get numbers that
- * more or less match the real memory usage.
- *
- * Actually the number of nodes and keys may be different depending
- * on the insertion speed and thus the ability of the radix tree
- * to compress prefixes. */
-size_t streamRadixTreeMemoryUsage(rax *rax) {
-    size_t size = sizeof(*rax);
-    size = rax->numele * sizeof(streamID);
-    size += rax->numnodes * sizeof(raxNode);
-    /* Add a fixed overhead due to the aux data pointer, children, ... */
-    size += rax->numnodes * sizeof(long) * 30;
-    return size;
-}
-
 /* Returns the size in bytes consumed by the key's value in RAM.
  * Note that the returned value is just an approximation, especially in the
  * case of aggregated data types where only "sample_size" elements
@@ -1072,7 +1049,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
     } else if (o->type == OBJ_STREAM) {
         stream *s = o->ptr;
         asize = sizeof(*o) + sizeof(*s);
-        asize += streamRadixTreeMemoryUsage(s->rax);
+        asize += raxAllocSize(s->rax);
 
         /* Now we have to add the listpacks. The last listpack is often non
          * complete, so we estimate the size of the first N listpacks, and
@@ -1112,7 +1089,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
             while (raxNext(&ri)) {
                 streamCG *cg = ri.data;
                 asize += sizeof(*cg);
-                asize += streamRadixTreeMemoryUsage(cg->pel);
+                asize += raxAllocSize(cg->pel);
                 asize += sizeof(streamNACK) * raxSize(cg->pel);
 
                 /* For each consumer we also need to add the basic data
@@ -1124,7 +1101,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
                     streamConsumer *consumer = cri.data;
                     asize += sizeof(*consumer);
                     asize += sdslen(consumer->name);
-                    asize += streamRadixTreeMemoryUsage(consumer->pel);
+                    asize += raxAllocSize(consumer->pel);
                     /* Don't count NACKs again, they are shared with the
                      * consumer group PEL. */
                 }
diff --git a/src/rax.c b/src/rax.c
index 319d89a2dc..ed17f3735d 100644
--- a/src/rax.c
+++ b/src/rax.c
@@ -192,6 +192,7 @@ rax *raxNew(void) {
     rax->numele = 0;
     rax->numnodes = 1;
     rax->head = raxNewNode(0, 0);
+    rax->alloc_size = rax_ptr_alloc_size(rax) + rax_ptr_alloc_size(rax->head);
     if (rax->head == NULL) {
         rax_free(rax);
         return NULL;
@@ -510,8 +511,12 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
         debugf("### Insert: node representing key exists\n");
         /* Make space for the value pointer if needed. */
         if (!h->iskey || (h->isnull && overwrite)) {
+            size_t oldalloc = rax_ptr_alloc_size(h);
             h = raxReallocForData(h, data);
-            if (h) memcpy(parentlink, &h, sizeof(h));
+            if (h) {
+                memcpy(parentlink, &h, sizeof(h));
+                rax->alloc_size = rax->alloc_size - oldalloc + rax_ptr_alloc_size(h);
+            }
         }
         if (h == NULL) {
             errno = ENOMEM;
@@ -706,6 +711,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
             return 0;
         }
         splitnode->data[0] = h->data[j];
+        rax->alloc_size += rax_ptr_alloc_size(splitnode);
 
         if (j == 0) {
             /* 3a: Replace the old node with the split node. */
@@ -730,6 +736,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
             memcpy(parentlink, &trimmed, sizeof(trimmed));
             parentlink = cp; /* Set parentlink to splitnode parent. */
             rax->numnodes++;
+            rax->alloc_size += rax_ptr_alloc_size(trimmed);
         }
 
         /* 4: Create the postfix node: what remains of the original
@@ -744,6 +751,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
             raxNode **cp = raxNodeLastChildPtr(postfix);
             memcpy(cp, &next, sizeof(next));
             rax->numnodes++;
+            rax->alloc_size += rax_ptr_alloc_size(postfix);
         } else {
             /* 4b: just use next as postfix node. */
             postfix = next;
@@ -756,6 +764,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
         /* 6. Continue insertion: this will cause the splitnode to
          * get a new child (the non common character at the currently
          * inserted key). */
+        rax->alloc_size -= rax_ptr_alloc_size(h);
         rax_free(h);
         h = splitnode;
     } else if (h->iscompr && i == len) {
@@ -794,6 +803,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
         raxNode **cp = raxNodeLastChildPtr(postfix);
         memcpy(cp, &next, sizeof(next));
         rax->numnodes++;
+        rax->alloc_size += rax_ptr_alloc_size(postfix);
 
         /* 3: Trim the compressed node. */
         trimmed->size = j;
@@ -806,6 +816,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
             void *aux = raxGetData(h);
             raxSetData(trimmed, aux);
         }
+        rax->alloc_size += rax_ptr_alloc_size(trimmed);
 
         /* Fix the trimmed node child pointer to point to
          * the postfix node. */
@@ -815,6 +826,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
         /* Finish! We don't need to continue with the insertion
          * algorithm for ALGO 2. The key is already inserted. */
         rax->numele++;
+        rax->alloc_size -= rax_ptr_alloc_size(h);
         rax_free(h);
         return 1; /* Key inserted. */
     }
@@ -823,6 +835,7 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
      * chars in our string. We need to insert the missing nodes. */
     while (i < len) {
         raxNode *child;
+        size_t oldalloc = rax_ptr_alloc_size(h);
 
         /* If this node is going to have a single child, and there
          * are other characters, so that that would result in a chain
@@ -848,14 +861,17 @@ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **
             i++;
         }
         rax->numnodes++;
+        rax->alloc_size = rax->alloc_size - oldalloc + rax_ptr_alloc_size(h) + rax_ptr_alloc_size(child);
         h = child;
     }
+    size_t oldalloc = rax_ptr_alloc_size(h);
     raxNode *newh = raxReallocForData(h, data);
     if (newh == NULL) goto oom;
     h = newh;
     if (!h->iskey) rax->numele++;
     raxSetData(h, data);
     memcpy(parentlink, &h, sizeof(h));
+    rax->alloc_size = rax->alloc_size - oldalloc + rax_ptr_alloc_size(h);
     return 1; /* Element inserted. */
 
 oom:
@@ -1025,6 +1041,7 @@ int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) {
             child = h;
             debugf("Freeing child %p [%.*s] key:%d\n", (void *)child, (int)child->size, (char *)child->data,
                    child->iskey);
+            rax->alloc_size -= rax_ptr_alloc_size(child);
             rax_free(child);
             rax->numnodes--;
             h = raxStackPop(&ts);
@@ -1034,7 +1051,9 @@ int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) {
         }
         if (child) {
             debugf("Unlinking child %p from parent %p\n", (void *)child, (void *)h);
+            size_t oldalloc = rax_ptr_alloc_size(h);
             raxNode *new = raxRemoveChild(h, child);
+            rax->alloc_size = rax->alloc_size - oldalloc + rax_ptr_alloc_size(new);
             if (new != h) {
                 raxNode *parent = raxStackPeek(&ts);
                 raxNode **parentlink;
@@ -1151,6 +1170,7 @@ int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) {
             new->iscompr = 1;
             new->size = comprsize;
             rax->numnodes++;
+            rax->alloc_size += rax_ptr_alloc_size(new);
 
             /* Scan again, this time to populate the new node content and
              * to fix the new node child pointer. At the same time we free
@@ -1163,6 +1183,7 @@ int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) {
                 raxNode **cp = raxNodeLastChildPtr(h);
                 raxNode *tofree = h;
                 memcpy(&h, cp, sizeof(h));
+                rax->alloc_size -= rax_ptr_alloc_size(tofree);
                 rax_free(tofree);
                 rax->numnodes--;
                 if (h->iskey || (!h->iscompr && h->size != 1)) break;
@@ -1764,6 +1785,11 @@ uint64_t raxSize(rax *rax) {
     return rax->numele;
 }
 
+/* Return the rax tree allocation size in bytes */
+size_t raxAllocSize(rax *rax) {
+    return rax->alloc_size;
+}
+
 /* ----------------------------- Introspection ------------------------------ */
 
 /* This function is mostly used for debugging and learning purposes.
diff --git a/src/rax.h b/src/rax.h
index 5347dc480e..2d0c940698 100644
--- a/src/rax.h
+++ b/src/rax.h
@@ -131,9 +131,10 @@ typedef struct raxNode {
 } raxNode;
 
 typedef struct rax {
-    raxNode *head;
-    uint64_t numele;
-    uint64_t numnodes;
+    raxNode *head;     /* Pointer to root node of tree */
+    uint64_t numele;   /* Number of keys in the tree */
+    uint64_t numnodes; /* Number of rax nodes in the tree */
+    size_t alloc_size; /* Total allocation size of the tree in bytes */
 } rax;
 
 /* Stack data structure used by raxLowWalk() in order to, optionally, return
@@ -203,6 +204,7 @@ void raxStop(raxIterator *it);
 int raxEOF(raxIterator *it);
 void raxShow(rax *rax);
 uint64_t raxSize(rax *rax);
+size_t raxAllocSize(rax *rax);
 unsigned long raxTouch(raxNode *n);
 void raxSetDebugMsg(int onoff);
 
diff --git a/src/rax_malloc.h b/src/rax_malloc.h
index 03c952e1a4..49a626595a 100644
--- a/src/rax_malloc.h
+++ b/src/rax_malloc.h
@@ -41,4 +41,5 @@
 #define rax_malloc zmalloc
 #define rax_realloc zrealloc
 #define rax_free zfree
+#define rax_ptr_alloc_size zmalloc_size
 #endif
diff --git a/src/unit/test_files.h b/src/unit/test_files.h
index 71952e343f..cd2e0c5b92 100644
--- a/src/unit/test_files.h
+++ b/src/unit/test_files.h
@@ -84,6 +84,18 @@ int test_listpackBenchmarkLpValidateIntegrity(int argc, char **argv, int flags);
 int test_listpackBenchmarkLpCompareWithString(int argc, char **argv, int flags);
 int test_listpackBenchmarkLpCompareWithNumber(int argc, char **argv, int flags);
 int test_listpackBenchmarkFree(int argc, char **argv, int flags);
+int test_raxRandomWalk(int argc, char **argv, int flags);
+int test_raxIteratorUnitTests(int argc, char **argv, int flags);
+int test_raxTryInsertUnitTests(int argc, char **argv, int flags);
+int test_raxRegressionTest1(int argc, char **argv, int flags);
+int test_raxRegressionTest2(int argc, char **argv, int flags);
+int test_raxRegressionTest3(int argc, char **argv, int flags);
+int test_raxRegressionTest4(int argc, char **argv, int flags);
+int test_raxRegressionTest5(int argc, char **argv, int flags);
+int test_raxRegressionTest6(int argc, char **argv, int flags);
+int test_raxBenchmark(int argc, char **argv, int flags);
+int test_raxHugeKey(int argc, char **argv, int flags);
+int test_raxFuzz(int argc, char **argv, int flags);
 int test_sds(int argc, char **argv, int flags);
 int test_typesAndAllocSize(int argc, char **argv, int flags);
 int test_sdsHeaderSizes(int argc, char **argv, int flags);
@@ -144,6 +156,7 @@ unitTest __test_endianconv_c[] = {{"test_endianconv", test_endianconv}, {NULL, N
 unitTest __test_intset_c[] = {{"test_intsetValueEncodings", test_intsetValueEncodings}, {"test_intsetBasicAdding", test_intsetBasicAdding}, {"test_intsetLargeNumberRandomAdd", test_intsetLargeNumberRandomAdd}, {"test_intsetUpgradeFromint16Toint32", test_intsetUpgradeFromint16Toint32}, {"test_intsetUpgradeFromint16Toint64", test_intsetUpgradeFromint16Toint64}, {"test_intsetUpgradeFromint32Toint64", test_intsetUpgradeFromint32Toint64}, {"test_intsetStressLookups", test_intsetStressLookups}, {"test_intsetStressAddDelete", test_intsetStressAddDelete}, {NULL, NULL}};
 unitTest __test_kvstore_c[] = {{"test_kvstoreAdd16Keys", test_kvstoreAdd16Keys}, {"test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyDict", test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyDict}, {"test_kvstoreIteratorRemoveAllKeysDeleteEmptyDict", test_kvstoreIteratorRemoveAllKeysDeleteEmptyDict}, {"test_kvstoreDictIteratorRemoveAllKeysNoDeleteEmptyDict", test_kvstoreDictIteratorRemoveAllKeysNoDeleteEmptyDict}, {"test_kvstoreDictIteratorRemoveAllKeysDeleteEmptyDict", test_kvstoreDictIteratorRemoveAllKeysDeleteEmptyDict}, {NULL, NULL}};
 unitTest __test_listpack_c[] = {{"test_listpackCreateIntList", test_listpackCreateIntList}, {"test_listpackCreateList", test_listpackCreateList}, {"test_listpackLpPrepend", test_listpackLpPrepend}, {"test_listpackLpPrependInteger", test_listpackLpPrependInteger}, {"test_listpackGetELementAtIndex", test_listpackGetELementAtIndex}, {"test_listpackPop", test_listpackPop}, {"test_listpackGetELementAtIndex2", test_listpackGetELementAtIndex2}, {"test_listpackIterate0toEnd", test_listpackIterate0toEnd}, {"test_listpackIterate1toEnd", test_listpackIterate1toEnd}, {"test_listpackIterate2toEnd", test_listpackIterate2toEnd}, {"test_listpackIterateBackToFront", test_listpackIterateBackToFront}, {"test_listpackIterateBackToFrontWithDelete", test_listpackIterateBackToFrontWithDelete}, {"test_listpackDeleteWhenNumIsMinusOne", test_listpackDeleteWhenNumIsMinusOne}, {"test_listpackDeleteWithNegativeIndex", test_listpackDeleteWithNegativeIndex}, {"test_listpackDeleteInclusiveRange0_0", test_listpackDeleteInclusiveRange0_0}, {"test_listpackDeleteInclusiveRange0_1", test_listpackDeleteInclusiveRange0_1}, {"test_listpackDeleteInclusiveRange1_2", test_listpackDeleteInclusiveRange1_2}, {"test_listpackDeleteWitStartIndexOutOfRange", test_listpackDeleteWitStartIndexOutOfRange}, {"test_listpackDeleteWitNumOverflow", test_listpackDeleteWitNumOverflow}, {"test_listpackBatchDelete", test_listpackBatchDelete}, {"test_listpackDeleteFooWhileIterating", test_listpackDeleteFooWhileIterating}, {"test_listpackReplaceWithSameSize", test_listpackReplaceWithSameSize}, {"test_listpackReplaceWithDifferentSize", test_listpackReplaceWithDifferentSize}, {"test_listpackRegressionGt255Bytes", test_listpackRegressionGt255Bytes}, {"test_listpackCreateLongListAndCheckIndices", test_listpackCreateLongListAndCheckIndices}, {"test_listpackCompareStrsWithLpEntries", test_listpackCompareStrsWithLpEntries}, {"test_listpackLpMergeEmptyLps", test_listpackLpMergeEmptyLps}, {"test_listpackLpMergeLp1Larger", test_listpackLpMergeLp1Larger}, {"test_listpackLpMergeLp2Larger", test_listpackLpMergeLp2Larger}, {"test_listpackLpNextRandom", test_listpackLpNextRandom}, {"test_listpackLpNextRandomCC", test_listpackLpNextRandomCC}, {"test_listpackRandomPairWithOneElement", test_listpackRandomPairWithOneElement}, {"test_listpackRandomPairWithManyElements", test_listpackRandomPairWithManyElements}, {"test_listpackRandomPairsWithOneElement", test_listpackRandomPairsWithOneElement}, {"test_listpackRandomPairsWithManyElements", test_listpackRandomPairsWithManyElements}, {"test_listpackRandomPairsUniqueWithOneElement", test_listpackRandomPairsUniqueWithOneElement}, {"test_listpackRandomPairsUniqueWithManyElements", test_listpackRandomPairsUniqueWithManyElements}, {"test_listpackPushVariousEncodings", test_listpackPushVariousEncodings}, {"test_listpackLpFind", test_listpackLpFind}, {"test_listpackLpValidateIntegrity", test_listpackLpValidateIntegrity}, {"test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN", test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN}, {"test_listpackStressWithRandom", test_listpackStressWithRandom}, {"test_listpackSTressWithVariableSize", test_listpackSTressWithVariableSize}, {"test_listpackBenchmarkInit", test_listpackBenchmarkInit}, {"test_listpackBenchmarkLpAppend", test_listpackBenchmarkLpAppend}, {"test_listpackBenchmarkLpFindString", test_listpackBenchmarkLpFindString}, {"test_listpackBenchmarkLpFindNumber", test_listpackBenchmarkLpFindNumber}, {"test_listpackBenchmarkLpSeek", test_listpackBenchmarkLpSeek}, {"test_listpackBenchmarkLpValidateIntegrity", test_listpackBenchmarkLpValidateIntegrity}, {"test_listpackBenchmarkLpCompareWithString", test_listpackBenchmarkLpCompareWithString}, {"test_listpackBenchmarkLpCompareWithNumber", test_listpackBenchmarkLpCompareWithNumber}, {"test_listpackBenchmarkFree", test_listpackBenchmarkFree}, {NULL, NULL}};
+unitTest __test_rax_c[] = {{"test_raxRandomWalk", test_raxRandomWalk}, {"test_raxIteratorUnitTests", test_raxIteratorUnitTests}, {"test_raxTryInsertUnitTests", test_raxTryInsertUnitTests}, {"test_raxRegressionTest1", test_raxRegressionTest1}, {"test_raxRegressionTest2", test_raxRegressionTest2}, {"test_raxRegressionTest3", test_raxRegressionTest3}, {"test_raxRegressionTest4", test_raxRegressionTest4}, {"test_raxRegressionTest5", test_raxRegressionTest5}, {"test_raxRegressionTest6", test_raxRegressionTest6}, {"test_raxBenchmark", test_raxBenchmark}, {"test_raxHugeKey", test_raxHugeKey}, {"test_raxFuzz", test_raxFuzz}, {NULL, NULL}};
 unitTest __test_sds_c[] = {{"test_sds", test_sds}, {"test_typesAndAllocSize", test_typesAndAllocSize}, {"test_sdsHeaderSizes", test_sdsHeaderSizes}, {NULL, NULL}};
 unitTest __test_sha1_c[] = {{"test_sha1", test_sha1}, {NULL, NULL}};
 unitTest __test_util_c[] = {{"test_string2ll", test_string2ll}, {"test_string2l", test_string2l}, {"test_ll2string", test_ll2string}, {"test_ld2string", test_ld2string}, {"test_fixedpoint_d2string", test_fixedpoint_d2string}, {"test_version2num", test_version2num}, {"test_reclaimFilePageCache", test_reclaimFilePageCache}, {NULL, NULL}};
@@ -162,6 +175,7 @@ struct unitTestSuite {
     {"test_intset.c", __test_intset_c},
     {"test_kvstore.c", __test_kvstore_c},
     {"test_listpack.c", __test_listpack_c},
+    {"test_rax.c", __test_rax_c},
     {"test_sds.c", __test_sds_c},
     {"test_sha1.c", __test_sha1_c},
     {"test_util.c", __test_util_c},
diff --git a/src/unit/test_rax.c b/src/unit/test_rax.c
new file mode 100644
index 0000000000..5f346b4115
--- /dev/null
+++ b/src/unit/test_rax.c
@@ -0,0 +1,1025 @@
+/* Rax -- A radix tree implementation.
+ *
+ * Copyright (c) 2017-2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *   * Neither the name of Redis nor the names of its contributors may be used
+ *     to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#include "../rax.c"
+#include "../mt19937-64.c"
+#include "test_help.h"
+
+uint16_t crc16(const char *buf, int len); /* From crc16.c */
+long long _ustime(void);                  /* From test_crc64combine.c */
+
+/* ---------------------------------------------------------------------------
+ * Simple hash table implementation, no rehashing, just chaining. This is
+ * used in order to test the radix tree implementation against something that
+ * will always "tell the truth" :-) */
+
+/* This is huge but we want it fast enough without reahshing needed. */
+#define HT_TABLE_SIZE 100000
+typedef struct htNode {
+    uint64_t keylen;
+    unsigned char *key;
+    void *data;
+    struct htNode *next;
+} htNode;
+
+typedef struct ht {
+    uint64_t numele;
+    htNode *table[HT_TABLE_SIZE];
+} hashtable;
+
+/* Create a new hash table. */
+hashtable *htNew(void) {
+    hashtable *ht = zcalloc(sizeof(*ht));
+    ht->numele = 0;
+    return ht;
+}
+
+/* djb2 hash function. */
+uint32_t htHash(unsigned char *s, size_t len) {
+    uint32_t hash = 5381;
+    for (size_t i = 0; i < len; i++) hash = hash * 33 + s[i];
+    return hash % HT_TABLE_SIZE;
+}
+
+/* Low level hash table lookup function. */
+htNode *htRawLookup(hashtable *t, unsigned char *s, size_t len, uint32_t *hash, htNode ***parentlink) {
+    uint32_t h = htHash(s, len);
+    if (hash) *hash = h;
+    htNode *n = t->table[h];
+    if (parentlink) *parentlink = &t->table[h];
+    while (n) {
+        if (n->keylen == len && memcmp(n->key, s, len) == 0) return n;
+        if (parentlink) *parentlink = &n->next;
+        n = n->next;
+    }
+    return NULL;
+}
+
+/* Add an element to the hash table, return 1 if the element is new,
+ * 0 if it existed and the value was updated to the new one. */
+int htAdd(hashtable *t, unsigned char *s, size_t len, void *data) {
+    uint32_t hash;
+    htNode *n = htRawLookup(t, s, len, &hash, NULL);
+
+    if (!n) {
+        n = zmalloc(sizeof(*n));
+        n->key = zmalloc(len);
+        memcpy(n->key, s, len);
+        n->keylen = len;
+        n->data = data;
+        n->next = t->table[hash];
+        t->table[hash] = n;
+        t->numele++;
+        return 1;
+    } else {
+        n->data = data;
+        return 0;
+    }
+}
+
+/* Remove the specified element, returns 1 on success, 0 if the element
+ * was not there already. */
+int htRem(hashtable *t, unsigned char *s, size_t len) {
+    htNode **parentlink;
+    htNode *n = htRawLookup(t, s, len, NULL, &parentlink);
+
+    if (!n) return 0;
+    *parentlink = n->next;
+    zfree(n->key);
+    zfree(n);
+    t->numele--;
+    return 1;
+}
+
+void *htNotFound = (void *)"ht-not-found";
+
+/* Find an element inside the hash table. Returns htNotFound if the
+ * element is not there, otherwise returns the associated value. */
+void *htFind(hashtable *t, unsigned char *s, size_t len) {
+    htNode *n = htRawLookup(t, s, len, NULL, NULL);
+    if (!n) return htNotFound;
+    return n->data;
+}
+
+/* Free the whole hash table including all the linked nodes. */
+void htFree(hashtable *ht) {
+    for (int j = 0; j < HT_TABLE_SIZE; j++) {
+        htNode *next = ht->table[j];
+        while (next) {
+            htNode *this = next;
+            next = this->next;
+            zfree(this->key);
+            zfree(this);
+        }
+    }
+    zfree(ht);
+}
+
+/* --------------------------------------------------------------------------
+ * Utility functions to generate keys, check time usage and so forth.
+ * -------------------------------------------------------------------------*/
+
+/* This is a simple Feistel network in order to turn every possible
+ * uint32_t input into another "randomly" looking uint32_t. It is a
+ * one to one map so there are no repetitions. */
+static uint32_t int2int(uint32_t input) {
+    uint16_t l = input & 0xffff;
+    uint16_t r = input >> 16;
+    for (int i = 0; i < 8; i++) {
+        uint16_t nl = r;
+        uint16_t F = (((r * 31) + (r >> 5) + 7 * 371) ^ r) & 0xffff;
+        r = l ^ F;
+        l = nl;
+    }
+    return (r << 16) | l;
+}
+
+/* Turn an uint32_t integer into an alphanumerical key and return its
+ * length. This function is used in order to generate keys that have
+ * a large charset, so that the radix tree can be testsed with many
+ * children per node. */
+static size_t int2alphakey(char *s, size_t maxlen, uint32_t i) {
+    const char *set = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                      "abcdefghijklmnopqrstuvwxyz"
+                      "0123456789";
+    const size_t setlen = 62;
+
+    if (maxlen == 0) return 0;
+    maxlen--; /* Space for null term char. */
+    size_t len = 0;
+    while (len < maxlen) {
+        s[len++] = set[i % setlen];
+        i /= setlen;
+        if (i == 0) break;
+    }
+    s[len] = '\0';
+    return len;
+}
+
+
+/* Turn the integer 'i' into a key according to 'mode'.
+ * KEY_INT: Just represents the integer as a string.
+ * KEY_UNIQUE_ALPHA: Turn it into a random-looking alphanumerical string
+ *                   according to the int2alphakey() function, so that
+ *                   at every integer is mapped a different string.
+ * KEY_RANDOM: Totally random string up to maxlen bytes.
+ * KEY_RANDOM_ALPHA: Alphanumerical random string up to maxlen bytes.
+ * KEY_RANDOM_SMALL_CSET: Small charset random strings.
+ * KEY_CHAIN: 'i' times the character "A". */
+#define KEY_INT 0
+#define KEY_UNIQUE_ALPHA 1
+#define KEY_RANDOM 2
+#define KEY_RANDOM_ALPHA 3
+#define KEY_RANDOM_SMALL_CSET 4
+#define KEY_CHAIN 5
+static size_t int2key(char *s, size_t maxlen, uint32_t i, int mode) {
+    if (mode == KEY_INT) {
+        return snprintf(s, maxlen, "%lu", (unsigned long)i);
+    } else if (mode == KEY_UNIQUE_ALPHA) {
+        if (maxlen > 16) maxlen = 16;
+        i = int2int(i);
+        return int2alphakey(s, maxlen, i);
+    } else if (mode == KEY_RANDOM) {
+        if (maxlen > 16) maxlen = 16;
+        int r = genrand64_int64() % maxlen;
+        for (int i = 0; i < r; i++) s[i] = genrand64_int64() & 0xff;
+        return r;
+    } else if (mode == KEY_RANDOM_ALPHA) {
+        if (maxlen > 16) maxlen = 16;
+        int r = genrand64_int64() % maxlen;
+        for (int i = 0; i < r; i++) s[i] = 'A' + genrand64_int64() % ('z' - 'A' + 1);
+        return r;
+    } else if (mode == KEY_RANDOM_SMALL_CSET) {
+        if (maxlen > 16) maxlen = 16;
+        int r = genrand64_int64() % maxlen;
+        for (int i = 0; i < r; i++) s[i] = 'A' + genrand64_int64() % 4;
+        return r;
+    } else if (mode == KEY_CHAIN) {
+        if (i > maxlen) i = maxlen;
+        memset(s, 'A', i);
+        return i;
+    } else {
+        return 0;
+    }
+}
+
+/* -------------------------------------------------------------------------- */
+
+/* Perform a fuzz test, returns 0 on success, 1 on error. */
+int fuzzTest(int keymode, size_t count, double addprob, double remprob) {
+    hashtable *ht = htNew();
+    rax *rax = raxNew();
+
+    printf("Fuzz test in mode %d [%zu]: ", keymode, count);
+    fflush(stdout);
+
+    /* Perform random operations on both the dictionaries. */
+    for (size_t i = 0; i < count; i++) {
+        unsigned char key[1024];
+        uint32_t keylen;
+
+        /* Insert element. */
+        if ((double)genrand64_int64() / RAND_MAX < addprob) {
+            keylen = int2key((char *)key, sizeof(key), i, keymode);
+            void *val = (void *)(unsigned long)genrand64_int64();
+            /* Stress NULL values more often, they use a special encoding. */
+            if (!(genrand64_int64() % 100)) val = NULL;
+            int retval1 = htAdd(ht, key, keylen, val);
+            int retval2 = raxInsert(rax, key, keylen, val, NULL);
+            if (retval1 != retval2) {
+                printf("Fuzz: key insertion reported mismatching value in HT/RAX\n");
+                return 1;
+            }
+        }
+
+        /* Remove element. */
+        if ((double)genrand64_int64() / RAND_MAX < remprob) {
+            keylen = int2key((char *)key, sizeof(key), i, keymode);
+            int retval1 = htRem(ht, key, keylen);
+            int retval2 = raxRemove(rax, key, keylen, NULL);
+            if (retval1 != retval2) {
+                printf("Fuzz: key deletion of '%.*s' reported mismatching "
+                       "value in HT=%d RAX=%d\n",
+                       (int)keylen, (char *)key, retval1, retval2);
+                return 1;
+            }
+        }
+    }
+
+    /* Check that count matches. */
+    if (ht->numele != raxSize(rax)) {
+        printf("Fuzz: HT / RAX keys count mismatch: %lu vs %lu\n", (unsigned long)ht->numele,
+               (unsigned long)raxSize(rax));
+        return 1;
+    }
+    printf("%lu elements inserted\n", (unsigned long)ht->numele);
+
+    /* Check that elements match. */
+    raxIterator iter;
+    raxStart(&iter, rax);
+    raxSeek(&iter, "^", NULL, 0);
+
+    size_t numkeys = 0;
+    while (raxNext(&iter)) {
+        void *val1 = htFind(ht, iter.key, iter.key_len);
+        void *val2 = NULL;
+        raxFind(rax, iter.key, iter.key_len, &val2);
+        if (val1 != val2) {
+            printf("Fuzz: HT=%p, RAX=%p value do not match "
+                   "for key %.*s\n",
+                   val1, val2, (int)iter.key_len, (char *)iter.key);
+            return 1;
+        }
+        numkeys++;
+    }
+
+    /* Check that the iterator reported all the elements. */
+    if (ht->numele != numkeys) {
+        printf("Fuzz: the iterator reported %lu keys instead of %lu\n", (unsigned long)numkeys,
+               (unsigned long)ht->numele);
+        return 1;
+    }
+
+    raxStop(&iter);
+    raxFree(rax);
+    htFree(ht);
+    return 0;
+}
+
+/* Redis Cluster alike fuzz testing.
+ *
+ * This test simulates the radix tree usage made by Redis Cluster in order
+ * to maintain the hash slot -> keys mappig. The keys are alphanumerical
+ * but the first two bytes that are binary (and are the key hashed).
+ *
+ * In this test there is no comparison with the hash table, the only goal
+ * is to crash the radix tree implementation, or to trigger Valgrind
+ * warnings. */
+int fuzzTestCluster(size_t count, double addprob, double remprob) {
+    unsigned char key[128];
+    int keylen = 0;
+
+    printf("Cluster Fuzz test [keys:%zu keylen:%d]: ", count, keylen);
+    fflush(stdout);
+
+    rax *rax = raxNew();
+
+    /* This is our template to generate keys. The first two bytes will
+     * be replaced with the binary redis cluster hash slot. */
+    keylen = snprintf((char *)key, sizeof(key), "__geocode:2e68e5df3624");
+    char *cset = "0123456789abcdef";
+
+    for (unsigned long j = 0; j < count; j++) {
+        /* Generate a random key by altering our template key. */
+
+        /* With a given probability, let's use a common prefix so that there
+         * is a subset of keys that have an higher percentage of probability
+         * of being hit again and again. */
+        size_t commonprefix = genrand64_int64() & 0xf;
+        if (commonprefix == 0) memcpy(key + 10, "2e68e5", 6);
+
+        /* Alter a random char in the key. */
+        int pos = 10 + genrand64_int64() % 12;
+        key[pos] = cset[genrand64_int64() % 16];
+
+        /* Compute the Redis Cluster hash slot to set the first two
+         * binary bytes of the key. */
+        int hashslot = crc16((char *)key, keylen) & 0x3FFF;
+        key[0] = (hashslot >> 8) & 0xff;
+        key[1] = hashslot & 0xff;
+
+        /* Insert element. */
+        if ((double)genrand64_int64() / RAND_MAX < addprob) {
+            raxInsert(rax, key, keylen, NULL, NULL);
+            TEST_ASSERT(raxAllocSize(rax) == zmalloc_used_memory());
+        }
+
+        /* Remove element. */
+        if ((double)genrand64_int64() / RAND_MAX < remprob) {
+            raxRemove(rax, key, keylen, NULL);
+            TEST_ASSERT(raxAllocSize(rax) == zmalloc_used_memory());
+        }
+    }
+    size_t finalkeys = raxSize(rax);
+    raxFree(rax);
+    printf("ok with %zu final keys\n", finalkeys);
+    return 0;
+}
+
+/* Iterator fuzz testing. Compared the items returned by the Rax iterator with
+ * a C implementation obtained by sorting the inserted strings in a linear
+ * array. */
+typedef struct arrayItem {
+    unsigned char *key;
+    size_t key_len;
+} arrayItem;
+
+/* Utility functions used with qsort() in order to sort the array of strings
+ * in the same way Rax sorts keys (which is, lexicographically considering
+ * every byte an unsigned integer. */
+int compareAB(const unsigned char *keya, size_t lena, const unsigned char *keyb, size_t lenb) {
+    size_t minlen = (lena <= lenb) ? lena : lenb;
+    int retval = memcmp(keya, keyb, minlen);
+    if (lena == lenb || retval != 0) return retval;
+    return (lena > lenb) ? 1 : -1;
+}
+
+int compareArrayItems(const void *aptr, const void *bptr) {
+    const arrayItem *a = aptr;
+    const arrayItem *b = bptr;
+    return compareAB(a->key, a->key_len, b->key, b->key_len);
+}
+
+/* Seek an element in the array, returning the seek index (the index inside the
+ * array). If the seek is not possible (== operator and key not found or empty
+ * array) -1 is returned. */
+int arraySeek(arrayItem *array, int count, unsigned char *key, size_t len, char *op) {
+    if (count == 0) return -1;
+    if (op[0] == '^') return 0;
+    if (op[0] == '$') return count - 1;
+
+    int eq = 0, lt = 0, gt = 0;
+    if (op[1] == '=') eq = 1;
+    if (op[0] == '<') lt = 1;
+    if (op[0] == '>') gt = 1;
+
+    int i;
+    for (i = 0; i < count; i++) {
+        int cmp = compareAB(array[i].key, array[i].key_len, key, len);
+        if (eq && !cmp) return i;
+        if (cmp > 0 && gt) return i;
+        if (cmp >= 0 && lt) {
+            i--;
+            break;
+        }
+    }
+    if (lt && i == count) return count - 1;
+    if (i < 0 || i >= count) return -1;
+    return i;
+}
+
+int iteratorFuzzTest(int keymode, size_t count) {
+    count = genrand64_int64() % count;
+    rax *rax = raxNew();
+    arrayItem *array = zmalloc(sizeof(arrayItem) * count);
+
+    /* Fill a radix tree and a linear array with some data. */
+    unsigned char key[1024];
+    size_t j = 0;
+    for (size_t i = 0; i < count; i++) {
+        uint32_t keylen = int2key((char *)key, sizeof(key), i, keymode);
+        void *val = (void *)(unsigned long)htHash(key, keylen);
+
+        if (raxInsert(rax, key, keylen, val, NULL)) {
+            array[j].key = zmalloc(keylen);
+            array[j].key_len = keylen;
+            memcpy(array[j].key, key, keylen);
+            j++;
+        }
+    }
+    count = raxSize(rax);
+
+    /* Sort the array. */
+    qsort(array, count, sizeof(arrayItem), compareArrayItems);
+
+    /* Perform a random seek operation. */
+    uint32_t keylen = int2key((char *)key, sizeof(key), genrand64_int64() % (count ? count : 1), keymode);
+    raxIterator iter;
+    raxStart(&iter, rax);
+    char *seekops[] = {"==", ">=", "<=", ">", "<", "^", "$"};
+    char *seekop = seekops[genrand64_int64() % 7];
+    raxSeek(&iter, seekop, key, keylen);
+    int seekidx = arraySeek(array, count, key, keylen, seekop);
+
+    int next = genrand64_int64() % 2;
+    int iteration = 0;
+    while (1) {
+        int rax_res;
+        int array_res;
+        unsigned char *array_key = NULL;
+        size_t array_key_len = 0;
+
+        array_res = (seekidx == -1) ? 0 : 1;
+        if (array_res) {
+            if (next && seekidx == (signed)count) array_res = 0;
+            if (!next && seekidx == -1) array_res = 0;
+            if (array_res != 0) {
+                array_key = array[seekidx].key;
+                array_key_len = array[seekidx].key_len;
+            }
+        }
+
+        if (next) {
+            rax_res = raxNext(&iter);
+            if (array_res) seekidx++;
+        } else {
+            rax_res = raxPrev(&iter);
+            if (array_res) seekidx--;
+        }
+
+        /* Both the iteratos should agree about EOF. */
+        if (array_res != rax_res) {
+            printf("Iter fuzz: iterators do not agree about EOF "
+                   "at iteration %d:  "
+                   "array_more=%d rax_more=%d next=%d\n",
+                   iteration, array_res, rax_res, next);
+            return 1;
+        }
+        if (array_res == 0) break; /* End of iteration reached. */
+
+        /* Check that the returned keys are the same. */
+        if (iter.key_len != array_key_len || memcmp(iter.key, array_key, iter.key_len)) {
+            printf("Iter fuzz: returned element %d mismatch\n", iteration);
+            printf("SEEKOP was %s\n", seekop);
+            if (keymode != KEY_RANDOM) {
+                printf("\n");
+                printf("BUG SEEKING: %s %.*s\n", seekop, keylen, key);
+                printf("%.*s (iter) VS %.*s (array) next=%d idx=%d "
+                       "count=%lu keymode=%d\n",
+                       (int)iter.key_len, (char *)iter.key, (int)array_key_len, (char *)array_key, next, seekidx,
+                       (unsigned long)count, keymode);
+                if (count < 500) {
+                    printf("\n");
+                    for (unsigned int j = 0; j < count; j++) {
+                        printf("%d) '%.*s'\n", j, (int)array[j].key_len, array[j].key);
+                    }
+                }
+                exit(1);
+            }
+            return 1;
+        }
+        iteration++;
+    }
+
+    for (unsigned int i = 0; i < count; i++) zfree(array[i].key);
+    zfree(array);
+    raxStop(&iter);
+    raxFree(rax);
+    return 0;
+}
+
+/* Test the random walk function. */
+int test_raxRandomWalk(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    rax *t = raxNew();
+    char *toadd[] = {"alligator", "alien", "byword", "chromodynamic", "romane", "romanus", "romulus", "rubens",
+                     "ruber", "rubicon", "rubicundus", "all", "rub", "by", NULL};
+
+    long numele;
+    for (numele = 0; toadd[numele] != NULL; numele++) {
+        raxInsert(t, (unsigned char *)toadd[numele], strlen(toadd[numele]), (void *)numele, NULL);
+        TEST_ASSERT(raxAllocSize(t) == zmalloc_used_memory());
+    }
+
+    raxIterator iter;
+    raxStart(&iter, t);
+    raxSeek(&iter, "^", NULL, 0);
+    int maxloops = 100000;
+    while (raxRandomWalk(&iter, 0) && maxloops--) {
+        int nulls = 0;
+        for (long i = 0; i < numele; i++) {
+            if (toadd[i] == NULL) {
+                nulls++;
+                continue;
+            }
+            if (strlen(toadd[i]) == iter.key_len && memcmp(toadd[i], iter.key, iter.key_len) == 0) {
+                toadd[i] = NULL;
+                nulls++;
+            }
+        }
+        if (nulls == numele) break;
+    }
+    if (maxloops == 0) {
+        printf("randomWalkTest() is unable to report all the elements "
+               "after 100k iterations!\n");
+        return 1;
+    }
+    raxStop(&iter);
+    raxFree(t);
+    return 0;
+}
+
+int test_raxIteratorUnitTests(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    rax *t = raxNew();
+    char *toadd[] = {"alligator", "alien", "byword", "chromodynamic", "romane", "romanus", "romulus", "rubens",
+                     "ruber", "rubicon", "rubicundus", "all", "rub", "by", NULL};
+
+    for (int x = 0; x < 10000; x++) genrand64_int64();
+
+    long items = 0;
+    while (toadd[items] != NULL) items++;
+
+    for (long i = 0; i < items; i++) {
+        raxInsert(t, (unsigned char *)toadd[i], strlen(toadd[i]), (void *)i, NULL);
+        TEST_ASSERT(raxAllocSize(t) == zmalloc_used_memory());
+    }
+
+    raxIterator iter;
+    raxStart(&iter, t);
+
+    struct {
+        char *seek;
+        size_t seeklen;
+        char *seekop;
+        char *expected;
+    } tests[] = {/* Seek value. */ /* Expected result. */
+                 {"rpxxx", 5, "<=", "romulus"},
+                 {"rom", 3, ">=", "romane"},
+                 {"rub", 3, ">=", "rub"},
+                 {"rub", 3, ">", "rubens"},
+                 {"rub", 3, "<", "romulus"},
+                 {"rom", 3, ">", "romane"},
+                 {"chro", 4, ">", "chromodynamic"},
+                 {"chro", 4, "<", "byword"},
+                 {"chromz", 6, "<", "chromodynamic"},
+                 {"", 0, "^", "alien"},
+                 {"zorro", 5, "<=", "rubicundus"},
+                 {"zorro", 5, "<", "rubicundus"},
+                 {"zorro", 5, "<", "rubicundus"},
+                 {"", 0, "$", "rubicundus"},
+                 {"ro", 2, ">=", "romane"},
+                 {"zo", 2, ">", NULL},
+                 {"zo", 2, "==", NULL},
+                 {"romane", 6, "==", "romane"}};
+
+    for (int i = 0; tests[i].expected != NULL; i++) {
+        raxSeek(&iter, tests[i].seekop, (unsigned char *)tests[i].seek, tests[i].seeklen);
+        int retval = raxNext(&iter);
+
+        if (tests[i].expected != NULL) {
+            if (strlen(tests[i].expected) != iter.key_len || memcmp(tests[i].expected, iter.key, iter.key_len) != 0) {
+                printf("Iterator unit test error: "
+                       "test %d, %s expected, %.*s reported\n",
+                       i, tests[i].expected, (int)iter.key_len, (char *)iter.key);
+                return 1;
+            }
+        } else {
+            if (retval != 0) {
+                printf("Iterator unit test error: "
+                       "EOF expected in test %d\n",
+                       i);
+                return 1;
+            }
+        }
+    }
+    raxStop(&iter);
+    raxFree(t);
+    return 0;
+}
+
+/* Test that raxInsert() / raxTryInsert() overwrite semantic
+ * works as expected. */
+int test_raxTryInsertUnitTests(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    rax *t = raxNew();
+    raxInsert(t, (unsigned char *)"FOO", 3, (void *)(long)1, NULL);
+    void *old, *val;
+    raxTryInsert(t, (unsigned char *)"FOO", 3, (void *)(long)2, &old);
+    if (old != (void *)(long)1) {
+        printf("Old value not returned correctly by raxTryInsert(): %p", old);
+        return 1;
+    }
+
+    val = NULL;
+    raxFind(t, (unsigned char *)"FOO", 3, &val);
+    if (val != (void *)(long)1) {
+        printf("FOO value mismatch: is %p instead of 1", val);
+        return 1;
+    }
+
+    raxInsert(t, (unsigned char *)"FOO", 3, (void *)(long)2, NULL);
+    val = NULL;
+    raxFind(t, (unsigned char *)"FOO", 3, &val);
+    if (val != (void *)(long)2) {
+        printf("FOO value mismatch: is %p instead of 2", val);
+        return 1;
+    }
+
+    raxFree(t);
+    return 0;
+}
+
+/* Regression test #1: Iterator wrong element returned after seek. */
+int test_raxRegressionTest1(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    rax *rax = raxNew();
+    raxInsert(rax, (unsigned char *)"LKE", 3, (void *)(long)1, NULL);
+    raxInsert(rax, (unsigned char *)"TQ", 2, (void *)(long)2, NULL);
+    raxInsert(rax, (unsigned char *)"B", 1, (void *)(long)3, NULL);
+    raxInsert(rax, (unsigned char *)"FY", 2, (void *)(long)4, NULL);
+    raxInsert(rax, (unsigned char *)"WI", 2, (void *)(long)5, NULL);
+
+    raxIterator iter;
+    raxStart(&iter, rax);
+    raxSeek(&iter, ">", (unsigned char *)"FMP", 3);
+    if (raxNext(&iter)) {
+        if (iter.key_len != 2 || memcmp(iter.key, "FY", 2)) {
+            printf("Regression test 1 failed: 'FY' expected, got: '%.*s'\n", (int)iter.key_len, (char *)iter.key);
+            return 1;
+        }
+    }
+
+    raxStop(&iter);
+    raxFree(rax);
+    return 0;
+}
+
+/* Regression test #2: Crash when mixing NULL and not NULL values. */
+int test_raxRegressionTest2(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    rax *rt = raxNew();
+    raxInsert(rt, (unsigned char *)"a", 1, (void *)100, NULL);
+    raxInsert(rt, (unsigned char *)"ab", 2, (void *)101, NULL);
+    raxInsert(rt, (unsigned char *)"abc", 3, (void *)NULL, NULL);
+    raxInsert(rt, (unsigned char *)"abcd", 4, (void *)NULL, NULL);
+    raxInsert(rt, (unsigned char *)"abc", 3, (void *)102, NULL);
+    raxFree(rt);
+    return 0;
+}
+
+/* Regression test #3: Wrong access at node value in raxRemoveChild()
+ * when iskey == 1 and isnull == 1: the memmove() was performed including
+ * the value length regardless of the fact there was no actual value.
+ *
+ * Note that this test always returns success but will trigger a
+ * Valgrind error. */
+int test_raxRegressionTest3(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    rax *rt = raxNew();
+    raxInsert(rt, (unsigned char *)"D", 1, (void *)1, NULL);
+    raxInsert(rt, (unsigned char *)"", 0, NULL, NULL);
+    raxRemove(rt, (unsigned char *)"D", 1, NULL);
+    raxFree(rt);
+    return 0;
+}
+
+/* Regression test #4: Github issue #8, iterator does not populate the
+ * data field after seek in case of exact match. The test case is looks odd
+ * because it is quite indirect: Seeking "^" will result into seeking
+ * the element >= "", and since we just added "" an exact match happens,
+ * however we are using the original one from the bug report, since this
+ * is quite odd and may later protect against different bugs related to
+ * storing and fetching the empty string key. */
+int test_raxRegressionTest4(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    rax *rt = raxNew();
+    raxIterator iter;
+    raxInsert(rt, (unsigned char *)"", 0, (void *)-1, NULL);
+    void *val = NULL;
+    raxFind(rt, (unsigned char *)"", 0, &val);
+    if (val != (void *)-1) {
+        printf("Regression test 4 failed. Key value mismatch in raxFind()\n");
+        return 1;
+    }
+    raxStart(&iter, rt);
+    raxSeek(&iter, "^", NULL, 0);
+    raxNext(&iter);
+    if (iter.data != (void *)-1) {
+        printf("Regression test 4 failed. Key value mismatch in raxNext()\n");
+        return 1;
+    }
+    raxStop(&iter);
+    raxFree(rt);
+    return 0;
+}
+
+/* Less than seek bug when stopping in the middle of a compressed node. */
+int test_raxRegressionTest5(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    rax *rax = raxNew();
+
+    raxInsert(rax, (unsigned char *)"b", 1, (void *)(long)1, NULL);
+    raxInsert(rax, (unsigned char *)"by", 2, (void *)(long)2, NULL);
+    raxInsert(rax, (unsigned char *)"byword", 6, (void *)(long)3, NULL);
+
+    raxInsert(rax, (unsigned char *)"f", 1, (void *)(long)4, NULL);
+    raxInsert(rax, (unsigned char *)"foobar", 6, (void *)(long)5, NULL);
+    raxInsert(rax, (unsigned char *)"foobar123", 9, (void *)(long)6, NULL);
+
+    raxIterator ri;
+    raxStart(&ri, rax);
+
+    raxSeek(&ri, "<", (unsigned char *)"foo", 3);
+    raxNext(&ri);
+    if (ri.key_len != 1 || ri.key[0] != 'f') {
+        printf("Regression test 4 failed. Key value mismatch in raxNext()\n");
+        return 1;
+    }
+
+    raxStop(&ri);
+    raxFree(rax);
+    return 0;
+}
+
+/* Seek may not populate iterator data. See issue #25. */
+int test_raxRegressionTest6(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    rax *rax = raxNew();
+
+    char *key1 = "172.17.141.2/adminguide/v5.0/";
+    char *key2 = "172.17.141.2/adminguide/v5.0/entitlements-configure.html";
+    char *seekpoint = "172.17.141.2/adminguide/v5.0/entitlements";
+
+    raxInsert(rax, (unsigned char *)key1, strlen(key1), (void *)(long)1234, NULL);
+    raxInsert(rax, (unsigned char *)key2, strlen(key2), (void *)(long)5678, NULL);
+
+    raxIterator ri;
+    raxStart(&ri, rax);
+    raxSeek(&ri, "<=", (unsigned char *)seekpoint, strlen(seekpoint));
+    raxPrev(&ri);
+    if ((long)ri.data != 1234) {
+        printf("Regression test 6 failed. Key data not populated.\n");
+        return 1;
+    }
+
+    raxStop(&ri);
+    raxFree(rax);
+    return 0;
+}
+
+int test_raxBenchmark(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+
+    if (!(flags & UNIT_TEST_SINGLE)) return 0;
+
+    for (int mode = 0; mode < 2; mode++) {
+        printf("Benchmark with %s keys:\n", (mode == 0) ? "integer" : "alphanumerical");
+        rax *t = raxNew();
+        long long start = _ustime();
+        for (int i = 0; i < 5000000; i++) {
+            char buf[64];
+            int len = int2key(buf, sizeof(buf), i, mode);
+            raxInsert(t, (unsigned char *)buf, len, (void *)(long)i, NULL);
+            TEST_ASSERT(raxAllocSize(t) == zmalloc_used_memory());
+        }
+        printf("Insert: %f\n", (double)(_ustime() - start) / 1000000);
+        printf("%llu total nodes\n", (unsigned long long)t->numnodes);
+        printf("%llu total elements\n", (unsigned long long)t->numele);
+
+        start = _ustime();
+        for (int i = 0; i < 5000000; i++) {
+            char buf[64];
+            int len = int2key(buf, sizeof(buf), i, mode);
+            void *data;
+            if (!raxFind(t, (unsigned char *)buf, len, &data) || data != (void *)(long)i) {
+                printf("Issue with %s: %p instead of %p\n", buf, data, (void *)(long)i);
+            }
+        }
+        printf("Linear lookup: %f\n", (double)(_ustime() - start) / 1000000);
+
+        start = _ustime();
+        for (int i = 0; i < 5000000; i++) {
+            char buf[64];
+            int r = genrand64_int64() % 5000000;
+            int len = int2key(buf, sizeof(buf), r, mode);
+            void *data;
+            if (!raxFind(t, (unsigned char *)buf, len, &data) || data != (void *)(long)r) {
+                printf("Issue with %s: %p instead of %p\n", buf, data, (void *)(long)r);
+            }
+        }
+        printf("Random lookup: %f\n", (double)(_ustime() - start) / 1000000);
+
+        start = _ustime();
+        for (int i = 0; i < 5000000; i++) {
+            char buf[64];
+            int len = int2key(buf, sizeof(buf), i, mode);
+            buf[i % len] = '!'; /* "!" is never set into keys. */
+            TEST_ASSERT_MESSAGE("Lookup should have failed", !raxFind(t, (unsigned char *)buf, len, NULL));
+        }
+        printf("Failed lookup: %f\n", (double)(_ustime() - start) / 1000000);
+
+        start = _ustime();
+        raxIterator ri;
+        raxStart(&ri, t);
+        raxSeek(&ri, "^", NULL, 0);
+        int iter = 0;
+        while (raxNext(&ri)) iter++;
+        TEST_ASSERT_MESSAGE("Iteration is incomplete", iter == 5000000);
+        raxStop(&ri);
+        printf("Full iteration: %f\n", (double)(_ustime() - start) / 1000000);
+
+        start = _ustime();
+        for (int i = 0; i < 5000000; i++) {
+            char buf[64];
+            int len = int2key(buf, sizeof(buf), i, mode);
+            int retval = raxRemove(t, (unsigned char *)buf, len, NULL);
+            TEST_ASSERT(retval == 1);
+            TEST_ASSERT(raxAllocSize(t) == zmalloc_used_memory());
+        }
+        printf("Deletion: %f\n", (double)(_ustime() - start) / 1000000);
+
+        printf("%llu total nodes\n", (unsigned long long)t->numnodes);
+        printf("%llu total elements\n", (unsigned long long)t->numele);
+        raxFree(t);
+    }
+
+    return 0;
+}
+
+/* Compressed nodes can only hold (2^29)-1 characters, so it is important
+ * to test for keys bigger than this amount, in order to make sure that
+ * the code to handle this edge case works as expected.
+ *
+ * This test is disabled by default because it uses a lot of memory. */
+int test_raxHugeKey(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+
+    if (!(flags & UNIT_TEST_LARGE_MEMORY)) return 0;
+
+    size_t max_keylen = ((1 << 29) - 1) + 100;
+    unsigned char *key = zmalloc(max_keylen);
+    if (key == NULL) goto oom;
+
+    memset(key, 'a', max_keylen);
+    key[10] = 'X';
+    key[max_keylen - 1] = 'Y';
+    rax *rax = raxNew();
+    int retval = raxInsert(rax, (unsigned char *)"aaabbb", 6, (void *)5678L, NULL);
+    if (retval == 0 && errno == ENOMEM) goto oom;
+    retval = raxInsert(rax, key, max_keylen, (void *)1234L, NULL);
+    if (retval == 0 && errno == ENOMEM) goto oom;
+    void *value1, *value2;
+    int found1 = raxFind(rax, (unsigned char *)"aaabbb", 6, &value1);
+    int found2 = raxFind(rax, key, max_keylen, &value2);
+    zfree(key);
+    if (!found1 || !found2) {
+        printf("Huge key test failed on elementhood\n");
+        return 1;
+    }
+    if (value1 != (void *)5678L || value2 != (void *)1234L) {
+        printf("Huge key test failed\n");
+        return 1;
+    }
+    raxFree(rax);
+    return 0;
+
+oom:
+    fprintf(stderr, "Sorry, not enough memory to execute --hugekey test.");
+    exit(1);
+}
+
+int test_raxFuzz(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+
+    if (!(flags & UNIT_TEST_ACCURATE)) return 0;
+
+    int errors = 0;
+
+    init_genrand64(1234);
+
+    for (int i = 0; i < 10; i++) {
+        double alpha = (double)genrand64_int64() / RAND_MAX;
+        double beta = 1 - alpha;
+        if (fuzzTestCluster(genrand64_int64() % 100000000, alpha, beta)) errors++;
+    }
+
+    for (int i = 0; i < 10; i++) {
+        double alpha = (double)genrand64_int64() / RAND_MAX;
+        double beta = 1 - alpha;
+        if (fuzzTest(KEY_INT, genrand64_int64() % 10000, alpha, beta)) errors++;
+        if (fuzzTest(KEY_UNIQUE_ALPHA, genrand64_int64() % 10000, alpha, beta)) errors++;
+        if (fuzzTest(KEY_RANDOM, genrand64_int64() % 10000, alpha, beta)) errors++;
+        if (fuzzTest(KEY_RANDOM_ALPHA, genrand64_int64() % 10000, alpha, beta)) errors++;
+        if (fuzzTest(KEY_RANDOM_SMALL_CSET, genrand64_int64() % 10000, alpha, beta)) errors++;
+    }
+
+    size_t numops = 100000, cycles = 3;
+    while (cycles--) {
+        if (fuzzTest(KEY_INT, numops, .7, .3)) errors++;
+        if (fuzzTest(KEY_UNIQUE_ALPHA, numops, .7, .3)) errors++;
+        if (fuzzTest(KEY_RANDOM, numops, .7, .3)) errors++;
+        if (fuzzTest(KEY_RANDOM_ALPHA, numops, .7, .3)) errors++;
+        if (fuzzTest(KEY_RANDOM_SMALL_CSET, numops, .7, .3)) errors++;
+        numops *= 10;
+    }
+
+    if (fuzzTest(KEY_CHAIN, 1000, .7, .3)) errors++;
+    printf("Iterator fuzz test: ");
+    fflush(stdout);
+    for (int i = 0; i < 100000; i++) {
+        if (iteratorFuzzTest(KEY_INT, 100)) errors++;
+        if (iteratorFuzzTest(KEY_UNIQUE_ALPHA, 100)) errors++;
+        if (iteratorFuzzTest(KEY_RANDOM_ALPHA, 1000)) errors++;
+        if (iteratorFuzzTest(KEY_RANDOM, 1000)) errors++;
+        if (i && !(i % 100)) {
+            printf(".");
+            if (!(i % 1000)) {
+                printf("%d%% done", i / 1000);
+            }
+            fflush(stdout);
+        }
+    }
+    printf("\n");
+
+    if (errors) {
+        printf("!!! WARNING !!!: %d errors found\n", errors);
+    } else {
+        printf("OK! \\o/\n");
+    }
+    return !!errors;
+}