fix: hashtables and divide and conquer

InfiniBrains · Feb 27, 2024 · d71beef · d71beef
1 parent d8f3ad1
commit d71beef
Show file tree

Hide file tree

Showing 3 changed files with 128 additions and 97 deletions.
diff --git a/docs/algorithms/05-divide-and-conquer/README.md b/docs/algorithms/05-divide-and-conquer/README.md
@@ -57,89 +57,117 @@ The algorthims will keep dividing the array (in red) until it reaches the base c
 #include <vector>
 #include <queue>
 
-// Merge two sorted halves 
+// inplace merge without extra space
 template <typename T>
 requires std::is_arithmetic<T>::value // C++20
-void merge(std::vector<T>& arr, const size_t start, const size_t mid,  const size_t end) {
-    // create a temporary array to store the merged array
-    std::vector<T> temp(end - start + 1);
-
-    // indexes for the subarrays:
-    const size_t leftStart = start;
-    const size_t leftEnd = mid;
-    const size_t rightStart = mid + 1;
-    const size_t rightEnd = end;
-
-    // indexes for 
-    size_t tempIdx = 0;
-    size_t leftIdx = leftStart;
-    size_t rightIdx = rightStart;
-
-    // merge the subarrays
-    while (leftIdx <= leftEnd && rightIdx <= rightEnd) {
-        if (arr[leftIdx] < arr[rightIdx])
-            temp[tempIdx++] = arr[leftIdx++];
-        else
-            temp[tempIdx++] = arr[rightIdx++];
+void mergeInplace(std::vector<T>& arr, const size_t start, size_t mid,  const size_t end) {
+  size_t left = start;
+  size_t right = mid + 1;
+
+  while (left <= mid && right <= end) {
+    if (arr[left] <= arr[right]) {
+      left++;
+    } else {
+      T temp = arr[right];
+      for (size_t i = right; i > left; i--) {
+        arr[i] = arr[i - 1];
+      }
+      arr[left] = temp;
+      left++;
+      mid++;
+      right++;
     }
+  }
+}
+
+// Merge two sorted halves
+template <typename T>
+requires std::is_arithmetic<T>::value // C++20
+void merge(std::vector<T>& arr, const size_t start, const size_t mid,  const size_t end) {
+  // create a temporary array to store the merged array
+  std::vector<T> temp(end - start + 1);
+
+  // indexes for the subarrays:
+  const size_t leftStart = start;
+  const size_t leftEnd = mid;
+  const size_t rightStart = mid + 1;
+  const size_t rightEnd = end;
+
+  // indexes for
+  size_t tempIdx = 0;
+  size_t leftIdx = leftStart;
+  size_t rightIdx = rightStart;
+
+  // merge the subarrays
+  while (leftIdx <= leftEnd && rightIdx <= rightEnd) {
+    if (arr[leftIdx] < arr[rightIdx])
+      temp[tempIdx++] = arr[leftIdx++];
+    else
+      temp[tempIdx++] = arr[rightIdx++];
+  }
 
-    // copy the remaining elements of the left subarray
-    while (leftIdx <= leftEnd)
-        temp[tempIdx++] = arr[leftIdx++];
+  // copy the remaining elements of the left subarray
+  while (leftIdx <= leftEnd)
+    temp[tempIdx++] = arr[leftIdx++];
 
-    // copy the remaining elements of the right subarray
-    while (rightIdx <= rightEnd)
-        temp[tempIdx++] = arr[rightIdx++];
+  // copy the remaining elements of the right subarray
+  while (rightIdx <= rightEnd)
+    temp[tempIdx++] = arr[rightIdx++];
 
-    // copy the merged array back to the original array
-    std::copy(temp.begin(), temp.end(), arr.begin() + start);
+  // copy the merged array back to the original array
+  std::copy(temp.begin(), temp.end(), arr.begin() + start);
 }
 
 // recursive mergesort
 template <typename T>
 requires std::is_arithmetic<T>::value // C++20
 void mergesortRecursive(std::vector<T>& arr,
-               size_t left,
-               size_t right) {
-    if (right - left > 0) {
-        size_t mid = (left + right) / 2;
-        mergesortRecursive(arr, left, mid);
-        mergesortRecursive(arr, mid+1, right);
-        merge(arr, left, mid, right);
-    }
+                        size_t left,
+                        size_t right) {
+  if (right - left > 0) {
+    size_t mid = (left + right) / 2;
+    mergesortRecursive(arr, left, mid);
+    mergesortRecursive(arr, mid+1, right);
+    merge(arr, left, mid, right);
+    // if the memory is limited, use the inplace merge at the cost of performance
+    // mergeInplace(arr, left, mid - 1, right - 1);
+  }
 }
 
 // interactive mergesort
 template <typename T>
 requires std::is_arithmetic<T>::value // C++20
 void mergesortInteractive(std::vector<T>& arr) {
-    for(size_t width = 1; width < arr.size(); width *= 2) {
-        for(size_t left = 0; left < arr.size(); left += 2 * width) {
-            size_t mid = std::min(left + width, arr.size());
-            size_t right = std::min(left + 2 * width, arr.size());
-            merge(arr, left, mid - 1, right - 1);
-        }
+  for(size_t width = 1; width < arr.size(); width *= 2) {
+    for(size_t left = 0; left < arr.size(); left += 2 * width) {
+      size_t mid = std::min(left + width, arr.size());
+      size_t right = std::min(left + 2 * width, arr.size());
+      merge(arr, left, mid - 1, right - 1);
+      // if the memory is limited, use the inplace merge at the cost of performance
+      // mergeInplace(arr, left, mid - 1, right - 1);
     }
+  }
 }
 
 
 int main() {
-    std::vector<int> arr1;
-    for(int i = 1000; i > 0; i--)
-        arr1.push_back(rand()%1000);
-    std::vector<int> arr2 = arr1;
-    
-    for(auto i: arr1) std::cout << i << " ";
+  std::vector<int> arr1;
+  for(int i = 1000; i > 0; i--)
+    arr1.push_back(rand()%1000);
+  std::vector<int> arr2 = arr1;
 
-    mergesortRecursive(arr1, 0, arr1.size() - 1);
-    for(auto i: arr1) std::cout << i << " ";
-    std::cout << std::endl;
+  for(auto i: arr1) std::cout << i << " ";
+  std::cout << std::endl;
 
-    mergesortInteractive(arr2);
-    for(auto i: arr2) std::cout << i << " ";
-    std::cout << std::endl;
+  mergesortRecursive(arr1, 0, arr1.size() - 1);
+  for(auto i: arr1) std::cout << i << " ";
+  std::cout << std::endl;
 
-    return 0;
+  mergesortInteractive(arr2);
+  for(auto i: arr2) std::cout << i << " ";
+  std::cout << std::endl;
+
+  return 0;
 }
 ```
 

diff --git a/docs/algorithms/06-hashtables/README.md b/docs/algorithms/06-hashtables/README.md
@@ -63,12 +63,12 @@ If the size of your key is less than the `size_t` you could just use the key cas
 
 ```c++
 struct MyCustomDataWith128Bits {
-  uint64_t a;
-  uint64_t b;
-  uint64_t c;
-  uint64_t d;
+  uint32_t a;
+  uint32_t b;
+  uint32_t c;
+  uint32_t d;
   size_t hash() const {
-    return a ^ b ^ c ^ d;
+    return (a << 32) ^ (b << 24) ^ (c << 16) ^ d;
   }
 };
 ```
@@ -114,6 +114,8 @@ For the sake of simplicity I will use the operator modulo to convert the hash in
 
 ## Collision resolution
 
+### Linked lists
+
 ![img_2.png](img_2.png)
 
 Assuming that your hash function is not perfect, you will have to deal with collisions. Two or more different keys could produce the same hash. There are plenty of ways to deal with that, but the easiest way is to use a linked list to store the key-value pairs that have the same hash.
@@ -123,7 +125,7 @@ Try to come up with your own strategy to deal with collisions.
 ![img_1.png](img_1.png)
 [source](https://www.hackerearth.com/practice/data-structures/hash-tables/basics-of-hash-tables/tutorial/)
 
-### Key restrictions
+#### Key restrictions
 
 In order for the hash table to work, the key should be:
 
@@ -134,15 +136,18 @@ In order for the hash table to work, the key should be:
 In C++20 you can use the `concept` feature to enforce those restrictions.
 
 ```c++
-#include <iostream>
-
+// concept for a hash table
 template <typename T>
-concept HasHashFunction = // C++20 concept
-requires(T t) {
-    { t.hash() } -> std::convertible_to<size_t>;
-} && requires(T t, T u) {
-    { t == u } -> std::convertible_to<bool>;
-} && std::is_const_v<T>;
+concept HasHashFunction =
+requires(T t, T u) {
+  { t.hash() } -> std::convertible_to<std::size_t>;
+  { t == u } -> std::convertible_to<bool>;
+  std::is_const_v<T>;
+} || requires(T t, T u) {
+  { std::hash<T>{}(t) } -> std::convertible_to<std::size_t>;
+  { t == u } -> std::convertible_to<bool>;
+};
+
 
 int main() {
   struct MyHashableType {
@@ -154,14 +159,15 @@ int main() {
       return value == other.value;
     }
   };
-  static_assert(HasHashFunction<MyHashableType>);
+  static_assert(HasHashFunction<const MyHashableType>);
+  static_assert(HasHashFunction<int>);
   return 0;
 }
 ```
 
 But you can require more from the key if you are going to implement a more complex collision resolution strategy.
 
-## Implementation
+#### Hash table implementation with linked lists 
 
 ![kitten-cat.gif](kitten-cat.gif)
 
@@ -173,12 +179,15 @@ This implementation is naive and not efficient. It is just to give you an idea o
 // key should not be modifiable
 // implements hash function and implements == operator
 template <typename T>
-concept HasHashFunction = // C++20 concept
-requires(T t) {
-    { t.hash() } -> std::convertible_to<size_t>;
-} && requires(T t, T u) {
-    { t == u } -> std::convertible_to<bool>;
-} && std::is_const_v<T>;
+concept HasHashFunction =
+requires(T t, T u) {
+  { t.hash() } -> std::convertible_to<std::size_t>;
+  { t == u } -> std::convertible_to<bool>;
+  std::is_const_v<T>;
+} || requires(T t, T u) {
+  { std::hash<T>{}(t) } -> std::convertible_to<std::size_t>;
+  { t == u } -> std::convertible_to<bool>;
+};
 
 // hash table
 template <HasHashFunction K, typename V>
@@ -205,6 +214,8 @@ public:
     // the hashtable will start with a constant size. You can resize it if you want or use any other strategy
     // a good size is something similar to the number of elements you are going to store
     explicit Hashtable(size_t size) {
+        // you colud make it automatically resize and increase the complexity of the implementation 
+        // for the sake of simplicity I will not do that
         this->size = size;
         table = new HashtableNode*[size];
         for (size_t i = 0; i < size; i++) {
@@ -218,6 +229,7 @@ private:
 public:
     // inserts a new key value pair
     void insert(K key, V value) {
+        // you can optionally resize the table and rearrange the elements if the table is too full
         size_t index = convertKeyToIndex(key);
         auto* node = new HashtableNode(key, value);
         if (table[index] == nullptr) {
@@ -318,3 +330,6 @@ int main() {
 }
 ```
 
+### Open addressing with linear probing
+
+Open addressing is a method of collision resolution in hash tables. In this approach, each cell is not a pointer to the linked list of contents of that bucket, but instead contains a single key-value pair. In linear probing, when a collision occurs, the next cell is checked. If it is occupied, the next cell is checked, and so on, until an empty cell is found.
diff --git a/docs/artificialintelligence/05-kdtree/README.md b/docs/artificialintelligence/05-kdtree/README.md
@@ -440,24 +440,12 @@ public:
     }
 
     GameObject* NearestNeighbor(KDNode* node, Vector2f position, GameObject* best, float bestDistance, int dimensionId) {
-        if (node == nullptr) return best;
-        float distance = node->object->position.distanceSqrd(position);
-        if (distance < bestDistance) {
-            best = node->object;
-            bestDistance = distance;
-        }
-        if (position[dimensionId] < node->object->position[dimensionId]) {
-            best = NearestNeighbor(node->left, position, best, bestDistance, (dimensionId + 1) % 2);
-            if (position[dimensionId] + bestDistance >= node->object->position[dimensionId]) {
-                best = NearestNeighbor(node->right, position, best, bestDistance, (dimensionId + 1) % 2);
-            }
-        } else {
-            best = NearestNeighbor(node->right, position, best, bestDistance, (dimensionId + 1) % 2);
-            if (position[dimensionId] - bestDistance <= node->object->position[dimensionId]) {
-                best = NearestNeighbor(node->left, position, best, bestDistance, (dimensionId + 1) % 2);
-            }
-        }
-        return best;
+        // create your own Nearest Neighbor algorithm. That's not hard, just follow the rules
+        // 1. If the current node is null, return the best
+        // 2. If the current node is closer to the position, update the best
+        // 3. If the current node is closer to the position than the best, search the children
+        // 4. If the current node is not closer to the position than the best, search the children
+        // 5. Return the best
     }
 
     // draw the tree