Skip to content

Commit

Permalink
fix: hashtables and divide and conquer
Browse files Browse the repository at this point in the history
  • Loading branch information
tolstenko committed Feb 27, 2024
1 parent d8f3ad1 commit d71beef
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 97 deletions.
142 changes: 85 additions & 57 deletions docs/algorithms/05-divide-and-conquer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,89 +57,117 @@ The algorthims will keep dividing the array (in red) until it reaches the base c
#include <vector>
#include <queue>
// Merge two sorted halves
// inplace merge without extra space
template <typename T>
requires std::is_arithmetic<T>::value // C++20
void merge(std::vector<T>& arr, const size_t start, const size_t mid, const size_t end) {
// create a temporary array to store the merged array
std::vector<T> temp(end - start + 1);
// indexes for the subarrays:
const size_t leftStart = start;
const size_t leftEnd = mid;
const size_t rightStart = mid + 1;
const size_t rightEnd = end;
// indexes for
size_t tempIdx = 0;
size_t leftIdx = leftStart;
size_t rightIdx = rightStart;
// merge the subarrays
while (leftIdx <= leftEnd && rightIdx <= rightEnd) {
if (arr[leftIdx] < arr[rightIdx])
temp[tempIdx++] = arr[leftIdx++];
else
temp[tempIdx++] = arr[rightIdx++];
void mergeInplace(std::vector<T>& arr, const size_t start, size_t mid, const size_t end) {
size_t left = start;
size_t right = mid + 1;
while (left <= mid && right <= end) {
if (arr[left] <= arr[right]) {
left++;
} else {
T temp = arr[right];
for (size_t i = right; i > left; i--) {
arr[i] = arr[i - 1];
}
arr[left] = temp;
left++;
mid++;
right++;
}
}
}
// Merge two sorted halves
template <typename T>
requires std::is_arithmetic<T>::value // C++20
void merge(std::vector<T>& arr, const size_t start, const size_t mid, const size_t end) {
// create a temporary array to store the merged array
std::vector<T> temp(end - start + 1);
// indexes for the subarrays:
const size_t leftStart = start;
const size_t leftEnd = mid;
const size_t rightStart = mid + 1;
const size_t rightEnd = end;
// indexes for
size_t tempIdx = 0;
size_t leftIdx = leftStart;
size_t rightIdx = rightStart;
// merge the subarrays
while (leftIdx <= leftEnd && rightIdx <= rightEnd) {
if (arr[leftIdx] < arr[rightIdx])
temp[tempIdx++] = arr[leftIdx++];
else
temp[tempIdx++] = arr[rightIdx++];
}
// copy the remaining elements of the left subarray
while (leftIdx <= leftEnd)
temp[tempIdx++] = arr[leftIdx++];
// copy the remaining elements of the left subarray
while (leftIdx <= leftEnd)
temp[tempIdx++] = arr[leftIdx++];
// copy the remaining elements of the right subarray
while (rightIdx <= rightEnd)
temp[tempIdx++] = arr[rightIdx++];
// copy the remaining elements of the right subarray
while (rightIdx <= rightEnd)
temp[tempIdx++] = arr[rightIdx++];
// copy the merged array back to the original array
std::copy(temp.begin(), temp.end(), arr.begin() + start);
// copy the merged array back to the original array
std::copy(temp.begin(), temp.end(), arr.begin() + start);
}
// recursive mergesort
template <typename T>
requires std::is_arithmetic<T>::value // C++20
void mergesortRecursive(std::vector<T>& arr,
size_t left,
size_t right) {
if (right - left > 0) {
size_t mid = (left + right) / 2;
mergesortRecursive(arr, left, mid);
mergesortRecursive(arr, mid+1, right);
merge(arr, left, mid, right);
}
size_t left,
size_t right) {
if (right - left > 0) {
size_t mid = (left + right) / 2;
mergesortRecursive(arr, left, mid);
mergesortRecursive(arr, mid+1, right);
merge(arr, left, mid, right);
// if the memory is limited, use the inplace merge at the cost of performance
// mergeInplace(arr, left, mid - 1, right - 1);
}
}
// interactive mergesort
template <typename T>
requires std::is_arithmetic<T>::value // C++20
void mergesortInteractive(std::vector<T>& arr) {
for(size_t width = 1; width < arr.size(); width *= 2) {
for(size_t left = 0; left < arr.size(); left += 2 * width) {
size_t mid = std::min(left + width, arr.size());
size_t right = std::min(left + 2 * width, arr.size());
merge(arr, left, mid - 1, right - 1);
}
for(size_t width = 1; width < arr.size(); width *= 2) {
for(size_t left = 0; left < arr.size(); left += 2 * width) {
size_t mid = std::min(left + width, arr.size());
size_t right = std::min(left + 2 * width, arr.size());
merge(arr, left, mid - 1, right - 1);
// if the memory is limited, use the inplace merge at the cost of performance
// mergeInplace(arr, left, mid - 1, right - 1);
}
}
}
int main() {
std::vector<int> arr1;
for(int i = 1000; i > 0; i--)
arr1.push_back(rand()%1000);
std::vector<int> arr2 = arr1;
for(auto i: arr1) std::cout << i << " ";
std::vector<int> arr1;
for(int i = 1000; i > 0; i--)
arr1.push_back(rand()%1000);
std::vector<int> arr2 = arr1;
mergesortRecursive(arr1, 0, arr1.size() - 1);
for(auto i: arr1) std::cout << i << " ";
std::cout << std::endl;
for(auto i: arr1) std::cout << i << " ";
std::cout << std::endl;
mergesortInteractive(arr2);
for(auto i: arr2) std::cout << i << " ";
std::cout << std::endl;
mergesortRecursive(arr1, 0, arr1.size() - 1);
for(auto i: arr1) std::cout << i << " ";
std::cout << std::endl;
return 0;
mergesortInteractive(arr2);
for(auto i: arr2) std::cout << i << " ";
std::cout << std::endl;
return 0;
}
```

Expand Down
59 changes: 37 additions & 22 deletions docs/algorithms/06-hashtables/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@ If the size of your key is less than the `size_t` you could just use the key cas
```c++
struct MyCustomDataWith128Bits {
uint64_t a;
uint64_t b;
uint64_t c;
uint64_t d;
uint32_t a;
uint32_t b;
uint32_t c;
uint32_t d;
size_t hash() const {
return a ^ b ^ c ^ d;
return (a << 32) ^ (b << 24) ^ (c << 16) ^ d;
}
};
```
Expand Down Expand Up @@ -114,6 +114,8 @@ For the sake of simplicity I will use the operator modulo to convert the hash in
## Collision resolution
### Linked lists
![img_2.png](img_2.png)
Assuming that your hash function is not perfect, you will have to deal with collisions. Two or more different keys could produce the same hash. There are plenty of ways to deal with that, but the easiest way is to use a linked list to store the key-value pairs that have the same hash.
Expand All @@ -123,7 +125,7 @@ Try to come up with your own strategy to deal with collisions.
![img_1.png](img_1.png)
[source](https://www.hackerearth.com/practice/data-structures/hash-tables/basics-of-hash-tables/tutorial/)
### Key restrictions
#### Key restrictions
In order for the hash table to work, the key should be:
Expand All @@ -134,15 +136,18 @@ In order for the hash table to work, the key should be:
In C++20 you can use the `concept` feature to enforce those restrictions.
```c++
#include <iostream>
// concept for a hash table
template <typename T>
concept HasHashFunction = // C++20 concept
requires(T t) {
{ t.hash() } -> std::convertible_to<size_t>;
} && requires(T t, T u) {
{ t == u } -> std::convertible_to<bool>;
} && std::is_const_v<T>;
concept HasHashFunction =
requires(T t, T u) {
{ t.hash() } -> std::convertible_to<std::size_t>;
{ t == u } -> std::convertible_to<bool>;
std::is_const_v<T>;
} || requires(T t, T u) {
{ std::hash<T>{}(t) } -> std::convertible_to<std::size_t>;
{ t == u } -> std::convertible_to<bool>;
};
int main() {
struct MyHashableType {
Expand All @@ -154,14 +159,15 @@ int main() {
return value == other.value;
}
};
static_assert(HasHashFunction<MyHashableType>);
static_assert(HasHashFunction<const MyHashableType>);
static_assert(HasHashFunction<int>);
return 0;
}
```

But you can require more from the key if you are going to implement a more complex collision resolution strategy.

## Implementation
#### Hash table implementation with linked lists

![kitten-cat.gif](kitten-cat.gif)

Expand All @@ -173,12 +179,15 @@ This implementation is naive and not efficient. It is just to give you an idea o
// key should not be modifiable
// implements hash function and implements == operator
template <typename T>
concept HasHashFunction = // C++20 concept
requires(T t) {
{ t.hash() } -> std::convertible_to<size_t>;
} && requires(T t, T u) {
{ t == u } -> std::convertible_to<bool>;
} && std::is_const_v<T>;
concept HasHashFunction =
requires(T t, T u) {
{ t.hash() } -> std::convertible_to<std::size_t>;
{ t == u } -> std::convertible_to<bool>;
std::is_const_v<T>;
} || requires(T t, T u) {
{ std::hash<T>{}(t) } -> std::convertible_to<std::size_t>;
{ t == u } -> std::convertible_to<bool>;
};

// hash table
template <HasHashFunction K, typename V>
Expand All @@ -205,6 +214,8 @@ public:
// the hashtable will start with a constant size. You can resize it if you want or use any other strategy
// a good size is something similar to the number of elements you are going to store
explicit Hashtable(size_t size) {
// you colud make it automatically resize and increase the complexity of the implementation
// for the sake of simplicity I will not do that
this->size = size;
table = new HashtableNode*[size];
for (size_t i = 0; i < size; i++) {
Expand All @@ -218,6 +229,7 @@ private:
public:
// inserts a new key value pair
void insert(K key, V value) {
// you can optionally resize the table and rearrange the elements if the table is too full
size_t index = convertKeyToIndex(key);
auto* node = new HashtableNode(key, value);
if (table[index] == nullptr) {
Expand Down Expand Up @@ -318,3 +330,6 @@ int main() {
}
```
### Open addressing with linear probing
Open addressing is a method of collision resolution in hash tables. In this approach, each cell is not a pointer to the linked list of contents of that bucket, but instead contains a single key-value pair. In linear probing, when a collision occurs, the next cell is checked. If it is occupied, the next cell is checked, and so on, until an empty cell is found.
24 changes: 6 additions & 18 deletions docs/artificialintelligence/05-kdtree/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -440,24 +440,12 @@ public:
}

GameObject* NearestNeighbor(KDNode* node, Vector2f position, GameObject* best, float bestDistance, int dimensionId) {
if (node == nullptr) return best;
float distance = node->object->position.distanceSqrd(position);
if (distance < bestDistance) {
best = node->object;
bestDistance = distance;
}
if (position[dimensionId] < node->object->position[dimensionId]) {
best = NearestNeighbor(node->left, position, best, bestDistance, (dimensionId + 1) % 2);
if (position[dimensionId] + bestDistance >= node->object->position[dimensionId]) {
best = NearestNeighbor(node->right, position, best, bestDistance, (dimensionId + 1) % 2);
}
} else {
best = NearestNeighbor(node->right, position, best, bestDistance, (dimensionId + 1) % 2);
if (position[dimensionId] - bestDistance <= node->object->position[dimensionId]) {
best = NearestNeighbor(node->left, position, best, bestDistance, (dimensionId + 1) % 2);
}
}
return best;
// create your own Nearest Neighbor algorithm. That's not hard, just follow the rules
// 1. If the current node is null, return the best
// 2. If the current node is closer to the position, update the best
// 3. If the current node is closer to the position than the best, search the children
// 4. If the current node is not closer to the position than the best, search the children
// 5. Return the best
}

// draw the tree
Expand Down

0 comments on commit d71beef

Please sign in to comment.