-
Notifications
You must be signed in to change notification settings - Fork 114
/
main.cpp
187 lines (170 loc) · 7.36 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
/*
* Simple benchmark that runs a mixture of point lookups and inserts on ALEX.
*/
#include "../core/alex.h"
#include <iomanip>
#include "flags.h"
#include "utils.h"
// Modify these if running your own workload
#define KEY_TYPE double
#define PAYLOAD_TYPE double
/*
* Required flags:
* --keys_file path to the file that contains keys
* --keys_file_type file type of keys_file (options: binary or text)
* --init_num_keys number of keys to bulk load with
* --total_num_keys total number of keys in the keys file
* --batch_size number of operations (lookup or insert) per batch
*
* Optional flags:
* --insert_frac fraction of operations that are inserts (instead of
* lookups)
* --lookup_distribution lookup keys distribution (options: uniform or zipf)
* --time_limit time limit, in minutes
* --print_batch_stats whether to output stats for each batch
*/
int main(int argc, char* argv[]) {
auto flags = parse_flags(argc, argv);
std::string keys_file_path = get_required(flags, "keys_file");
std::string keys_file_type = get_required(flags, "keys_file_type");
auto init_num_keys = stoi(get_required(flags, "init_num_keys"));
auto total_num_keys = stoi(get_required(flags, "total_num_keys"));
auto batch_size = stoi(get_required(flags, "batch_size"));
auto insert_frac = stod(get_with_default(flags, "insert_frac", "0.5"));
std::string lookup_distribution =
get_with_default(flags, "lookup_distribution", "zipf");
auto time_limit = stod(get_with_default(flags, "time_limit", "0.5"));
bool print_batch_stats = get_boolean_flag(flags, "print_batch_stats");
// Read keys from file
auto keys = new KEY_TYPE[total_num_keys];
if (keys_file_type == "binary") {
load_binary_data(keys, total_num_keys, keys_file_path);
} else if (keys_file_type == "text") {
load_text_data(keys, total_num_keys, keys_file_path);
} else {
std::cerr << "--keys_file_type must be either 'binary' or 'text'"
<< std::endl;
return 1;
}
// Combine bulk loaded keys with randomly generated payloads
auto values = new std::pair<KEY_TYPE, PAYLOAD_TYPE>[init_num_keys];
std::mt19937_64 gen_payload(std::random_device{}());
for (int i = 0; i < init_num_keys; i++) {
values[i].first = keys[i];
values[i].second = static_cast<PAYLOAD_TYPE>(gen_payload());
}
// Create ALEX and bulk load
alex::Alex<KEY_TYPE, PAYLOAD_TYPE> index;
std::sort(values, values + init_num_keys,
[](auto const& a, auto const& b) { return a.first < b.first; });
index.bulk_load(values, init_num_keys);
// Run workload
int i = init_num_keys;
long long cumulative_inserts = 0;
long long cumulative_lookups = 0;
int num_inserts_per_batch = static_cast<int>(batch_size * insert_frac);
int num_lookups_per_batch = batch_size - num_inserts_per_batch;
double cumulative_insert_time = 0;
double cumulative_lookup_time = 0;
auto workload_start_time = std::chrono::high_resolution_clock::now();
int batch_no = 0;
PAYLOAD_TYPE sum = 0;
std::cout << std::scientific;
std::cout << std::setprecision(3);
while (true) {
batch_no++;
// Do lookups
double batch_lookup_time = 0.0;
if (i > 0) {
KEY_TYPE* lookup_keys = nullptr;
if (lookup_distribution == "uniform") {
lookup_keys = get_search_keys(keys, i, num_lookups_per_batch);
} else if (lookup_distribution == "zipf") {
lookup_keys = get_search_keys_zipf(keys, i, num_lookups_per_batch);
} else {
std::cerr << "--lookup_distribution must be either 'uniform' or 'zipf'"
<< std::endl;
return 1;
}
auto lookups_start_time = std::chrono::high_resolution_clock::now();
for (int j = 0; j < num_lookups_per_batch; j++) {
KEY_TYPE key = lookup_keys[j];
PAYLOAD_TYPE* payload = index.get_payload(key);
if (payload) {
sum += *payload;
}
}
auto lookups_end_time = std::chrono::high_resolution_clock::now();
batch_lookup_time = std::chrono::duration_cast<std::chrono::nanoseconds>(
lookups_end_time - lookups_start_time)
.count();
cumulative_lookup_time += batch_lookup_time;
cumulative_lookups += num_lookups_per_batch;
delete[] lookup_keys;
}
// Do inserts
int num_actual_inserts =
std::min(num_inserts_per_batch, total_num_keys - i);
int num_keys_after_batch = i + num_actual_inserts;
auto inserts_start_time = std::chrono::high_resolution_clock::now();
for (; i < num_keys_after_batch; i++) {
index.insert(keys[i], static_cast<PAYLOAD_TYPE>(gen_payload()));
}
auto inserts_end_time = std::chrono::high_resolution_clock::now();
double batch_insert_time =
std::chrono::duration_cast<std::chrono::nanoseconds>(inserts_end_time -
inserts_start_time)
.count();
cumulative_insert_time += batch_insert_time;
cumulative_inserts += num_actual_inserts;
if (print_batch_stats) {
int num_batch_operations = num_lookups_per_batch + num_actual_inserts;
double batch_time = batch_lookup_time + batch_insert_time;
long long cumulative_operations = cumulative_lookups + cumulative_inserts;
double cumulative_time = cumulative_lookup_time + cumulative_insert_time;
std::cout << "Batch " << batch_no
<< ", cumulative ops: " << cumulative_operations
<< "\n\tbatch throughput:\t"
<< num_lookups_per_batch / batch_lookup_time * 1e9
<< " lookups/sec,\t"
<< num_actual_inserts / batch_insert_time * 1e9
<< " inserts/sec,\t" << num_batch_operations / batch_time * 1e9
<< " ops/sec"
<< "\n\tcumulative throughput:\t"
<< cumulative_lookups / cumulative_lookup_time * 1e9
<< " lookups/sec,\t"
<< cumulative_inserts / cumulative_insert_time * 1e9
<< " inserts/sec,\t"
<< cumulative_operations / cumulative_time * 1e9 << " ops/sec"
<< std::endl;
}
// Check for workload end conditions
if (num_actual_inserts < num_inserts_per_batch) {
// End if we have inserted all keys in a workload with inserts
break;
}
double workload_elapsed_time =
std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::high_resolution_clock::now() - workload_start_time)
.count();
if (workload_elapsed_time > time_limit * 1e9 * 60) {
break;
}
}
long long cumulative_operations = cumulative_lookups + cumulative_inserts;
double cumulative_time = cumulative_lookup_time + cumulative_insert_time;
std::cout << "Cumulative stats: " << batch_no << " batches, "
<< cumulative_operations << " ops (" << cumulative_lookups
<< " lookups, " << cumulative_inserts << " inserts)"
<< "\n\tcumulative throughput:\t"
<< cumulative_lookups / cumulative_lookup_time * 1e9
<< " lookups/sec,\t"
<< cumulative_inserts / cumulative_insert_time * 1e9
<< " inserts/sec,\t"
<< cumulative_operations / cumulative_time * 1e9 << " ops/sec"
<< std::endl;
delete[] keys;
delete[] values;
}