-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathprofile_decoding.cpp
129 lines (106 loc) · 4.2 KB
/
profile_decoding.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#include <iostream>
#include <random>
#include <boost/lexical_cast.hpp>
#include "succinct/mapper.hpp"
#include "index_types.hpp"
#include "util.hpp"
#include "dec_time_prediction.hpp"
namespace ds2i {
double measure_decoding_time(size_t sum_of_values, size_t n,
std::vector<uint8_t> const& buf)
{
static const size_t runs = 256;
std::vector<uint32_t> out_buf(mixed_block::block_size);
// dry run to ignore one-time initializations (static variables, ...)
mixed_block::decode(buf.data(), out_buf.data(),
sum_of_values, n);
size_t spacing = 1 << 10;
thread_local std::vector<uint8_t> readbuf(runs * spacing);
thread_local std::vector<uint8_t const*> positions(runs);
for (size_t run = 0; run < runs; ++run) {
// try random alignments
// XXX switch to c++ gens
uint8_t* position = readbuf.data() + run * spacing + (rand() % 64);
std::copy(buf.begin(), buf.end(), position);
positions[run] = position;
}
double tick = get_time_usecs();
for (auto position: positions) {
mixed_block::decode(position, out_buf.data(), sum_of_values, n);
do_not_optimize_away(out_buf[0]);
}
return (get_time_usecs() - tick) / runs * 1000;
}
void profile_block(std::vector<uint32_t> const& values,
uint32_t sum_of_values)
{
using namespace time_prediction;
std::vector<uint8_t> buf;
uint32_t n = values.size();
feature_vector fv;
values_statistics(values, fv);
for (uint8_t t = 0; t < mixed_block::block_types; ++t) {
mixed_block::block_type type = (mixed_block::block_type)t;
for (mixed_block::compr_param_type param = 0;
param < mixed_block::compr_params(type); ++param) {
buf.clear();
if (!mixed_block::compression_stats(type, param, values.data(),
sum_of_values, n, buf, fv)) {
continue;
}
double time = measure_decoding_time(sum_of_values, n, buf);
stats_line()
("type", (int)t)
("time", time)
(fv)
;
}
}
}
template <typename IndexType>
void profile_decoding(const char* index_filename,
double p)
{
std::default_random_engine rng(1729);
std::uniform_real_distribution<double> dist01(0.0, 1.0);
IndexType index;
logger() << "Loading index from " << index_filename << std::endl;
boost::iostreams::mapped_file_source m(index_filename);
succinct::mapper::map(index, m);
std::vector<uint32_t> values;
for (size_t l = 0; l < index.size(); ++l) {
if (l % 1000000 == 0) {
logger() << l << " lists processed" << std::endl;
}
auto blocks = index[l].get_blocks();
for (auto const& block: blocks) {
// only measure full blocks
if (block.size == mixed_block::block_size && dist01(rng) < p) {
block.decode_doc_gaps(values);
profile_block(values, block.doc_gaps_universe);
block.decode_freqs(values);
profile_block(values, uint32_t(-1));
}
}
}
logger() << index.size() << " lists processed" << std::endl;
}
}
int main(int /* argc */, const char** argv)
{
using namespace ds2i;
std::string type = argv[1];
const char* index_filename = argv[2];
double p = boost::lexical_cast<double>(argv[3]);
if (false) {
#define LOOP_BODY(R, DATA, T) \
} else if (type == BOOST_PP_STRINGIZE(T)) { \
profile_decoding<BOOST_PP_CAT(T, _index)> \
(index_filename, p); \
/**/
BOOST_PP_SEQ_FOR_EACH(LOOP_BODY, _, DS2I_BLOCK_INDEX_TYPES);
#undef LOOP_BODY
} else {
logger() << "ERROR: Unknown type " << type << std::endl;
}
}