-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathverify_collection.hpp
55 lines (43 loc) · 1.69 KB
/
verify_collection.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#pragma once
#include <succinct/mapper.hpp>
#include "util.hpp"
using ds2i::logger;
template <typename InputCollection, typename Collection>
void verify_collection(InputCollection const& input, const char* filename)
{
Collection coll;
boost::iostreams::mapped_file_source m(filename);
succinct::mapper::map(coll, m);
logger() << "Checking the written data, just to be extra safe..." << std::endl;
size_t s = 0;
for (auto seq: input) {
auto e = coll[s];
if (e.size() != seq.docs.size()) {
logger() << "sequence " << s
<< " has wrong length! ("
<< e.size() << " != " << seq.docs.size() << ")"
<< std::endl;
exit(1);
}
for (size_t i = 0; i < e.size(); ++i, e.next()) {
uint64_t docid = *(seq.docs.begin() + i);
uint64_t freq = *(seq.freqs.begin() + i);
if (docid != e.docid()) {
logger() << "docid in sequence " << s
<< " differs at position " << i << "!" << std::endl;
logger() << e.docid() << " != " << docid << std::endl;
logger() << "sequence length: " << seq.docs.size() << std::endl;
exit(1);
}
if (freq != e.freq()) {
logger() << "freq in sequence " << s
<< " differs at position " << i << "!" << std::endl;
logger() << e.freq() << " != " << freq << std::endl;
logger() << "sequence length: " << seq.docs.size() << std::endl;
exit(1);
}
}
s += 1;
}
logger() << "Everything is OK!" << std::endl;
}