-
Notifications
You must be signed in to change notification settings - Fork 1
/
locate_test.cpp
122 lines (109 loc) · 2.9 KB
/
locate_test.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#include <algorithm>
#include <cstdlib>
#include <ctime>
#include <fstream>
#include <iostream>
#include "rlcsa.h"
#include "misc/utils.h"
using namespace CSA;
int main(int argc, char** argv)
{
std::cout << "RLCSA locate test" << std::endl;
if(argc < 3)
{
std::cout << "Usage: locate_test basename [begin end] output [d|s]" << std::endl;
std::cout << " d Use direct locate." << std::endl;
std::cout << " s Output a 32-bit suffix array." << std::endl;
return 1;
}
usint begin = 0, end = 0;
int output_arg = 2;
std::cout << "Base name: " << argv[1] << std::endl;
if(argc >= 5)
{
output_arg = 4;
begin = atoi(argv[2]), end = atoi(argv[3]);
std::cout << "Begin: " << begin << std::endl;
std::cout << "End: " << end << std::endl;
if(begin > end)
{
std::cerr << "Error: Empty range!" << std::endl;
return 2;
}
}
bool direct = false, do_output = false;
for(int i = output_arg + 1; i < argc; i++)
{
switch(argv[i][0])
{
case 'd':
direct = true; break;
case 's':
do_output = true; break;
}
}
if(!direct)
{
std::cout << "Using run-based optimizations." << std::endl;
}
else
{
std::cout << "Using direct locate." << std::endl;
}
if(do_output)
{
std::cout << "Writing plain SA to output file, ignoring begin and end." << std::endl;
}
std::cout << std::endl;
RLCSA rlcsa(argv[1]);
if(!rlcsa.supportsLocate())
{
std::cerr << "Error: Locate is not supported!" << std::endl;
return 3;
}
if(output_arg == 4 && !do_output)
{
if(end >= rlcsa.getSize())
{
std::cerr << "Error: Invalid range!" << std::endl;
return 4;
}
}
else
{
begin = 0; end = rlcsa.getSize() - 1;
}
std::ofstream output(argv[output_arg], std::ios_base::binary);
if(!output)
{
std::cerr << "Error: Cannot open output file!" << std::endl;
return 5;
}
usint* buffer = new usint[MILLION]; CSA::usint temp;
std::clock_t start = std::clock();
if(do_output) { temp = rlcsa.getSize(); output.write((char*)&temp, sizeof(temp)); }
for(usint curr = begin; curr <= end; curr += MILLION)
{
pair_type range(curr, std::min(end, curr + MILLION - 1));
if(direct)
{
for(usint i = 0; i < range.second + 1 - range.first; i++)
{
buffer[i] = rlcsa.locate(curr + i, false);
}
}
else { rlcsa.locate(range, buffer); }
for(usint i = 0; i < range.second + 1 - range.first; i++)
{
if(do_output) { temp = buffer[i]; output.write((char*)&temp, sizeof(temp)); }
else { output.write((char*)&(buffer[i]), sizeof(usint)); }
}
}
std::clock_t stop = std::clock();
delete[] buffer;
double size = (end + 1 - begin);
double time = (stop - start) / (double)CLOCKS_PER_SEC;
std::cout << size << " locates in " << time << " seconds (" << (size / time) << " locates/s)" << std::endl;
output.close();
return 0;
}