-
Notifications
You must be signed in to change notification settings - Fork 2
/
uniq_lines.cpp
46 lines (37 loc) · 1.07 KB
/
uniq_lines.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
//The purpose of this program is to get the number of uniq source phrases in the document
//In order to use a smaller probing_hash_table
#include "helpers/line_splitter.hh"
#include "util/file_piece.hh"
#include "util/file.hh"
#include "util/usage.hh"
#include <stdio.h>
#include <fstream>
#include <iostream>
int main(int argc, char* argv[]) {
if (argc != 2) {
std::cout << "ERROR! The program requires 1 argument, but " << argc - 1 << " were provided." << std::endl;
std::cout << "Usage: " << argv[0] << " path_to_phrase_table." << std::endl;
}
//Read the file
util::FilePiece filein(argv[1]);
unsigned long uniq_lines = 0;
line_text prev_line;
while (true){
line_text new_line;
try {
//Process line read
new_line = splitLine(filein.ReadLine());
} catch (util::EndOfFileException e){
std::cout << "End of file" << std::endl;
break;
}
if (new_line.source_phrase == prev_line.source_phrase){
continue;
} else {
uniq_lines++;
prev_line = new_line;
}
}
std::cout << "Number of unique lines is: " << uniq_lines << std::endl;
return 1;
}