forked from tabulapdf/tabula-java
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathTestWriters.java
136 lines (111 loc) · 5.4 KB
/
TestWriters.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
package technology.tabula;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import java.util.List;
import org.junit.Test;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import technology.tabula.extractors.BasicExtractionAlgorithm;
import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
import technology.tabula.writers.CSVWriter;
import technology.tabula.writers.JSONWriter;
import technology.tabula.writers.TSVWriter;
public class TestWriters {
private static final String EXPECTED_CSV_WRITER_OUTPUT = "\"ABDALA de MATARAZZO, Norma Amanda\",Frente Cívico por Santiago,Santiago del Estero,AFIRMATIVO";
private Table getTable() throws IOException {
Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/argentina_diputados_voting_record.pdf", 269.875f, 12.75f, 790.5f, 561f);
BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm();
Table table = bea.extract(page).get(0);
return table;
}
private List<Table> getTables() throws IOException {
Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/twotables.pdf", 1);
SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();
return (List<Table>) sea.extract(page);
}
@Test
public void testCSVWriter() throws IOException {
String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/argentina_diputados_voting_record.csv");
Table table = this.getTable();
StringBuilder sb = new StringBuilder();
(new CSVWriter()).write(sb, table);
String s = sb.toString();
String[] lines = s.split("\\r?\\n");
assertEquals(lines[0], EXPECTED_CSV_WRITER_OUTPUT);
assertEquals(expectedCsv, s);
}
// TODO Add assertions
@Test
public void testTSVWriter() throws IOException {
Table table = this.getTable();
StringBuilder sb = new StringBuilder();
(new TSVWriter()).write(sb, table);
String s = sb.toString();
//System.out.println(s);
//String[] lines = s.split("\\r?\\n");
//assertEquals(lines[0], EXPECTED_CSV_WRITER_OUTPUT);
}
@Test
public void testJSONWriter() throws IOException {
String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json");
Table table = this.getTable();
StringBuilder sb = new StringBuilder();
(new JSONWriter()).write(sb, table);
String s = sb.toString();
assertEquals(expectedJson, s);
}
@Test
public void testJSONSerializeInfinity() throws IOException {
String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/schools.json");
Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/schools.pdf", 53.74f, 16.97f, 548.74f, 762.3f);
SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();
Table table = sea.extract(page).get(0);
StringBuilder sb = new StringBuilder();
(new JSONWriter()).write(sb, table);
String s = sb.toString();
assertEquals(expectedJson, s);
}
@Test
public void testCSVSerializeInfinity() throws IOException {
String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/schools.csv");
Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/schools.pdf", 53.74f, 16.97f, 548.74f, 762.3f);
SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();
Table table = sea.extract(page).get(0);
StringBuilder sb = new StringBuilder();
(new CSVWriter()).write(sb, table);
String s = sb.toString();
assertEquals(expectedCsv, s);
}
@Test
public void testJSONSerializeTwoTables() throws IOException {
String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/twotables.json");
List<Table> tables = this.getTables();
StringBuilder sb = new StringBuilder();
(new JSONWriter()).write(sb, tables);
String s = sb.toString();
assertEquals(expectedJson, s);
Gson gson = new Gson();
JsonArray json = gson.fromJson(s, JsonArray.class);
assertEquals(2, json.size());
}
@Test
public void testCSVSerializeTwoTables() throws IOException {
String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/twotables.csv");
List<Table> tables = this.getTables();
StringBuilder sb = new StringBuilder();
(new CSVWriter()).write(sb, tables);
String s = sb.toString();
assertEquals(expectedCsv, s);
}
@Test
public void testCSVMultilineRow() throws IOException {
String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/frx_2012_disclosure.csv");
Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/frx_2012_disclosure.pdf", 53.0f, 49.0f, 735.0f, 550.0f);
SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();
Table table = sea.extract(page).get(0);
StringBuilder sb = new StringBuilder();
(new CSVWriter()).write(sb, table);
String s = sb.toString();
assertEquals(expectedCsv, s);
}
}