Skip to content

Commit

Permalink
CSVRecordReader – Set header record when header is explicitly provided (
Browse files Browse the repository at this point in the history
  • Loading branch information
rajagopr authored Sep 15, 2023
1 parent cf460aa commit e1a6651
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -137,11 +137,11 @@ public void init(File dataFile, @Nullable Set<String> fieldsToRead, @Nullable Re
}

if (_isHeaderProvided) {
_headerMap = parseLineAsHeader(config.getHeader());
_format = _format.builder().setHeader(_headerMap.keySet().toArray(new String[0])).build();
if (!_useLineIterator) {
validateHeaderForDelimiter(delimiter, config.getHeader(), _format);
}
_headerMap = parseLineAsHeader(config.getHeader());
_format = _format.builder().setHeader(_headerMap.keySet().toArray(new String[0])).build();
}

if (config.isMultiValueDelimiterEnabled()) {
Expand Down Expand Up @@ -329,7 +329,12 @@ private void initLineIteratorResources()
// read the first line
String headerLine = _bufferedReader.readLine();
_headerMap = parseLineAsHeader(headerLine);
_format = _format.builder().setHeader(_headerMap.keySet().toArray(new String[0])).build();
_format = _format.builder()
// If header isn't provided, the first line would be set as header and the 'skipHeader' property
// is set to false.
.setSkipHeaderRecord(false)
.setHeader(_headerMap.keySet().toArray(new String[0]))
.build();
}
_nextLine = _bufferedReader.readLine();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,46 @@ public void testReadingDataFileWithNoRecords()
// Note: The default CSVRecordReader cannot handle unparseable rows
}

@Test
public void testReadingDataFileWithNoHeaderAndDataRecordsWithEmptyValues()
throws URISyntaxException, IOException {
URI uri = ClassLoader.getSystemResource("dataFileWithNoHeader2.csv").toURI();
File dataFile = new File(uri);

// test using line iterator
CSVRecordReaderConfig readerConfig = new CSVRecordReaderConfig();
readerConfig.setSkipUnParseableLines(true);
readerConfig.setHeader("key,num0,num1");
List<GenericRow> genericRows = readCSVRecords(dataFile, readerConfig, null, false);
Assert.assertEquals(4, genericRows.size());

// test using default CSVRecordReader
readerConfig.setSkipUnParseableLines(false);
genericRows = readCSVRecords(dataFile, readerConfig, null, false);
Assert.assertEquals(4, genericRows.size());
}

@Test
public void testReadingDataFileWithValidHeaders()
throws URISyntaxException, IOException {
URI uri = ClassLoader.getSystemResource("dataFileWithValidHeaders.csv").toURI();
File dataFile = new File(uri);

// test using line iterator
CSVRecordReaderConfig readerConfig = new CSVRecordReaderConfig();
readerConfig.setSkipUnParseableLines(true);
// No explicit header is set and attempt to skip the header should be ignored. 1st line would be treated as the
// header line.
readerConfig.setSkipHeader(false);
List<GenericRow> genericRows = readCSVRecords(dataFile, readerConfig, null, false);
Assert.assertEquals(4, genericRows.size());

// test using default CSVRecordReader
readerConfig.setSkipUnParseableLines(false);
genericRows = readCSVRecords(dataFile, readerConfig, null, false);
Assert.assertEquals(4, genericRows.size());
}

private List<GenericRow> readCSVRecords(File dataFile,
CSVRecordReaderConfig readerConfig, GenericRow genericRow, boolean rewind)
throws IOException {
Expand All @@ -543,10 +583,11 @@ private List<GenericRow> readCSVRecords(File dataFile,
while (recordReader.hasNext()) {
if (genericRow != null) {
recordReader.next(reuse);
genericRows.add(reuse);
} else {
recordReader.next();
GenericRow nextRow = recordReader.next();
genericRows.add(nextRow);
}
genericRows.add(genericRow);
}

if (rewind) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"key00",12.3,8.42
"key01",,7.1
"key02",,16.81
"key03",,7.12
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"key","num0","num1"
"key00",12.3,8.42
"key01",,7.1
"key02",,16.81
"key03",,7.12

0 comments on commit e1a6651

Please sign in to comment.