Skip to content

Commit

Permalink
feat(#23): add exception handling and tests for incompleteRecordsHand…
Browse files Browse the repository at this point in the history
…lers
  • Loading branch information
acsolle66 committed Oct 19, 2023
1 parent b72612f commit af01c5a
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 52 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,33 +28,45 @@ private List<String[]> averageFillRecordsWithIncompleteNumericalFeature(
}
}

if (validFeatureCount == 0) {
continue;
if (validFeatureCount < dataset.size() * 0.5) {
throw new RuntimeException(
"Less than 50% of the records will be used to calculate the fill values. "
+ "Consider using another IncompleteRecordsHandlerStrategy or handle this exception.");
}

average = sum / validFeatureCount;

for (String[] record : dataset) {
if (!isCompleteFeature(record[columnIndex])) {
record[columnIndex] = String.valueOf(average);
}
}
}
}

return dataset;
}

private List<String[]> dropRecordsWithIncompleteCategoricalFeature(
List<String[]> dataset, List<String> typeOfFeatures) {
List<String[]> cleanedDataset = dataset;

for (int columnIndex = 0; columnIndex < typeOfFeatures.size(); columnIndex++) {
if (typeOfFeatures.get(columnIndex).equals("categorical")) {
int columnIndexFin = columnIndex;
dataset =
dataset.stream().filter(record -> isCompleteFeature(record[columnIndexFin])).toList();
cleanedDataset =
cleanedDataset.stream()
.filter(record -> isCompleteFeature(record[columnIndexFin]))
.toList();
}
}
return dataset;

if (cleanedDataset.size() < dataset.size() * 0.5) {
throw new RuntimeException(
"More than 50% of the records will be dropped with this IncompleteRecordsHandlerStrategy. "
+ "Consider using another IncompleteRecordsHandlerStrategy or handle this exception.");
}

return cleanedDataset;
}

private List<String> getFeatureTypes(List<String[]> dataset) {
Expand All @@ -72,6 +84,10 @@ private List<String> getFeatureTypes(List<String[]> dataset) {
}
break;
}

if (featureTypes.isEmpty()) {
throw new RuntimeException("At least one full record needed with valid features");
}
return featureTypes;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,16 @@
public class DropIncompleteRecordsHandler implements IIncompleteRecordsHandler {
@Override
public List<String[]> getCleanedDataset(List<String[]> dataset) {
List<String[]> cleanedDataset =
dataset.stream().filter(this::containsOnlyCompletedFeatures).toList();

return dataset.stream().filter(this::containsOnlyCompletedFeatures).toList();
if (cleanedDataset.size() < dataset.size() * 0.5) {
throw new RuntimeException(
"More than 50% of the records will be dropped with this IncompleteRecordsHandlerStrategy. "
+ "Consider using another IncompleteRecordsHandlerStrategy or handle this exception.");
}

return cleanedDataset;
}

private boolean containsOnlyCompletedFeatures(String[] record) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,41 +26,51 @@ void initializeList() {
void dropRecordsWithIncompleteCategoricalFeature() {

this.dataset.add(new String[] {"A", "1", "A"});
this.dataset.add(new String[] {"", "1", ""});
this.dataset.add(new String[] {"", "2", ""});
this.dataset.add(new String[] {"C", "", "C"});
this.dataset.add(new String[] {"D", "1", ""});
this.dataset.add(new String[] {"E", "1", "E"});
for (String[] data : dataset) {
System.out.println(Arrays.toString(data));
}

List<String[]> cleanedDataset = incompleteRecordHandler.getCleanedDataset(dataset);
System.out.println("----------------------------------------------------");
for (String[] data : cleanedDataset) {
System.out.println(Arrays.toString(data));
}

assertEquals(3, cleanedDataset.size());
this.dataset.add(new String[] {"D", "3", ""});
this.dataset.add(new String[] {"E", "4", "E"});

assertAll(
() -> assertEquals(3, incompleteRecordHandler.getCleanedDataset(dataset).size()),
() ->
assertEquals(
2.5, Double.valueOf(incompleteRecordHandler.getCleanedDataset(dataset).get(1)[1])));
}

@Test
void fillWithAverageValues() {
void testThrowRuntimeExceptionForDroppingMoreThanHalfOfOriginalDataset() {

this.dataset.add(new String[] {"A", "1", "A"});
this.dataset.add(new String[] {"", "1", ""});
this.dataset.add(new String[] {"C", "", "C"});
this.dataset.add(new String[] {"D", "1", ""});
this.dataset.add(new String[] {"E", "1", "E"});
for (String[] data : dataset) {
System.out.println(Arrays.toString(data));
}

List<String[]> cleanedDataset = incompleteRecordHandler.getCleanedDataset(dataset);
System.out.println("----------------------------------------------------");
for (String[] data : cleanedDataset) {
System.out.println(Arrays.toString(data));
}

assertEquals(1, Integer.valueOf(cleanedDataset.get(2)[1]));
this.dataset.add(new String[] {"", "1", "A"});
this.dataset.add(new String[] {"B", "2", "B"});
this.dataset.add(new String[] {"C", "3", "C"});
this.dataset.add(new String[] {"D", "4", ""});
this.dataset.add(new String[] {"", "5", "E"});

assertThrows(RuntimeException.class, () -> incompleteRecordHandler.getCleanedDataset(dataset));
}

@Test
void testThrowRuntimeExceptionForZeroValidNumericalFeatures() {

this.dataset.add(new String[] {"A", "", "A"});
this.dataset.add(new String[] {"B", "", "B"});
this.dataset.add(new String[] {"C", "1", "C"});
this.dataset.add(new String[] {"D", "", "D"});
this.dataset.add(new String[] {"E", "", "E"});

assertThrows(RuntimeException.class, () -> incompleteRecordHandler.getCleanedDataset(dataset));
}

@Test
void testThrowRuntimeExceptionForAtLeastOneFullValidRecord() {

this.dataset.add(new String[] {"", "1", "A"});
this.dataset.add(new String[] {"B", "2", ""});
this.dataset.add(new String[] {"", "", "C"});
this.dataset.add(new String[] {"D", "3", ""});
this.dataset.add(new String[] {"", "4", "E"});

assertThrows(RuntimeException.class, () -> incompleteRecordHandler.getCleanedDataset(dataset));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.util.List;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;

class DropIncompleteRecordHandlerTest {
private List<String[]> dataset;
Expand All @@ -28,8 +29,7 @@ void testDropZeroIncompleteResults() {
this.dataset.add(new String[] {"A", "B", "C"});
this.dataset.add(new String[] {"A", "B", "C"});

List<String[]> cleanedDataset = incompleteRecordHandler.getCleanedDataset(dataset);
assertEquals(5, cleanedDataset.size());
assertEquals(5, incompleteRecordHandler.getCleanedDataset(dataset).size());
}

@Test
Expand All @@ -41,33 +41,30 @@ void testDropOneIncompleteResult() {
this.dataset.add(new String[] {"A", "B", "C"});
this.dataset.add(new String[] {"A", "B", "C"});

List<String[]> cleanedDataset = incompleteRecordHandler.getCleanedDataset(dataset);
assertEquals(4, cleanedDataset.size());
assertEquals(4, incompleteRecordHandler.getCleanedDataset(dataset).size());
}

@Test
void testDropThreeIncompleteResults() {
void testDropTwoIncompleteResult() {

this.dataset.add(new String[] {"A", "B", "C"});
this.dataset.add(new String[] {"", "B", "C"});
this.dataset.add(new String[] {"A", "", "C"});
this.dataset.add(new String[] {"A", "B", ""});
this.dataset.add(new String[] {"A", "", "C"});
this.dataset.add(new String[] {"A", "B", "C"});
this.dataset.add(new String[] {"A", "B", "C"});

List<String[]> cleanedDataset = incompleteRecordHandler.getCleanedDataset(dataset);
assertEquals(2, cleanedDataset.size());
assertEquals(3, incompleteRecordHandler.getCleanedDataset(dataset).size());
}

@Test
void testDropAllIncompleteResults() {
void testThrowRuntimeExceptionForDroppingMoreThanHalfOfOriginalDataset() {

this.dataset.add(new String[] {"A", "", "C"});
this.dataset.add(new String[] {"A", "B", "C"});
this.dataset.add(new String[] {"", "B", "C"});
this.dataset.add(new String[] {"A", "", "C"});
this.dataset.add(new String[] {"A", "B", ""});
this.dataset.add(new String[] {"A", "", "C"});
this.dataset.add(new String[] {"A", "B", "C"});

List<String[]> cleanedDataset = incompleteRecordHandler.getCleanedDataset(dataset);
assertEquals(0, cleanedDataset.size());
assertThrows(RuntimeException.class, () -> incompleteRecordHandler.getCleanedDataset(dataset));
}
}

0 comments on commit af01c5a

Please sign in to comment.