Skip to content

Commit

Permalink
add Assert function to verify in the DataGeneratorTest (apache#15504)
Browse files Browse the repository at this point in the history
* add Assert function to verify in the DataGeneratorTest

* remove unused log in DataGeneratorTest

* add comment for DataGeneratorTest
  • Loading branch information
AlbericByte authored Dec 8, 2023
1 parent db3a633 commit 935aa18
Showing 1 changed file with 108 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,15 @@
import org.apache.druid.data.input.impl.StringDimensionSchema;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.CountAggregatorFactory;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
import org.apache.druid.segment.incremental.OnheapIncrementalIndex;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.joda.time.DateTime;
import org.junit.Assert;
import org.junit.Test;

Expand All @@ -40,11 +42,15 @@
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;

// Doesn't assert behavior right now, just generates rows and prints out some distribution numbers
public class DataGeneratorTest extends InitializedNullHandlingTest
{
private static final Logger log = new Logger(DataGeneratorTest.class);


@Test
public void testSequential()
{
Expand Down Expand Up @@ -89,10 +95,12 @@ public void testSequential()
DataGenerator dataGenerator = new DataGenerator(schemas, 9999, 0, 0, 1000.0);
for (int i = 0; i < 100; i++) {
InputRow row = dataGenerator.nextRow();
//System.out.println("S-ROW: " + row);
tracker.addRow(row);
}
tracker.printStuff();
tracker.assertTotalRowSize("dimA", 100);
tracker.assertTotalRowSize("dimB", 100);
tracker.assertTotalRowSize("dimC", 100);
}

@Test
Expand Down Expand Up @@ -151,12 +159,16 @@ public void testDiscreteUniform()
DataGenerator dataGenerator = new DataGenerator(schemas, 9999, 0, 0, 1000.0);
for (int i = 0; i < 100; i++) {
InputRow row = dataGenerator.nextRow();
//System.out.println("U-ROW: " + row);

tracker.addRow(row);
}

tracker.printStuff();
tracker.assertTotalRowSize("dimA", 100);
// The row size of dimB is 4, dataGenerator will generate 4 value for dimB at one time, so the total size is 400
tracker.assertTotalRowSize("dimB", 400);
tracker.assertTotalRowSize("dimC", 100);
tracker.assertNullable("dimC", true);
tracker.assertTotalRowSize("dimD", 100);
}


Expand Down Expand Up @@ -195,12 +207,12 @@ public void testRoundedNormal()
DataGenerator dataGenerator = new DataGenerator(schemas, 9999, 0, 0, 1000.0);
for (int i = 0; i < 1000000; i++) {
InputRow row = dataGenerator.nextRow();
//System.out.println("N-ROW: " + row);

tracker.addRow(row);
}

tracker.printStuff();
tracker.assertTotalRowSize("dimA", 1000000);
tracker.assertTotalRowSize("dimB", 1000000);
}

@Test
Expand Down Expand Up @@ -250,12 +262,13 @@ public void testZipf()
DataGenerator dataGenerator = new DataGenerator(schemas, 9999, 0, 0, 1000.0);
for (int i = 0; i < 100; i++) {
InputRow row = dataGenerator.nextRow();
//System.out.println("Z-ROW: " + row);

tracker.addRow(row);
}

tracker.printStuff();
tracker.assertTotalRowSize("dimA", 100);
tracker.assertTotalRowSize("dimB", 100);
tracker.assertTotalRowSize("dimC", 100);
}

@Test
Expand All @@ -279,12 +292,11 @@ public void testEnumerated()
DataGenerator dataGenerator = new DataGenerator(schemas, 9999, 0, 0, 1000.0);
for (int i = 0; i < 10000; i++) {
InputRow row = dataGenerator.nextRow();
//System.out.println("Z-ROW: " + row);

tracker.addRow(row);
}

tracker.printStuff();
tracker.assertTotalRowSize("dimA", 10000);
}

@Test
Expand Down Expand Up @@ -322,12 +334,13 @@ public void testNormal()
DataGenerator dataGenerator = new DataGenerator(schemas, 9999, 0, 0, 1000.0);
for (int i = 0; i < 100; i++) {
InputRow row = dataGenerator.nextRow();
//System.out.println("N-ROW: " + row);

tracker.addRow(row);
}

tracker.printStuff();
tracker.assertTotalRowSize("dimA", 100);
tracker.assertTotalRowSize("dimB", 100);
tracker.assertNullable("dimB", true);
}

@Test
Expand Down Expand Up @@ -363,18 +376,19 @@ public void testRealUniform()
DataGenerator dataGenerator = new DataGenerator(schemas, 9999, 0, 0, 1000.0);
for (int i = 0; i < 100; i++) {
InputRow row = dataGenerator.nextRow();
//System.out.println("U-ROW: " + row);

tracker.addRow(row);
}

tracker.printStuff();
tracker.assertTotalRowSize("dimA", 100);
tracker.assertTotalRowSize("dimB", 100);
}

@Test
public void testIntervalBasedTimeGeneration()
{
List<GeneratorColumnSchema> schemas = new ArrayList<>();
RowValueTracker tracker = new RowValueTracker();

schemas.add(
GeneratorColumnSchema.makeEnumeratedSequential(
Expand All @@ -389,13 +403,20 @@ public void testIntervalBasedTimeGeneration()

DataGenerator dataGenerator = new DataGenerator(schemas, 9999, Intervals.utc(50000, 600000), 100);
for (int i = 0; i < 100; i++) {
dataGenerator.nextRow();
InputRow row = dataGenerator.nextRow();
tracker.addRow(row);
}
tracker.printStuff();
tracker.assertTimeStamp("dimB", 50000, 600000);

RowValueTracker tracker2 = new RowValueTracker();
DataGenerator dataGenerator2 = new DataGenerator(schemas, 9999, Intervals.utc(50000, 50001), 100);
for (int i = 0; i < 100; i++) {
dataGenerator2.nextRow();
InputRow row = dataGenerator2.nextRow();
tracker2.addRow(row);
}
tracker2.printStuff();
tracker2.assertTimeStamp("dimB", 50000, 50001);
}

@Test
Expand Down Expand Up @@ -480,17 +501,42 @@ public void testLazyZipf()
DataGenerator dataGenerator = new DataGenerator(schemas, 9999, 0, 0, 1000.0);
for (int i = 0; i < 100000; i++) {
InputRow row = dataGenerator.nextRow();
System.out.println("Z-ROW: " + row);

Assert.assertNotNull(row);
tracker.addRow(row);
}

tracker.printStuff();
tracker.assertTotalRowSize("dimA", 100000);
tracker.assertTotalRowSize("dimB", 100000);
tracker.assertTotalRowSize("dimC", 100000);
tracker.assertTotalRowSize("dimD", 100000);
}

private static class RowValuePropertyTracker
{
private Integer count;
private DateTime timeStamp;

public RowValuePropertyTracker(Integer count, DateTime timeStamp)
{
this.count = count;
this.timeStamp = timeStamp;
}

public Integer getCount()
{
return count;
}

public DateTime getTimeStamp()
{
return timeStamp;
}
}

private static class RowValueTracker
{
private Map<String, Map<Object, Integer>> dimensionMap;
private Map<String, Map<Object, RowValuePropertyTracker>> dimensionMap;

public RowValueTracker()
{
Expand All @@ -501,10 +547,10 @@ public void addRow(InputRow row)
{
for (String dim : row.getDimensions()) {
if (dimensionMap.get(dim) == null) {
dimensionMap.put(dim, new HashMap<Object, Integer>());
dimensionMap.put(dim, new HashMap<Object, RowValuePropertyTracker>());
}

Map<Object, Integer> valueMap = dimensionMap.get(dim);
Map<Object, RowValuePropertyTracker> valueMap = dimensionMap.get(dim);
Object dimVals = row.getRaw(dim);
if (dimVals == null) {
dimVals = Collections.singletonList(null);
Expand All @@ -518,20 +564,54 @@ public void addRow(InputRow row)
val = "";
}
if (valueMap.get(val) == null) {
valueMap.put(val, 0);
valueMap.put(val, new RowValuePropertyTracker(0, row.getTimestamp()));
}
valueMap.put(val, valueMap.get(val) + 1);
RowValuePropertyTracker property = valueMap.get(val);
valueMap.put(val, new RowValuePropertyTracker(property.getCount() + 1, property.getTimeStamp()));
}
}
}

private void assertNullable(String dim, boolean expected)
{
String format = expected ? "%s dimension is nullable" : "%s dimension is not nullable";
String message = String.format(Locale.US, format, dim);
Map<Object, RowValuePropertyTracker> valueMap = dimensionMap.get(dim);
Assert.assertEquals(message, expected, valueMap.containsKey(""));
}

private void assertTimeStamp(String dim, long startTime, long endTime)
{
Map<Object, RowValuePropertyTracker> valueMap = dimensionMap.get(dim);
String message = String.format(
Locale.US,
"%s timeStamp value should be in the range of [%s, %s]",
dim,
startTime,
endTime
);
for (Object val : valueMap.keySet()) {
long timeStamp = valueMap.get(val).getTimeStamp().getMillis();
Assert.assertTrue(message, timeStamp >= startTime && timeStamp <= endTime);
}
}

private void assertTotalRowSize(String dim, int expected)
{
Map<Object, RowValuePropertyTracker> valueMap = dimensionMap.get(dim);
int count = 0;
String message = String.format(Locale.US, "%s dimension row size is not equal", dim);
for (Object val : valueMap.keySet()) {
count += valueMap.get(val).getCount();
}
Assert.assertEquals(message, expected, count);
}

public void printStuff()
{
System.out.println();
for (String dim : dimensionMap.keySet()) {
System.out.println("DIMENSION " + dim + "\n============");
Map<Object, Integer> valueMap = dimensionMap.get(dim);
log.info("DIMENSION " + dim + "\n============");
Map<Object, RowValuePropertyTracker> valueMap = dimensionMap.get(dim);

List<Comparable> valList = new ArrayList<>();
for (Object val : valueMap.keySet()) {
Expand All @@ -541,9 +621,10 @@ public void printStuff()
Collections.sort(valList);

for (Comparable val : valList) {
System.out.println(" VAL: " + val + " CNT: " + valueMap.get(val));
RowValuePropertyTracker property = valueMap.get(val);
log.info(" VAL: " + val + " CNT: " + property.getCount() + " TIMESTAMP: " + property.getTimeStamp()
.getMillis());
}
System.out.println();
}
}
}
Expand Down

0 comments on commit 935aa18

Please sign in to comment.