From 50912eb0c419151bdaa23fa5153d93dfab4bf9c6 Mon Sep 17 00:00:00 2001 From: Shounak kulkarni Date: Thu, 28 Dec 2023 10:21:55 +0530 Subject: [PATCH] Create DateTimeGenerator and add it to data generator (#12206) --- .../data/DataGenerationHelpers.java | 35 +++++-- .../data/generator/DataGenerator.java | 18 +++- .../data/generator/DataGeneratorSpec.java | 92 ++++++++++++++++++- .../data/generator/DateTimeGenerator.java | 51 ++++++++++ .../provisioning/MemoryEstimator.java | 12 ++- .../admin/command/GenerateDataCommand.java | 32 ++----- 6 files changed, 201 insertions(+), 39 deletions(-) create mode 100644 pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/generator/DateTimeGenerator.java diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/DataGenerationHelpers.java b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/DataGenerationHelpers.java index b4017abfa81f..0450d56e87aa 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/DataGenerationHelpers.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/DataGenerationHelpers.java @@ -22,8 +22,8 @@ import java.io.File; import java.io.IOException; import java.util.HashMap; +import java.util.LinkedList; import java.util.List; -import java.util.Map; import java.util.concurrent.TimeUnit; import org.apache.commons.io.FileUtils; import org.apache.commons.lang.math.IntRange; @@ -34,6 +34,7 @@ import org.apache.pinot.controller.recommender.data.writer.CsvWriter; import org.apache.pinot.controller.recommender.data.writer.FileWriterSpec; import org.apache.pinot.controller.recommender.data.writer.JsonWriter; +import org.apache.pinot.spi.data.DateTimeFieldSpec; import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.data.Schema; import org.apache.pinot.spi.data.TimeFieldSpec; @@ -82,10 +83,16 @@ private static File handleOutDir(String outDir, boolean isOverrideOutDir) return dir; } - public static DataGeneratorSpec buildDataGeneratorSpec(Schema schema, List columns, - HashMap dataTypes, HashMap fieldTypes, - HashMap timeUnits, HashMap cardinality, HashMap range, - HashMap> pattern, Map mvCountMap, Map lengthMap) { + public static DataGeneratorSpec buildDataGeneratorSpec(Schema schema) { + final List columns = new LinkedList<>(); + final HashMap dataTypes = new HashMap<>(); + final HashMap fieldTypes = new HashMap<>(); + final HashMap timeUnits = new HashMap<>(); + + final HashMap cardinality = new HashMap<>(); + final HashMap range = new HashMap<>(); + final HashMap granularityMap = new HashMap<>(); + final HashMap formatMap = new HashMap<>(); for (final FieldSpec fs : schema.getAllFieldSpecs()) { String col = fs.getName(); columns.add(col); @@ -104,16 +111,28 @@ public static DataGeneratorSpec buildDataGeneratorSpec(Schema schema, List lengthMap = new HashMap<>(); List columnNames = new ArrayList<>(); + final Map dateTimeFormatMap = new HashMap<>(); + final Map dateTimeGranularityMap = new HashMap<>(); + int cardinalityValue = 5; int strLength = 5; @@ -199,7 +213,7 @@ public static void main(String[] args) String outputDir = Paths.get(System.getProperty("java.io.tmpdir"), "csv-data").toString(); final DataGeneratorSpec spec = new DataGeneratorSpec(columnNames, cardinality, range, template, mvCountMap, lengthMap, dataTypes, fieldTypes, - timeUnits); + timeUnits, dateTimeFormatMap, dateTimeGranularityMap); final DataGenerator gen = new DataGenerator(); gen.init(spec); diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/generator/DataGeneratorSpec.java b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/generator/DataGeneratorSpec.java index f64a7a984bd8..550730ac9c89 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/generator/DataGeneratorSpec.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/generator/DataGeneratorSpec.java @@ -41,6 +41,8 @@ public class DataGeneratorSpec { private final Map _dataTypeMap; private final Map _fieldTypeMap; private final Map _timeUnitMap; + private final Map _dateTimeFormatMap; + private final Map _dateTimeGranularityMap; @Deprecated private FileFormat _outputFileFormat; @@ -74,11 +76,15 @@ public DataGeneratorSpec(List columns, Map cardinalityM _dataTypeMap = dataTypesMap; _fieldTypeMap = fieldTypesMap; _timeUnitMap = timeUnitMap; + + _dateTimeFormatMap = new HashMap<>(); + _dateTimeGranularityMap = new HashMap<>(); } public DataGeneratorSpec(List columns, Map cardinalityMap, Map rangeMap, Map> patternMap, Map mvCountMap, Map lengthMap, - Map dataTypesMap, Map fieldTypesMap, Map timeUnitMap) { + Map dataTypesMap, Map fieldTypesMap, Map timeUnitMap, + Map dateTimeFormatMap, Map dateTimeGranularityMap) { _columns = columns; _cardinalityMap = cardinalityMap; _rangeMap = rangeMap; @@ -89,6 +95,8 @@ public DataGeneratorSpec(List columns, Map cardinalityM _dataTypeMap = dataTypesMap; _fieldTypeMap = fieldTypesMap; _timeUnitMap = timeUnitMap; + _dateTimeGranularityMap = dateTimeGranularityMap; + _dateTimeFormatMap = dateTimeFormatMap; } public Map getDataTypeMap() { @@ -139,6 +147,14 @@ public String getOutputDir() { return _outputDir; } + public Map getDateTimeFormatMap() { + return _dateTimeFormatMap; + } + + public Map getDateTimeGranularityMap() { + return _dateTimeGranularityMap; + } + @Override public String toString() { final StringBuilder builder = new StringBuilder(); @@ -156,4 +172,78 @@ public String toString() { builder.append(", output dir : " + _outputDir); return builder.toString(); } + + public static class Builder { + private List _columns = new ArrayList<>(); + private Map _cardinalityMap = new HashMap<>(); + private Map _rangeMap = new HashMap<>(); + private Map> _patternMap = new HashMap<>(); + private Map _mvCountMap = new HashMap<>(); + private Map _lengthMap = new HashMap<>(); + private Map _dataTypeMap = new HashMap<>(); + private Map _fieldTypeMap = new HashMap<>(); + private Map _timeUnitMap = new HashMap<>(); + private Map _dateTimeFormatMap = new HashMap<>(); + private Map _dateTimeGranularityMap = new HashMap<>(); + + public DataGeneratorSpec build() { + return new DataGeneratorSpec(_columns, _cardinalityMap, _rangeMap, _patternMap, _mvCountMap, _lengthMap, + _dataTypeMap, _fieldTypeMap, _timeUnitMap, _dateTimeFormatMap, _dateTimeGranularityMap); + } + + public Builder setColumns(List columns) { + _columns = columns; + return this; + } + + public Builder setCardinalityMap(Map cardinalityMap) { + _cardinalityMap = cardinalityMap; + return this; + } + + public Builder setRangeMap(Map rangeMap) { + _rangeMap = rangeMap; + return this; + } + + public Builder setPatternMap(Map> patternMap) { + _patternMap = patternMap; + return this; + } + + public Builder setMvCountMap(Map mvCountMap) { + _mvCountMap = mvCountMap; + return this; + } + + public Builder setLengthMap(Map lengthMap) { + _lengthMap = lengthMap; + return this; + } + + public Builder setDataTypeMap(Map dataTypeMap) { + _dataTypeMap = dataTypeMap; + return this; + } + + public Builder setFieldTypeMap(Map fieldTypeMap) { + _fieldTypeMap = fieldTypeMap; + return this; + } + + public Builder setTimeUnitMap(Map timeUnitMap) { + _timeUnitMap = timeUnitMap; + return this; + } + + public Builder setDateTimeFormatMap(Map dateTimeFormatMap) { + _dateTimeFormatMap = dateTimeFormatMap; + return this; + } + + public Builder setDateTimeGranularityMap(Map dateTimeGranularityMap) { + _dateTimeGranularityMap = dateTimeGranularityMap; + return this; + } + } } diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/generator/DateTimeGenerator.java b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/generator/DateTimeGenerator.java new file mode 100644 index 000000000000..b83f95ba3b23 --- /dev/null +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/generator/DateTimeGenerator.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.pinot.controller.recommender.data.generator; + +import java.util.Date; +import java.util.Random; +import org.apache.pinot.spi.data.DateTimeFormatSpec; +import org.apache.pinot.spi.data.DateTimeGranularitySpec; + + +public class DateTimeGenerator implements Generator { + + private static final int MULTIPLIER_CARDINALITY = 5; + private final DateTimeFormatSpec _formatSpec; + private final DateTimeGranularitySpec _granularitySpec; + private long _currentValue; + private Random _multiplier = new Random(); + + public DateTimeGenerator(String format, String granularity) { + _formatSpec = new DateTimeFormatSpec(format); + _granularitySpec = new DateTimeGranularitySpec(granularity); + } + + @Override + public void init() { + _currentValue = new Date().getTime(); + } + + @Override + public Object next() { + _currentValue += _granularitySpec.granularityToMillis() * _multiplier.nextInt(MULTIPLIER_CARDINALITY); + return _formatSpec.fromMillisToFormat(_currentValue); + } +} diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/realtime/provisioning/MemoryEstimator.java b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/realtime/provisioning/MemoryEstimator.java index 3c19db74b61b..934b1975601d 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/realtime/provisioning/MemoryEstimator.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/realtime/provisioning/MemoryEstimator.java @@ -526,9 +526,15 @@ private File generateData() { // generate data String outputDir = new File(_workingDir, "csv").getAbsolutePath(); - DataGeneratorSpec spec = - new DataGeneratorSpec(colNames, cardinalities, new HashMap<>(), new HashMap<>(), mvCounts, lengths, dataTypes, - fieldTypes, timeUnits); + DataGeneratorSpec spec = new DataGeneratorSpec.Builder() + .setColumns(colNames) + .setCardinalityMap(cardinalities) + .setMvCountMap(mvCounts) + .setLengthMap(lengths) + .setDataTypeMap(dataTypes) + .setFieldTypeMap(fieldTypes) + .setTimeUnitMap(timeUnits) + .build(); DataGenerator dataGenerator = new DataGenerator(); try { dataGenerator.init(spec); diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/GenerateDataCommand.java b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/GenerateDataCommand.java index 9a98f652fb8e..1c06e95853db 100644 --- a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/GenerateDataCommand.java +++ b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/GenerateDataCommand.java @@ -20,10 +20,7 @@ import java.io.File; import java.io.IOException; -import java.util.HashMap; -import java.util.LinkedList; import java.util.List; -import java.util.Map; import java.util.concurrent.TimeUnit; import org.apache.commons.lang.math.IntRange; import org.apache.pinot.controller.recommender.data.DataGenerationHelpers; @@ -31,7 +28,6 @@ import org.apache.pinot.controller.recommender.data.generator.DataGeneratorSpec; import org.apache.pinot.controller.recommender.data.generator.SchemaAnnotation; import org.apache.pinot.spi.data.FieldSpec.DataType; -import org.apache.pinot.spi.data.FieldSpec.FieldType; import org.apache.pinot.spi.data.Schema; import org.apache.pinot.spi.data.Schema.SchemaBuilder; import org.apache.pinot.spi.data.TimeGranularitySpec; @@ -124,23 +120,10 @@ public boolean execute() } Schema schema = Schema.fromFile(new File(_schemaFile)); - - List columns = new LinkedList<>(); - final HashMap dataTypes = new HashMap<>(); - final HashMap fieldTypes = new HashMap<>(); - final HashMap timeUnits = new HashMap<>(); - - final HashMap cardinality = new HashMap<>(); - final HashMap range = new HashMap<>(); - final HashMap> pattern = new HashMap<>(); - final HashMap mvCountMap = new HashMap<>(); - final HashMap lengthMap = new HashMap<>(); - - buildCardinalityRangeMaps(_schemaAnnFile, cardinality, range, pattern); - final DataGeneratorSpec spec = - DataGenerationHelpers.buildDataGeneratorSpec(schema, columns, dataTypes, fieldTypes, timeUnits, cardinality, - range, pattern, mvCountMap, lengthMap); + DataGenerationHelpers.buildDataGeneratorSpec(schema); + buildCardinalityRangeMaps(_schemaAnnFile, spec); + final DataGenerator gen = new DataGenerator(); gen.init(spec); @@ -158,8 +141,7 @@ public boolean execute() return true; } - private void buildCardinalityRangeMaps(String file, HashMap cardinality, - HashMap range, Map> pattern) + private void buildCardinalityRangeMaps(String file, DataGeneratorSpec spec) throws IOException { if (file == null) { return; // Nothing to do here. @@ -171,11 +153,11 @@ private void buildCardinalityRangeMaps(String file, HashMap car String column = sa.getColumn(); if (sa.isRange()) { - range.put(column, new IntRange(sa.getRangeStart(), sa.getRangeEnd())); + spec.getRangeMap().put(column, new IntRange(sa.getRangeStart(), sa.getRangeEnd())); } else if (sa.getPattern() != null) { - pattern.put(column, sa.getPattern()); + spec.getPatternMap().put(column, sa.getPattern()); } else { - cardinality.put(column, sa.getCardinality()); + spec.getCardinalityMap().put(column, sa.getCardinality()); } } }