Skip to content

Commit

Permalink
Add support for custom date format and openSearch date format for dat…
Browse files Browse the repository at this point in the history
…e fields as part of Lucene query

Github Issue - opensearch-project#2700

Signed-off-by: Manasvini B S <[email protected]>
  • Loading branch information
manasvinibs committed Jul 16, 2024
1 parent eb5c824 commit 92dec43
Show file tree
Hide file tree
Showing 22 changed files with 450 additions and 115 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1443,7 +1443,6 @@ public void testTimeFormat(
private static Stream<Arguments> getInvalidTestDataForTimeFormat() {
return Stream.of(
Arguments.of(DSL.literal("asdfasdf"), DSL.literal("%f")),
Arguments.of(DSL.literal("12345"), DSL.literal("%h")),
Arguments.of(DSL.literal("10:11:61"), DSL.literal("%h")),
Arguments.of(DSL.literal("10:61:12"), DSL.literal("%h")),
Arguments.of(DSL.literal("61:11:12"), DSL.literal("%h")));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,18 @@
import static java.util.stream.Collectors.joining;

import com.google.common.base.Strings;
import com.google.common.collect.ImmutableSet;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.temporal.ChronoField;
import java.util.*;
import org.json.JSONObject;
import org.opensearch.sql.correctness.runner.resultset.DBResult;
import org.opensearch.sql.correctness.runner.resultset.Row;
Expand All @@ -29,6 +32,20 @@ public class JDBCConnection implements DBConnection {
private static final String DOUBLE_QUOTE = "''";
private static final String BACKTICK = "`";

/** Possible types for date field* */
private static final Set<String> DATE_TIME_TYPES = ImmutableSet.of("DATE", "TIMESTAMP");

/** Formatter used to convert date time from h2 and sqlite to date * */
private static final DateTimeFormatter DATE_OPTIONAL_TIME_NANO_FORMATTER =
new DateTimeFormatterBuilder()
.appendPattern("yyyy-MM-dd")
.optionalStart()
.appendPattern(" HH:mm:ss")
.optionalStart()
.appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true)
.optionalEnd()
.toFormatter();

/** Database name for display */
private final String databaseName;

Expand Down Expand Up @@ -170,16 +187,35 @@ private void populateMetaData(ResultSet resultSet, DBResult result) throws SQLEx
if (Strings.isNullOrEmpty(colName)) {
colName = metaData.getColumnName(i);
}
result.addColumn(colName, metaData.getColumnTypeName(i));

String type = metaData.getColumnTypeName(i);
// OpenSearch database returns datetime as DATE
if (type.equals("DATE")) {
result.addColumn(colName, mapToJDBCType(type));
} else {
result.addColumn(colName, type);
}
}
}

private void populateData(ResultSet resultSet, DBResult result) throws SQLException {

while (resultSet.next()) {
Row row = new Row();
ResultSetMetaData metaData = resultSet.getMetaData();

for (int i = 1; i <= result.columnSize(); i++) {
Object value = resultSet.getObject(i);
row.add(resultSet.wasNull() ? null : value);

// Even though the OpenSearch database returns the full date and time,
// the retrieved object from the result set contains only the date.
// We convert date-time values from other databases to a date format for precise comparison.
if (value != null && DATE_TIME_TYPES.contains(metaData.getColumnTypeName(i))) {
LocalDate dateTime = LocalDate.parse(value.toString(), DATE_OPTIONAL_TIME_NANO_FORMATTER);
row.add(dateTime.format(DateTimeFormatter.ofPattern("yyyy-MM-dd")));
} else {
row.add(resultSet.wasNull() ? null : value);
}
}
result.addRow(row);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ public void groupByDateShouldPass() {
Index.BANK.getName()));

verifySchema(
response, schema("birthdate", null, "timestamp"), schema("count(*)", "count", "integer"));
response, schema("birthdate", null, "date"), schema("count(*)", "count", "integer"));
verifyDataRows(response, rows("2018-06-23 00:00:00", 1));
}

Expand All @@ -220,9 +220,7 @@ public void groupByDateWithAliasShouldPass() {
Index.BANK.getName()));

verifySchema(
response,
schema("birthdate", "birth", "timestamp"),
schema("count(*)", "count", "integer"));
response, schema("birthdate", "birth", "date"), schema("count(*)", "count", "integer"));
verifyDataRows(response, rows("2018-06-23 00:00:00", 1));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ public void test_nonnumeric_data_types() throws IOException {
schema("keyword_value", "string"),
schema("text_value", "string"),
schema("binary_value", "binary"),
schema("date_value", "timestamp"),
schema("date_nanos_value", "timestamp"),
schema("date_value", "date"),
schema("date_nanos_value", "date"),
schema("ip_value", "ip"),
schema("object_value", "struct"),
schema("nested_value", "array"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public void testFieldsWildCard() throws IOException {
public void testSelectDateTypeField() throws IOException {
JSONObject result =
executeQuery(String.format("source=%s | fields birthdate", TEST_INDEX_BANK));
verifySchema(result, schema("birthdate", null, "timestamp"));
verifySchema(result, schema("birthdate", null, "date"));

verifyDataRows(
result,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,7 @@ public void testStatsTimeSpan() throws IOException {
executeQuery(
String.format("source=%s | stats count() by span(birthdate,1y)", TEST_INDEX_BANK));
verifySchema(
response,
schema("count()", null, "integer"),
schema("span(birthdate,1y)", null, "timestamp"));
response, schema("count()", null, "integer"), schema("span(birthdate,1y)", null, "date"));
verifyDataRows(response, rows(2, "2017-01-01 00:00:00"), rows(5, "2018-01-01 00:00:00"));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,6 @@ public void typeof_opensearch_types() throws IOException {
TEST_INDEX_DATATYPE_NONNUMERIC));
verifyDataRows(
response,
rows(
"TEXT",
"TIMESTAMP",
"TIMESTAMP",
"BOOLEAN",
"OBJECT",
"KEYWORD",
"IP",
"BINARY",
"GEO_POINT"));
rows("TEXT", "DATE", "DATE", "BOOLEAN", "OBJECT", "KEYWORD", "IP", "BINARY", "GEO_POINT"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ public void testReadingDateFormats() throws IOException {
verifySchema(
result,
schema("weekyear_week_day", null, "date"),
schema("hour_minute_second_millis", null, "time"),
schema("strict_ordinal_date_time", null, "timestamp"));
schema("hour_minute_second_millis", null, "date"),
schema("strict_ordinal_date_time", null, "date"));
verifyDataRows(result, rows("1984-04-12", "09:07:42", "1984-04-12 09:07:42.000123456"));
}

Expand All @@ -68,11 +68,11 @@ public void testCustomFormats() {
JSONObject result = executeQuery(query);
verifySchema(
result,
schema("custom_time", null, "time"),
schema("custom_timestamp", null, "timestamp"),
schema("custom_time", null, "date"),
schema("custom_timestamp", null, "date"),
schema("custom_date_or_date", null, "date"),
schema("custom_date_or_custom_time", null, "timestamp"),
schema("custom_time_parser_check", null, "time"));
schema("custom_date_or_custom_time", null, "date"),
schema("custom_time_parser_check", null, "date"));
verifyDataRows(
result,
rows(
Expand All @@ -97,8 +97,8 @@ public void testCustomFormats2() {
verifySchema(
result,
schema("custom_no_delimiter_date", null, "date"),
schema("custom_no_delimiter_time", null, "time"),
schema("custom_no_delimiter_ts", null, "timestamp"));
schema("custom_no_delimiter_time", null, "date"),
schema("custom_no_delimiter_ts", null, "date"));
verifyDataRows(
result,
rows("1984-10-20", "10:20:30", "1984-10-20 15:35:48"),
Expand All @@ -116,10 +116,10 @@ public void testIncompleteFormats() {
JSONObject result = executeQuery(query);
verifySchema(
result,
schema("incomplete_1", null, "timestamp"),
schema("incomplete_1", null, "date"),
schema("incomplete_2", null, "date"),
schema("incorrect", null, "timestamp"),
schema("incomplete_custom_time", null, "time"),
schema("incorrect", null, "date"),
schema("incomplete_custom_time", null, "date"),
schema("incomplete_custom_date", null, "date"));
verifyDataRows(
result,
Expand All @@ -133,8 +133,7 @@ public void testNumericFormats() {
String query =
String.format("SELECT epoch_sec, epoch_milli" + " FROM %s", TEST_INDEX_DATE_FORMATS);
JSONObject result = executeQuery(query);
verifySchema(
result, schema("epoch_sec", null, "timestamp"), schema("epoch_milli", null, "timestamp"));
verifySchema(result, schema("epoch_sec", null, "date"), schema("epoch_milli", null, "date"));
verifyDataRows(
result,
rows("1970-01-01 00:00:42", "1970-01-01 00:00:00.042"),
Expand All @@ -147,7 +146,7 @@ public void testDateNanosWithFormats() {
String query =
String.format("SELECT hour_minute_second_OR_t_time" + " FROM %s", TEST_INDEX_DATE_FORMATS);
JSONObject result = executeQuery(query);
verifySchema(result, schema("hour_minute_second_OR_t_time", null, "time"));
verifySchema(result, schema("hour_minute_second_OR_t_time", null, "date"));
verifyDataRows(result, rows("09:07:42"), rows("07:07:42.123456789"));
}

Expand Down Expand Up @@ -182,15 +181,15 @@ public void testDateNanosWithFunctions() {
+ " FROM %s WHERE hour_minute_second_OR_t_time > TIME '08:07:00'",
TEST_INDEX_DATE_FORMATS);
result = executeQuery(query);
verifySchema(result, schema("hour_minute_second_OR_t_time", null, "time"));
verifySchema(result, schema("hour_minute_second_OR_t_time", null, "date"));
verifyDataRows(result, rows("09:07:42"));
query =
String.format(
"SELECT hour_minute_second_OR_t_time"
+ " FROM %s WHERE hour_minute_second_OR_t_time < TIME '08:07:00'",
TEST_INDEX_DATE_FORMATS);
result = executeQuery(query);
verifySchema(result, schema("hour_minute_second_OR_t_time", null, "time"));
verifySchema(result, schema("hour_minute_second_OR_t_time", null, "date"));
verifyDataRows(result, rows("07:07:42.123456789"));
}

Expand All @@ -203,7 +202,7 @@ public void testDateNanosOrderBy() {
+ " FROM %s ORDER BY hour_minute_second_OR_t_time ASC",
TEST_INDEX_DATE_FORMATS);
JSONObject result = executeQuery(query);
verifySchema(result, schema("hour_minute_second_OR_t_time", null, "time"));
verifySchema(result, schema("hour_minute_second_OR_t_time", null, "date"));
verifyDataRows(result, rows("07:07:42.123456789"), rows("09:07:42"));
}

Expand All @@ -225,7 +224,7 @@ public void testDateNanosWithNanos() {
String query =
String.format("SELECT date_nanos_value" + " FROM %s", TEST_INDEX_DATATYPE_NONNUMERIC);
JSONObject result = executeQuery(query);
verifySchema(result, schema("date_nanos_value", null, "timestamp"));
verifySchema(result, schema("date_nanos_value", null, "date"));
verifyDataRows(result, rows("2019-03-24 01:34:46.123456789"));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public void testSimpleDataTypesInSchema() {
schema("account_number", "long"),
schema("address", "text"),
schema("age", "integer"),
schema("birthdate", "timestamp"),
schema("birthdate", "date"),
schema("city", "keyword"),
schema("male", "boolean"),
schema("state", "text"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,6 @@ public void typeof_opensearch_types() {
TEST_INDEX_DATATYPE_NONNUMERIC));
verifyDataRows(
response,
rows(
"TEXT",
"TIMESTAMP",
"TIMESTAMP",
"BOOLEAN",
"OBJECT",
"KEYWORD",
"IP",
"BINARY",
"GEO_POINT"));
rows("TEXT", "DATE", "DATE", "BOOLEAN", "OBJECT", "KEYWORD", "IP", "BINARY", "GEO_POINT"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,19 +62,23 @@ public String toString() {
@EqualsAndHashCode.Exclude @Getter protected MappingType mappingType;

// resolved ExprCoreType
protected ExprCoreType exprCoreType;
@Getter protected ExprCoreType exprCoreType;

/**
* Get a simplified type {@link ExprCoreType} if possible. To avoid returning `UNKNOWN` for
* `OpenSearch*Type`s, e.g. for IP, returns itself.
* `OpenSearch*Type`s, e.g. for IP, returns itself. If the `exprCoreType` is {@link
* ExprCoreType#DATE}, {@link ExprCoreType#TIMESTAMP}, {@link ExprCoreType#TIME}, or {@link
* ExprCoreType#UNKNOWN}, it returns the current instance; otherwise, it returns `exprCoreType`.
*
* @return An {@link ExprType}.
*/
public ExprType getExprType() {
if (exprCoreType != ExprCoreType.UNKNOWN) {
return exprCoreType;
}
return this;
return (exprCoreType == ExprCoreType.DATE
|| exprCoreType == ExprCoreType.TIMESTAMP
|| exprCoreType == ExprCoreType.TIME
|| exprCoreType == ExprCoreType.UNKNOWN)
? this
: exprCoreType;
}

/**
Expand Down
Loading

0 comments on commit 92dec43

Please sign in to comment.