diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/JsonMalformedIndexTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/JsonMalformedIndexTest.java new file mode 100644 index 00000000000..6a9c0a28cd3 --- /dev/null +++ b/pinot-core/src/test/java/org/apache/pinot/queries/JsonMalformedIndexTest.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.queries; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import org.apache.commons.io.FileUtils; +import org.apache.pinot.common.response.broker.BrokerResponseNative; +import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader; +import org.apache.pinot.segment.local.segment.creator.impl.ColumnJsonParserException; +import org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl; +import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig; +import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader; +import org.apache.pinot.segment.spi.ImmutableSegment; +import org.apache.pinot.segment.spi.IndexSegment; +import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig; +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.config.table.TableType; +import org.apache.pinot.spi.data.FieldSpec; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.spi.data.readers.GenericRow; +import org.apache.pinot.spi.utils.ReadMode; +import org.apache.pinot.spi.utils.builder.TableConfigBuilder; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +public class JsonMalformedIndexTest extends BaseQueriesTest { + private static final String RAW_TABLE_NAME = "testTable"; + private static final String SEGMENT_NAME = "testSegment"; + private static final String STRING_COLUMN = "stringColumn"; + private static final String JSON_COLUMN = "jsonColumn"; + private static final Schema SCHEMA = new Schema.SchemaBuilder().setSchemaName(RAW_TABLE_NAME) + .addSingleValueDimension(STRING_COLUMN, FieldSpec.DataType.STRING) + .addSingleValueDimension(JSON_COLUMN, FieldSpec.DataType.STRING).build(); + private static final TableConfig TABLE_CONFIG = + new TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME).build(); + private IndexSegment _indexSegment; + private List _indexSegments; + private final List _records = new ArrayList<>(); + + @BeforeClass + public void setUp() + throws Exception { + _records.add(createRecord("ludwik von drake", + "{\"name\": {\"first\": \"ludwik\", \"last\": \"von drake\"}, \"id\": 181, " + + "\"data\": [\"l\", \"b\", \"c\", \"d\"]")); + } + + protected void checkResult(String query, Object[][] expectedResults) { + BrokerResponseNative brokerResponse = getBrokerResponseForOptimizedQuery(query, TABLE_CONFIG, SCHEMA); + QueriesTestUtils.testInterSegmentsResult(brokerResponse, Arrays.asList(expectedResults)); + } + + File indexDir() { + return new File(FileUtils.getTempDirectory(), getClass().getSimpleName()); + } + + GenericRow createRecord(String stringValue, String jsonValue) { + GenericRow record = new GenericRow(); + record.putValue(STRING_COLUMN, stringValue); + record.putValue(JSON_COLUMN, jsonValue); + return record; + } + + @Test(expectedExceptions = ColumnJsonParserException.class, + expectedExceptionsMessageRegExp = "Column: jsonColumn.*") + public void testJsonIndexBuild() + throws Exception { + File indexDir = indexDir(); + FileUtils.deleteDirectory(indexDir); + + List jsonIndexColumns = new ArrayList<>(); + jsonIndexColumns.add("jsonColumn"); + TABLE_CONFIG.getIndexingConfig().setJsonIndexColumns(jsonIndexColumns); + SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(TABLE_CONFIG, SCHEMA); + segmentGeneratorConfig.setTableName(RAW_TABLE_NAME); + segmentGeneratorConfig.setSegmentName(SEGMENT_NAME); + segmentGeneratorConfig.setOutDir(indexDir.getPath()); + + SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl(); + driver.init(segmentGeneratorConfig, new GenericRowRecordReader(_records)); + driver.build(); + + IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(); + indexLoadingConfig.setTableConfig(TABLE_CONFIG); + indexLoadingConfig.setJsonIndexColumns(new HashSet<>(jsonIndexColumns)); + indexLoadingConfig.setReadMode(ReadMode.mmap); + + ImmutableSegment immutableSegment = + ImmutableSegmentLoader.load(new File(indexDir, SEGMENT_NAME), indexLoadingConfig); + _indexSegment = immutableSegment; + _indexSegments = Arrays.asList(immutableSegment, immutableSegment); + + Object[][] expecteds1 = {{"von drake"}, {"von drake"}, {"von drake"}, {"von drake"}}; + checkResult("SELECT jsonextractscalar(jsonColumn, '$.name.last', 'STRING') FROM testTable", expecteds1); + } + + @Override + protected String getFilter() { + return ""; + } + + @Override + protected IndexSegment getIndexSegment() { + return _indexSegment; + } + + @Override + protected List getIndexSegments() { + return _indexSegments; + } +} diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/ColumnJsonParserException.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/ColumnJsonParserException.java new file mode 100644 index 00000000000..962bb539cc4 --- /dev/null +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/ColumnJsonParserException.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.segment.creator.impl; + +import com.fasterxml.jackson.core.JsonParseException; + +public class ColumnJsonParserException extends JsonParseException { + /** + * Exception type for parsing problems when + * processing JSON content in a column + * Sub-class of {@link com.fasterxml.jackson.core.JsonParseException}. + */ + private final String _columnName; + + public ColumnJsonParserException(String columnName, JsonParseException jpe) { + super(jpe.getProcessor(), jpe.getOriginalMessage(), jpe.getCause()); + _columnName = columnName; + } + + /** + * Default method overridden so that we can add column and location information + */ + @Override + public String getMessage() { + return "Column: " + _columnName + "\n" + super.getMessage(); + } + + @Override + public String toString() { + return getClass().getName() + ": " + getMessage(); + } +} diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java index 15ae19a9f3d..5f5c589f2f4 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java @@ -18,6 +18,7 @@ */ package org.apache.pinot.segment.local.segment.creator.impl; +import com.fasterxml.jackson.core.JsonParseException; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.Maps; @@ -315,11 +316,14 @@ public void indexRow(GenericRow row) FieldSpec fieldSpec = _schema.getFieldSpecFor(columnName); SegmentDictionaryCreator dictionaryCreator = _dictionaryCreatorMap.get(columnName); - - if (fieldSpec.isSingleValueField()) { - indexSingleValueRow(dictionaryCreator, columnValueToIndex, creatorsByIndex); - } else { - indexMultiValueRow(dictionaryCreator, (Object[]) columnValueToIndex, creatorsByIndex); + try { + if (fieldSpec.isSingleValueField()) { + indexSingleValueRow(dictionaryCreator, columnValueToIndex, creatorsByIndex); + } else { + indexMultiValueRow(dictionaryCreator, (Object[]) columnValueToIndex, creatorsByIndex); + } + } catch (JsonParseException jpe) { + throw new ColumnJsonParserException(columnName, jpe); } }