From 43dadbfd96a70c19a9ac83bb6c0c35f3fa58bffb Mon Sep 17 00:00:00 2001 From: Xiang Fu Date: Fri, 9 Feb 2024 20:26:59 -0800 Subject: [PATCH] Fixing the backward compatible issue that existing metadata may contain unescaped characters (#12393) --- .../segment/index/ColumnMetadataTest.java | 18 ++++++ .../data/metadata-with-unescaped.properties | 61 +++++++++++++++++++ .../spi/env/CommonsConfigurationUtils.java | 7 ++- 3 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 pinot-segment-local/src/test/resources/data/metadata-with-unescaped.properties diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/ColumnMetadataTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/ColumnMetadataTest.java index 053bc8dc55ef..e961ad50006d 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/ColumnMetadataTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/ColumnMetadataTest.java @@ -19,12 +19,15 @@ package org.apache.pinot.segment.local.segment.index; import java.io.File; +import java.net.URL; import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import java.util.stream.Stream; +import org.apache.commons.configuration2.PropertiesConfiguration; +import org.apache.commons.configuration2.ex.ConfigurationException; import org.apache.commons.io.FileUtils; import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader; import org.apache.pinot.segment.local.segment.creator.SegmentTestUtils; @@ -34,11 +37,13 @@ import org.apache.pinot.segment.spi.SegmentMetadata; import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig; import org.apache.pinot.segment.spi.creator.SegmentIndexCreationDriver; +import org.apache.pinot.segment.spi.index.metadata.ColumnMetadataImpl; import org.apache.pinot.segment.spi.partition.BoundedColumnValuePartitionFunction; import org.apache.pinot.spi.config.table.ColumnPartitionConfig; import org.apache.pinot.spi.config.table.SegmentPartitionConfig; import org.apache.pinot.spi.data.DimensionFieldSpec; import org.apache.pinot.spi.data.FieldSpec.DataType; +import org.apache.pinot.spi.env.CommonsConfigurationUtils; import org.apache.pinot.spi.utils.ReadMode; import org.apache.pinot.util.TestUtils; import org.testng.Assert; @@ -207,4 +212,17 @@ public void testSegmentPartitionedWithBoundedColumnValue() Assert.assertEquals(col3Meta.getPartitionFunction().getFunctionConfig(), functionConfig); Assert.assertEquals(col3Meta.getPartitions(), Stream.of(0, 1, 2, 3).collect(Collectors.toSet())); } + + @Test + public void testMetadataWithEscapedValue() + throws ConfigurationException { + // Reading metadata file: + ClassLoader classLoader = getClass().getClassLoader(); + URL resource = classLoader.getResource("data/metadata-with-unescaped.properties"); + File metadataFile = new File(resource.getFile()); + PropertiesConfiguration propertiesConfiguration = CommonsConfigurationUtils.fromFile(metadataFile); + ColumnMetadataImpl installationOutput = + ColumnMetadataImpl.fromPropertiesConfiguration("installation_output", propertiesConfiguration); + Assert.assertNotNull(installationOutput); + } } diff --git a/pinot-segment-local/src/test/resources/data/metadata-with-unescaped.properties b/pinot-segment-local/src/test/resources/data/metadata-with-unescaped.properties new file mode 100644 index 000000000000..e11479fa1b1e --- /dev/null +++ b/pinot-segment-local/src/test/resources/data/metadata-with-unescaped.properties @@ -0,0 +1,61 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +segment.padding.character = \u0000 +segment.name = mytable_segment_0 +segment.table.name = mytable +segment.dimension.column.names = installation_output +segment.datetime.column.names = createdAt +segment.time.column.name = created_at +segment.total.docs = 6478 +segment.start.time = 1707431311000 +segment.end.time = 1707517649000 +segment.time.unit = MILLISECONDS +column.installation_output.cardinality = 31 +column.installation_output.totalDocs = 6478 +column.installation_output.dataType = STRING +column.installation_output.bitsPerElement = 5 +column.installation_output.lengthOfEachEntry = 512 +column.installation_output.columnType = DIMENSION +column.installation_output.isSorted = false +column.installation_output.hasDictionary = true +column.installation_output.isSingleValues = true +column.installation_output.maxNumberOfMultiValues = -1 +column.installation_output.totalNumberOfEntries = 6478 +column.installation_output.isAutoGenerated = false +column.installation_output.minValue = \r\n\r\n utils em::C:\\dir\\utils\r\nPSParentPath : Mi +column.installation_output.maxValue = null +column.installation_output.defaultNullValue = null +column.createdAt.cardinality = 319 +column.createdAt.totalDocs = 6478 +column.createdAt.dataType = TIMESTAMP +column.createdAt.bitsPerElement = 9 +column.createdAt.lengthOfEachEntry = 0 +column.createdAt.columnType = DATE_TIME +column.createdAt.isSorted = false +column.createdAt.hasDictionary = true +column.createdAt.isSingleValues = true +column.createdAt.maxNumberOfMultiValues = -1 +column.createdAt.totalNumberOfEntries = 6478 +column.createdAt.isAutoGenerated = false +column.createdAt.datetimeFormat = 1:MILLISECONDS:TIMESTAMP +column.createdAt.datetimeGranularity = 1:MILLISECONDS +column.createdAt.minValue = 1634841705000 +column.createdAt.maxValue = 1707517574000 +column.createdAt.defaultNullValue = 0 diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/env/CommonsConfigurationUtils.java b/pinot-spi/src/main/java/org/apache/pinot/spi/env/CommonsConfigurationUtils.java index ae79fe9d90fc..759fa0e3bde8 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/env/CommonsConfigurationUtils.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/env/CommonsConfigurationUtils.java @@ -241,7 +241,12 @@ public static String replaceSpecialCharacterInPropertyValue(String value) { * {@link #replaceSpecialCharacterInPropertyValue(String)}. */ public static String recoverSpecialCharacterInPropertyValue(String value) { - value = StringEscapeUtils.unescapeJava(value); + try { + // This is for backward compatibility, to handle the old commons library escape character behavior. + value = StringEscapeUtils.unescapeJava(value); + } catch (Exception e) { + // If the value is not a valid escaped string, ignore the exception and continue + } if (value.isEmpty()) { return value; }