This is an automated email from the ASF dual-hosted git repository.
rongr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new f9df57a9e3 Add column name to JSONParsing exception message during
index build (#12151)
f9df57a9e3 is described below
commit f9df57a9e32d1fbd538fc37338245b06c440838b
Author: Rajat Venkatesh <[email protected]>
AuthorDate: Wed Jan 3 21:03:39 2024 +0530
Add column name to JSONParsing exception message during index build (#12151)
* Add a reproducer for a json error during index build
* Add column name to the error message and change the repro
* Use + operator to concatenate strings
Co-authored-by: Xiaotian (Jackie) Jiang
<[email protected]>
---------
Co-authored-by: Xiaotian (Jackie) Jiang
<[email protected]>
---
.../pinot/queries/JsonMalformedIndexTest.java | 131 +++++++++++++++++++++
.../creator/impl/ColumnJsonParserException.java | 48 ++++++++
.../creator/impl/SegmentColumnarIndexCreator.java | 14 ++-
3 files changed, 188 insertions(+), 5 deletions(-)
diff --git
a/pinot-core/src/test/java/org/apache/pinot/queries/JsonMalformedIndexTest.java
b/pinot-core/src/test/java/org/apache/pinot/queries/JsonMalformedIndexTest.java
new file mode 100644
index 0000000000..6a9c0a28cd
--- /dev/null
+++
b/pinot-core/src/test/java/org/apache/pinot/queries/JsonMalformedIndexTest.java
@@ -0,0 +1,131 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.queries;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.common.response.broker.BrokerResponseNative;
+import
org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader;
+import
org.apache.pinot.segment.local.segment.creator.impl.ColumnJsonParserException;
+import
org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl;
+import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig;
+import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader;
+import org.apache.pinot.segment.spi.ImmutableSegment;
+import org.apache.pinot.segment.spi.IndexSegment;
+import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.config.table.TableType;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.data.readers.GenericRow;
+import org.apache.pinot.spi.utils.ReadMode;
+import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+public class JsonMalformedIndexTest extends BaseQueriesTest {
+ private static final String RAW_TABLE_NAME = "testTable";
+ private static final String SEGMENT_NAME = "testSegment";
+ private static final String STRING_COLUMN = "stringColumn";
+ private static final String JSON_COLUMN = "jsonColumn";
+ private static final Schema SCHEMA = new
Schema.SchemaBuilder().setSchemaName(RAW_TABLE_NAME)
+ .addSingleValueDimension(STRING_COLUMN, FieldSpec.DataType.STRING)
+ .addSingleValueDimension(JSON_COLUMN,
FieldSpec.DataType.STRING).build();
+ private static final TableConfig TABLE_CONFIG =
+ new
TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME).build();
+ private IndexSegment _indexSegment;
+ private List<IndexSegment> _indexSegments;
+ private final List<GenericRow> _records = new ArrayList<>();
+
+ @BeforeClass
+ public void setUp()
+ throws Exception {
+ _records.add(createRecord("ludwik von drake",
+ "{\"name\": {\"first\": \"ludwik\", \"last\": \"von drake\"},
\"id\": 181, "
+ + "\"data\": [\"l\", \"b\", \"c\", \"d\"]"));
+ }
+
+ protected void checkResult(String query, Object[][] expectedResults) {
+ BrokerResponseNative brokerResponse =
getBrokerResponseForOptimizedQuery(query, TABLE_CONFIG, SCHEMA);
+ QueriesTestUtils.testInterSegmentsResult(brokerResponse,
Arrays.asList(expectedResults));
+ }
+
+ File indexDir() {
+ return new File(FileUtils.getTempDirectory(),
getClass().getSimpleName());
+ }
+
+ GenericRow createRecord(String stringValue, String jsonValue) {
+ GenericRow record = new GenericRow();
+ record.putValue(STRING_COLUMN, stringValue);
+ record.putValue(JSON_COLUMN, jsonValue);
+ return record;
+ }
+
+ @Test(expectedExceptions = ColumnJsonParserException.class,
+ expectedExceptionsMessageRegExp = "Column: jsonColumn.*")
+ public void testJsonIndexBuild()
+ throws Exception {
+ File indexDir = indexDir();
+ FileUtils.deleteDirectory(indexDir);
+
+ List<String> jsonIndexColumns = new ArrayList<>();
+ jsonIndexColumns.add("jsonColumn");
+ TABLE_CONFIG.getIndexingConfig().setJsonIndexColumns(jsonIndexColumns);
+ SegmentGeneratorConfig segmentGeneratorConfig = new
SegmentGeneratorConfig(TABLE_CONFIG, SCHEMA);
+ segmentGeneratorConfig.setTableName(RAW_TABLE_NAME);
+ segmentGeneratorConfig.setSegmentName(SEGMENT_NAME);
+ segmentGeneratorConfig.setOutDir(indexDir.getPath());
+
+ SegmentIndexCreationDriverImpl driver = new
SegmentIndexCreationDriverImpl();
+ driver.init(segmentGeneratorConfig, new
GenericRowRecordReader(_records));
+ driver.build();
+
+ IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig();
+ indexLoadingConfig.setTableConfig(TABLE_CONFIG);
+ indexLoadingConfig.setJsonIndexColumns(new
HashSet<>(jsonIndexColumns));
+ indexLoadingConfig.setReadMode(ReadMode.mmap);
+
+ ImmutableSegment immutableSegment =
+ ImmutableSegmentLoader.load(new File(indexDir, SEGMENT_NAME),
indexLoadingConfig);
+ _indexSegment = immutableSegment;
+ _indexSegments = Arrays.asList(immutableSegment, immutableSegment);
+
+ Object[][] expecteds1 = {{"von drake"}, {"von drake"}, {"von drake"},
{"von drake"}};
+ checkResult("SELECT jsonextractscalar(jsonColumn, '$.name.last',
'STRING') FROM testTable", expecteds1);
+ }
+
+ @Override
+ protected String getFilter() {
+ return "";
+ }
+
+ @Override
+ protected IndexSegment getIndexSegment() {
+ return _indexSegment;
+ }
+
+ @Override
+ protected List<IndexSegment> getIndexSegments() {
+ return _indexSegments;
+ }
+}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/ColumnJsonParserException.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/ColumnJsonParserException.java
new file mode 100644
index 0000000000..962bb539cc
--- /dev/null
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/ColumnJsonParserException.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.creator.impl;
+
+import com.fasterxml.jackson.core.JsonParseException;
+
+public class ColumnJsonParserException extends JsonParseException {
+ /**
+ * Exception type for parsing problems when
+ * processing JSON content in a column
+ * Sub-class of {@link com.fasterxml.jackson.core.JsonParseException}.
+ */
+ private final String _columnName;
+
+ public ColumnJsonParserException(String columnName, JsonParseException
jpe) {
+ super(jpe.getProcessor(), jpe.getOriginalMessage(), jpe.getCause());
+ _columnName = columnName;
+ }
+
+ /**
+ * Default method overridden so that we can add column and location
information
+ */
+ @Override
+ public String getMessage() {
+ return "Column: " + _columnName + "\n" + super.getMessage();
+ }
+
+ @Override
+ public String toString() {
+ return getClass().getName() + ": " + getMessage();
+ }
+}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
index 15ae19a9f3..5f5c589f2f 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
@@ -18,6 +18,7 @@
*/
package org.apache.pinot.segment.local.segment.creator.impl;
+import com.fasterxml.jackson.core.JsonParseException;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
@@ -315,11 +316,14 @@ public class SegmentColumnarIndexCreator implements
SegmentCreator {
FieldSpec fieldSpec = _schema.getFieldSpecFor(columnName);
SegmentDictionaryCreator dictionaryCreator =
_dictionaryCreatorMap.get(columnName);
-
- if (fieldSpec.isSingleValueField()) {
- indexSingleValueRow(dictionaryCreator, columnValueToIndex,
creatorsByIndex);
- } else {
- indexMultiValueRow(dictionaryCreator, (Object[]) columnValueToIndex,
creatorsByIndex);
+ try {
+ if (fieldSpec.isSingleValueField()) {
+ indexSingleValueRow(dictionaryCreator, columnValueToIndex,
creatorsByIndex);
+ } else {
+ indexMultiValueRow(dictionaryCreator, (Object[]) columnValueToIndex,
creatorsByIndex);
+ }
+ } catch (JsonParseException jpe) {
+ throw new ColumnJsonParserException(columnName, jpe);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]