This is an automated email from the ASF dual-hosted git repository.

rongr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new f9df57a9e3 Add column name to JSONParsing exception message during 
index build (#12151)
f9df57a9e3 is described below

commit f9df57a9e32d1fbd538fc37338245b06c440838b
Author: Rajat Venkatesh <[email protected]>
AuthorDate: Wed Jan 3 21:03:39 2024 +0530

    Add column name to JSONParsing exception message during index build (#12151)
    
    * Add a reproducer for a json error during index build
    * Add column name to the error message and change the repro
    * Use + operator to concatenate strings
    
    Co-authored-by: Xiaotian (Jackie) Jiang 
<[email protected]>
    
    ---------
    
    Co-authored-by: Xiaotian (Jackie) Jiang 
<[email protected]>
---
 .../pinot/queries/JsonMalformedIndexTest.java      | 131 +++++++++++++++++++++
 .../creator/impl/ColumnJsonParserException.java    |  48 ++++++++
 .../creator/impl/SegmentColumnarIndexCreator.java  |  14 ++-
 3 files changed, 188 insertions(+), 5 deletions(-)

diff --git 
a/pinot-core/src/test/java/org/apache/pinot/queries/JsonMalformedIndexTest.java 
b/pinot-core/src/test/java/org/apache/pinot/queries/JsonMalformedIndexTest.java
new file mode 100644
index 0000000000..6a9c0a28cd
--- /dev/null
+++ 
b/pinot-core/src/test/java/org/apache/pinot/queries/JsonMalformedIndexTest.java
@@ -0,0 +1,131 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.queries;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.common.response.broker.BrokerResponseNative;
+import 
org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader;
+import 
org.apache.pinot.segment.local.segment.creator.impl.ColumnJsonParserException;
+import 
org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl;
+import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig;
+import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader;
+import org.apache.pinot.segment.spi.ImmutableSegment;
+import org.apache.pinot.segment.spi.IndexSegment;
+import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.config.table.TableType;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.data.readers.GenericRow;
+import org.apache.pinot.spi.utils.ReadMode;
+import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+public class JsonMalformedIndexTest extends BaseQueriesTest {
+    private static final String RAW_TABLE_NAME = "testTable";
+    private static final String SEGMENT_NAME = "testSegment";
+    private static final String STRING_COLUMN = "stringColumn";
+    private static final String JSON_COLUMN = "jsonColumn";
+    private static final Schema SCHEMA = new 
Schema.SchemaBuilder().setSchemaName(RAW_TABLE_NAME)
+            .addSingleValueDimension(STRING_COLUMN, FieldSpec.DataType.STRING)
+            .addSingleValueDimension(JSON_COLUMN, 
FieldSpec.DataType.STRING).build();
+    private static final TableConfig TABLE_CONFIG =
+            new 
TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME).build();
+    private IndexSegment _indexSegment;
+    private List<IndexSegment> _indexSegments;
+    private final List<GenericRow> _records = new ArrayList<>();
+
+    @BeforeClass
+    public void setUp()
+            throws Exception {
+        _records.add(createRecord("ludwik von drake",
+                "{\"name\": {\"first\": \"ludwik\", \"last\": \"von drake\"}, 
\"id\": 181, "
+                        + "\"data\": [\"l\", \"b\", \"c\", \"d\"]"));
+    }
+
+    protected void checkResult(String query, Object[][] expectedResults) {
+        BrokerResponseNative brokerResponse = 
getBrokerResponseForOptimizedQuery(query, TABLE_CONFIG, SCHEMA);
+        QueriesTestUtils.testInterSegmentsResult(brokerResponse, 
Arrays.asList(expectedResults));
+    }
+
+    File indexDir() {
+        return new File(FileUtils.getTempDirectory(), 
getClass().getSimpleName());
+    }
+
+    GenericRow createRecord(String stringValue, String jsonValue) {
+        GenericRow record = new GenericRow();
+        record.putValue(STRING_COLUMN, stringValue);
+        record.putValue(JSON_COLUMN, jsonValue);
+        return record;
+    }
+
+    @Test(expectedExceptions = ColumnJsonParserException.class,
+          expectedExceptionsMessageRegExp = "Column: jsonColumn.*")
+    public void testJsonIndexBuild()
+            throws Exception {
+        File indexDir = indexDir();
+        FileUtils.deleteDirectory(indexDir);
+
+        List<String> jsonIndexColumns = new ArrayList<>();
+        jsonIndexColumns.add("jsonColumn");
+        TABLE_CONFIG.getIndexingConfig().setJsonIndexColumns(jsonIndexColumns);
+        SegmentGeneratorConfig segmentGeneratorConfig = new 
SegmentGeneratorConfig(TABLE_CONFIG, SCHEMA);
+        segmentGeneratorConfig.setTableName(RAW_TABLE_NAME);
+        segmentGeneratorConfig.setSegmentName(SEGMENT_NAME);
+        segmentGeneratorConfig.setOutDir(indexDir.getPath());
+
+        SegmentIndexCreationDriverImpl driver = new 
SegmentIndexCreationDriverImpl();
+        driver.init(segmentGeneratorConfig, new 
GenericRowRecordReader(_records));
+        driver.build();
+
+        IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig();
+        indexLoadingConfig.setTableConfig(TABLE_CONFIG);
+        indexLoadingConfig.setJsonIndexColumns(new 
HashSet<>(jsonIndexColumns));
+        indexLoadingConfig.setReadMode(ReadMode.mmap);
+
+        ImmutableSegment immutableSegment =
+                ImmutableSegmentLoader.load(new File(indexDir, SEGMENT_NAME), 
indexLoadingConfig);
+        _indexSegment = immutableSegment;
+        _indexSegments = Arrays.asList(immutableSegment, immutableSegment);
+
+        Object[][] expecteds1 = {{"von drake"}, {"von drake"}, {"von drake"}, 
{"von drake"}};
+        checkResult("SELECT jsonextractscalar(jsonColumn, '$.name.last', 
'STRING') FROM testTable", expecteds1);
+    }
+
+    @Override
+    protected String getFilter() {
+        return "";
+    }
+
+    @Override
+    protected IndexSegment getIndexSegment() {
+        return _indexSegment;
+    }
+
+    @Override
+    protected List<IndexSegment> getIndexSegments() {
+        return _indexSegments;
+    }
+}
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/ColumnJsonParserException.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/ColumnJsonParserException.java
new file mode 100644
index 0000000000..962bb539cc
--- /dev/null
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/ColumnJsonParserException.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.creator.impl;
+
+import com.fasterxml.jackson.core.JsonParseException;
+
+public class ColumnJsonParserException extends JsonParseException {
+    /**
+     * Exception type for parsing problems when
+     * processing JSON content in a column
+     * Sub-class of {@link com.fasterxml.jackson.core.JsonParseException}.
+     */
+    private final String _columnName;
+
+    public ColumnJsonParserException(String columnName, JsonParseException 
jpe) {
+        super(jpe.getProcessor(), jpe.getOriginalMessage(), jpe.getCause());
+        _columnName = columnName;
+    }
+
+    /**
+     * Default method overridden so that we can add column and location 
information
+     */
+    @Override
+    public String getMessage() {
+        return "Column: " + _columnName + "\n" + super.getMessage();
+    }
+
+    @Override
+    public String toString() {
+        return getClass().getName() + ": " + getMessage();
+    }
+}
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
index 15ae19a9f3..5f5c589f2f 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
@@ -18,6 +18,7 @@
  */
 package org.apache.pinot.segment.local.segment.creator.impl;
 
+import com.fasterxml.jackson.core.JsonParseException;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Maps;
@@ -315,11 +316,14 @@ public class SegmentColumnarIndexCreator implements 
SegmentCreator {
 
       FieldSpec fieldSpec = _schema.getFieldSpecFor(columnName);
       SegmentDictionaryCreator dictionaryCreator = 
_dictionaryCreatorMap.get(columnName);
-
-      if (fieldSpec.isSingleValueField()) {
-        indexSingleValueRow(dictionaryCreator, columnValueToIndex, 
creatorsByIndex);
-      } else {
-        indexMultiValueRow(dictionaryCreator, (Object[]) columnValueToIndex, 
creatorsByIndex);
+      try {
+        if (fieldSpec.isSingleValueField()) {
+          indexSingleValueRow(dictionaryCreator, columnValueToIndex, 
creatorsByIndex);
+        } else {
+          indexMultiValueRow(dictionaryCreator, (Object[]) columnValueToIndex, 
creatorsByIndex);
+        }
+      } catch (JsonParseException jpe) {
+        throw new ColumnJsonParserException(columnName, jpe);
       }
     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to