jackluo923 commented on code in PR #12788:
URL: https://github.com/apache/pinot/pull/12788#discussion_r1556445579


##########
pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/SchemaConformingTransformerV2Config.java:
##########
@@ -0,0 +1,253 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.spi.config.table.ingestion;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonPropertyDescription;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import javax.annotation.Nullable;
+import org.apache.pinot.spi.config.BaseJsonConfig;
+
+public class SchemaConformingTransformerV2Config extends BaseJsonConfig {
+  @JsonPropertyDescription("Enable indexable extras")
+  private boolean _enableIndexableExtras = true;
+
+  @JsonPropertyDescription("Name of the field that should contain extra fields 
that are not part of the schema.")
+  private String _indexableExtrasField = "json_data";
+
+  @JsonPropertyDescription("Enable unindexable extras")
+  private boolean _enableUnindexableExtras = true;
+
+  @JsonPropertyDescription(
+      "Like indexableExtrasField except it only contains fields with the 
suffix in unindexableFieldSuffix.")
+  private String _unindexableExtrasField = "json_data_no_idx";
+
+  @JsonPropertyDescription("The suffix of fields that must be stored in 
unindexableExtrasField")
+  private String _unindexableFieldSuffix = "_noindex";
+
+  @JsonPropertyDescription("Array of flattened (dot-delimited) object paths to 
drop")
+  private Set<String> _fieldPathsToDrop = new HashSet<>();
+
+  @JsonPropertyDescription("Array of flattened (dot-delimited) object paths 
not to traverse further and keep same as "
+      + "input. This will also skip building mergedTextIndex for the field.")
+  private Set<String> _fieldPathsToPreserveInput = new HashSet<>();
+
+  @JsonPropertyDescription("Map from customized meaningful column name to json 
key path")
+  private Map<String, String> _columnNameToJsonKeyPathMap = new HashMap<>();
+
+  @JsonPropertyDescription("mergedTextIndex field")
+  private String _mergedTextIndexField = "__mergedTextIndex";
+
+  @JsonPropertyDescription("mergedTextIndex document max length")
+  private int _mergedTextIndexDocumentMaxLength = 32766;

Review Comment:
   If we want to be 100% precise, we should change this parameter to 
`_mergedTextIndexDocumentMaxLength` to `_mergedTextIndexDocumentMaxSize` as 
32766 refers to number of bytes rather than number of characters. We also 
should change the implementation for the check by decoding the string to bytes, 
etc.
   
   We could simply remove this feature as well  because it's not necessary for 
us because in production, we use a custom analyzer which already enforces this 
limit. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to