KKcorps opened a new pull request, #8600:
URL: https://github.com/apache/pinot/pull/8600

   Currently, if someone specifies empty `complexTypeConfig` in table config, 
we create a transformer for it. This can cause unintended issues in flows such 
as parsing JSON data in schema which starts returning `null` instead of proper 
value. Currently the only solution is to just remove the config key from the 
table config if not required.  This PR however fixes this issue.
   
   Sample config which can be used to reproduce the issue
   ```json
   {
       "tableName": "myObjects",
       "tableType": "REALTIME",
       "segmentsConfig": {
         "timeType": "MILLISECONDS",
         "schemaName": "myObjects",
         "retentionTimeUnit": "DAYS",
         "retentionTimeValue": "365",
         "timeColumnName": "lastModified",
         "allowNullTimeValue": false,
         "replicasPerPartition": "1"
       },
       "tenants": {
       },
       "tableIndexConfig": {
         "rangeIndexVersion": 2,
         "jsonIndexColumns": [
           "jsonObject"
         ],
         "autoGeneratedInvertedIndex": false,
         "createInvertedIndexDuringSegmentGeneration": false,
         "loadMode": "MMAP",
         "noDictionaryColumns": [
           "lastModified",
           "jsonObject"
         ],
         "enableDefaultStarTree": false,
         "enableDynamicStarTreeCreation": false,
         "segmentPartitionConfig": {
           "columnPartitionMap": {
             "objectId": {
               "functionName": "Murmur",
               "numPartitions": 2
             }
           }
         },
         "aggregateMetrics": false,
         "nullHandlingEnabled": false
       },
       "metadata": {
         "customConfigs": {}
       },
       "ingestionConfig": {
         "streamIngestionConfig": {
           "streamConfigMaps": [
             {
               "streamType": "kafka",
               "stream.kafka.consumer.type": "lowlevel",
               "stream.kafka.topic.name": "my_objects",
               "stream.kafka.decoder.class.name": 
"org.apache.pinot.plugin.inputformat.json.JSONMessageDecoder",
               "stream.kafka.consumer.factory.class.name": 
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
               "stream.kafka.broker.list": "localhost:19092",
               "realtime.segment.flush.threshold.rows": "0",
               "realtime.segment.flush.threshold.time": "24h",
               "realtime.segment.flush.threshold.segment.size": "200M",
               "realtime.segment.flush.autotune.initialRows": "2000000",
               "stream.kafka.consumer.prop.auto.offset.reset": "smallest"
             }
           ]
         },
         "transformConfigs": [],
         "complexTypeConfig": {}
       },
       "isDimTable": false
     }
   ```
   
   Schema - 
   
   
   ```json
   {
     "schemaName": "myObjects",
     "dimensionFieldSpecs": [
       {
         "name": "objectId",
         "dataType": "STRING"
       },
       {
         "name": "jsonObject",
         "dataType": "JSON"
       }
     ],
     "dateTimeFieldSpecs": [
       {
         "name": "lastModified",
         "dataType": "LONG",
         "format": "1:MILLISECONDS:EPOCH",
         "granularity": "1:DAYS"
       }
     ]
   }
   ```
   
   Record
   
   ```
   { "lastModified":1651001043557, "objectId": 
"00000000-0000-0000-0000-000000000000",   "jsonObject": {"values": [ {"id": 
"bob", "names": ["a","b","c","d","e"] } ] }}
   
   ```
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to