KKcorps opened a new pull request, #8600:
URL: https://github.com/apache/pinot/pull/8600
Currently, if someone specifies empty `complexTypeConfig` in table config,
we create a transformer for it. This can cause unintended issues in flows such
as parsing JSON data in schema which starts returning `null` instead of proper
value. Currently the only solution is to just remove the config key from the
table config if not required. This PR however fixes this issue.
Sample config which can be used to reproduce the issue
```json
{
"tableName": "myObjects",
"tableType": "REALTIME",
"segmentsConfig": {
"timeType": "MILLISECONDS",
"schemaName": "myObjects",
"retentionTimeUnit": "DAYS",
"retentionTimeValue": "365",
"timeColumnName": "lastModified",
"allowNullTimeValue": false,
"replicasPerPartition": "1"
},
"tenants": {
},
"tableIndexConfig": {
"rangeIndexVersion": 2,
"jsonIndexColumns": [
"jsonObject"
],
"autoGeneratedInvertedIndex": false,
"createInvertedIndexDuringSegmentGeneration": false,
"loadMode": "MMAP",
"noDictionaryColumns": [
"lastModified",
"jsonObject"
],
"enableDefaultStarTree": false,
"enableDynamicStarTreeCreation": false,
"segmentPartitionConfig": {
"columnPartitionMap": {
"objectId": {
"functionName": "Murmur",
"numPartitions": 2
}
}
},
"aggregateMetrics": false,
"nullHandlingEnabled": false
},
"metadata": {
"customConfigs": {}
},
"ingestionConfig": {
"streamIngestionConfig": {
"streamConfigMaps": [
{
"streamType": "kafka",
"stream.kafka.consumer.type": "lowlevel",
"stream.kafka.topic.name": "my_objects",
"stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.inputformat.json.JSONMessageDecoder",
"stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
"stream.kafka.broker.list": "localhost:19092",
"realtime.segment.flush.threshold.rows": "0",
"realtime.segment.flush.threshold.time": "24h",
"realtime.segment.flush.threshold.segment.size": "200M",
"realtime.segment.flush.autotune.initialRows": "2000000",
"stream.kafka.consumer.prop.auto.offset.reset": "smallest"
}
]
},
"transformConfigs": [],
"complexTypeConfig": {}
},
"isDimTable": false
}
```
Schema -
```json
{
"schemaName": "myObjects",
"dimensionFieldSpecs": [
{
"name": "objectId",
"dataType": "STRING"
},
{
"name": "jsonObject",
"dataType": "JSON"
}
],
"dateTimeFieldSpecs": [
{
"name": "lastModified",
"dataType": "LONG",
"format": "1:MILLISECONDS:EPOCH",
"granularity": "1:DAYS"
}
]
}
```
Record
```
{ "lastModified":1651001043557, "objectId":
"00000000-0000-0000-0000-000000000000", "jsonObject": {"values": [ {"id":
"bob", "names": ["a","b","c","d","e"] } ] }}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]