pabrahamusa opened a new issue #6295:
URL: https://github.com/apache/incubator-pinot/issues/6295
Hello,
Currently I am having a config with following for real time ingestion I have
enabled Text Index. When I try to query the data with TEXT_MATCH an error is
throwing. Why it is so, Is this the right way to enable Text indexing?
Error:
`[
{
"errorCode": 200,
"message": "QueryExecutionError:\njava.lang.NullPointerException\n\tat
org.apache.pinot.core.operator.filter.TextMatchFilterOperator.getNextBlock(TextMatchFilterOperator.java:45)\n\tat
org.apache.pinot.core.operator.filter.TextMatchFilterOperator.getNextBlock(TextMatchFilterOperator.java:30)\n\tat
org.apache.pinot.core.operator.BaseOperator.nextBlock(BaseOperator.java:49)\n\tat
org.apache.pinot.core.operator.DocIdSetOperator.getNextBlock(DocIdSetOperator.java:62)\n\tat
org.apache.pinot.core.operator.DocIdSetOperator.getNextBlock(DocIdSetOperator.java:35)\n\tat
org.apache.pinot.core.operator.BaseOperator.nextBlock(BaseOperator.java:49)\n\tat
org.apache.pinot.core.operator.ProjectionOperator.getNextBlock(ProjectionOperator.java:57)\n\tat
org.apache.pinot.core.operator.ProjectionOperator.getNextBlock(ProjectionOperator.java:30)\n\tat
org.apache.pinot.core.operator.BaseOperator.nextBlock(BaseOperator.java:49)\n\tat
org.apache.pinot.core.operator.transform.TransformOperator.getNext
Block(TransformOperator.java:92)\n\tat
org.apache.pinot.core.operator.transform.TransformOperator.getNextBlock(TransformOperator.java:39)\n\tat
org.apache.pinot.core.operator.BaseOperator.nextBlock(BaseOperator.java:49)\n\tat
org.apache.pinot.core.operator.query.SelectionOnlyOperator.getNextBlock(SelectionOnlyOperator.java:77)\n\tat
org.apache.pinot.core.operator.query.SelectionOnlyOperator.getNextBlock(SelectionOnlyOperator.java:38)"
}
]`
Config
```
{
"tableName": "log",
"tableType": "REALTIME",
"ingestionConfig": {
},
"segmentsConfig": {
"timeColumnName": "five_mins_epoch",
"timeType": "MINUTES",
"retentionTimeUnit": "DAYS",
"retentionTimeValue": "7",
"schemaName": "log",
"replication": "2",
"replicasPerPartition": "2",
"segmentPushType": "APPEND",
"completionConfig": {
"completionMode": "DOWNLOAD"
}
},
"tenants": {
},
"tableIndexConfig": {
"loadMode": "MMAP",
"sortedColumn": ["timemillis"],
"fieldConfigList": [
{
"name": "log",
"encodingType": "RAW",
"indexType": "TEXT",
"properties": {
"enableQueryCacheForTextIndex": "true",
"deriveNumDocsPerChunkForRawIndex": "true"
}
},
{
"name": "container_name",
"encodingType": "RAW",
"indexType": "TEXT",
"properties": {
"enableQueryCacheForTextIndex": "true",
"deriveNumDocsPerChunkForRawIndex": "true"
}
},
{
"name": "pod_name",
"encodingType": "RAW",
"indexType": "TEXT",
"properties": {
"enableQueryCacheForTextIndex": "true",
"deriveNumDocsPerChunkForRawIndex": "true"
}
},
{
"name": "namespace_name",
"encodingType": "RAW",
"indexType": "TEXT",
"properties": {
"enableQueryCacheForTextIndex": "true",
"deriveNumDocsPerChunkForRawIndex": "true"
}
},
{
"name": "host",
"encodingType": "RAW",
"indexType": "TEXT",
"properties": {
"enableQueryCacheForTextIndex": "true",
"deriveNumDocsPerChunkForRawIndex": "true"
}
},
{
"name": "cluster",
"encodingType": "RAW",
"indexType": "TEXT",
"properties": {
"enableQueryCacheForTextIndex": "true",
"deriveNumDocsPerChunkForRawIndex": "true"
}
}],
"streamConfigs": {
"streamType": "kafka",
"stream.kafka.consumer.type": "simple",
"stream.kafka.topic.name": "all_logs",
"stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
"stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
"stream.kafka.zk.broker.url":
"cp-zookeeper-headless.logging.svc.cluster.local:2181",
"stream.kafka.broker.list":
"cp-kafka-headless.logging.svc.cluster.local:9092",
"realtime.segment.flush.threshold.time": "12h",
"realtime.segment.flush.threshold.size": "100000",
"stream.kafka.consumer.prop.auto.offset.reset": "smallest"
}
},
"metadata": {
"customConfigs": {}
}
}
schema.json:
{
"schemaName": "log",
"dimensionFieldSpecs": [
{
"name": "log",
"dataType": "STRING",
"maxLength": "10000",
"defaultNullValue": ""
},
{
"name": "cluster",
"dataType": "STRING",
"defaultNullValue": ""
},
{
"name": "container_name",
"dataType": "STRING",
"defaultNullValue": ""
},
{
"name": "namespace_name",
"dataType": "STRING",
"defaultNullValue": ""
},
{
"name": "pod_name",
"dataType": "STRING",
"defaultNullValue": ""
},
{
"name": "host",
"dataType": "STRING",
"defaultNullValue": ""
},
{
"name": "timemillis",
"dataType": "LONG",
"defaultNullValue": "100000"
}
],
"metricFieldSpecs": [],
"timeFieldSpec": {
"incomingGranularitySpec": {
"timeType": "MINUTES",
"dataType": "LONG",
"timeFormat": "EPOCH",
"name": "five_mins_epoch"
},
"outgoingGranularitySpec": {
"dataType": "LONG",
"timeType": "MINUTES",
"timeFormat": "EPOCH",
"name": "five_mins_epoch"
}
}
}
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]