lksvenoy-r7 opened a new issue #7983:
URL: https://github.com/apache/pinot/issues/7983
The Pinot Recommendation Engine does not support the BOOLEAN type
Recommender Input (Using BOOLEAN type):
```
{
"schema":{
"dimensionFieldSpecs": [
{
"cardinality": 10000,
"dataType": "LONG",
"name": "studentID"
},
{
"averageLength": 8,
"cardinality": 2000,
"dataType": "STRING",
"name": "firstName"
},
{
"averageLength": 12,
"cardinality": 2000,
"dataType": "STRING",
"name": "lastName"
},
{
"averageLength": 6,
"cardinality": 2,
"dataType": "STRING",
"name": "gender"
},
{
"averageLength": 25,
"cardinality": 100,
"dataType": "STRING",
"name": "subject"
},
{
"cardinality": 2,
"dataType": "BOOLEAN",
"name": "isEmployed"
}
],
"metricFieldSpecs": [
{
"cardinality": 5000,
"dataType": "FLOAT",
"name": "score"
}
],
"schemaName": "transcript"
},
"queriesWithWeights":{
"select subject, count(*) from transcript where score > 3 and gender =
'MALE' group by subject": 0.5,
"select subject, score from transcript where firstName = 'Tsubasa' and
lastName = 'Oozora'": 0.5
},
"tableType": "OFFLINE",
"numRecordsPerPush":100000000,
"qps": 5,
"latencySLA": 1000,
"rulesToExecute": {
"recommendRealtimeProvisioning": false
}
}
```
Output
```
{
"_code": 400,
"_error": "java.lang.RuntimeException: number generator can only accept a
column of type number and this : BOOLEAN is not a supported number type"
}
```
Internally BOOLEAN is treated as an integer, and the recommendation engine
should respect this. Here is an example using number instead of boolean (which
works)
Recommender Input (Using INT type):
```
{
"schema":{
"dimensionFieldSpecs": [
{
"cardinality": 10000,
"dataType": "LONG",
"name": "studentID"
},
{
"averageLength": 8,
"cardinality": 2000,
"dataType": "STRING",
"name": "firstName"
},
{
"averageLength": 12,
"cardinality": 2000,
"dataType": "STRING",
"name": "lastName"
},
{
"averageLength": 6,
"cardinality": 2,
"dataType": "STRING",
"name": "gender"
},
{
"averageLength": 25,
"cardinality": 100,
"dataType": "STRING",
"name": "subject"
},
{
"cardinality": 2,
"dataType": "INT",
"name": "isEmployed"
}
],
"metricFieldSpecs": [
{
"cardinality": 5000,
"dataType": "FLOAT",
"name": "score"
}
],
"schemaName": "transcript"
},
"queriesWithWeights":{
"select subject, count(*) from transcript where score > 3 and gender =
'MALE' group by subject": 0.5,
"select subject, score from transcript where firstName = 'Tsubasa' and
lastName = 'Oozora'": 0.5
},
"tableType": "OFFLINE",
"numRecordsPerPush":100000000,
"qps": 5,
"latencySLA": 1000,
"rulesToExecute": {
"recommendRealtimeProvisioning": false
}
}
```
Output
```
{
"realtimeProvisioningRecommendations": {},
"segmentSizeRecommendations": {
"message": null,
"numRowsPerSegment": 33333333,
"numSegments": 3,
"segmentSize": 488491328
},
"partitionConfig": {
"numKafkaPartitions": 0,
"numPartitionsRealtime": 1,
"partitionDimension": "",
"numPartitionsOffline": 1,
"numPartitionsOfflineOverwritten": false,
"numPartitionsRealtimeOverwritten": false,
"partitionDimensionOverwritten": false
},
"flaggedQueries": {
"flaggedQueries": {}
},
"indexConfig": {
"sortedColumnOverwritten": true,
"invertedIndexColumns": [
"gender"
],
"noDictionaryColumns": [
"studentID",
"score",
"isEmployed"
],
"onHeapDictionaryColumns": [],
"varLengthDictionaryColumns": [
"firstName",
"lastName",
"gender",
"subject"
],
"sortedColumn": "firstName",
"bloomFilterColumns": [],
"rangeIndexColumns": [
"score"
]
},
"aggregateMetrics": false
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]