lksvenoy-r7 opened a new issue #7983:
URL: https://github.com/apache/pinot/issues/7983


   The Pinot Recommendation Engine does not support the BOOLEAN type
   
   Recommender Input (Using BOOLEAN type):
   ```
   {
     "schema":{
       "dimensionFieldSpecs": [
         {
           "cardinality": 10000,
           "dataType": "LONG",
           "name": "studentID"
         },
         {
           "averageLength": 8,
           "cardinality": 2000,
           "dataType": "STRING",
           "name": "firstName"
         },
         {
           "averageLength": 12,
           "cardinality": 2000,
           "dataType": "STRING",
           "name": "lastName"
         },
         {
           "averageLength": 6,
           "cardinality": 2,
           "dataType": "STRING",
           "name": "gender"
         },
         {
           "averageLength": 25,
           "cardinality": 100,
           "dataType": "STRING",
           "name": "subject"
         },
         {
           "cardinality": 2,
           "dataType": "BOOLEAN",
           "name": "isEmployed"
         }
       ],
       "metricFieldSpecs": [
         {
           "cardinality": 5000,
           "dataType": "FLOAT",
           "name": "score"
         }
       ],
       "schemaName": "transcript"
     },
     "queriesWithWeights":{
       "select subject, count(*) from transcript where score > 3 and gender = 
'MALE' group by subject": 0.5,
       "select subject, score from transcript where firstName = 'Tsubasa' and 
lastName = 'Oozora'": 0.5
     },
     "tableType": "OFFLINE",
     "numRecordsPerPush":100000000,
     "qps": 5,
     "latencySLA": 1000,
     "rulesToExecute": {
       "recommendRealtimeProvisioning": false
     }
   }
   ```
   Output
   ```
   {
     "_code": 400,
     "_error": "java.lang.RuntimeException: number generator can only accept a 
column of type number and this : BOOLEAN is not a supported number type"
   }
   ```
   
   Internally BOOLEAN is treated as an integer, and the recommendation engine 
should respect this. Here is an example using number instead of boolean (which 
works)
   
   Recommender Input (Using INT type):
   ```
   {
     "schema":{
       "dimensionFieldSpecs": [
         {
           "cardinality": 10000,
           "dataType": "LONG",
           "name": "studentID"
         },
         {
           "averageLength": 8,
           "cardinality": 2000,
           "dataType": "STRING",
           "name": "firstName"
         },
         {
           "averageLength": 12,
           "cardinality": 2000,
           "dataType": "STRING",
           "name": "lastName"
         },
         {
           "averageLength": 6,
           "cardinality": 2,
           "dataType": "STRING",
           "name": "gender"
         },
         {
           "averageLength": 25,
           "cardinality": 100,
           "dataType": "STRING",
           "name": "subject"
         },
         {
           "cardinality": 2,
           "dataType": "INT",
           "name": "isEmployed"
         }
       ],
       "metricFieldSpecs": [
         {
           "cardinality": 5000,
           "dataType": "FLOAT",
           "name": "score"
         }
       ],
       "schemaName": "transcript"
     },
     "queriesWithWeights":{
       "select subject, count(*) from transcript where score > 3 and gender = 
'MALE' group by subject": 0.5,
       "select subject, score from transcript where firstName = 'Tsubasa' and 
lastName = 'Oozora'": 0.5
     },
     "tableType": "OFFLINE",
     "numRecordsPerPush":100000000,
     "qps": 5,
     "latencySLA": 1000,
     "rulesToExecute": {
       "recommendRealtimeProvisioning": false
     }
   }
   ```
   Output
   ```
   {
     "realtimeProvisioningRecommendations": {},
     "segmentSizeRecommendations": {
       "message": null,
       "numRowsPerSegment": 33333333,
       "numSegments": 3,
       "segmentSize": 488491328
     },
     "partitionConfig": {
       "numKafkaPartitions": 0,
       "numPartitionsRealtime": 1,
       "partitionDimension": "",
       "numPartitionsOffline": 1,
       "numPartitionsOfflineOverwritten": false,
       "numPartitionsRealtimeOverwritten": false,
       "partitionDimensionOverwritten": false
     },
     "flaggedQueries": {
       "flaggedQueries": {}
     },
     "indexConfig": {
       "sortedColumnOverwritten": true,
       "invertedIndexColumns": [
         "gender"
       ],
       "noDictionaryColumns": [
         "studentID",
         "score",
         "isEmployed"
       ],
       "onHeapDictionaryColumns": [],
       "varLengthDictionaryColumns": [
         "firstName",
         "lastName",
         "gender",
         "subject"
       ],
       "sortedColumn": "firstName",
       "bloomFilterColumns": [],
       "rangeIndexColumns": [
         "score"
       ]
     },
     "aggregateMetrics": false
   }
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to