shubhraGupta opened a new issue, #14326:
URL: https://github.com/apache/druid/issues/14326

   Please provide a detailed title (e.g. "Broker crashes when using TopN query 
with Bound filter" instead of just "Broker crashes").
   
   ### Affected Version
   
   Version 25
   ### Description
   
   We did rolling upgrade on our Staging Cluster from Version 22 and version 25 
and saw one of the Theta sketch queries are breaking in version 25. 
   
   with t1(merge_column, s1) as
       (
         SELECT 0 as merge_column,
                DS_THETA(c1)
           FROM table1
       )
   
   
         SELECT 
           gp_col1,
           sum(ts2),
           sum(ts1),
           sum(ts3)
         FROM (
                  SELECT gp_col1,
                         gp_multiplier,
                         THETA_SKETCH_ESTIMATE((DS_THETA(s2))) as ts2,
                         THETA_SKETCH_ESTIMATE((DS_THETA(s1))) as ts1,
                         gp_multiplier * 
THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT(DS_THETA(s2), DS_THETA(s1))) as ts3
                    FROM (
                           SELECT 0 as merge_column,
                                  gp_col1,
                                  gp_multiplier,
                                  DS_THETA(c2) as s2
                             FROM table2
                            WHERE gp_multiplier != 0 
                            GROUP BY gp_col1,
                         gp_multiplier
                         ) t2
                    JOIN t1
                      ON t1.merge_column = t2.merge_column
                   GROUP BY gp_col1,
                         gp_multiplier
               )
         GROUP BY gp_col1
   
   
   In version 22, we get non zero values for sum(ts2), sum(ts1) and sum(ts3).
   In version 25, we are getting non zero value for only for sum(ts2), others 
sums are returning 0. Though ts2, ts1 and ts3 in subquery are returning correct 
values.
   
   
   We checked the Query Plan in v22 and v25 and find out query plan has changed 
in version 25. The same query comes under type 'TopN' in version 22 and is of 
type `GroupBy` in version 25.
   
   Explain Query in Version 22
   
   DruidOuterQueryRel(query=[{"queryType":"topN","dataSource":
     {"type":"table","name":"__subquery__"},
     "virtualColumns":[],
     
"dimension":{"type":"default","dimension":"gp_col1","outputName":"d0","outputType":"STRING"},
     
"metric":{"type":"dimension","previousStop":null,"ordering":{"type":"lexicographic"}},
     "threshold":101,"intervals":{"type":"intervals",
     
"intervals":["-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"]},
     "filter":null,
     "granularity":{"type":"all"},
     
"aggregations":[{"type":"doubleSum","name":"a0","fieldName":"ts2","expression":null},
       {"type":"doubleSum","name":"a1","fieldName":"ts1","expression":null},
       {"type":"doubleSum","name":"a2","fieldName":"ts3","expression":null}],
     
"postAggregations":[],"context":{"sqlOuterLimit":101,"sqlQueryId":"ba9dcb86-9f4f-4c01-ab04-569ffd1cec08"},
   "descending":false}],
    signature=[{d0:STRING, a0:DOUBLE, a1:DOUBLE, a2:DOUBLE}]) 
   DruidJoinQueryRel(condition=[=($0, $4)], joinType=[inner], 
     query=[{"queryType":"groupBy",
       "dataSource":{
       "type":"table","name":"__join__"},
       "intervals":{
       
"type":"intervals","intervals":["-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"]},
       "virtualColumns":[],"filter":null,"granularity":{"type":"all"},
       "dimensions":
       
[{"type":"default","dimension":"gp_col1","outputName":"d0","outputType":"STRING"},
         
{"type":"default","dimension":"gp_multiplier","outputName":"d1","outputType":"STRING"}],
       "aggregations":[
         
{"type":"thetaSketch","name":"a0","fieldName":"s2","size":16384,"shouldFinalize":true,"isInputThetaSketch":false,"errorBoundsStdDev":null},
     {"type":"thetaSketch","name":"a1","fieldName":"EXPR$1","size":16384,
     
"shouldFinalize":true,"isInputThetaSketch":false,"errorBoundsStdDev":null}],
       "postAggregations":[{"type":"thetaSketchEstimate","name":"p1",
         "field":{
         
"type":"fieldAccess","name":"p0","fieldName":"a0"},"errorBoundsStdDev":null},
         {"type":"thetaSketchEstimate","name":"p3",
           
"field":{"type":"fieldAccess","name":"p2","fieldName":"a1"},"errorBoundsStdDev":null},
           {"type":"thetaSketchEstimate","name":"p7",
             
"field":{"type":"thetaSketchSetOp","name":"p6","func":"INTERSECT","size":16384,
               "fields":[{"type":"fieldAccess","name":"p4","fieldName":"a0"},
                 
{"type":"fieldAccess","name":"p5","fieldName":"a1"}]},"errorBoundsStdDev":null},
               {"type":"expression","name":"p8","expression":"(CAST(\"d1\", 
'DOUBLE') * p7)","ordering":null}
             ],"having":null,"limitSpec":{"type":"NoopLimitSpec"},
       
"context":{"sqlOuterLimit":101,"sqlQueryId":"ba9dcb86-9f4f-4c01-ab04-569ffd1cec08"},"descending":false}],
 
     signature=[{d0:STRING, p1:DOUBLE, p3:DOUBLE, p8:DOUBLE}]) 
   
DruidQueryRel(query=[{"queryType":"groupBy","dataSource":{"type":"table","name":"table2"},
     
"intervals":{"type":"intervals","intervals":["2023-03-01T00:00:00.000Z/2023-03-01T00:00:00.001Z"]},
     "virtualColumns":[],"filter":{
       "type":"and","fields":[{
         "type":"not","field":{
           
"type":"bound","dimension":"gp_multiplier","lower":"0","upper":"0","lowerStrict":false,"upperStrict":false,"extractionFn":null,"ordering":{
           "type":"numeric"}}},{
             "type":"not","field":{
             
"type":"selector","dimension":"gp_col1","value":null,"extractionFn":null}}]},
       "granularity":{"type":"all"},
       
"dimensions":[{"type":"default","dimension":"gp_col1","outputName":"d0","outputType":"STRING"},
         
{"type":"default","dimension":"gp_multiplier","outputName":"d1","outputType":"STRING"}],
       
"aggregations":[{"type":"thetaSketch","name":"a0","fieldName":"c2","size":16384,"shouldFinalize":true,"isInputThetaSketch":false,"errorBoundsStdDev":null}],
       "postAggregations":[{
       "type":"expression","name":"p0","expression":"0","ordering":null},{
       "type":"fieldAccess","name":"p1","fieldName":"a0"}],"having":null,
       "limitSpec":{"type":"NoopLimitSpec"},"context":{
       
"sqlOuterLimit":101,"sqlQueryId":"ba9dcb86-9f4f-4c01-ab04-569ffd1cec08"},"descending":false}],
        signature=[{p0:LONG, d0:STRING, d1:STRING, p1:COMPLEX}]) 
   DruidQueryRel(query=[{"queryType":"timeseries","dataSource":{
   "type":"table","name":"table1"},
   "intervals":{
   "type":"intervals",
   "intervals":["2023-03-15T00:00:00.000Z/2023-03-16T00:00:00.000Z"]},
   
"descending":false,"virtualColumns":[],"filter":null,"granularity":{"type":"all"},
   "aggregations":[{
   
"type":"thetaSketch","name":"a0","fieldName":"c1","size":16384,"shouldFinalize":true,"isInputThetaSketch":false,"errorBoundsStdDev":null}],
   
"postAggregations":[{"type":"expression","name":"p0","expression":"0","ordering":null},
     
{"type":"fieldAccess","name":"p1","fieldName":"a0"}],"limit":2147483647,"context":{
   "sqlOuterLimit":101,"sqlQueryId":"ba9dcb86-9f4f-4c01-ab04-569ffd1cec08"}}], 
signature=[{p0:LONG, p1:COMPLEX}])
   
   
   
   Explain Query in version 25
   {
     "queryType": "groupBy",
     "dataSource": {
       "type": "query",
       "query": {
         "queryType": "groupBy",
         "dataSource": {
           "type": "join",
           "left": {
             "type": "query",
             "query": {
               "queryType": "groupBy",
               "dataSource": {
                 "type": "table",
                 "name": "table2"
               },
               "intervals": {
                 "type": "intervals",
                 "intervals": [
                   
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"
                 ]
               },
               "filter": {
                 "type": "and",
                 "fields": [
                   {
                     "type": "not",
                     "field": {
                       "type": "bound",
                       "dimension": "gp_multiplier",
                       "lower": "0",
                       "upper": "0",
                       "ordering": {
                         "type": "numeric"
                       }
                     }
                   },
                   {
                     "type": "not",
                     "field": {
                       "type": "selector",
                       "dimension": "gp_col1",
                       "value": null
                     }
                   }
                 ]
               },
               "granularity": {
                 "type": "all"
               },
               "dimensions": [
                 {
                   "type": "default",
                   "dimension": "gp_col1",
                   "outputName": "d0",
                   "outputType": "STRING"
                 },
                 {
                   "type": "default",
                   "dimension": "gp_multiplier",
                   "outputName": "d1",
                   "outputType": "STRING"
                 }
               ],
               "aggregations": [
                 {
                   "type": "thetaSketch",
                   "name": "a0",
                   "fieldName": "c2",
                   "size": 16384,
                   "shouldFinalize": false
                 }
               ],
               "postAggregations": [
                 {
                   "type": "expression",
                   "name": "p0",
                   "expression": "0"
                 }
               ],
               "limitSpec": {
                 "type": "NoopLimitSpec"
               },
               "context": {
                 "queryId": "1b05327f-6904-4e6b-8876-225929d81ab9",
                 "sqlOuterLimit": 1001,
                 "sqlQueryId": "1b05327f-6904-4e6b-8876-225929d81ab9",
                 "useNativeQueryExplain": true
               }
             }
           },
           "right": {
             "type": "query",
             "query": {
               "queryType": "timeseries",
               "dataSource": {
                 "type": "table",
                 "name": "table1"
               },
               "intervals": {
                 "type": "intervals",
                 "intervals": [
                   
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"
                 ]
               },
               "granularity": {
                 "type": "all"
               },
               "aggregations": [
                 {
                   "type": "thetaSketch",
                   "name": "a0",
                   "fieldName": "c1",
                   "size": 16384,
                   "shouldFinalize": false
                 }
               ],
               "postAggregations": [
                 {
                   "type": "expression",
                   "name": "p0",
                   "expression": "0"
                 }
               ],
               "context": {
                 "queryId": "1b05327f-6904-4e6b-8876-225929d81ab9",
                 "sqlOuterLimit": 1001,
                 "sqlQueryId": "1b05327f-6904-4e6b-8876-225929d81ab9",
                 "useNativeQueryExplain": true
               }
             }
           },
           "rightPrefix": "j0.",
           "condition": "(\"p0\" == \"j0.p0\")",
           "joinType": "INNER"
         },
         "intervals": {
           "type": "intervals",
           "intervals": [
             "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"
           ]
         },
         "granularity": {
           "type": "all"
         },
         "dimensions": [
           {
             "type": "default",
             "dimension": "d0",
             "outputName": "_d0",
             "outputType": "STRING"
           },
           {
             "type": "default",
             "dimension": "d1",
             "outputName": "_d1",
             "outputType": "STRING"
           }
         ],
         "aggregations": [
           {
             "type": "thetaSketch",
             "name": "_a0:a",
             "fieldName": "a0",
             "size": 16384,
             "shouldFinalize": false
           },
           {
             "type": "thetaSketch",
             "name": "_a1:a",
             "fieldName": "j0.a0",
             "size": 16384,
             "shouldFinalize": false
           }
         ],
         "postAggregations": [
           {
             "type": "thetaSketchEstimate",
             "name": "p1",
             "field": {
               "type": "fieldAccess",
               "name": "p0",
               "fieldName": "_a0:a"
             }
           },
           {
             "type": "thetaSketchEstimate",
             "name": "p3",
             "field": {
               "type": "fieldAccess",
               "name": "p2",
               "fieldName": "_a1:a"
             }
           },
           {
             "type": "thetaSketchEstimate",
             "name": "p7",
             "field": {
               "type": "thetaSketchSetOp",
               "name": "p6",
               "func": "INTERSECT",
               "size": 16384,
               "fields": [
                 {
                   "type": "fieldAccess",
                   "name": "p4",
                   "fieldName": "_a0:a"
                 },
                 {
                   "type": "fieldAccess",
                   "name": "p5",
                   "fieldName": "_a1:a"
                 }
               ]
             }
           },
           {
             "type": "expression",
             "name": "p8",
             "expression": "(CAST(\"_d1\", 'DOUBLE') * \"p7\")"
           }
         ],
         "limitSpec": {
           "type": "NoopLimitSpec"
         },
         "context": {
           "queryId": "1b05327f-6904-4e6b-8876-225929d81ab9",
           "sqlOuterLimit": 1001,
           "sqlQueryId": "1b05327f-6904-4e6b-8876-225929d81ab9",
           "useNativeQueryExplain": true
         }
       }
     },
     "intervals": {
       "type": "intervals",
       "intervals": [
         "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"
       ]
     },
     "granularity": {
       "type": "all"
     },
     "dimensions": [
       {
         "type": "default",
         "dimension": "_d0",
         "outputName": "d0",
         "outputType": "STRING"
       }
     ],
     "aggregations": [
       {
         "type": "doubleSum",
         "name": "a0",
         "fieldName": "p1"
       },
       {
         "type": "doubleSum",
         "name": "a1",
         "fieldName": "p3"
       },
       {
         "type": "doubleSum",
         "name": "a2",
         "fieldName": "p8"
       }
     ],
     "limitSpec": {
       "type": "default",
       "columns": [],
       "limit": 1001
     },
     "context": {
       "queryId": "1b05327f-6904-4e6b-8876-225929d81ab9",
       "sqlOuterLimit": 1001,
       "sqlQueryId": "1b05327f-6904-4e6b-8876-225929d81ab9",
       "useNativeQueryExplain": true
     }
   }
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to