Zhiyong Liu created DRILL-801:
---------------------------------

             Summary: join queries with merge fail with 
ArrayIndexOutOfBoundsException en masse
                 Key: DRILL-801
                 URL: https://issues.apache.org/jira/browse/DRILL-801
             Project: Apache Drill
          Issue Type: Bug
          Components: Functions - Drill
            Reporter: Zhiyong Liu


Datasources: TPCH (10MB), three-way split parquet files
git.commit.id.abbrev=5d7e3d3
git.commit.id=5d7e3d3ab548eb2b23607df46ea843a9c1532b72

All of the join queries in the smoke test suite with merge-join fail with 
ArrayIndexOutOfBoundsException.  An example follows:

0: jdbc:drill:schema=dfs.TpcHMulti> alter session set `planner.enable_hashjoin` 
= false;
+------------+------------+
|     ok     |  summary   |
+------------+------------+
| true       | planner.enable_hashjoin updated. |
+------------+------------+
1 row selected (0.024 seconds)
0: jdbc:drill:schema=dfs.TpcHMulti> select o.O_TOTALPRICE, c.C_NAME
. . . . . . . . . . . . . . . . . >    from orders o, customer c
. . . . . . . . . . . . . . . . . >    where o.C_CUSTKEY = c.C_CUSTKEY and 
o.O_TOTALPRICE > 400000.00
. . . . . . . . . . . . . . . . . >    order by o.O_TOTALPRICE;
Query failed: org.apache.drill.exec.rpc.RpcException: Remote failure while 
running query.[error_id: "3914508b-6c56-4598-a5aa-5d3f51885ded"
endpoint {
  address: "perfnode104.perf.lab"
  user_port: 31010
  control_port: 31011
  data_port: 31012
}
error_type: 0
message: "Failure while running fragment. < ArrayIndexOutOfBoundsException:[ 
16666 ]"
]
Error: exception while executing query (state=,code=0)

Physical plan:

0: jdbc:drill:schema=dfs.TpcHMulti> explain plan for select o.O_TOTALPRICE, 
c.C_NAME
. . . . . . . . . . . . . . . . . >    from orders o, customer c                
    
. . . . . . . . . . . . . . . . . >    where o.C_CUSTKEY = c.C_CUSTKEY and 
o.O_TOTALPRICE > 400000.00
. . . . . . . . . . . . . . . . . >    order by o.O_TOTALPRICE ;                
                     
+------------+------------+                                                     
                     
|    text    |    json    |                                                     
                     
+------------+------------+                                                     
                     
| ScreenPrel                                                                    
                     
  SingleMergeExchangePrel(sort0=[0 ASC])                                        
                     
    SelectionVectorRemoverPrel                                                  
                     
      SortPrel(sort0=[$0], dir0=[ASC])                                          
                     
        HashToRandomExchangePrel(dist0=[[$0]])                                  
                     
          ProjectPrel(O_TOTALPRICE=[$2], C_NAME=[$5])                           
                     
            MergeJoinPrel(condition=[=($1, $4)], joinType=[inner])              
                     
              SelectionVectorRemoverPrel                                        
                     
                SortPrel(sort0=[$1], dir0=[ASC])                                
                     
                  HashToRandomExchangePrel(dist0=[[$1]])                        
                     
                    FilterPrel(condition=[>($2, 400000.00)])                    
                     
                      ScanPrel(groupscan=[ParquetGroupScan 
[entries=[ReadEntryWithPath [path=maprfs:/drill/testdata/tpch-multi/orders]], 
selectionRoot=/drill/testdata/tpch-multi/orders, columns=[SchemaPath 
[`C_CUSTKEY`], SchemaPath [`O_TOTALPRICE`]]]])                                  
                             
              SelectionVectorRemoverPrel                                        
                         
                SortPrel(sort0=[$1], dir0=[ASC])                                
                         
                  HashToRandomExchangePrel(dist0=[[$1]])                        
                         
                    ScanPrel(groupscan=[ParquetGroupScan 
[entries=[ReadEntryWithPath [path=maprfs:/drill/testdata/tpch-multi/customer]], 
selectionRoot=/drill/testdata/tpch-multi/customer, columns=[SchemaPath 
[`C_CUSTKEY`], SchemaPath [`C_NAME`]]]])                                        
                           
 | {                                                                            
                         
  "head" : {                                                                    
                         
    "version" : 1,                                                              
                         
    "generator" : {                                                             
                         
      "type" : "ExplainHandler",                                                
                         
      "info" : ""                                                               
                         
    },                                                                          
                         
    "type" : "APACHE_DRILL_PHYSICAL",                                           
                         
    "options" : [ {                                                             
                         
      "name" : "planner.enable_hashjoin",                                       
                         
      "kind" : "BOOLEAN",                                                       
                         
      "type" : "SESSION",                                                       
                         
      "num_val" : null,                                                         
                         
      "string_val" : null,                                                      
                         
      "bool_val" : false,                                                       
                         
      "float_val" : null                                                        
                         
    } ],                                                                        
                         
    "resultMode" : "EXEC"                                                       
                         
  },                                                                            
                         
  "graph" : [ {                                                                 
                         
    "pop" : "parquet-scan",                                                     
                         
    "@id" : 1,                                                                  
                         
    "entries" : [ {                                                             
                         
      "path" : "maprfs:/drill/testdata/tpch-multi/customer"                     
                         
    } ],                                                                        
                         
    "storage" : {                                                               
                         
      "type" : "file",                                                          
                         
      "connection" : "maprfs:///",                                              
                         
      "workspaces" : {                                                          
                         
        "root" : {                                                              
                         
          "location" : "/",                                                     
                         
          "writable" : false,                                                   
                         
          "storageformat" : null                                                
                         
        },                                                                      
                         
        "TpcHMulti" : {                                                         
                         
          "location" : "/drill/testdata/tpch-multi",                            
                         
          "writable" : true,                                                    
                         
          "storageformat" : "parquet"                                           
                         
        },                                                                      
                         
        "TpcH001" : {                                                           
                         
          "location" : "/drill/testdata/tpch001",                               
                         
          "writable" : true,                                                    
                         
          "storageformat" : "parquet"                                           
                         
        },                                                                      
                         
        "P1Tests" : {                                                           
                         
          "location" : "/drill/testdata/p1tests",                               
                         
          "writable" : true,                                                    
                         
          "storageformat" : "parquet"                                           
                         
        },                                                                      
                         
        "Krystal" : {                                                           
                         
          "location" : "/drill/testdata/krystal",                               
                         
          "writable" : true,                                                    
                         
          "storageformat" : "parquet"                                           
                         
        },                                                                      
                         
        "Amplab" : {                                                            
                         
          "location" : "/drill/testdata/amplab",                                
                         
          "writable" : true,                                                    
                         
          "storageformat" : "parquet"                                           
                         
        },                                                                      
                         
        "tmp" : {                                                               
                         
          "location" : "/tmp",                                                  
                         
          "writable" : true,                                                    
                         
          "storageformat" : "csv"                                               
                         
        }                                                                       
                         
      },                                                                        
                         
      "formats" : {                                                             
                         
        "psv" : {                                                               
                         
          "type" : "text",                                                      
                         
          "extensions" : [ "tbl" ],                                             
                         
          "delimiter" : "|"                                                     
                         
        },                                                                      
                         
        "csv" : {                                                               
                         
          "type" : "text",                                                      
                         
          "extensions" : [ "csv" ],                                             
                         
          "delimiter" : ","                                                     
                         
        },                                                                      
                         
        "tsv" : {                                                               
                         
          "type" : "text",                                                      
                         
          "extensions" : [ "tsv" ],                                             
                         
          "delimiter" : "\t"                                                    
                         
        },                                                                      
                         
        "parquet" : {                                                           
                         
          "type" : "parquet"                                                    
                         
        },                                                                      
                         
        "json" : {                                                              
                         
          "type" : "json"                                                       
                         
        }                                                                       
                         
      }                                                                         
                         
    },                                                                          
                         
    "format" : {                                                                
                         
      "type" : "parquet"                                                        
                         
    },                                                                          
                         
    "columns" : [ "`C_CUSTKEY`", "`C_NAME`" ],                                  
                         
    "selectionRoot" : "/drill/testdata/tpch-multi/customer"                     
                         
  }, {                                                                          
                         
    "pop" : "hash-to-random-exchange",                                          
                         
    "@id" : 2,                                                                  
                         
    "child" : 1,                                                                
                         
    "expr" : "hash(`C_CUSTKEY`) ",                                              
                         
    "initialAllocation" : 1000000,                                              
                         
    "maxAllocation" : 10000000000                                               
                         
  }, {                                                                          
                         
    "pop" : "external-sort",                                                    
                         
    "@id" : 3,                                                                  
                         
    "child" : 2,                                                                
                         
    "orderings" : [ {                                                           
                         
      "order" : "ASC",                                                          
                         
      "expr" : "`C_CUSTKEY`",                                                   
                         
      "nullDirection" : "UNSPECIFIED"                                           
                         
    } ],                                                                        
                         
    "reverse" : false,                                                          
                         
    "initialAllocation" : 1000000,                                              
                         
    "maxAllocation" : 10000000000                                               
                         
  }, {                                                                          
                         
    "pop" : "selection-vector-remover",                                         
                         
    "@id" : 4,                                                                  
                         
    "child" : 3,                                                                
                         
    "initialAllocation" : 1000000,                                              
                         
    "maxAllocation" : 10000000000                                               
                         
  }, {                                                                          
                         
    "pop" : "project",                                                          
                         
    "@id" : 5,                                                                  
                         
    "exprs" : [ {                                                               
                         
      "ref" : "`*0`",                                                           
                         
      "expr" : "`*`"                                                            
                         
    }, {                                                                        
                         
      "ref" : "`C_CUSTKEY0`",                                                   
                         
      "expr" : "`C_CUSTKEY`"                                                    
                         
    }, {                                                                        
                         
      "ref" : "`C_NAME`",                                                       
                         
      "expr" : "`C_NAME`"                                                       
                         
    } ],                                                                        
                         
    "child" : 4,                                                                
                         
    "initialAllocation" : 1000000,                                              
                         
    "maxAllocation" : 10000000000                                               
                         
  }, {                                                                          
                         
    "pop" : "parquet-scan",                                                     
                         
    "@id" : 6,                                                                  
                         
    "entries" : [ {                                                             
                         
      "path" : "maprfs:/drill/testdata/tpch-multi/orders"                       
                         
    } ],                                                                        
                         
    "storage" : {                                                               
                         
      "type" : "file",                                                          
                         
      "connection" : "maprfs:///",                                              
                         
      "workspaces" : {                                                          
                         
        "root" : {                                                              
                         
          "location" : "/",                                                     
                         
          "writable" : false,                                                   
                         
          "storageformat" : null                                                
                         
        },                                                                      
                         
        "TpcHMulti" : {                                                         
                         
          "location" : "/drill/testdata/tpch-multi",                            
                         
          "writable" : true,                                                    
                         
          "storageformat" : "parquet"                                           
                         
        },                                                                      
                         
        "TpcH001" : {                                                           
                         
          "location" : "/drill/testdata/tpch001",                               
                         
          "writable" : true,                                                    
                         
          "storageformat" : "parquet"                                           
                         
        },                                                                      
                         
        "P1Tests" : {                                                           
                         
          "location" : "/drill/testdata/p1tests",                               
                         
          "writable" : true,                                                    
                         
          "storageformat" : "parquet"                                           
                         
        },                                                                      
                         
        "Krystal" : {                                                           
                         
          "location" : "/drill/testdata/krystal",                               
                         
          "writable" : true,                                                    
                         
          "storageformat" : "parquet"                                           
                         
        },                                                                      
                         
        "Amplab" : {                                                            
                         
          "location" : "/drill/testdata/amplab",                                
                         
          "writable" : true,                                                    
                         
          "storageformat" : "parquet"                                           
                         
        },                                                                      
                         
        "tmp" : {                                                               
                         
          "location" : "/tmp",                                                  
                         
          "writable" : true,                                                    
                         
          "storageformat" : "csv"                                               
                         
        }                                                                       
                         
      },                                                                        
                         
      "formats" : {                                                             
                         
        "psv" : {                                                               
                         
          "type" : "text",                                                      
                         
          "extensions" : [ "tbl" ],                                             
                         
          "delimiter" : "|"                                                     
                         
        },                                                                      
                         
        "csv" : {                                                               
                         
          "type" : "text",                                                      
                         
          "extensions" : [ "csv" ],                                             
                         
          "delimiter" : ","                                                     
                         
        },                                                                      
                         
        "tsv" : {                                                               
                         
          "type" : "text",                                                      
                         
          "extensions" : [ "tsv" ],                                             
                         
          "delimiter" : "\t"                                                    
                         
        },                                                                      
                         
        "parquet" : {                                                           
                         
          "type" : "parquet"                                                    
                         
        },                                                                      
                         
        "json" : {                                                              
                         
          "type" : "json"                                                       
                         
        }                                                                       
                         
      }                                                                         
                         
    },                                                                          
                         
    "format" : {                                                                
                         
      "type" : "parquet"                                                        
                         
    },                                                                          
                         
    "columns" : [ "`C_CUSTKEY`", "`O_TOTALPRICE`" ],                            
                         
    "selectionRoot" : "/drill/testdata/tpch-multi/orders"                       
                         
  }, {                                                                          
                         
    "pop" : "filter",                                                           
                         
    "@id" : 7,                                                                  
                         
    "child" : 6,                                                                
                         
    "expr" : "greater_than(`O_TOTALPRICE`, 400000.0) ",                         
                         
    "initialAllocation" : 1000000,                                              
                         
    "maxAllocation" : 10000000000                                               
                         
  }, {                                                                          
                         
    "pop" : "hash-to-random-exchange",                                          
                         
    "@id" : 8,                                                                  
                         
    "child" : 7,                                                                
                         
    "expr" : "hash(`C_CUSTKEY`) ",                                              
                         
    "initialAllocation" : 1000000,                                              
                         
    "maxAllocation" : 10000000000                                               
                         
  }, {                                                                          
                         
    "pop" : "external-sort",                                                    
                         
    "@id" : 9,                                                                  
                         
    "child" : 8,                                                                
                         
    "orderings" : [ {                                                           
                         
      "order" : "ASC",                                                          
                         
      "expr" : "`C_CUSTKEY`",                                                   
                         
      "nullDirection" : "UNSPECIFIED"                                           
                         
    } ],                                                                        
                         
    "reverse" : false,                                                          
                         
    "initialAllocation" : 1000000,                                              
                         
    "maxAllocation" : 10000000000                                               
                         
  }, {                                                                          
                         
    "pop" : "selection-vector-remover",                                         
                         
    "@id" : 10,                                                                 
                         
    "child" : 9,                                                                
                         
    "initialAllocation" : 1000000,                                              
                         
    "maxAllocation" : 10000000000                                               
                         
  }, {                                                                          
                         
    "pop" : "merge-join",                                                       
                         
    "@id" : 11,                                                                 
                         
    "left" : 10,                                                                
                         
    "right" : 5,                                                                
                         
    "conditions" : [ {                                                          
                         
      "relationship" : "==",                                                    
                         
      "left" : "`C_CUSTKEY`",                                                   
                         
      "right" : "`C_CUSTKEY0`"                                                  
                         
    } ],                                                                        
                         
    "joinType" : "INNER",                                                       
                         
    "initialAllocation" : 1000000,                                              
                         
    "maxAllocation" : 10000000000                                               
                         
  }, {                                                                          
                         
    "pop" : "project",                                                          
                         
    "@id" : 12,                                                                 
                         
    "exprs" : [ {                                                               
                         
      "ref" : "`O_TOTALPRICE`",                                                 
                         
      "expr" : "`O_TOTALPRICE`"                                                 
                         
    }, {                                                                        
                         
      "ref" : "`C_NAME`",                                                       
                         
      "expr" : "`C_NAME`"                                                       
                         
    } ],                                                                        
                         
    "child" : 11,                                                               
                         
    "initialAllocation" : 1000000,                                              
                         
    "maxAllocation" : 10000000000                                               
                         
  }, {                                                                          
                         
    "pop" : "hash-to-random-exchange",                                          
                         
    "@id" : 13,                                                                 
                         
    "child" : 12,                                                               
                         
    "expr" : "hash(`O_TOTALPRICE`) ",                                           
                         
    "initialAllocation" : 1000000,                                              
                         
    "maxAllocation" : 10000000000                                               
                         
  }, {                                                                          
                         
    "pop" : "external-sort",                                                    
                         
    "@id" : 14,                                                                 
                         
    "child" : 13,                                                               
                         
    "orderings" : [ {                                                           
                         
      "order" : "ASC",                                                          
                         
      "expr" : "`O_TOTALPRICE`",                                                
                         
      "nullDirection" : "UNSPECIFIED"                                           
                         
    } ],
    "reverse" : false,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  }, {
    "pop" : "selection-vector-remover",
    "@id" : 15,
    "child" : 14,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  }, {
    "pop" : "single-merge-exchange",
    "@id" : 16,
    "child" : 15,
    "orderings" : [ {
      "order" : "ASC",
      "expr" : "`O_TOTALPRICE`",
      "nullDirection" : "UNSPECIFIED"
    } ],
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  }, {
    "pop" : "screen",
    "@id" : 17,
    "child" : 16,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  } ]
} |
+------------+------------+
1 row selected (0.151 seconds)



--
This message was sent by Atlassian JIRA
(v6.2#6252)

Reply via email to