[
https://issues.apache.org/jira/browse/DRILL-801?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14005539#comment-14005539
]
Zhiyong Liu commented on DRILL-801:
-----------------------------------
Despite your observations, it should be pointed out that hash joins with the
same queries do NOT have the same symptom. In addition, there is a subtle
difference in the exception statements in this and your case.
> merge joins fail with ArrayIndexOutOfBoundsException en masse
> -------------------------------------------------------------
>
> Key: DRILL-801
> URL: https://issues.apache.org/jira/browse/DRILL-801
> Project: Apache Drill
> Issue Type: Bug
> Components: Functions - Drill
> Reporter: Zhiyong Liu
>
> Datasources: TPCH (10MB), three-way split parquet files
> git.commit.id.abbrev=5d7e3d3
> git.commit.id=5d7e3d3ab548eb2b23607df46ea843a9c1532b72
> All of the join queries in the smoke test suite with merge-join fail with
> ArrayIndexOutOfBoundsException. An example follows:
> 0: jdbc:drill:schema=dfs.TpcHMulti> alter session set
> `planner.enable_hashjoin` = false;
> +------------+------------+
> | ok | summary |
> +------------+------------+
> | true | planner.enable_hashjoin updated. |
> +------------+------------+
> 1 row selected (0.024 seconds)
> 0: jdbc:drill:schema=dfs.TpcHMulti> select o.O_TOTALPRICE, c.C_NAME
> . . . . . . . . . . . . . . . . . > from orders o, customer c
> . . . . . . . . . . . . . . . . . > where o.C_CUSTKEY = c.C_CUSTKEY and
> o.O_TOTALPRICE > 400000.00
> . . . . . . . . . . . . . . . . . > order by o.O_TOTALPRICE;
> Query failed: org.apache.drill.exec.rpc.RpcException: Remote failure while
> running query.[error_id: "3914508b-6c56-4598-a5aa-5d3f51885ded"
> endpoint {
> address: "perfnode104.perf.lab"
> user_port: 31010
> control_port: 31011
> data_port: 31012
> }
> error_type: 0
> message: "Failure while running fragment. < ArrayIndexOutOfBoundsException:[
> 16666 ]"
> ]
> Error: exception while executing query (state=,code=0)
> Physical plan:
> 0: jdbc:drill:schema=dfs.TpcHMulti> explain plan for select o.O_TOTALPRICE,
> c.C_NAME
> . . . . . . . . . . . . . . . . . > from orders o, customer c
>
> . . . . . . . . . . . . . . . . . > where o.C_CUSTKEY = c.C_CUSTKEY and
> o.O_TOTALPRICE > 400000.00
> . . . . . . . . . . . . . . . . . > order by o.O_TOTALPRICE ;
>
> +------------+------------+
>
> | text | json |
>
> +------------+------------+
>
> | ScreenPrel
>
> SingleMergeExchangePrel(sort0=[0 ASC])
>
> SelectionVectorRemoverPrel
>
> SortPrel(sort0=[$0], dir0=[ASC])
>
> HashToRandomExchangePrel(dist0=[[$0]])
>
> ProjectPrel(O_TOTALPRICE=[$2], C_NAME=[$5])
>
> MergeJoinPrel(condition=[=($1, $4)], joinType=[inner])
>
> SelectionVectorRemoverPrel
>
> SortPrel(sort0=[$1], dir0=[ASC])
>
> HashToRandomExchangePrel(dist0=[[$1]])
>
> FilterPrel(condition=[>($2, 400000.00)])
>
> ScanPrel(groupscan=[ParquetGroupScan
> [entries=[ReadEntryWithPath [path=maprfs:/drill/testdata/tpch-multi/orders]],
> selectionRoot=/drill/testdata/tpch-multi/orders, columns=[SchemaPath
> [`C_CUSTKEY`], SchemaPath [`O_TOTALPRICE`]]]])
>
> SelectionVectorRemoverPrel
>
> SortPrel(sort0=[$1], dir0=[ASC])
>
> HashToRandomExchangePrel(dist0=[[$1]])
>
> ScanPrel(groupscan=[ParquetGroupScan
> [entries=[ReadEntryWithPath
> [path=maprfs:/drill/testdata/tpch-multi/customer]],
> selectionRoot=/drill/testdata/tpch-multi/customer, columns=[SchemaPath
> [`C_CUSTKEY`], SchemaPath [`C_NAME`]]]])
>
> | {
>
> "head" : {
>
> "version" : 1,
>
> "generator" : {
>
> "type" : "ExplainHandler",
>
> "info" : ""
>
> },
>
> "type" : "APACHE_DRILL_PHYSICAL",
>
> "options" : [ {
>
> "name" : "planner.enable_hashjoin",
>
> "kind" : "BOOLEAN",
>
> "type" : "SESSION",
>
> "num_val" : null,
>
> "string_val" : null,
>
> "bool_val" : false,
>
> "float_val" : null
>
> } ],
>
> "resultMode" : "EXEC"
>
> },
>
> "graph" : [ {
>
> "pop" : "parquet-scan",
>
> "@id" : 1,
>
> "entries" : [ {
>
> "path" : "maprfs:/drill/testdata/tpch-multi/customer"
>
> } ],
>
> "storage" : {
>
> "type" : "file",
>
> "connection" : "maprfs:///",
>
> "workspaces" : {
>
> "root" : {
>
> "location" : "/",
>
> "writable" : false,
>
> "storageformat" : null
>
> },
>
> "TpcHMulti" : {
>
> "location" : "/drill/testdata/tpch-multi",
>
> "writable" : true,
>
> "storageformat" : "parquet"
>
> },
>
> "TpcH001" : {
>
> "location" : "/drill/testdata/tpch001",
>
> "writable" : true,
>
> "storageformat" : "parquet"
>
> },
>
> "P1Tests" : {
>
> "location" : "/drill/testdata/p1tests",
>
> "writable" : true,
>
> "storageformat" : "parquet"
>
> },
>
> "Krystal" : {
>
> "location" : "/drill/testdata/krystal",
>
> "writable" : true,
>
> "storageformat" : "parquet"
>
> },
>
> "Amplab" : {
>
> "location" : "/drill/testdata/amplab",
>
> "writable" : true,
>
> "storageformat" : "parquet"
>
> },
>
> "tmp" : {
>
> "location" : "/tmp",
>
> "writable" : true,
>
> "storageformat" : "csv"
>
> }
>
> },
>
> "formats" : {
>
> "psv" : {
>
> "type" : "text",
>
> "extensions" : [ "tbl" ],
>
> "delimiter" : "|"
>
> },
>
> "csv" : {
>
> "type" : "text",
>
> "extensions" : [ "csv" ],
>
> "delimiter" : ","
>
> },
>
> "tsv" : {
>
> "type" : "text",
>
> "extensions" : [ "tsv" ],
>
> "delimiter" : "\t"
>
> },
>
> "parquet" : {
>
> "type" : "parquet"
>
> },
>
> "json" : {
>
> "type" : "json"
>
> }
>
> }
>
> },
>
> "format" : {
>
> "type" : "parquet"
>
> },
>
> "columns" : [ "`C_CUSTKEY`", "`C_NAME`" ],
>
> "selectionRoot" : "/drill/testdata/tpch-multi/customer"
>
> }, {
>
> "pop" : "hash-to-random-exchange",
>
> "@id" : 2,
>
> "child" : 1,
>
> "expr" : "hash(`C_CUSTKEY`) ",
>
> "initialAllocation" : 1000000,
>
> "maxAllocation" : 10000000000
>
> }, {
>
> "pop" : "external-sort",
>
> "@id" : 3,
>
> "child" : 2,
>
> "orderings" : [ {
>
> "order" : "ASC",
>
> "expr" : "`C_CUSTKEY`",
>
> "nullDirection" : "UNSPECIFIED"
>
> } ],
>
> "reverse" : false,
>
> "initialAllocation" : 1000000,
>
> "maxAllocation" : 10000000000
>
> }, {
>
> "pop" : "selection-vector-remover",
>
> "@id" : 4,
>
> "child" : 3,
>
> "initialAllocation" : 1000000,
>
> "maxAllocation" : 10000000000
>
> }, {
>
> "pop" : "project",
>
> "@id" : 5,
>
> "exprs" : [ {
>
> "ref" : "`*0`",
>
> "expr" : "`*`"
>
> }, {
>
> "ref" : "`C_CUSTKEY0`",
>
> "expr" : "`C_CUSTKEY`"
>
> }, {
>
> "ref" : "`C_NAME`",
>
> "expr" : "`C_NAME`"
>
> } ],
>
> "child" : 4,
>
> "initialAllocation" : 1000000,
>
> "maxAllocation" : 10000000000
>
> }, {
>
> "pop" : "parquet-scan",
>
> "@id" : 6,
>
> "entries" : [ {
>
> "path" : "maprfs:/drill/testdata/tpch-multi/orders"
>
> } ],
>
> "storage" : {
>
> "type" : "file",
>
> "connection" : "maprfs:///",
>
> "workspaces" : {
>
> "root" : {
>
> "location" : "/",
>
> "writable" : false,
>
> "storageformat" : null
>
> },
>
> "TpcHMulti" : {
>
> "location" : "/drill/testdata/tpch-multi",
>
> "writable" : true,
>
> "storageformat" : "parquet"
>
> },
>
> "TpcH001" : {
>
> "location" : "/drill/testdata/tpch001",
>
> "writable" : true,
>
> "storageformat" : "parquet"
>
> },
>
> "P1Tests" : {
>
> "location" : "/drill/testdata/p1tests",
>
> "writable" : true,
>
> "storageformat" : "parquet"
>
> },
>
> "Krystal" : {
>
> "location" : "/drill/testdata/krystal",
>
> "writable" : true,
>
> "storageformat" : "parquet"
>
> },
>
> "Amplab" : {
>
> "location" : "/drill/testdata/amplab",
>
> "writable" : true,
>
> "storageformat" : "parquet"
>
> },
>
> "tmp" : {
>
> "location" : "/tmp",
>
> "writable" : true,
>
> "storageformat" : "csv"
>
> }
>
> },
>
> "formats" : {
>
> "psv" : {
>
> "type" : "text",
>
> "extensions" : [ "tbl" ],
>
> "delimiter" : "|"
>
> },
>
> "csv" : {
>
> "type" : "text",
>
> "extensions" : [ "csv" ],
>
> "delimiter" : ","
>
> },
>
> "tsv" : {
>
> "type" : "text",
>
> "extensions" : [ "tsv" ],
>
> "delimiter" : "\t"
>
> },
>
> "parquet" : {
>
> "type" : "parquet"
>
> },
>
> "json" : {
>
> "type" : "json"
>
> }
>
> }
>
> },
>
> "format" : {
>
> "type" : "parquet"
>
> },
>
> "columns" : [ "`C_CUSTKEY`", "`O_TOTALPRICE`" ],
>
> "selectionRoot" : "/drill/testdata/tpch-multi/orders"
>
> }, {
>
> "pop" : "filter",
>
> "@id" : 7,
>
> "child" : 6,
>
> "expr" : "greater_than(`O_TOTALPRICE`, 400000.0) ",
>
> "initialAllocation" : 1000000,
>
> "maxAllocation" : 10000000000
>
> }, {
>
> "pop" : "hash-to-random-exchange",
>
> "@id" : 8,
>
> "child" : 7,
>
> "expr" : "hash(`C_CUSTKEY`) ",
>
> "initialAllocation" : 1000000,
>
> "maxAllocation" : 10000000000
>
> }, {
>
> "pop" : "external-sort",
>
> "@id" : 9,
>
> "child" : 8,
>
> "orderings" : [ {
>
> "order" : "ASC",
>
> "expr" : "`C_CUSTKEY`",
>
> "nullDirection" : "UNSPECIFIED"
>
> } ],
>
> "reverse" : false,
>
> "initialAllocation" : 1000000,
>
> "maxAllocation" : 10000000000
>
> }, {
>
> "pop" : "selection-vector-remover",
>
> "@id" : 10,
>
> "child" : 9,
>
> "initialAllocation" : 1000000,
>
> "maxAllocation" : 10000000000
>
> }, {
>
> "pop" : "merge-join",
>
> "@id" : 11,
>
> "left" : 10,
>
> "right" : 5,
>
> "conditions" : [ {
>
> "relationship" : "==",
>
> "left" : "`C_CUSTKEY`",
>
> "right" : "`C_CUSTKEY0`"
>
> } ],
>
> "joinType" : "INNER",
>
> "initialAllocation" : 1000000,
>
> "maxAllocation" : 10000000000
>
> }, {
>
> "pop" : "project",
>
> "@id" : 12,
>
> "exprs" : [ {
>
> "ref" : "`O_TOTALPRICE`",
>
> "expr" : "`O_TOTALPRICE`"
>
> }, {
>
> "ref" : "`C_NAME`",
>
> "expr" : "`C_NAME`"
>
> } ],
>
> "child" : 11,
>
> "initialAllocation" : 1000000,
>
> "maxAllocation" : 10000000000
>
> }, {
>
> "pop" : "hash-to-random-exchange",
>
> "@id" : 13,
>
> "child" : 12,
>
> "expr" : "hash(`O_TOTALPRICE`) ",
>
> "initialAllocation" : 1000000,
>
> "maxAllocation" : 10000000000
>
> }, {
>
> "pop" : "external-sort",
>
> "@id" : 14,
>
> "child" : 13,
>
> "orderings" : [ {
>
> "order" : "ASC",
>
> "expr" : "`O_TOTALPRICE`",
>
> "nullDirection" : "UNSPECIFIED"
>
> } ],
> "reverse" : false,
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000
> }, {
> "pop" : "selection-vector-remover",
> "@id" : 15,
> "child" : 14,
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000
> }, {
> "pop" : "single-merge-exchange",
> "@id" : 16,
> "child" : 15,
> "orderings" : [ {
> "order" : "ASC",
> "expr" : "`O_TOTALPRICE`",
> "nullDirection" : "UNSPECIFIED"
> } ],
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000
> }, {
> "pop" : "screen",
> "@id" : 17,
> "child" : 16,
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000
> } ]
> } |
> +------------+------------+
> 1 row selected (0.151 seconds)
--
This message was sent by Atlassian JIRA
(v6.2#6252)