[
https://issues.apache.org/jira/browse/DRILL-1397?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Abhishek Girish updated DRILL-1397:
-----------------------------------
Attachment: explain plan.txt
Attaching explain plan.
> Query with IN clause and correlation fails
> ------------------------------------------
>
> Key: DRILL-1397
> URL: https://issues.apache.org/jira/browse/DRILL-1397
> Project: Apache Drill
> Issue Type: Bug
> Reporter: Neeraja
> Assignee: Aman Sinha
> Priority: Critical
> Fix For: 0.7.0
>
> Attachments: clicks.json, explain plan.txt, orders.zip
>
>
> The following query fails. This could be related to
> https://issues.apache.org/jira/browse/DRILL-1396, but filing separate issue
> as the error is different.
> 0: jdbc:drill:> select t.trans_info.purch_flag,
> . . . . . . . > t.user_info.cust_id, t.trans_info.prod_id[0]
> . . . . . . . > from `Clickstream.clicks`.`/json/clicks.json` t
> . . . . . . . > where t.user_info.cust_id IN (select o.cust_id from
> hive.orders o where o.order_total >100 );
> Query failed: Failure while running fragment. Incoming batch has an empty
> schema. This is not allowed. [2b441a79-be49-4116-a459-513f97418738]
> Error: exception while executing query: Failure while trying to get next
> result batch. (state=,code=0)
> Below is the explain plan.
> +------------+------------+
> | text | json |
> +------------+------------+
> | 00-00 Screen
> 00-01 Project(EXPR$0=[$0], EXPR$1=[$1], EXPR$2=[$2])
> 00-02 Project(EXPR$0=[ITEM($1, 'purch_flag')], EXPR$1=[ITEM($0,
> 'cust_id')], EXPR$2=[ITEM(ITEM($1, 'prod_id'), 0)])
> 00-03 HashJoin(condition=[=($2, $3)], joinType=[inner])
> 00-05 Project(T27¦¦user_info=[$1], T27¦¦trans_info=[$2],
> $f3=[ITEM($1, 'cust_id')])
> 00-07 Project(T27¦¦*=[$0], T27¦¦user_info=[$1],
> T27¦¦trans_info=[$2])
> 00-09 Scan(groupscan=[EasyGroupScan
> [selectionRoot=/mapr/my.cluster.com/demo/clicks/json/clicks.json, columns =
> null]])
> 00-04 HashAgg(group=[{0}])
> 00-06 Project(cust_id=[$0])
> 00-08 SelectionVectorRemover
> 00-10 Filter(condition=[>($1, 100)])
> 00-11 Project(cust_id=[$1], order_total=[$0])
> 00-12 Scan(groupscan=[HiveScan
> [table=Table(tableName:orders, dbName:default, owner:root,
> createTime:1409956843, lastAccessTime:0, retention:0,
> sd:StorageDescriptor(cols:[FieldSchema(name:order_id, type:bigint,
> comment:null), FieldSchema(name:month, type:string, comment:null),
> FieldSchema(name:purchdate, type:timestamp, comment:null),
> FieldSchema(name:cust_id, type:bigint, comment:null), FieldSchema(name:state,
> type:string, comment:null), FieldSchema(name:prod_id, type:bigint,
> comment:null), FieldSchema(name:order_total, type:int, comment:null)],
> location:maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders,
> inputFormat:org.apache.hadoop.mapred.TextInputFormat,
> outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat,
> compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null,
> serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe,
> parameters:{serialization.format=,, field.delim=,}), bucketCols:[],
> sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[],
> skewedColValues:[], skewedColValueLocationMaps:{}),
> storedAsSubDirectories:false), partitionKeys:[], parameters:{EXTERNAL=TRUE,
> transient_lastDdlTime=1409956843}, viewOriginalText:null,
> viewExpandedText:null, tableType:EXTERNAL_TABLE),
> inputSplits=[maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month2.agg.orders.csv:0+640155,
>
> maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month5.agg.orders.csv:0+775506,
>
> maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month6.agg.orders.csv:0+791685,
>
> maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month8.agg.orders.csv:0+805072,
>
> maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month4.agg.orders.csv:0+603886,
>
> maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month9.agg.orders.csv:0+846270,
>
> maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month1.agg.orders.csv:0+461090,
>
> maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month7.agg.orders.csv:0+771399,
>
> maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month3.agg.orders.csv:0+806738],
> columns=[SchemaPath [`cust_id`], SchemaPath [`order_total`]]]])
> | {
> "head" : {
> "version" : 1,
> "generator" : {
> "type" : "ExplainHandler",
> "info" : ""
> },
> "type" : "APACHE_DRILL_PHYSICAL",
> "options" : [ ],
> "queue" : 0,
> "resultMode" : "EXEC"
> },
> "graph" : [ {
> "pop" : "hive-scan",
> "@id" : 12,
> "hive-table" : {
> "table" : {
> "tableName" : "orders",
> "dbName" : "default",
> "owner" : "root",
> "createTime" : 1409956843,
> "lastAccessTime" : 0,
> "retention" : 0,
> "sd" : {
> "cols" : [ {
> "name" : "order_id",
> "type" : "bigint",
> "comment" : null
> }, {
> "name" : "month",
> "type" : "string",
> "comment" : null
> }, {
> "name" : "purchdate",
> "type" : "timestamp",
> "comment" : null
> }, {
> "name" : "cust_id",
> "type" : "bigint",
> "comment" : null
> }, {
> "name" : "state",
> "type" : "string",
> "comment" : null
> }, {
> "name" : "prod_id",
> "type" : "bigint",
> "comment" : null
> }, {
> "name" : "order_total",
> "type" : "int",
> "comment" : null
> } ],
> "location" :
> "maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders",
> "inputFormat" : "org.apache.hadoop.mapred.TextInputFormat",
> "outputFormat" :
> "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
> "compressed" : false,
> "numBuckets" : -1,
> "serDeInfo" : {
> "name" : null,
> "serializationLib" :
> "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
> "parameters" : {
> "serialization.format" : ",",
> "field.delim" : ","
> }
> },
> "sortCols" : [ ],
> "parameters" : { }
> },
> "partitionKeys" : [ ],
> "parameters" : {
> "EXTERNAL" : "TRUE",
> "transient_lastDdlTime" : "1409956843"
> },
> "viewOriginalText" : null,
> "viewExpandedText" : null,
> "tableType" : "EXTERNAL_TABLE"
> },
> "partitions" : null,
> "hiveConfigOverride" : {
> "hive.metastore.uris" : "thrift://192.168.208.143:9083",
> "hive.metastore.sasl.enabled" : "false"
> }
> },
> "storage-plugin" : "hive",
> "columns" : [ "`cust_id`", "`order_total`" ],
> "cost" : 6349.0
> }, {
> "pop" : "project",
> "@id" : 11,
> "exprs" : [ {
> "ref" : "`cust_id`",
> "expr" : "`cust_id`"
> }, {
> "ref" : "`order_total`",
> "expr" : "`order_total`"
> } ],
> "child" : 12,
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000,
> "cost" : 6349.0
> }, {
> "pop" : "filter",
> "@id" : 10,
> "child" : 11,
> "expr" : "greater_than(`order_total`, 100) ",
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000,
> "cost" : 3174.5
> }, {
> "pop" : "selection-vector-remover",
> "@id" : 8,
> "child" : 10,
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000,
> "cost" : 3174.5
> }, {
> "pop" : "project",
> "@id" : 6,
> "exprs" : [ {
> "ref" : "`cust_id`",
> "expr" : "`cust_id`"
> } ],
> "child" : 8,
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000,
> "cost" : 3174.5
> }, {
> "pop" : "hash-aggregate",
> "@id" : 4,
> "child" : 6,
> "cardinality" : 1.0,
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000,
> "cost" : 1587.25,
> "groupByExprs" : [ {
> "ref" : "`cust_id`",
> "expr" : "`cust_id`"
> } ],
> "aggrExprs" : [ ]
> }, {
> "pop" : "fs-scan",
> "@id" : 9,
> "files" : [ "maprfs:/mapr/my.cluster.com/demo/clicks/json/clicks.json" ],
> "storage" : {
> "type" : "file",
> "enabled" : true,
> "connection" : "maprfs:///",
> "workspaces" : {
> "root" : {
> "location" : "/mapr/my.cluster.com/demo",
> "writable" : false,
> "storageformat" : null
> },
> "clicks" : {
> "location" : "/mapr/my.cluster.com/demo/clicks",
> "writable" : true,
> "storageformat" : "parquet"
> },
> "views" : {
> "location" : "/mapr/my.cluster.com/demo/views",
> "writable" : true,
> "storageformat" : "parquet"
> }
> },
> "formats" : {
> "psv" : {
> "type" : "text",
> "extensions" : [ "tbl" ],
> "delimiter" : "|"
> },
> "csv" : {
> "type" : "text",
> "extensions" : [ "csv" ],
> "delimiter" : ","
> },
> "tsv" : {
> "type" : "text",
> "extensions" : [ "tsv" ],
> "delimiter" : "\t"
> },
> "parquet" : {
> "type" : "parquet"
> },
> "json" : {
> "type" : "json"
> }
> }
> },
> "format" : {
> "type" : "json"
> },
> "selectionRoot" : "/mapr/my.cluster.com/demo/clicks/json/clicks.json",
> "cost" : 5097.0
> }, {
> "pop" : "project",
> "@id" : 7,
> "exprs" : [ {
> "ref" : "`T27¦¦*`",
> "expr" : "`*`"
> } ],
> "child" : 9,
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000,
> "cost" : 5097.0
> }, {
> "pop" : "project",
> "@id" : 5,
> "exprs" : [ {
> "ref" : "`T27¦¦user_info`",
> "expr" : "`T27¦¦user_info`"
> }, {
> "ref" : "`T27¦¦trans_info`",
> "expr" : "`T27¦¦trans_info`"
> }, {
> "ref" : "`$f3`",
> "expr" : "`T27¦¦user_info`.`cust_id`"
> } ],
> "child" : 7,
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000,
> "cost" : 5097.0
> }, {
> "pop" : "hash-join",
> "@id" : 3,
> "left" : 5,
> "right" : 4,
> "conditions" : [ {
> "relationship" : "==",
> "left" : "`$f3`",
> "right" : "`cust_id`"
> } ],
> "joinType" : "INNER",
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000,
> "cost" : 5097.0
> }, {
> "pop" : "project",
> "@id" : 2,
> "exprs" : [ {
> "ref" : "`EXPR$0`",
> "expr" : "`T27¦¦trans_info`.`purch_flag`"
> }, {
> "ref" : "`EXPR$1`",
> "expr" : "`T27¦¦user_info`.`cust_id`"
> }, {
> "ref" : "`EXPR$2`",
> "expr" : "`T27¦¦trans_info`.`prod_id`[0]"
> } ],
> "child" : 3,
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000,
> "cost" : 5097.0
> }, {
> "pop" : "project",
> "@id" : 1,
> "exprs" : [ {
> "ref" : "`EXPR$0`",
> "expr" : "`EXPR$0`"
> }, {
> "ref" : "`EXPR$1`",
> "expr" : "`EXPR$1`"
> }, {
> "ref" : "`EXPR$2`",
> "expr" : "`EXPR$2`"
> } ],
> "child" : 2,
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000,
> "cost" : 5097.0
> }, {
> "pop" : "screen",
> "@id" : 0,
> "child" : 1,
> "initialAllocation" : 1000000,
> "maxAllocation" : 10000000000,
> "cost" : 5097.0
> } ]
> } |
> +------------+------------+
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)