[jira] [Assigned] (SPARK-20094) Putting predicate with subquery into join condition in ReorderJoin fails RewritePredicateSubquery.rewriteExistentialExpr
[ https://issues.apache.org/jira/browse/SPARK-20094?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Apache Spark reassigned SPARK-20094: Assignee: Apache Spark > Putting predicate with subquery into join condition in ReorderJoin fails > RewritePredicateSubquery.rewriteExistentialExpr > > > Key: SPARK-20094 > URL: https://issues.apache.org/jira/browse/SPARK-20094 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 2.2.0 >Reporter: Zhenhua Wang >Assignee: Apache Spark > > ReorderJoin collects all predicates and try to put them into join condition > when creating ordered join. If a predicate with a subquery is in a join > condition instead of a filter condition, > `RewritePredicateSubquery.rewriteExistentialExpr` would fail to convert the > subquery to an ExistenceJoin, and thus result in error. > For example, tpcds q45 fails due to the above reason: > {noformat} > spark-sql> explain codegen > > SELECT > > ca_zip, > > ca_city, > > sum(ws_sales_price) > > FROM web_sales, customer, customer_address, date_dim, item > > WHERE ws_bill_customer_sk = c_customer_sk > > AND c_current_addr_sk = ca_address_sk > > AND ws_item_sk = i_item_sk > > AND (substr(ca_zip, 1, 5) IN > > ('85669', '86197', '88274', '83405', '86475', '85392', '85460', > '80348', '81792') > > OR > > i_item_id IN (SELECT i_item_id > > FROM item > > WHERE i_item_sk IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) > > ) > > ) > > AND ws_sold_date_sk = d_date_sk > > AND d_qoy = 2 AND d_year = 2001 > > GROUP BY ca_zip, ca_city > > ORDER BY ca_zip, ca_city > > LIMIT 100; > 17/03/25 15:27:02 ERROR SparkSQLDriver: Failed in [explain codegen > > SELECT > ca_zip, > ca_city, > sum(ws_sales_price) > FROM web_sales, customer, customer_address, date_dim, item > WHERE ws_bill_customer_sk = c_customer_sk > AND c_current_addr_sk = ca_address_sk > AND ws_item_sk = i_item_sk > AND (substr(ca_zip, 1, 5) IN > ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', > '81792') > OR > i_item_id IN (SELECT i_item_id > FROM item > WHERE i_item_sk IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) > ) > ) > AND ws_sold_date_sk = d_date_sk > AND d_qoy = 2 AND d_year = 2001 > GROUP BY ca_zip, ca_city > ORDER BY ca_zip, ca_city > LIMIT 100] > java.lang.UnsupportedOperationException: Cannot evaluate expression: list#1 [] > at > org.apache.spark.sql.catalyst.expressions.Unevaluable$class.doGenCode(Expression.scala:224) > at > org.apache.spark.sql.catalyst.expressions.ListQuery.doGenCode(subquery.scala:262) > at > org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$genCode$2.apply(Expression.scala:104) > at > org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$genCode$2.apply(Expression.scala:101) > at scala.Option.getOrElse(Option.scala:121) > at > org.apache.spark.sql.catalyst.expressions.Expression.genCode(Expression.scala:101) > at > org.apache.spark.sql.catalyst.expressions.In$$anonfun$3.apply(predicates.scala:199) > at > org.apache.spark.sql.catalyst.expressions.In$$anonfun$3.apply(predicates.scala:199) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at scala.collection.immutable.List.foreach(List.scala:381) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) > at scala.collection.immutable.List.map(List.scala:285) > at > org.apache.spark.sql.catalyst.expressions.In.doGenCode(predicates.scala:199) > at > org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$genCode$2.apply(Expression.scala:104) > at > org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$genCode$2.apply(Expression.scala:101) > at scala.Option.getOrElse(Option.scala:121) > at > org.apache.spark.sql.catalyst.expressions.Expression.genCode(Expression.scala:101) > at > org.apache.spark.sql.catalyst.expressions.Or.doGenCode(predicates.scala:379) > at > org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$genCode$2.apply(Expression.scala:104) > at > org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$genCode$2.apply(Expression.scala:101) > at scala.Option.getOrElse(Option.scala:121) > at > org.apache.spark.sql.catalyst.expressions.Expression.genCode(Expression.scala:101) > at > org.apache.spark.sq
[jira] [Assigned] (SPARK-20094) Putting predicate with subquery into join condition in ReorderJoin fails RewritePredicateSubquery.rewriteExistentialExpr
[ https://issues.apache.org/jira/browse/SPARK-20094?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Apache Spark reassigned SPARK-20094: Assignee: (was: Apache Spark) > Putting predicate with subquery into join condition in ReorderJoin fails > RewritePredicateSubquery.rewriteExistentialExpr > > > Key: SPARK-20094 > URL: https://issues.apache.org/jira/browse/SPARK-20094 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 2.2.0 >Reporter: Zhenhua Wang > > ReorderJoin collects all predicates and try to put them into join condition > when creating ordered join. If a predicate with a subquery is in a join > condition instead of a filter condition, > `RewritePredicateSubquery.rewriteExistentialExpr` would fail to convert the > subquery to an ExistenceJoin, and thus result in error. > For example, tpcds q45 fails due to the above reason: > {noformat} > spark-sql> explain codegen > > SELECT > > ca_zip, > > ca_city, > > sum(ws_sales_price) > > FROM web_sales, customer, customer_address, date_dim, item > > WHERE ws_bill_customer_sk = c_customer_sk > > AND c_current_addr_sk = ca_address_sk > > AND ws_item_sk = i_item_sk > > AND (substr(ca_zip, 1, 5) IN > > ('85669', '86197', '88274', '83405', '86475', '85392', '85460', > '80348', '81792') > > OR > > i_item_id IN (SELECT i_item_id > > FROM item > > WHERE i_item_sk IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) > > ) > > ) > > AND ws_sold_date_sk = d_date_sk > > AND d_qoy = 2 AND d_year = 2001 > > GROUP BY ca_zip, ca_city > > ORDER BY ca_zip, ca_city > > LIMIT 100; > 17/03/25 15:27:02 ERROR SparkSQLDriver: Failed in [explain codegen > > SELECT > ca_zip, > ca_city, > sum(ws_sales_price) > FROM web_sales, customer, customer_address, date_dim, item > WHERE ws_bill_customer_sk = c_customer_sk > AND c_current_addr_sk = ca_address_sk > AND ws_item_sk = i_item_sk > AND (substr(ca_zip, 1, 5) IN > ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', > '81792') > OR > i_item_id IN (SELECT i_item_id > FROM item > WHERE i_item_sk IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) > ) > ) > AND ws_sold_date_sk = d_date_sk > AND d_qoy = 2 AND d_year = 2001 > GROUP BY ca_zip, ca_city > ORDER BY ca_zip, ca_city > LIMIT 100] > java.lang.UnsupportedOperationException: Cannot evaluate expression: list#1 [] > at > org.apache.spark.sql.catalyst.expressions.Unevaluable$class.doGenCode(Expression.scala:224) > at > org.apache.spark.sql.catalyst.expressions.ListQuery.doGenCode(subquery.scala:262) > at > org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$genCode$2.apply(Expression.scala:104) > at > org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$genCode$2.apply(Expression.scala:101) > at scala.Option.getOrElse(Option.scala:121) > at > org.apache.spark.sql.catalyst.expressions.Expression.genCode(Expression.scala:101) > at > org.apache.spark.sql.catalyst.expressions.In$$anonfun$3.apply(predicates.scala:199) > at > org.apache.spark.sql.catalyst.expressions.In$$anonfun$3.apply(predicates.scala:199) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at scala.collection.immutable.List.foreach(List.scala:381) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) > at scala.collection.immutable.List.map(List.scala:285) > at > org.apache.spark.sql.catalyst.expressions.In.doGenCode(predicates.scala:199) > at > org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$genCode$2.apply(Expression.scala:104) > at > org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$genCode$2.apply(Expression.scala:101) > at scala.Option.getOrElse(Option.scala:121) > at > org.apache.spark.sql.catalyst.expressions.Expression.genCode(Expression.scala:101) > at > org.apache.spark.sql.catalyst.expressions.Or.doGenCode(predicates.scala:379) > at > org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$genCode$2.apply(Expression.scala:104) > at > org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$genCode$2.apply(Expression.scala:101) > at scala.Option.getOrElse(Option.scala:121) > at > org.apache.spark.sql.catalyst.expressions.Expression.genCode(Expression.scala:101) > at > org.apache.spark.sql.execution.joins.Broadca