[
https://issues.apache.org/jira/browse/HIVE-29146?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18016259#comment-18016259
]
Krisztian Kasa commented on HIVE-29146:
---------------------------------------
Merged to master. Thanks [~InvisibleProgrammer] and [~ayushtkn] for the review.
> Query with WITH clause fails during split generation when CTE materaliazation
> is enabled
> ----------------------------------------------------------------------------------------
>
> Key: HIVE-29146
> URL: https://issues.apache.org/jira/browse/HIVE-29146
> Project: Hive
> Issue Type: Bug
> Reporter: Stamatis Zampetakis
> Assignee: Krisztian Kasa
> Priority: Major
> Labels: pull-request-available
> Attachments: repro.q
>
>
> Queries with WITH clause over transactional tables fail at runtime during
> split generation when the CTE materialization is enabled.
> The problem can be reproduced by running TPC-DS query 11 and setting the
> following properties.
> {code:sql}
> set hive.optimize.cte.materialize.threshold=2;
> set hive.optimize.cte.materialize.full.aggregate.only=false;
> set hive.optimize.shared.work=true;
> with year_total as (
> select c_customer_id customer_id
> ,c_first_name customer_first_name
> ,c_last_name customer_last_name
> ,c_preferred_cust_flag customer_preferred_cust_flag
> ,c_birth_country customer_birth_country
> ,c_login customer_login
> ,c_email_address customer_email_address
> ,d_year dyear
> ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total
> ,'s' sale_type
> from customer
> ,store_sales
> ,date_dim
> where c_customer_sk = ss_customer_sk
> and ss_sold_date_sk = d_date_sk
> group by c_customer_id
> ,c_first_name
> ,c_last_name
> ,c_preferred_cust_flag
> ,c_birth_country
> ,c_login
> ,c_email_address
> ,d_year
> union all
> select c_customer_id customer_id
> ,c_first_name customer_first_name
> ,c_last_name customer_last_name
> ,c_preferred_cust_flag customer_preferred_cust_flag
> ,c_birth_country customer_birth_country
> ,c_login customer_login
> ,c_email_address customer_email_address
> ,d_year dyear
> ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total
> ,'w' sale_type
> from customer
> ,web_sales
> ,date_dim
> where c_customer_sk = ws_bill_customer_sk
> and ws_sold_date_sk = d_date_sk
> group by c_customer_id
> ,c_first_name
> ,c_last_name
> ,c_preferred_cust_flag
> ,c_birth_country
> ,c_login
> ,c_email_address
> ,d_year
> )
> select
> t_s_secyear.customer_id
> ,t_s_secyear.customer_first_name
> ,t_s_secyear.customer_last_name
> ,t_s_secyear.customer_birth_country
> from year_total t_s_firstyear
> ,year_total t_s_secyear
> ,year_total t_w_firstyear
> ,year_total t_w_secyear
> where t_s_secyear.customer_id = t_s_firstyear.customer_id
> and t_s_firstyear.customer_id = t_w_secyear.customer_id
> and t_s_firstyear.customer_id = t_w_firstyear.customer_id
> and t_s_firstyear.sale_type = 's'
> and t_w_firstyear.sale_type = 'w'
> and t_s_secyear.sale_type = 's'
> and t_w_secyear.sale_type = 'w'
> and t_s_firstyear.dyear = 1999
> and t_s_secyear.dyear = 1999+1
> and t_w_firstyear.dyear = 1999
> and t_w_secyear.dyear = 1999+1
> and t_s_firstyear.year_total > 0
> and t_w_firstyear.year_total > 0
> and case when t_w_firstyear.year_total > 0 then
> t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end
> > case when t_s_firstyear.year_total > 0 then
> t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end
> order by t_s_secyear.customer_id
> ,t_s_secyear.customer_first_name
> ,t_s_secyear.customer_last_name
> ,t_s_secyear.customer_birth_country
> limit 100;
> {code}
>
> Sample error is shown below. There are various occurrences of the same error
> each with a different table.
> {noformat}
> 2025-08-14T05:10:46,213 ERROR [Dispatcher thread {Central}] impl.VertexImpl:
> Vertex Input: date_dim initializer failed,
> vertex=vertex_1755173441322_0001_1_02 [Map 6]
> org.apache.tez.dag.app.dag.impl.AMUserCodeException: java.io.IOException:
> Acid table: default.date_dim is missing from the ValidWriteIdList config: null
> at
> org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallback.onFailure(RootInputInitializerManager.java:330)
> ~[tez-dag-0.10.5.jar:0.10.5]
> at
> com.google.common.util.concurrent.Futures$CallbackListener.run(Futures.java:1228)
> ~[guava-22.0.jar:?]
> at
> com.google.common.util.concurrent.MoreExecutors$DirectExecutor.execute(MoreExecutors.java:399)
> ~[guava-22.0.jar:?]
> at
> com.google.common.util.concurrent.AbstractFuture.executeListener(AbstractFuture.java:911)
> ~[guava-22.0.jar:?]
> at
> com.google.common.util.concurrent.AbstractFuture.complete(AbstractFuture.java:822)
> ~[guava-22.0.jar:?]
> at
> com.google.common.util.concurrent.AbstractFuture.setException(AbstractFuture.java:686)
> ~[guava-22.0.jar:?]
> at
> com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:113)
> ~[guava-22.0.jar:?]
> at
> com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:58)
> ~[guava-22.0.jar:?]
> at
> com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:75)
> ~[guava-22.0.jar:?]
> at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
> ~[?:?]
> at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
> ~[?:?]
> at java.base/java.lang.Thread.run(Thread.java:1583) [?:?]
> Caused by: java.io.IOException: Acid table: default.date_dim is missing from
> the ValidWriteIdList config: null
> at
> org.apache.hadoop.hive.ql.io.HiveInputFormat.addSplitsForGroup(HiveInputFormat.java:536)
> ~[hive-exec-4.2.0-SNAPSHOT.jar:4.2.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.io.HiveInputFormat.getSplits(HiveInputFormat.java:880)
> ~[hive-exec-4.2.0-SNAPSHOT.jar:4.2.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.tez.HiveSplitGenerator.initialize(HiveSplitGenerator.java:363)
> ~[hive-exec-4.2.0-SNAPSHOT.jar:4.2.0-SNAPSHOT]
> at
> org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable$1.run(RootInputInitializerManager.java:280)
> ~[tez-dag-0.10.5.jar:0.10.5]
> at
> org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable$1.run(RootInputInitializerManager.java:272)
> ~[tez-dag-0.10.5.jar:0.10.5]
> at
> java.base/java.security.AccessController.doPrivileged(AccessController.java:714)
> ~[?:?]
> at java.base/javax.security.auth.Subject.doAs(Subject.java:525) ~[?:?]
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1953)
> ~[hadoop-common-3.4.1.jar:?]
> at
> org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable.call(RootInputInitializerManager.java:272)
> ~[tez-dag-0.10.5.jar:0.10.5]
> at
> org.apache.tez.dag.app.dag.RootInputInitializerManager$InputInitializerCallable.call(RootInputInitializerManager.java:256)
> ~[tez-dag-0.10.5.jar:0.10.5]
> at
> com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:111)
> ~[guava-22.0.jar:?]
> ... 5 more
> {noformat}
> The problem is reproducible in master (commit
> 243bc97290f12c97a11b840f2723ec50458b198c) using [^repro.q]
> {code:java}
> mvn test -Dtest=TestMiniLlapLocalCliDriver -Dqfile=repro.q
> {code}
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)