[ https://issues.apache.org/jira/browse/HIVE-27898?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17819491#comment-17819491 ]
Butao Zhang commented on HIVE-27898: ------------------------------------ *After disable cbo, the query is also fine.* // set conf in beeline: set hive.fetch.task.conversion=none; //enforce launching tez task set hive.explain.user=false; set hive.cbo.enable=false; {code:java} 0: jdbc:hive2://127.0.0.1:10000/default> show create table testdb.test_data_02; INFO : Compiling command(queryId=hive_20240222143047_1bb04d39-fc35-40e3-a897-d4211d3f5e9e): show create table testdb.test_data_02 INFO : Semantic Analysis Completed (retrial = false) INFO : Created Hive schema: Schema(fieldSchemas:[FieldSchema(name:createtab_stmt, type:string, comment:from deserializer)], properties:null) INFO : Completed compiling command(queryId=hive_20240222143047_1bb04d39-fc35-40e3-a897-d4211d3f5e9e); Time taken: 0.045 seconds INFO : Concurrency mode is disabled, not creating a lock manager INFO : Executing command(queryId=hive_20240222143047_1bb04d39-fc35-40e3-a897-d4211d3f5e9e): show create table testdb.test_data_02 INFO : Starting task [Stage-0:DDL] in serial mode INFO : Completed executing command(queryId=hive_20240222143047_1bb04d39-fc35-40e3-a897-d4211d3f5e9e); Time taken: 0.057 seconds +----------------------------------------------------+ | createtab_stmt | +----------------------------------------------------+ | CREATE EXTERNAL TABLE `testdb`.`test_data_02`( | | `id` string, | | `name` string) | | PARTITIONED BY SPEC ( | | `name`) | | ROW FORMAT SERDE | | 'org.apache.iceberg.mr.hive.HiveIcebergSerDe' | | STORED BY | | 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' | | WITH SERDEPROPERTIES ( | | 'serialization.format'='1') | | LOCATION | | 'hdfs://127.0.0.1:8028/tmp/testiceberg/default/test_data_02' | | TBLPROPERTIES ( | | 'bucketing_version'='2', | | 'current-snapshot-id'='6130266132019241842', | | 'engine.hive.enabled'='true', | | 'format-version'='2', | | 'iceberg.catalog'='location_based_table', | | 'table_type'='ICEBERG', | | 'transient_lastDdlTime'='1708508832') | +----------------------------------------------------+ 21 rows selected (0.482 seconds) 0: jdbc:hive2://127.0.0.1:10000/default> 0: jdbc:hive2://127.0.0.1:10000/default> 0: jdbc:hive2://127.0.0.1:10000/default> explain select * from (select * from testdb.test_data_02 limit 10) s1; INFO : Compiling command(queryId=hive_20240222143051_e69515c5-6c8d-4bd7-8973-faa3f9ed6d97): explain select * from (select * from testdb.test_data_02 limit 10) s1 INFO : Semantic Analysis Completed (retrial = false) INFO : Created Hive schema: Schema(fieldSchemas:[FieldSchema(name:Explain, type:string, comment:null)], properties:null) INFO : Completed compiling command(queryId=hive_20240222143051_e69515c5-6c8d-4bd7-8973-faa3f9ed6d97); Time taken: 0.09 seconds INFO : Concurrency mode is disabled, not creating a lock manager INFO : Executing command(queryId=hive_20240222143051_e69515c5-6c8d-4bd7-8973-faa3f9ed6d97): explain select * from (select * from testdb.test_data_02 limit 10) s1 INFO : Starting task [Stage-3:EXPLAIN] in serial mode INFO : Completed executing command(queryId=hive_20240222143051_e69515c5-6c8d-4bd7-8973-faa3f9ed6d97); Time taken: 0.008 seconds +----------------------------------------------------+ | Explain | +----------------------------------------------------+ | STAGE DEPENDENCIES: | | Stage-1 is a root stage | | Stage-0 depends on stages: Stage-1 | | | | STAGE PLANS: | | Stage: Stage-1 | | Tez | | DagId: hive_20240222143051_e69515c5-6c8d-4bd7-8973-faa3f9ed6d97:18 | | Edges: | | Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) | | DagName: hive_20240222143051_e69515c5-6c8d-4bd7-8973-faa3f9ed6d97:18 | | Vertices: | | Map 1 | | Map Operator Tree: | | TableScan | | alias: test_data_02 | | Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE | | Limit | | Number of rows: 10 | | Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE | | Select Operator | | expressions: id (type: string), name (type: string) | | outputColumnNames: _col0, _col1 | | Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE | | Reduce Output Operator | | null sort order: | | sort order: | | Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE | | TopN Hash Memory Usage: 0.1 | | value expressions: _col0 (type: string), _col1 (type: string) | | Execution mode: vectorized | | Reducer 2 | | Execution mode: vectorized | | Reduce Operator Tree: | | Limit | | Number of rows: 10 | | Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE | | Select Operator | | expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) | | outputColumnNames: _col0, _col1 | | Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE | | File Output Operator | | compressed: false | | Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE | | table: | | input format: org.apache.hadoop.mapred.SequenceFileInputFormat | | output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat | | serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe | | | | Stage: Stage-0 | | Fetch Operator | | limit: -1 | | Processor Tree: | | ListSink | | | +----------------------------------------------------+ 55 rows selected (0.171 seconds) 0: jdbc:hive2://127.0.0.1:10000/default> select * from (select * from testdb.test_data_02 limit 10) s1; INFO : Compiling command(queryId=hive_20240222143056_4ab33c8c-6eff-4b03-9022-c6355985f0fd): select * from (select * from testdb.test_data_02 limit 10) s1 INFO : Semantic Analysis Completed (retrial = false) INFO : Created Hive schema: Schema(fieldSchemas:[FieldSchema(name:s1.id, type:string, comment:null), FieldSchema(name:s1.name, type:string, comment:null)], properties:null) INFO : Completed compiling command(queryId=hive_20240222143056_4ab33c8c-6eff-4b03-9022-c6355985f0fd); Time taken: 0.119 seconds INFO : Concurrency mode is disabled, not creating a lock manager INFO : Executing command(queryId=hive_20240222143056_4ab33c8c-6eff-4b03-9022-c6355985f0fd): select * from (select * from testdb.test_data_02 limit 10) s1 INFO : Query ID = hive_20240222143056_4ab33c8c-6eff-4b03-9022-c6355985f0fd INFO : Total jobs = 1 INFO : Launching Job 1 out of 1 INFO : Starting task [Stage-1:MAPRED] in serial mode INFO : Subscribed to counters: [] for queryId: hive_20240222143056_4ab33c8c-6eff-4b03-9022-c6355985f0fd INFO : Session is already open INFO : Dag name: select * from (select......a_04 limit 10) s1 (Stage-1) INFO : HS2 Host: [D01-303-D6-10], Query ID: [hive_20240222143056_4ab33c8c-6eff-4b03-9022-c6355985f0fd], Dag ID: [dag_1706773972635_0009_7], DAG Session ID: [application_1706773972635_0009] INFO : Status: Running (Executing on YARN cluster with App id application_1706773972635_0009)---------------------------------------------------------------------------------------------- VERTICES MODE STATUS TOTAL COMPLETED RUNNING PENDING FAILED KILLED ---------------------------------------------------------------------------------------------- Map 1 .......... container SUCCEEDED 1 1 0 0 0 0 Reducer 2 ...... container SUCCEEDED 1 1 0 0 0 0 ---------------------------------------------------------------------------------------------- VERTICES: 02/02 [==========================>>] 100% ELAPSED TIME: 21.09 s ---------------------------------------------------------------------------------------------- INFO : Completed executing command(queryId=hive_20240222143056_4ab33c8c-6eff-4b03-9022-c6355985f0fd); Time taken: 5.694 seconds +--------+----------+ | s1.id | s1.name | +--------+----------+ | 1 | a | | 2 | b | +--------+----------+ 2 rows selected (5.885 seconds) {code} > When CBO is disabled, HIVE cannot query ICEBERG tables in nested subqueries > properly > ------------------------------------------------------------------------------------ > > Key: HIVE-27898 > URL: https://issues.apache.org/jira/browse/HIVE-27898 > Project: Hive > Issue Type: Bug > Components: Iceberg integration > Affects Versions: 4.0.0, 4.0.0-beta-1 > Reporter: yongzhi.shao > Priority: Critical > Attachments: hive-site.xml > > > Currently, we found that when using HIVE4-BETA1 version, if we use ICEBERG > table in the subquery, we can't get any data in the end. > I have used HIVE3-TEZ for cross validation and HIVE3 does not have this > problem when querying ICEBERG. > {code:java} > --spark3.4.1+iceberg 1.4.2 > CREATE TABLE datacenter.dwd.b_std_trade ( > uni_order_id STRING, > data_from BIGINT, > partner STRING, > plat_code STRING, > order_id STRING, > uni_shop_id STRING, > uni_id STRING, > guide_id STRING, > shop_id STRING, > plat_account STRING, > total_fee DOUBLE, > item_discount_fee DOUBLE, > trade_discount_fee DOUBLE, > adjust_fee DOUBLE, > post_fee DOUBLE, > discount_rate DOUBLE, > payment_no_postfee DOUBLE, > payment DOUBLE, > pay_time STRING, > product_num BIGINT, > order_status STRING, > is_refund STRING, > refund_fee DOUBLE, > insert_time STRING, > created STRING, > endtime STRING, > modified STRING, > trade_type STRING, > receiver_name STRING, > receiver_country STRING, > receiver_state STRING, > receiver_city STRING, > receiver_district STRING, > receiver_town STRING, > receiver_address STRING, > receiver_mobile STRING, > trade_source STRING, > delivery_type STRING, > consign_time STRING, > orders_num BIGINT, > is_presale BIGINT, > presale_status STRING, > first_fee_paytime STRING, > last_fee_paytime STRING, > first_paid_fee DOUBLE, > tenant STRING, > tidb_modified STRING, > step_paid_fee DOUBLE, > seller_flag STRING, > is_used_store_card BIGINT, > store_card_used DOUBLE, > store_card_basic_used DOUBLE, > store_card_expand_used DOUBLE, > order_promotion_num BIGINT, > item_promotion_num BIGINT, > buyer_remark STRING, > seller_remark STRING, > trade_business_type STRING) > USING iceberg > PARTITIONED BY (uni_shop_id, truncate(4, created)) > LOCATION '/iceberg-catalog/warehouse/dwd/b_std_trade' > TBLPROPERTIES ( > 'current-snapshot-id' = '7217819472703702905', > 'format' = 'iceberg/orc', > 'format-version' = '1', > 'hive.stored-as' = 'iceberg', > 'read.orc.vectorization.enabled' = 'true', > 'sort-order' = 'uni_shop_id ASC NULLS FIRST, created ASC NULLS FIRST', > 'write.distribution-mode' = 'hash', > 'write.format.default' = 'orc', > 'write.metadata.delete-after-commit.enabled' = 'true', > 'write.metadata.previous-versions-max' = '3', > 'write.orc.bloom.filter.columns' = 'order_id', > 'write.orc.compression-codec' = 'zstd') > --hive-iceberg > CREATE EXTERNAL TABLE iceberg_dwd.b_std_trade > STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' > LOCATION 'hdfs://xxxx/iceberg-catalog/warehouse/dwd/b_std_trade' > TBLPROPERTIES > ('iceberg.catalog'='location_based_table','engine.hive.enabled'='true'); > select * from iceberg_dwd.b_std_trade > where uni_shop_id = 'TEST|11111' limit 10 --10 rows > select * > from ( > select * from iceberg_dwd.b_std_trade > where uni_shop_id = 'TEST|11111' limit 10 > ) t1; --10 rows > select uni_shop_id > from ( > select * from iceberg_dwd.b_std_trade > where uni_shop_id = 'TEST|11111' limit 10 > ) t1; --0 rows > select uni_shop_id > from ( > select uni_shop_id as uni_shop_id from iceberg_dwd.b_std_trade > where uni_shop_id = 'TEST|11111' limit 10 > ) t1; --0 rows > --hive-orc > select uni_shop_id > from ( > select * from iceberg_dwd.trade_test > where uni_shop_id = 'TEST|11111' limit 10 > ) t1; --10 ROWS{code} > -- This message was sent by Atlassian Jira (v8.20.10#820010)