[ 
https://issues.apache.org/jira/browse/IMPALA-11161?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17517341#comment-17517341
 ] 

Zoltán Borók-Nagy commented on IMPALA-11161:
--------------------------------------------

Yes, for Iceberg, the source of truth about table metadata is in the Iceberg 
metadata files. This means if we notice that there's an inconsistency between 
Iceberg metadata and HMS metadata during table load, we update HMS:
https://github.com/apache/impala/blob/85ddd27b640bf42a61d8af238938e16618db537e/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java#L361

Btw, this behavior is similar to Kudu tables, where Kudu is the source of truth 
about table metadata:
https://github.com/apache/impala/blob/85ddd27b640bf42a61d8af238938e16618db537e/fe/src/main/java/org/apache/impala/catalog/KuduTable.java#L341

> TestIcebergTable.test_insert failed by InconsistentMetadataFetchException
> -------------------------------------------------------------------------
>
>                 Key: IMPALA-11161
>                 URL: https://issues.apache.org/jira/browse/IMPALA-11161
>             Project: IMPALA
>          Issue Type: Bug
>            Reporter: Quanlong Huang
>            Assignee: Quanlong Huang
>            Priority: Major
>              Labels: broken-build, flaky-test
>         Attachments: 
> catalogd.117a07c182d2.invalid-user.log.INFO.20220303-060304.1.bz2, 
> impalad.907651c26c29.invalid-user.log.INFO.20220303-060305.1.bz2, 
> ubuntu-16.04-dockerised-tests-5529-logs.tar.bz2
>
>
> Saw the test failed in a unrelated patch: 
> [https://jenkins.impala.io/job/ubuntu-16.04-dockerised-tests/5356]
> Stacktrace
> {code:java}
> query_test/test_iceberg.py:76: in test_insert
>     self.run_test_case('QueryTest/iceberg-insert', vector, 
> use_db=unique_database)
> common/impala_test_suite.py:687: in run_test_case
>     result = exec_fn(query, user=test_section.get('USER', '').strip() or None)
> common/impala_test_suite.py:625: in __exec_in_impala
>     result = self.__execute_query(target_impalad_client, query, user=user)
> common/impala_test_suite.py:961: in __execute_query
>     return impalad_client.execute(query, user=user)
> common/impala_connection.py:212: in execute
>     return self.__beeswax_client.execute(sql_stmt, user=user)
> beeswax/impala_beeswax.py:189: in execute
>     handle = self.__execute_query(query_string.strip(), user=user)
> beeswax/impala_beeswax.py:365: in __execute_query
>     handle = self.execute_query_async(query_string, user=user)
> beeswax/impala_beeswax.py:359: in execute_query_async
>     handle = self.__do_rpc(lambda: self.imp_service.query(query,))
> beeswax/impala_beeswax.py:522: in __do_rpc
>     raise ImpalaBeeswaxException(self.__build_error_message(b), b)
> E   ImpalaBeeswaxException: ImpalaBeeswaxException:
> E    INNER EXCEPTION: <class 'beeswaxd.ttypes.BeeswaxException'>
> E    MESSAGE: AnalysisException: Failed to load metadata for table: 
> 'iceberg_hive_cat'
> E   CAUSED BY: TableLoadingException: Error opening Iceberg table 
> 'test_insert_c97bfed.iceberg_hive_cat'
> E   CAUSED BY: InconsistentMetadataFetchException: Catalog object 
> TCatalogObject(type:TABLE, catalog_version:8745, 
> table:TTable(db_name:test_insert_c97bfed, tbl_name:iceberg_hive_cat)) changed 
> version between accesses.
>  {code}
> Standard Error
> {code:java}
> SET 
> client_identifier=query_test/test_iceberg.py::TestIcebergTable::()::test_insert[protocol:beeswax|exec_option:{'batch_size':0;'num_nodes':0;'disable_codegen_rows_threshold':0;'disable_codegen':False;'abort_on_error':1;'exec_single_node_rows_threshold':0}|table_format:parque;
> SET sync_ddl=False;
> -- executing against localhost:21000
> DROP DATABASE IF EXISTS `test_insert_c97bfed` CASCADE;
> -- 2022-03-03 06:53:51,764 INFO     MainThread: Started query 
> e54827913199d54b:81ef550a00000000
> SET 
> client_identifier=query_test/test_iceberg.py::TestIcebergTable::()::test_insert[protocol:beeswax|exec_option:{'batch_size':0;'num_nodes':0;'disable_codegen_rows_threshold':0;'disable_codegen':False;'abort_on_error':1;'exec_single_node_rows_threshold':0}|table_format:parque;
> SET sync_ddl=False;
> -- executing against localhost:21000
> CREATE DATABASE `test_insert_c97bfed`;
> -- 2022-03-03 06:53:57,134 INFO     MainThread: Started query 
> c74639986188e989:ffc8a33600000000
> -- 2022-03-03 06:53:57,196 INFO     MainThread: Created database 
> "test_insert_c97bfed" for test ID 
> "query_test/test_iceberg.py::TestIcebergTable::()::test_insert[protocol: 
> beeswax | exec_option: {'batch_size': 0, 'num_nodes': 0, 
> 'disable_codegen_rows_threshold': 0, 'disable_codegen': False, 
> 'abort_on_error': 1, 'exec_single_node_rows_threshold': 0} | table_format: 
> parquet/none]"
> SET 
> client_identifier=query_test/test_iceberg.py::TestIcebergTable::()::test_insert[protocol:beeswax|exec_option:{'batch_size':0;'num_nodes':0;'disable_codegen_rows_threshold':0;'disable_codegen':False;'abort_on_error':1;'exec_single_node_rows_threshold':0}|table_format:parque;
> -- executing against localhost:21000
> use test_insert_c97bfed;
> -- 2022-03-03 06:53:57,207 INFO     MainThread: Started query 
> 1b417dc9753deed1:ce0ebc0700000000
> SET 
> client_identifier=query_test/test_iceberg.py::TestIcebergTable::()::test_insert[protocol:beeswax|exec_option:{'batch_size':0;'num_nodes':0;'disable_codegen_rows_threshold':0;'disable_codegen':False;'abort_on_error':1;'exec_single_node_rows_threshold':0}|table_format:parque;
> SET batch_size=0;
> SET num_nodes=0;
> SET disable_codegen_rows_threshold=0;
> SET disable_codegen=False;
> SET abort_on_error=1;
> SET exec_single_node_rows_threshold=0;
> -- 2022-03-03 06:53:57,241 INFO     MainThread: Loading query test file: 
> /home/ubuntu/Impala/testdata/workloads/functional-query/queries/QueryTest/iceberg-insert.test
> -- executing against localhost:21000
> create table iceberg_alltypes(
>   id INT COMMENT 'Add a comment',
>   bool_col BOOLEAN,
>   int_col INT,
>   bigint_col BIGINT,
>   float_col FLOAT,
>   double_col DOUBLE,
>   date_col DATE,
>   string_col STRING,
>   timestamp_col TIMESTAMP
> )
> stored as iceberg
> tblproperties('iceberg.catalog'='hadoop.tables');
> -- 2022-03-03 06:54:00,136 INFO     MainThread: Started query 
> bd4a7bfc3c960f99:b2e546cb00000000
> -- executing against localhost:21000
> insert into iceberg_alltypes
> select id, bool_col, int_col, bigint_col, float_col, double_col,
> CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
> from functional.alltypes
> order by id
> limit 5;
> -- 2022-03-03 06:54:00,574 INFO     MainThread: Started query 
> 834eb2bcea190263:8a97217c00000000
> -- executing against localhost:21000
> select * from iceberg_alltypes;
> -- 2022-03-03 06:54:02,080 INFO     MainThread: Started query 
> 30409bce826387d3:c185434100000000
> -- executing against localhost:21000
> CREATE TABLE decimal_tbl (
>   d1 DECIMAL(9,0),
>   d2 DECIMAL(10,0),
>   d3 DECIMAL(20,10),
>   d4 DECIMAL(38,38),
>   d5 DECIMAL(10,5),
>   d6 DECIMAL(9,0)
> )
> STORED AS iceberg
> TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
> -- 2022-03-03 06:54:02,202 INFO     MainThread: Started query 
> 6546349f7c340ca7:155d92b400000000
> -- executing against localhost:21000
> insert into decimal_tbl select * from functional_parquet.decimal_tbl;
> -- 2022-03-03 06:54:02,350 INFO     MainThread: Started query 
> af421f7c7726321d:6da6059c00000000
> -- executing against localhost:21000
> select * from decimal_tbl;
> -- 2022-03-03 06:54:02,858 INFO     MainThread: Started query 
> 084c5fb372651844:6273982e00000000
> -- executing against localhost:21000
> create table int96_nanos (ts timestamp) stored as parquet;
> -- 2022-03-03 06:54:02,990 INFO     MainThread: Started query 
> 2c46567f0c984a5b:6fea63ad00000000
> -- executing against localhost:21000
> set parquet_timestamp_type=INT96_NANOS;
> -- 2022-03-03 06:54:03,048 INFO     MainThread: Started query 
> 914aaea52266e1b2:b77aeeb000000000
> -- executing against localhost:21000
> insert into int96_nanos values
> ("1400-01-01"),
> ("2019-01-18 00:00:00.000000001"),
> ("2019-01-18 00:00:00.000001"),
> ("2019-01-18 00:00:00.001"),
> ("2019-01-18 23:59:59.999"),
> ("2019-01-18 23:59:59.999999"),
> ("2019-01-18 23:59:59.999999999");
> -- 2022-03-03 06:54:03,158 INFO     MainThread: Started query 
> 1943c70feb174d5d:196228af00000000
> -- executing against localhost:21000
> SET PARQUET_TIMESTAMP_TYPE="INT96_NANOS";
> -- 2022-03-03 06:54:03,578 INFO     MainThread: Started query 
> f2447af500a15f6e:d46b11d700000000
> -- executing against localhost:21000
> create table ts_iceberg (ts timestamp) stored as iceberg
> tblproperties('iceberg.catalog'='hadoop.tables');
> -- 2022-03-03 06:54:03,589 INFO     MainThread: Started query 
> 2948694cfa519b93:d82cbf6300000000
> -- executing against localhost:21000
> insert into ts_iceberg select * from int96_nanos;
> -- 2022-03-03 06:54:04,269 INFO     MainThread: Started query 
> 6047d4ca6d5ff73c:89aebea800000000
> -- executing against localhost:21000
> select * from ts_iceberg;
> -- 2022-03-03 06:54:05,271 INFO     MainThread: Started query 
> 5d4eb15511b4381b:74493a7f00000000
> -- executing against localhost:21000
> create table iceberg_hadoop_cat (i int)
> stored as iceberg
> tblproperties('iceberg.catalog'='hadoop.catalog',
>   
> 'iceberg.catalog_location'='/test-warehouse/test_insert_c97bfed.db/hadoop_catalog_test');
> -- 2022-03-03 06:54:05,384 INFO     MainThread: Started query 
> de4667d242c22c5d:142bfede00000000
> -- executing against localhost:21000
> insert into iceberg_hadoop_cat values (1), (2), (3);
> -- 2022-03-03 06:54:05,526 INFO     MainThread: Started query 
> 8e4684f67e013bb6:6e4c83ec00000000
> -- executing against localhost:21000
> select * from iceberg_hadoop_cat;
> -- 2022-03-03 06:54:06,906 INFO     MainThread: Started query 
> 6743af5946d3cbde:59c85ae900000000
> -- executing against localhost:21000
> show files in iceberg_hadoop_cat;
> -- 2022-03-03 06:54:07,114 INFO     MainThread: Started query 
> 2f44905b4535cba1:f25eec7800000000
> -- executing against localhost:21000
> create table iceberg_hadoop_cat_ti (i int)
> stored as iceberg
> tblproperties('iceberg.catalog'='hadoop.catalog',
>   
> 'iceberg.catalog_location'='/test-warehouse/test_insert_c97bfed.db/hadoop_catalog_test',
>   'iceberg.table_identifier'='test.custom_db.int_table');
> -- 2022-03-03 06:54:07,162 INFO     MainThread: Started query 
> d4404c05d4b49a6b:b1170ccf00000000
> -- executing against localhost:21000
> insert into iceberg_hadoop_cat_ti values (1), (2), (3);
> -- 2022-03-03 06:54:07,674 INFO     MainThread: Started query 
> a6482139f3a258cd:d169a9c200000000
> -- executing against localhost:21000
> select * from iceberg_hadoop_cat_ti;
> -- 2022-03-03 06:54:08,863 INFO     MainThread: Started query 
> 1f48d05812600a2a:5710b56a00000000
> -- executing against localhost:21000
> show files in iceberg_hadoop_cat_ti;
> -- 2022-03-03 06:54:08,990 INFO     MainThread: Started query 
> 324d685844f88e47:61233d7d00000000
> -- executing against localhost:21000
> create table iceberg_hive_cat (i int)
> stored as iceberg
> tblproperties('iceberg.catalog'='hive.catalog');
> -- 2022-03-03 06:54:08,998 INFO     MainThread: Started query 
> 39470e44b824b40c:fbfd5e9e00000000
> -- executing against localhost:21000
> insert into iceberg_hive_cat values (7);
> -- 2022-03-03 06:54:09,136 INFO     MainThread: Started query 
> 6c4d6861ab54d44b:a04acaa400000000
> -- executing against localhost:21000
> select * from iceberg_hive_cat;
> -- 2022-03-03 06:54:09,621 INFO     MainThread: Started query 
> 564c0536732233d6:3f2391b600000000
> -- executing against localhost:21000
> create external table iceberg_hive_cat_ext (i int)
> stored as iceberg
> location '/test-warehouse/test_insert_c97bfed.db/iceberg_hive_cat'
> tblproperties('iceberg.catalog'='hive.catalog',
>     'iceberg.table_identifier'='test_insert_c97bfed.iceberg_hive_cat');
> -- 2022-03-03 06:54:09,771 INFO     MainThread: Started query 
> 3244dcc51751ddcb:f799fca200000000
> -- executing against localhost:21000
> select * from iceberg_hive_cat_ext;
> -- 2022-03-03 06:54:09,894 INFO     MainThread: Started query 
> 704d7a9cb5af192a:761f46ad00000000
> -- executing against localhost:21000
> insert into iceberg_hive_cat_ext values (8);
> -- 2022-03-03 06:54:10,015 INFO     MainThread: Started query 
> 75452f1c279534c6:c8db1e5400000000
> -- executing against localhost:21000
> select * from iceberg_hive_cat_ext;
> -- 2022-03-03 06:54:10,303 INFO     MainThread: Started query 
> 0540e1c28cca4f68:4ebd322c00000000
> -- executing against localhost:21000
> refresh iceberg_hive_cat;
> -- 2022-03-03 06:54:10,450 INFO     MainThread: Started query 
> b14f16cd1a7b959c:14ccf4d400000000
> -- executing against localhost:21000
> select * from iceberg_hive_cat;
> {code}



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-all-unsubscr...@impala.apache.org
For additional commands, e-mail: issues-all-h...@impala.apache.org

Reply via email to