[ 
https://issues.apache.org/jira/browse/HIVE-26929?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Dharmik Thakkar updated HIVE-26929:
-----------------------------------
    Priority: Minor  (was: Critical)

> Allow creating iceberg tables without column definition when 
> 'metadata_location' tblproperties is set.
> ------------------------------------------------------------------------------------------------------
>
>                 Key: HIVE-26929
>                 URL: https://issues.apache.org/jira/browse/HIVE-26929
>             Project: Hive
>          Issue Type: Improvement
>          Components: Iceberg integration
>            Reporter: Dharmik Thakkar
>            Priority: Minor
>
> Allow creating iceberg tables without column definition when 
> 'metadata_location' tblproperties is set.
> Iceberg supports pointing to external metadata.json file to infer table 
> schema. Irrespective of the schema defined as part of create table statement 
> the metadata.json is used to create table. We should allow creating table 
> without column definition in case the metadata_location is defined in 
> tblproperties.
> {code:java}
> create table test_meta (id int, name string, cgpa decimal) stored by iceberg 
> stored as orc;
> describe formatted test_meta;
> create table test_meta_copy(id int) stored by iceberg 
> tblproperties('metadata_location'='s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta/metadata/00000-7dfd7602-f5e1-4473-97cb-79377d358aa3.metadata.json');{code}
> As a result of above SQL we get test_meta_copy with same schema as test_meta 
> irrespective of the columns specified in create table statement.
> |{color:#000000}*col_name*{color}|{color:#000000}*data_type*{color}|
> |{color:#000000}*id*{color}|{color:#000000}int{color}|
> |{color:#000000}*name*{color}|{color:#000000}string{color}|
> |{color:#000000}*cgpa*{color}|{color:#000000}decimal(10,0){color}|
> | |{color:#000000}NULL{color}|
> |{color:#000000}*# Detailed Table 
> Information*{color}|{color:#000000}NULL{color}|
> |{color:#000000}*Database:*           
> {color}|{color:#000000}iceberg_test_db_hive{color}|
> |{color:#000000}*OwnerType: *         {color}|{color:#000000}USER             
>    {color}|
> |{color:#000000}*Owner: *             {color}|{color:#000000}hive             
>    {color}|
> |{color:#000000}*CreateTime:*         {color}|{color:#000000}Tue Jan 10 
> 21:49:08 UTC 2023{color}|
> |{color:#000000}*LastAccessTime:*     {color}|{color:#000000}Fri Dec 12 
> 21:41:41 UTC 1969{color}|
> |{color:#000000}*Retention: *         
> {color}|{color:#000000}2147483647{color}|
> |{color:#000000}*Location:*           
> {color}|{color:#000000}+s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta+{color}|
> |{color:#000000}*Table Type:*         {color}|{color:#000000}EXTERNAL_TABLE   
>    {color}|
> |{color:#000000}*Table Parameters:*{color}|{color:#000000}NULL{color}|
> | |{color:#000000}EXTERNAL            {color}|
> | |{color:#000000}bucketing_version   {color}|
> | |{color:#000000}engine.hive.enabled{color}|
> | |{color:#000000}metadata_location   {color}|
> | |{color:#000000}numFiles            {color}|
> | |{color:#000000}numRows             {color}|
> | |{color:#000000}rawDataSize         {color}|
> | |{color:#000000}serialization.format{color}|
> | |{color:#000000}storage_handler     {color}|
> | |{color:#000000}table_type          {color}|
> | |{color:#000000}totalSize           {color}|
> | |{color:#000000}transient_lastDdlTime{color}|
> | |{color:#000000}uuid                {color}|
> | |{color:#000000}write.format.default{color}|
> | |{color:#000000}NULL{color}|
> |{color:#000000}*# Storage Information*{color}|{color:#000000}NULL{color}|
> |{color:#000000}*SerDe Library: *     
> {color}|{color:#000000}org.apache.iceberg.mr.hive.HiveIcebergSerDe{color}|
> |{color:#000000}*InputFormat: *       
> {color}|{color:#000000}org.apache.iceberg.mr.hive.HiveIcebergInputFormat{color}|
> |{color:#000000}*OutputFormat:*       
> {color}|{color:#000000}org.apache.iceberg.mr.hive.HiveIcebergOutputFormat{color}|
> |{color:#000000}*Compressed:*         {color}|{color:#000000}No               
>    {color}|
> |{color:#000000}*Sort Columns:*       {color}|{color:#000000}[]               
>    {color}|
> However if we skip passing column definition the query fails
> {code:java}
> create table test_meta_copy2 stored by iceberg 
> tblproperties('metadata_location'='s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta/metadata/00000-7dfd7602-f5e1-4473-97cb-79377d358aa3.metadata.json');{code}
> error
> {code:java}
> INFO  : Compiling 
> command(queryId=hive_20230110220019_94ffafef-f531-4532-a07c-0e46e3879f19): 
> create table test_meta_copy2 stored by iceberg 
> tblproperties('metadata_location'='s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta/metadata/00000-7dfd7602-f5e1-4473-97cb-79377d358aa3.metadata.json')
> INFO  : Semantic Analysis Completed (retrial = false)
> INFO  : Created Hive schema: Schema(fieldSchemas:null, properties:null)
> INFO  : Completed compiling 
> command(queryId=hive_20230110220019_94ffafef-f531-4532-a07c-0e46e3879f19); 
> Time taken: 0.019 seconds
> INFO  : Executing 
> command(queryId=hive_20230110220019_94ffafef-f531-4532-a07c-0e46e3879f19): 
> create table test_meta_copy2 stored by iceberg 
> tblproperties('metadata_location'='s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta/metadata/00000-7dfd7602-f5e1-4473-97cb-79377d358aa3.metadata.json')
> INFO  : Starting task [Stage-0:DDL] in serial mode
> ERROR : Failed
> org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException: 
> MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please 
> provide an existing table or a valid schema)
>       at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1361) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1366) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation.createTableNonReplaceMode(CreateTableOperation.java:158)
>  ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation.execute(CreateTableOperation.java:116)
>  ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.ddl.DDLTask.execute(DDLTask.java:84) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:360) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:333) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:250) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:111) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:809) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.Driver.run(Driver.java:547) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.Driver.run(Driver.java:541) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:232)
>  ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:89)
>  ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:338)
>  ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at java.security.AccessController.doPrivileged(Native Method) ~[?:?]
>       at javax.security.auth.Subject.doAs(Subject.java:423) ~[?:?]
>       at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
>  ~[hadoop-common-3.1.1.7.2.15.4-6.jar:?]
>       at 
> org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:358)
>  ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
>       at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
>       at 
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
>       at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
>  ~[?:?]
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
>  ~[?:?]
>       at java.lang.Thread.run(Thread.java:829) [?:?]
> Caused by: java.lang.RuntimeException: 
> MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please 
> provide an existing table or a valid schema)
>       at 
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:349)
>  ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:329) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1307) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       ... 28 more
> Caused by: org.apache.hadoop.hive.metastore.api.MetaException: 
> org.apache.hadoop.hive.serde2.SerDeException Please provide an existing table 
> or a valid schema
>       at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:123)
>  ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:80)
>  ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:347)
>  ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:329) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1307) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       ... 28 more
> ERROR : DDLTask failed, DDL Operation: class 
> org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation
> org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException: 
> MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please 
> provide an existing table or a valid schema)
>       at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1361) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1366) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation.createTableNonReplaceMode(CreateTableOperation.java:158)
>  ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation.execute(CreateTableOperation.java:116)
>  ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.ddl.DDLTask.execute(DDLTask.java:84) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:360) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:333) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:250) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:111) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:809) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.Driver.run(Driver.java:547) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.Driver.run(Driver.java:541) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:232)
>  ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:89)
>  ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:338)
>  ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at java.security.AccessController.doPrivileged(Native Method) ~[?:?]
>       at javax.security.auth.Subject.doAs(Subject.java:423) ~[?:?]
>       at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
>  ~[hadoop-common-3.1.1.7.2.15.4-6.jar:?]
>       at 
> org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:358)
>  ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
>       at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
>       at 
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
>       at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
>  ~[?:?]
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
>  ~[?:?]
>       at java.lang.Thread.run(Thread.java:829) [?:?]
> Caused by: java.lang.RuntimeException: 
> MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please 
> provide an existing table or a valid schema)
>       at 
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:349)
>  ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:329) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1307) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       ... 28 more
> Caused by: org.apache.hadoop.hive.metastore.api.MetaException: 
> org.apache.hadoop.hive.serde2.SerDeException Please provide an existing table 
> or a valid schema
>       at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:123)
>  ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:80)
>  ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:347)
>  ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at 
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:329) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1307) 
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
>       ... 28 more
> ERROR : FAILED: Execution Error, return code 40000 from 
> org.apache.hadoop.hive.ql.ddl.DDLTask. java.lang.RuntimeException: 
> MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please 
> provide an existing table or a valid schema)
> INFO  : Completed executing 
> command(queryId=hive_20230110220019_94ffafef-f531-4532-a07c-0e46e3879f19); 
> Time taken: 0.033 seconds
> INFO  : OK {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to