[jira] [Work logged] (HIVE-26628) Iceberg table is created when running explain ctas command

ASF GitHub Bot (Jira) Fri, 11 Nov 2022 02:18:05 -0800


     [ 
https://issues.apache.org/jira/browse/HIVE-26628?focusedWorklogId=825229&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-825229
 ]


ASF GitHub Bot logged work on HIVE-26628:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 11/Nov/22 10:17
            Start Date: 11/Nov/22 10:17
    Worklog Time Spent: 10m 
      Work Description: deniskuzZ commented on code in PR #3745:
URL: https://github.com/apache/hive/pull/3745#discussion_r1020081943


##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java:
##########
@@ -804,31 +806,59 @@ public static Schema schema(Configuration config) {
   @VisibleForTesting
   static void overlayTableProperties(Configuration configuration, TableDesc 
tableDesc, Map<String, String> map) {
     Properties props = tableDesc.getProperties();
-    Table table = IcebergTableUtil.getTable(configuration, props);
-    String schemaJson = SchemaParser.toJson(table.schema());
 
     Maps.fromProperties(props).entrySet().stream()
         .filter(entry -> !map.containsKey(entry.getKey())) // map overrides 
tableDesc properties
         .forEach(entry -> map.put(entry.getKey(), entry.getValue()));
 
-    map.put(InputFormatConfig.TABLE_IDENTIFIER, 
props.getProperty(Catalogs.NAME));
-    map.put(InputFormatConfig.TABLE_LOCATION, table.location());
-    map.put(InputFormatConfig.TABLE_SCHEMA, schemaJson);
-    props.put(InputFormatConfig.PARTITION_SPEC, 
PartitionSpecParser.toJson(table.spec()));
-
-    // serialize table object into config
-    Table serializableTable = SerializableTable.copyOf(table);
-    checkAndSkipIoConfigSerialization(configuration, serializableTable);
-    map.put(InputFormatConfig.SERIALIZED_TABLE_PREFIX + 
tableDesc.getTableName(),
-        SerializationUtil.serializeToBase64(serializableTable));
+    try {
+      Table table = IcebergTableUtil.getTable(configuration, props);
+      String schemaJson = SchemaParser.toJson(table.schema());
+
+      map.put(InputFormatConfig.TABLE_IDENTIFIER, 
props.getProperty(Catalogs.NAME));
+      map.put(InputFormatConfig.TABLE_LOCATION, table.location());
+      map.put(InputFormatConfig.TABLE_SCHEMA, schemaJson);
+      props.put(InputFormatConfig.PARTITION_SPEC, 
PartitionSpecParser.toJson(table.spec()));
+
+      // serialize table object into config
+      Table serializableTable = SerializableTable.copyOf(table);
+      checkAndSkipIoConfigSerialization(configuration, serializableTable);
+      map.put(InputFormatConfig.SERIALIZED_TABLE_PREFIX + 
tableDesc.getTableName(),
+          SerializationUtil.serializeToBase64(serializableTable));
+
+      // We need to remove this otherwise the job.xml will be invalid as 
column comments are separated with '\0' and
+      // the serialization utils fail to serialize this character
+      map.remove("columns.comments");
+
+      // save schema into table props as well to avoid repeatedly hitting the 
HMS during serde initializations
+      // this is an exception to the interface documentation, but it's a safe 
operation to add this property
+      props.put(InputFormatConfig.TABLE_SCHEMA, schemaJson);
+    } catch (NoSuchTableException ex) {
+      if 
(!(StringUtils.isNotBlank(props.getProperty(hive_metastoreConstants.TABLE_IS_CTAS))
 &&
+          
Boolean.parseBoolean(props.getProperty(org.apache.hadoop.hive.conf.Constants.IS_EXPLAIN))))
 {
+        throw ex;
+      }
 
-    // We need to remove this otherwise the job.xml will be invalid as column 
comments are separated with '\0' and
-    // the serialization utils fail to serialize this character
-    map.remove("columns.comments");
+      try {
+        map.put(InputFormatConfig.TABLE_IDENTIFIER, 
props.getProperty(Catalogs.NAME));
+        map.put(InputFormatConfig.SERIALIZED_TABLE_PREFIX + 
tableDesc.getTableName(),
+            SerializationUtil.serializeToBase64(null));
 
-    // save schema into table props as well to avoid repeatedly hitting the 
HMS during serde initializations
-    // this is an exception to the interface documentation, but it's a safe 
operation to add this property
-    props.put(InputFormatConfig.TABLE_SCHEMA, schemaJson);
+        String location = map.get(hive_metastoreConstants.META_TABLE_LOCATION);
+        if (StringUtils.isBlank(location)) {
+          location = props.getProperty(hive_metastoreConstants.TABLE_IS_CTAS);
+        }
+        map.put(InputFormatConfig.TABLE_LOCATION, location);
+
+        AbstractSerDe serDe = tableDesc.getDeserializer(configuration);
+        HiveIcebergSerDe icebergSerDe = (HiveIcebergSerDe) serDe;
+        String schemaJson = SchemaParser.toJson(icebergSerDe.getTableSchema());
+        map.put(InputFormatConfig.TABLE_SCHEMA, schemaJson);
+        props.put(InputFormatConfig.TABLE_SCHEMA, schemaJson);
+      } catch (Exception e) {
+        throw new RuntimeException(e);

Review Comment:
   should we throw MetaException?





Issue Time Tracking
-------------------

    Worklog Id:     (was: 825229)
    Time Spent: 4h 20m  (was: 4h 10m)

> Iceberg table is created when running explain ctas command
> ----------------------------------------------------------
>
>                 Key: HIVE-26628
>                 URL: https://issues.apache.org/jira/browse/HIVE-26628
>             Project: Hive
>          Issue Type: Bug
>          Components: StorageHandler
>            Reporter: Krisztian Kasa
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 4h 20m
>  Remaining Estimate: 0h
>
> {code}
> create table source(a int, b string, c int);
> explain
> create table tbl_ice stored by iceberg stored as orc tblproperties 
> ('format-version'='2') as
> select a, b, c from source;
> create table tbl_ice stored by iceberg stored as orc tblproperties 
> ('format-version'='2') as
> select a, b, c from source;
> {code}
> {code}
>  org.apache.hadoop.hive.ql.parse.SemanticException: 
> org.apache.hadoop.hive.ql.parse.SemanticException: Table already exists: 
> default.tbl_ice
>       at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeCreateTable(SemanticAnalyzer.java:13963)
>       at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genResolvedParseTree(SemanticAnalyzer.java:12528)
>       at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12693)
>       at 
> org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:460)
>       at 
> org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:317)
>       at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:224)
>       at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:106)
>       at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:522)
>       at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:474)
>       at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:439)
>       at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:433)
>       at 
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:121)
>       at 
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:227)
>       at 
> org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:255)
>       at org.apache.hadoop.hive.cli.CliDriver.processCmd1(CliDriver.java:200)
>       at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:126)
>       at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:421)
>       at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:352)
>       at 
> org.apache.hadoop.hive.ql.QTestUtil.executeClientInternal(QTestUtil.java:727)
>       at org.apache.hadoop.hive.ql.QTestUtil.executeClient(QTestUtil.java:697)
>       at 
> org.apache.hadoop.hive.cli.control.CoreCliDriver.runTest(CoreCliDriver.java:114)
>       at 
> org.apache.hadoop.hive.cli.control.CliAdapter.runTest(CliAdapter.java:157)
>       at 
> org.apache.hadoop.hive.cli.TestIcebergLlapLocalCliDriver.testCliDriver(TestIcebergLlapLocalCliDriver.java:60)
>       at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>       at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>       at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>       at java.lang.reflect.Method.invoke(Method.java:498)
>       at 
> org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59)
>       at 
> org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
>       at 
> org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56)
>       at 
> org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
>       at 
> org.apache.hadoop.hive.cli.control.CliAdapter$2$1.evaluate(CliAdapter.java:135)
>       at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306)
>       at 
> org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100)
>       at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:366)
>       at 
> org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103)
>       at 
> org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63)
>       at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331)
>       at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79)
>       at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329)
>       at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66)
>       at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293)
>       at org.junit.runners.ParentRunner.run(ParentRunner.java:413)
>       at org.junit.runners.Suite.runChild(Suite.java:128)
>       at org.junit.runners.Suite.runChild(Suite.java:27)
>       at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331)
>       at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79)
>       at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329)
>       at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66)
>       at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293)
>       at 
> org.apache.hadoop.hive.cli.control.CliAdapter$1$1.evaluate(CliAdapter.java:95)
>       at org.junit.rules.RunRules.evaluate(RunRules.java:20)
>       at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306)
>       at org.junit.runners.ParentRunner.run(ParentRunner.java:413)
>       at 
> org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365)
>       at 
> org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273)
>       at 
> org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238)
>       at 
> org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159)
>       at 
> org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:377)
>       at 
> org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:138)
>       at 
> org.apache.maven.surefire.booter.ForkedBooter.run(ForkedBooter.java:465)
>       at 
> org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:451)
> Caused by: org.apache.hadoop.hive.ql.parse.SemanticException: Table already 
> exists: default.tbl_ice
>       at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeCreateTable(SemanticAnalyzer.java:13960)
>       ... 61 more
> {code}
> The EXPLAIN ... command creates the Iceberg table default.tbl_ice hence the 
> ctas command executed after it fails with table already exists.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

[jira] [Work logged] (HIVE-26628) Iceberg table is created when running explain ctas command

Reply via email to