[ https://issues.apache.org/jira/browse/HIVE-26628?focusedWorklogId=825226&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-825226 ]
ASF GitHub Bot logged work on HIVE-26628: ----------------------------------------- Author: ASF GitHub Bot Created on: 11/Nov/22 10:15 Start Date: 11/Nov/22 10:15 Worklog Time Spent: 10m Work Description: deniskuzZ commented on code in PR #3745: URL: https://github.com/apache/hive/pull/3745#discussion_r1020080504 ########## iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java: ########## @@ -804,31 +806,59 @@ public static Schema schema(Configuration config) { @VisibleForTesting static void overlayTableProperties(Configuration configuration, TableDesc tableDesc, Map<String, String> map) { Properties props = tableDesc.getProperties(); - Table table = IcebergTableUtil.getTable(configuration, props); - String schemaJson = SchemaParser.toJson(table.schema()); Maps.fromProperties(props).entrySet().stream() .filter(entry -> !map.containsKey(entry.getKey())) // map overrides tableDesc properties .forEach(entry -> map.put(entry.getKey(), entry.getValue())); - map.put(InputFormatConfig.TABLE_IDENTIFIER, props.getProperty(Catalogs.NAME)); - map.put(InputFormatConfig.TABLE_LOCATION, table.location()); - map.put(InputFormatConfig.TABLE_SCHEMA, schemaJson); - props.put(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(table.spec())); - - // serialize table object into config - Table serializableTable = SerializableTable.copyOf(table); - checkAndSkipIoConfigSerialization(configuration, serializableTable); - map.put(InputFormatConfig.SERIALIZED_TABLE_PREFIX + tableDesc.getTableName(), - SerializationUtil.serializeToBase64(serializableTable)); + try { + Table table = IcebergTableUtil.getTable(configuration, props); + String schemaJson = SchemaParser.toJson(table.schema()); + + map.put(InputFormatConfig.TABLE_IDENTIFIER, props.getProperty(Catalogs.NAME)); + map.put(InputFormatConfig.TABLE_LOCATION, table.location()); + map.put(InputFormatConfig.TABLE_SCHEMA, schemaJson); + props.put(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(table.spec())); + + // serialize table object into config + Table serializableTable = SerializableTable.copyOf(table); + checkAndSkipIoConfigSerialization(configuration, serializableTable); + map.put(InputFormatConfig.SERIALIZED_TABLE_PREFIX + tableDesc.getTableName(), + SerializationUtil.serializeToBase64(serializableTable)); + + // We need to remove this otherwise the job.xml will be invalid as column comments are separated with '\0' and + // the serialization utils fail to serialize this character + map.remove("columns.comments"); + + // save schema into table props as well to avoid repeatedly hitting the HMS during serde initializations + // this is an exception to the interface documentation, but it's a safe operation to add this property + props.put(InputFormatConfig.TABLE_SCHEMA, schemaJson); + } catch (NoSuchTableException ex) { + if (!(StringUtils.isNotBlank(props.getProperty(hive_metastoreConstants.TABLE_IS_CTAS)) && + Boolean.parseBoolean(props.getProperty(org.apache.hadoop.hive.conf.Constants.IS_EXPLAIN)))) { + throw ex; + } - // We need to remove this otherwise the job.xml will be invalid as column comments are separated with '\0' and - // the serialization utils fail to serialize this character - map.remove("columns.comments"); + try { + map.put(InputFormatConfig.TABLE_IDENTIFIER, props.getProperty(Catalogs.NAME)); Review Comment: try to extract common parts Issue Time Tracking ------------------- Worklog Id: (was: 825226) Time Spent: 4h 10m (was: 4h) > Iceberg table is created when running explain ctas command > ---------------------------------------------------------- > > Key: HIVE-26628 > URL: https://issues.apache.org/jira/browse/HIVE-26628 > Project: Hive > Issue Type: Bug > Components: StorageHandler > Reporter: Krisztian Kasa > Priority: Major > Labels: pull-request-available > Time Spent: 4h 10m > Remaining Estimate: 0h > > {code} > create table source(a int, b string, c int); > explain > create table tbl_ice stored by iceberg stored as orc tblproperties > ('format-version'='2') as > select a, b, c from source; > create table tbl_ice stored by iceberg stored as orc tblproperties > ('format-version'='2') as > select a, b, c from source; > {code} > {code} > org.apache.hadoop.hive.ql.parse.SemanticException: > org.apache.hadoop.hive.ql.parse.SemanticException: Table already exists: > default.tbl_ice > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeCreateTable(SemanticAnalyzer.java:13963) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genResolvedParseTree(SemanticAnalyzer.java:12528) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12693) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:460) > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:317) > at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:224) > at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:106) > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:522) > at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:474) > at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:439) > at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:433) > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:121) > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:227) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:255) > at org.apache.hadoop.hive.cli.CliDriver.processCmd1(CliDriver.java:200) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:126) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:421) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:352) > at > org.apache.hadoop.hive.ql.QTestUtil.executeClientInternal(QTestUtil.java:727) > at org.apache.hadoop.hive.ql.QTestUtil.executeClient(QTestUtil.java:697) > at > org.apache.hadoop.hive.cli.control.CoreCliDriver.runTest(CoreCliDriver.java:114) > at > org.apache.hadoop.hive.cli.control.CliAdapter.runTest(CliAdapter.java:157) > at > org.apache.hadoop.hive.cli.TestIcebergLlapLocalCliDriver.testCliDriver(TestIcebergLlapLocalCliDriver.java:60) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.apache.hadoop.hive.cli.control.CliAdapter$2$1.evaluate(CliAdapter.java:135) > at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) > at > org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:366) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63) > at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329) > at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293) > at org.junit.runners.ParentRunner.run(ParentRunner.java:413) > at org.junit.runners.Suite.runChild(Suite.java:128) > at org.junit.runners.Suite.runChild(Suite.java:27) > at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329) > at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293) > at > org.apache.hadoop.hive.cli.control.CliAdapter$1$1.evaluate(CliAdapter.java:95) > at org.junit.rules.RunRules.evaluate(RunRules.java:20) > at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) > at org.junit.runners.ParentRunner.run(ParentRunner.java:413) > at > org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365) > at > org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273) > at > org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238) > at > org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159) > at > org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:377) > at > org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:138) > at > org.apache.maven.surefire.booter.ForkedBooter.run(ForkedBooter.java:465) > at > org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:451) > Caused by: org.apache.hadoop.hive.ql.parse.SemanticException: Table already > exists: default.tbl_ice > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeCreateTable(SemanticAnalyzer.java:13960) > ... 61 more > {code} > The EXPLAIN ... command creates the Iceberg table default.tbl_ice hence the > ctas command executed after it fails with table already exists. -- This message was sent by Atlassian Jira (v8.20.10#820010)