Daniel Voros created HIVE-17947:
-----------------------------------
Summary: Concurrent inserts might fail for ACID table since
HIVE-17526 on branch-1
Key: HIVE-17947
URL: https://issues.apache.org/jira/browse/HIVE-17947
Project: Hive
Issue Type: Bug
Components: Transactions
Affects Versions: 1.3.0
Reporter: Daniel Voros
Assignee: Daniel Voros
Priority: Blocker
HIVE-17526 (only on branch-1) disabled conversion to ACID if there are *_copy_N
files under the table, but the filesystem checks introduced there are running
for every insert since the MoveTask in the end of the insert will call
alterTable eventually.
The filename checking also recurses into staging directories created by other
inserts. If those are removed while listing the files, it leads to the
following exception and failing insert:
{code}
java.io.FileNotFoundException: File
hdfs://mycluster/apps/hive/warehouse/dvoros.db/concurrent_insert/.hive-staging_hive_2017-10-30_13-23-35_056_2844419018556002410-2/-ext-10001
does not exist.
at
org.apache.hadoop.hdfs.DistributedFileSystem$DirListingIterator.<init>(DistributedFileSystem.java:1081)
~[hadoop-hdfs-2.7.3.2.6.3.0-235.jar:?]
at
org.apache.hadoop.hdfs.DistributedFileSystem$DirListingIterator.<init>(DistributedFileSystem.java:1059)
~[hadoop-hdfs-2.7.3.2.6.3.0-235.jar:?]
at
org.apache.hadoop.hdfs.DistributedFileSystem$23.doCall(DistributedFileSystem.java:1004)
~[hadoop-hdfs-2.7.3.2.6.3.0-235.jar:?]
at
org.apache.hadoop.hdfs.DistributedFileSystem$23.doCall(DistributedFileSystem.java:1000)
~[hadoop-hdfs-2.7.3.2.6.3.0-235.jar:?]
at
org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
~[hadoop-common-2.7.3.2.6.3.0-235.jar:?]
at
org.apache.hadoop.hdfs.DistributedFileSystem.listLocatedStatus(DistributedFileSystem.java:1018)
~[hadoop-hdfs-2.7.3.2.6.3.0-235.jar:?]
at
org.apache.hadoop.fs.FileSystem.listLocatedStatus(FileSystem.java:1735)
~[hadoop-common-2.7.3.2.6.3.0-235.jar:?]
at
org.apache.hadoop.fs.FileSystem$6.handleFileStat(FileSystem.java:1864)
~[hadoop-common-2.7.3.2.6.3.0-235.jar:?]
at org.apache.hadoop.fs.FileSystem$6.hasNext(FileSystem.java:1841)
~[hadoop-common-2.7.3.2.6.3.0-235.jar:?]
at
org.apache.hadoop.hive.metastore.TransactionalValidationListener.containsCopyNFiles(TransactionalValidationListener.java:226)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at
org.apache.hadoop.hive.metastore.TransactionalValidationListener.handleAlterTableTransactionalProp(TransactionalValidationListener.java:104)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at
org.apache.hadoop.hive.metastore.TransactionalValidationListener.handle(TransactionalValidationListener.java:63)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at
org.apache.hadoop.hive.metastore.TransactionalValidationListener.onEvent(TransactionalValidationListener.java:55)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.firePreEvent(HiveMetaStore.java:2478)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.alter_table_core(HiveMetaStore.java:4145)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.alter_table_with_environment_context(HiveMetaStore.java:4117)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at sun.reflect.GeneratedMethodAccessor107.invoke(Unknown Source) ~[?:?]
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
~[?:1.8.0_144]
at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_144]
at
org.apache.hadoop.hive.metastore.RetryingHMSHandler.invokeInternal(RetryingHMSHandler.java:148)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at
org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:107)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at com.sun.proxy.$Proxy32.alter_table_with_environment_context(Unknown
Source) [?:?]
at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.alter_table_with_environmentContext(HiveMetaStoreClient.java:299)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at
org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.alter_table_with_environmentContext(SessionHiveMetaStoreClient.java:325)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at sun.reflect.GeneratedMethodAccessor87.invoke(Unknown Source) ~[?:?]
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
~[?:1.8.0_144]
at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_144]
at
org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:173)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at com.sun.proxy.$Proxy33.alter_table_with_environmentContext(Unknown
Source) [?:?]
at sun.reflect.GeneratedMethodAccessor87.invoke(Unknown Source) ~[?:?]
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
~[?:1.8.0_144]
at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_144]
at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient$SynchronizedHandler.invoke(HiveMetaStoreClient.java:2227)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at com.sun.proxy.$Proxy33.alter_table_with_environmentContext(Unknown
Source) [?:?]
at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:626)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:608)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at org.apache.hadoop.hive.ql.metadata.Hive.loadTable(Hive.java:2020)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:380)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:199)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1987)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1667)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1414)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1211)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1204)
[hive-exec-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at
org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:242)
[hive-service-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at
org.apache.hive.service.cli.operation.SQLOperation.access$800(SQLOperation.java:91)
[hive-service-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:336)
[hive-service-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at java.security.AccessController.doPrivileged(Native Method)
~[?:1.8.0_144]
at javax.security.auth.Subject.doAs(Subject.java:422) [?:1.8.0_144]
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1866)
[hadoop-common-2.7.3.2.6.3.0-235.jar:?]
at
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:350)
[hive-service-2.1.0.2.6.3.0-235.jar:2.1.0.2.6.3.0-235]
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
[?:1.8.0_144]
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
[?:1.8.0_144]
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
[?:1.8.0_144]
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
[?:1.8.0_144]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[?:1.8.0_144]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[?:1.8.0_144]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_144]
{code}
We should:
- run checks only when converting to ACID and not on every alter (as we do on
master)
- skip hidden files to prevent recursing into staging directories
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)