[jira] [Commented] (HIVE-14137) Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty tables
[ https://issues.apache.org/jira/browse/HIVE-14137?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15375933#comment-15375933 ] Sergio Peña commented on HIVE-14137: Patch looks good. +1 > Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty > tables > --- > > Key: HIVE-14137 > URL: https://issues.apache.org/jira/browse/HIVE-14137 > Project: Hive > Issue Type: Bug > Components: Spark >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-14137.1.patch, HIVE-14137.2.patch, > HIVE-14137.3.patch, HIVE-14137.4.patch, HIVE-14137.5.patch, > HIVE-14137.6.patch, HIVE-14137.patch > > > The following queries: > {code} > -- Setup > drop table if exists empty1; > create table empty1 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > drop table if exists empty2; > create table empty2 (col1 bigint, col2 bigint) stored as parquet > tblproperties ('parquet.compress'='snappy'); > drop table if exists empty3; > create table empty3 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > -- All empty HDFS directories. > -- Fails with [08S01]: Error while processing statement: FAILED: Execution > Error, return code 3 from org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- Two empty HDFS directories. > -- Create an empty file in HDFS. > insert into empty1 select * from empty1 where false; > -- Same query fails with [08S01]: Error while processing statement: FAILED: > Execution Error, return code 3 from > org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- One empty HDFS directory. > -- Create an empty file in HDFS. > insert into empty2 select * from empty2 where false; > -- Same query succeeds. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > {code} > Will result in the following exception: > {code} > org.apache.hadoop.fs.FileAlreadyExistsException: > /tmp/hive/hive/1f3837aa-9407-4780-92b1-42a66d205139/hive_2016-06-24_15-45-23_206_79177714958655528-2/-mr-10004/0/emptyFile > for client 172.26.14.151 already exists > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2784) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2676) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2561) > at > org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:593) > at > org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.create(AuthorizationProviderProxyClientProtocol.java:111) > at > org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:393) > at > org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2086) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2082) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1693) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2080) > at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) > at > sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57) > at > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > at java.lang.reflect.Constructor.newInstance(Constructor.java:526) > at > org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106) > at > org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73) > at > org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:1902) > at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1738) > at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1663) > at >
[jira] [Commented] (HIVE-14137) Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty tables
[ https://issues.apache.org/jira/browse/HIVE-14137?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15375288#comment-15375288 ] Sahil Takiar commented on HIVE-14137: - Test failures look un-related. [~spena] can you review? > Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty > tables > --- > > Key: HIVE-14137 > URL: https://issues.apache.org/jira/browse/HIVE-14137 > Project: Hive > Issue Type: Bug > Components: Spark >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-14137.1.patch, HIVE-14137.2.patch, > HIVE-14137.3.patch, HIVE-14137.4.patch, HIVE-14137.5.patch, > HIVE-14137.6.patch, HIVE-14137.patch > > > The following queries: > {code} > -- Setup > drop table if exists empty1; > create table empty1 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > drop table if exists empty2; > create table empty2 (col1 bigint, col2 bigint) stored as parquet > tblproperties ('parquet.compress'='snappy'); > drop table if exists empty3; > create table empty3 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > -- All empty HDFS directories. > -- Fails with [08S01]: Error while processing statement: FAILED: Execution > Error, return code 3 from org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- Two empty HDFS directories. > -- Create an empty file in HDFS. > insert into empty1 select * from empty1 where false; > -- Same query fails with [08S01]: Error while processing statement: FAILED: > Execution Error, return code 3 from > org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- One empty HDFS directory. > -- Create an empty file in HDFS. > insert into empty2 select * from empty2 where false; > -- Same query succeeds. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > {code} > Will result in the following exception: > {code} > org.apache.hadoop.fs.FileAlreadyExistsException: > /tmp/hive/hive/1f3837aa-9407-4780-92b1-42a66d205139/hive_2016-06-24_15-45-23_206_79177714958655528-2/-mr-10004/0/emptyFile > for client 172.26.14.151 already exists > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2784) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2676) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2561) > at > org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:593) > at > org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.create(AuthorizationProviderProxyClientProtocol.java:111) > at > org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:393) > at > org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2086) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2082) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1693) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2080) > at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) > at > sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57) > at > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > at java.lang.reflect.Constructor.newInstance(Constructor.java:526) > at > org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106) > at > org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73) > at > org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:1902) > at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1738) > at
[jira] [Commented] (HIVE-14137) Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty tables
[ https://issues.apache.org/jira/browse/HIVE-14137?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15374137#comment-15374137 ] Hive QA commented on HIVE-14137: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12817305/HIVE-14137.6.patch {color:green}SUCCESS:{color} +1 due to 1 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 12 failed/errored test(s), 10315 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_acid_globallimit org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_masking_8 org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_stats_list_bucket org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_subquery_multiinsert org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_vector_interval_arithmetic org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_vector_complex_all org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_vector_complex_join org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_acid_globallimit org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_vector_interval_arithmetic org.apache.hadoop.hive.cli.TestMinimrCliDriver.org.apache.hadoop.hive.cli.TestMinimrCliDriver org.apache.hadoop.hive.llap.daemon.impl.TestLlapTokenChecker.testCheckPermissions org.apache.hadoop.hive.llap.daemon.impl.TestLlapTokenChecker.testGetToken {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-MASTER-Build/487/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-MASTER-Build/487/console Test logs: http://ec2-204-236-174-241.us-west-1.compute.amazonaws.com/logs/PreCommit-HIVE-MASTER-Build-487/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 12 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12817305 - PreCommit-HIVE-MASTER-Build > Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty > tables > --- > > Key: HIVE-14137 > URL: https://issues.apache.org/jira/browse/HIVE-14137 > Project: Hive > Issue Type: Bug > Components: Spark >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-14137.1.patch, HIVE-14137.2.patch, > HIVE-14137.3.patch, HIVE-14137.4.patch, HIVE-14137.5.patch, > HIVE-14137.6.patch, HIVE-14137.patch > > > The following queries: > {code} > -- Setup > drop table if exists empty1; > create table empty1 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > drop table if exists empty2; > create table empty2 (col1 bigint, col2 bigint) stored as parquet > tblproperties ('parquet.compress'='snappy'); > drop table if exists empty3; > create table empty3 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > -- All empty HDFS directories. > -- Fails with [08S01]: Error while processing statement: FAILED: Execution > Error, return code 3 from org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- Two empty HDFS directories. > -- Create an empty file in HDFS. > insert into empty1 select * from empty1 where false; > -- Same query fails with [08S01]: Error while processing statement: FAILED: > Execution Error, return code 3 from > org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- One empty HDFS directory. > -- Create an empty file in HDFS. > insert into empty2 select * from empty2 where false; > -- Same query succeeds. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > {code} > Will result in the following exception: > {code} > org.apache.hadoop.fs.FileAlreadyExistsException: > /tmp/hive/hive/1f3837aa-9407-4780-92b1-42a66d205139/hive_2016-06-24_15-45-23_206_79177714958655528-2/-mr-10004/0/emptyFile > for client 172.26.14.151 already exists > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2784) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2676) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2561) > at >
[jira] [Commented] (HIVE-14137) Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty tables
[ https://issues.apache.org/jira/browse/HIVE-14137?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15371988#comment-15371988 ] Sahil Takiar commented on HIVE-14137: - Re-basing again > Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty > tables > --- > > Key: HIVE-14137 > URL: https://issues.apache.org/jira/browse/HIVE-14137 > Project: Hive > Issue Type: Bug > Components: Spark >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-14137.1.patch, HIVE-14137.2.patch, > HIVE-14137.3.patch, HIVE-14137.4.patch, HIVE-14137.5.patch, > HIVE-14137.6.patch, HIVE-14137.patch > > > The following queries: > {code} > -- Setup > drop table if exists empty1; > create table empty1 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > drop table if exists empty2; > create table empty2 (col1 bigint, col2 bigint) stored as parquet > tblproperties ('parquet.compress'='snappy'); > drop table if exists empty3; > create table empty3 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > -- All empty HDFS directories. > -- Fails with [08S01]: Error while processing statement: FAILED: Execution > Error, return code 3 from org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- Two empty HDFS directories. > -- Create an empty file in HDFS. > insert into empty1 select * from empty1 where false; > -- Same query fails with [08S01]: Error while processing statement: FAILED: > Execution Error, return code 3 from > org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- One empty HDFS directory. > -- Create an empty file in HDFS. > insert into empty2 select * from empty2 where false; > -- Same query succeeds. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > {code} > Will result in the following exception: > {code} > org.apache.hadoop.fs.FileAlreadyExistsException: > /tmp/hive/hive/1f3837aa-9407-4780-92b1-42a66d205139/hive_2016-06-24_15-45-23_206_79177714958655528-2/-mr-10004/0/emptyFile > for client 172.26.14.151 already exists > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2784) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2676) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2561) > at > org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:593) > at > org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.create(AuthorizationProviderProxyClientProtocol.java:111) > at > org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:393) > at > org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2086) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2082) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1693) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2080) > at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) > at > sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57) > at > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > at java.lang.reflect.Constructor.newInstance(Constructor.java:526) > at > org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106) > at > org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73) > at > org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:1902) > at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1738) > at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1663) > at >
[jira] [Commented] (HIVE-14137) Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty tables
[ https://issues.apache.org/jira/browse/HIVE-14137?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15371945#comment-15371945 ] Hive QA commented on HIVE-14137: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12817191/HIVE-14137.5.patch {color:red}ERROR:{color} -1 due to build exiting with an error Test results: https://builds.apache.org/job/PreCommit-HIVE-MASTER-Build/473/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-MASTER-Build/473/console Test logs: http://ec2-204-236-174-241.us-west-1.compute.amazonaws.com/logs/PreCommit-HIVE-MASTER-Build-473/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Tests exited with: NonZeroExitCodeException Command 'bash /data/hive-ptest/working/scratch/source-prep.sh' failed with exit status 1 and output '+ [[ -n /usr/java/jdk1.8.0_25 ]] + export JAVA_HOME=/usr/java/jdk1.8.0_25 + JAVA_HOME=/usr/java/jdk1.8.0_25 + export PATH=/usr/java/jdk1.8.0_25/bin/:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games + PATH=/usr/java/jdk1.8.0_25/bin/:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games + export 'ANT_OPTS=-Xmx1g -XX:MaxPermSize=256m ' + ANT_OPTS='-Xmx1g -XX:MaxPermSize=256m ' + export 'M2_OPTS=-Xmx1g -XX:MaxPermSize=256m -Dhttp.proxyHost=localhost -Dhttp.proxyPort=3128' + M2_OPTS='-Xmx1g -XX:MaxPermSize=256m -Dhttp.proxyHost=localhost -Dhttp.proxyPort=3128' + cd /data/hive-ptest/working/ + tee /data/hive-ptest/logs/PreCommit-HIVE-MASTER-Build-473/source-prep.txt + [[ false == \t\r\u\e ]] + mkdir -p maven ivy + [[ git = \s\v\n ]] + [[ git = \g\i\t ]] + [[ -z master ]] + [[ -d apache-github-source-source ]] + [[ ! -d apache-github-source-source/.git ]] + [[ ! -d apache-github-source-source ]] + cd apache-github-source-source + git fetch origin >From https://github.com/apache/hive c790391..a61c351 master -> origin/master + git reset --hard HEAD HEAD is now at c790391 HIVE-14151: Use of USE_DEPRECATED_CLI environment variable does not work (Vihang Karajgaonkar, reviewed by Sergio Pena) + git clean -f -d + git checkout master Already on 'master' Your branch is behind 'origin/master' by 1 commit, and can be fast-forwarded. (use "git pull" to update your local branch) + git reset --hard origin/master HEAD is now at a61c351 HIVE-14200: Tez: disable auto-reducer parallelism when reducer-count * min.partition.factor < 1.0 (Gopal V, reviewed by Gunther Hagleitner) + git merge --ff-only origin/master Already up-to-date. + git gc + patchCommandPath=/data/hive-ptest/working/scratch/smart-apply-patch.sh + patchFilePath=/data/hive-ptest/working/scratch/build.patch + [[ -f /data/hive-ptest/working/scratch/build.patch ]] + chmod +x /data/hive-ptest/working/scratch/smart-apply-patch.sh + /data/hive-ptest/working/scratch/smart-apply-patch.sh /data/hive-ptest/working/scratch/build.patch The patch does not appear to apply with p0, p1, or p2 + exit 1 ' {noformat} This message is automatically generated. ATTACHMENT ID: 12817191 - PreCommit-HIVE-MASTER-Build > Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty > tables > --- > > Key: HIVE-14137 > URL: https://issues.apache.org/jira/browse/HIVE-14137 > Project: Hive > Issue Type: Bug > Components: Spark >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-14137.1.patch, HIVE-14137.2.patch, > HIVE-14137.3.patch, HIVE-14137.4.patch, HIVE-14137.5.patch, HIVE-14137.patch > > > The following queries: > {code} > -- Setup > drop table if exists empty1; > create table empty1 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > drop table if exists empty2; > create table empty2 (col1 bigint, col2 bigint) stored as parquet > tblproperties ('parquet.compress'='snappy'); > drop table if exists empty3; > create table empty3 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > -- All empty HDFS directories. > -- Fails with [08S01]: Error while processing statement: FAILED: Execution > Error, return code 3 from org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- Two empty HDFS directories. > -- Create an empty file in HDFS. > insert into empty1 select * from empty1 where false; > -- Same query fails with [08S01]: Error while processing statement: FAILED: > Execution Error, return code 3 from > org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- One empty
[jira] [Commented] (HIVE-14137) Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty tables
[ https://issues.apache.org/jira/browse/HIVE-14137?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15371116#comment-15371116 ] Sahil Takiar commented on HIVE-14137: - Re-based patch > Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty > tables > --- > > Key: HIVE-14137 > URL: https://issues.apache.org/jira/browse/HIVE-14137 > Project: Hive > Issue Type: Bug > Components: Spark >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-14137.1.patch, HIVE-14137.2.patch, > HIVE-14137.3.patch, HIVE-14137.4.patch, HIVE-14137.5.patch, HIVE-14137.patch > > > The following queries: > {code} > -- Setup > drop table if exists empty1; > create table empty1 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > drop table if exists empty2; > create table empty2 (col1 bigint, col2 bigint) stored as parquet > tblproperties ('parquet.compress'='snappy'); > drop table if exists empty3; > create table empty3 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > -- All empty HDFS directories. > -- Fails with [08S01]: Error while processing statement: FAILED: Execution > Error, return code 3 from org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- Two empty HDFS directories. > -- Create an empty file in HDFS. > insert into empty1 select * from empty1 where false; > -- Same query fails with [08S01]: Error while processing statement: FAILED: > Execution Error, return code 3 from > org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- One empty HDFS directory. > -- Create an empty file in HDFS. > insert into empty2 select * from empty2 where false; > -- Same query succeeds. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > {code} > Will result in the following exception: > {code} > org.apache.hadoop.fs.FileAlreadyExistsException: > /tmp/hive/hive/1f3837aa-9407-4780-92b1-42a66d205139/hive_2016-06-24_15-45-23_206_79177714958655528-2/-mr-10004/0/emptyFile > for client 172.26.14.151 already exists > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2784) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2676) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2561) > at > org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:593) > at > org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.create(AuthorizationProviderProxyClientProtocol.java:111) > at > org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:393) > at > org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2086) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2082) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1693) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2080) > at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) > at > sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57) > at > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > at java.lang.reflect.Constructor.newInstance(Constructor.java:526) > at > org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106) > at > org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73) > at > org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:1902) > at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1738) > at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1663) > at >
[jira] [Commented] (HIVE-14137) Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty tables
[ https://issues.apache.org/jira/browse/HIVE-14137?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15369057#comment-15369057 ] Hive QA commented on HIVE-14137: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12816881/HIVE-14137.4.patch {color:red}ERROR:{color} -1 due to build exiting with an error Test results: https://builds.apache.org/job/PreCommit-HIVE-MASTER-Build/438/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-MASTER-Build/438/console Test logs: http://ec2-204-236-174-241.us-west-1.compute.amazonaws.com/logs/PreCommit-HIVE-MASTER-Build-438/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Tests exited with: NonZeroExitCodeException Command 'bash /data/hive-ptest/working/scratch/source-prep.sh' failed with exit status 1 and output '+ [[ -n /usr/java/jdk1.8.0_25 ]] + export JAVA_HOME=/usr/java/jdk1.8.0_25 + JAVA_HOME=/usr/java/jdk1.8.0_25 + export PATH=/usr/java/jdk1.8.0_25/bin/:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games + PATH=/usr/java/jdk1.8.0_25/bin/:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games + export 'ANT_OPTS=-Xmx1g -XX:MaxPermSize=256m ' + ANT_OPTS='-Xmx1g -XX:MaxPermSize=256m ' + export 'M2_OPTS=-Xmx1g -XX:MaxPermSize=256m -Dhttp.proxyHost=localhost -Dhttp.proxyPort=3128' + M2_OPTS='-Xmx1g -XX:MaxPermSize=256m -Dhttp.proxyHost=localhost -Dhttp.proxyPort=3128' + cd /data/hive-ptest/working/ + tee /data/hive-ptest/logs/PreCommit-HIVE-MASTER-Build-438/source-prep.txt + [[ false == \t\r\u\e ]] + mkdir -p maven ivy + [[ git = \s\v\n ]] + [[ git = \g\i\t ]] + [[ -z master ]] + [[ -d apache-github-source-source ]] + [[ ! -d apache-github-source-source/.git ]] + [[ ! -d apache-github-source-source ]] + cd apache-github-source-source + git fetch origin >From https://github.com/apache/hive 7a91bbf..0506161 master -> origin/master cec61d9..0ba089b branch-2.1 -> origin/branch-2.1 + git reset --hard HEAD HEAD is now at 7a91bbf HIVE-14114 Ensure RecordWriter in streaming API is using the same UserGroupInformation as StreamingConnection (Eugene Koifman, reviewed by Wei Zheng) + git clean -f -d Removing ql/src/test/queries/clientpositive/groupby_grouping_sets_limit.q Removing ql/src/test/results/clientpositive/groupby_grouping_sets_limit.q.out + git checkout master Already on 'master' Your branch is behind 'origin/master' by 1 commit, and can be fast-forwarded. (use "git pull" to update your local branch) + git reset --hard origin/master HEAD is now at 0506161 HIVE-14176: CBO nesting windowing function within each other when merging Project operators (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan) + git merge --ff-only origin/master Already up-to-date. + git gc + patchCommandPath=/data/hive-ptest/working/scratch/smart-apply-patch.sh + patchFilePath=/data/hive-ptest/working/scratch/build.patch + [[ -f /data/hive-ptest/working/scratch/build.patch ]] + chmod +x /data/hive-ptest/working/scratch/smart-apply-patch.sh + /data/hive-ptest/working/scratch/smart-apply-patch.sh /data/hive-ptest/working/scratch/build.patch The patch does not appear to apply with p0, p1, or p2 + exit 1 ' {noformat} This message is automatically generated. ATTACHMENT ID: 12816881 - PreCommit-HIVE-MASTER-Build > Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty > tables > --- > > Key: HIVE-14137 > URL: https://issues.apache.org/jira/browse/HIVE-14137 > Project: Hive > Issue Type: Bug > Components: Spark >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-14137.1.patch, HIVE-14137.2.patch, > HIVE-14137.3.patch, HIVE-14137.4.patch, HIVE-14137.patch > > > The following queries: > {code} > -- Setup > drop table if exists empty1; > create table empty1 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > drop table if exists empty2; > create table empty2 (col1 bigint, col2 bigint) stored as parquet > tblproperties ('parquet.compress'='snappy'); > drop table if exists empty3; > create table empty3 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > -- All empty HDFS directories. > -- Fails with [08S01]: Error while processing statement: FAILED: Execution > Error, return code 3 from org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- Two empty HDFS directories. > -- Create an empty file in HDFS. > insert into empty1 select * from empty1 where false; > -- Same query fails with [08S01]: Error while processing statement: FAILED: > Execution
[jira] [Commented] (HIVE-14137) Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty tables
[ https://issues.apache.org/jira/browse/HIVE-14137?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15358042#comment-15358042 ] Sahil Takiar commented on HIVE-14137: - Updated diff and RB to use a UUID instead of the alias in the constructed {{Path}}. > Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty > tables > --- > > Key: HIVE-14137 > URL: https://issues.apache.org/jira/browse/HIVE-14137 > Project: Hive > Issue Type: Bug > Components: Spark >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-14137.1.patch, HIVE-14137.2.patch, HIVE-14137.patch > > > The following queries: > {code} > -- Setup > drop table if exists empty1; > create table empty1 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > drop table if exists empty2; > create table empty2 (col1 bigint, col2 bigint) stored as parquet > tblproperties ('parquet.compress'='snappy'); > drop table if exists empty3; > create table empty3 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > -- All empty HDFS directories. > -- Fails with [08S01]: Error while processing statement: FAILED: Execution > Error, return code 3 from org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- Two empty HDFS directories. > -- Create an empty file in HDFS. > insert into empty1 select * from empty1 where false; > -- Same query fails with [08S01]: Error while processing statement: FAILED: > Execution Error, return code 3 from > org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- One empty HDFS directory. > -- Create an empty file in HDFS. > insert into empty2 select * from empty2 where false; > -- Same query succeeds. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > {code} > Will result in the following exception: > {code} > org.apache.hadoop.fs.FileAlreadyExistsException: > /tmp/hive/hive/1f3837aa-9407-4780-92b1-42a66d205139/hive_2016-06-24_15-45-23_206_79177714958655528-2/-mr-10004/0/emptyFile > for client 172.26.14.151 already exists > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2784) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2676) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2561) > at > org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:593) > at > org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.create(AuthorizationProviderProxyClientProtocol.java:111) > at > org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:393) > at > org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2086) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2082) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1693) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2080) > at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) > at > sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57) > at > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > at java.lang.reflect.Constructor.newInstance(Constructor.java:526) > at > org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106) > at > org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73) > at > org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:1902) > at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1738) > at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1663) > at >
[jira] [Commented] (HIVE-14137) Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty tables
[ https://issues.apache.org/jira/browse/HIVE-14137?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15357933#comment-15357933 ] Sahil Takiar commented on HIVE-14137: - It seems there are a bunch of test failures occurring because alias names may have special characters in them such as ":", this causes problems since we are putting the alias name in a {{Path}} object. This eventually causes exceptions like {{java.net.URISyntaxException: Relative path in absolute URI: null-subquery1:$hdt$_0-subquery1:$hdt$_0:x}} when trying to perform on ls on the {{Path}} object. Instead of putting the alias in the {{Path}} I am just going to place a UUID in the {{Path}} instead. > Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty > tables > --- > > Key: HIVE-14137 > URL: https://issues.apache.org/jira/browse/HIVE-14137 > Project: Hive > Issue Type: Bug > Components: Spark >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-14137.1.patch, HIVE-14137.patch > > > The following queries: > {code} > -- Setup > drop table if exists empty1; > create table empty1 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > drop table if exists empty2; > create table empty2 (col1 bigint, col2 bigint) stored as parquet > tblproperties ('parquet.compress'='snappy'); > drop table if exists empty3; > create table empty3 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > -- All empty HDFS directories. > -- Fails with [08S01]: Error while processing statement: FAILED: Execution > Error, return code 3 from org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- Two empty HDFS directories. > -- Create an empty file in HDFS. > insert into empty1 select * from empty1 where false; > -- Same query fails with [08S01]: Error while processing statement: FAILED: > Execution Error, return code 3 from > org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- One empty HDFS directory. > -- Create an empty file in HDFS. > insert into empty2 select * from empty2 where false; > -- Same query succeeds. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > {code} > Will result in the following exception: > {code} > org.apache.hadoop.fs.FileAlreadyExistsException: > /tmp/hive/hive/1f3837aa-9407-4780-92b1-42a66d205139/hive_2016-06-24_15-45-23_206_79177714958655528-2/-mr-10004/0/emptyFile > for client 172.26.14.151 already exists > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2784) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2676) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2561) > at > org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:593) > at > org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.create(AuthorizationProviderProxyClientProtocol.java:111) > at > org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:393) > at > org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2086) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2082) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1693) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2080) > at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) > at > sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57) > at > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > at java.lang.reflect.Constructor.newInstance(Constructor.java:526) > at >
[jira] [Commented] (HIVE-14137) Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty tables
[ https://issues.apache.org/jira/browse/HIVE-14137?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15357628#comment-15357628 ] Hive QA commented on HIVE-14137: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12815273/HIVE-14137.1.patch {color:green}SUCCESS:{color} +1 due to 1 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 48 failed/errored test(s), 10287 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_authorization_view_sqlstd org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_auto_join32 org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_cbo_input26 org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_cbo_rp_lineage2 org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_cbo_stats org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_compute_stats_empty_table org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_diff_part_input_formats org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_fileformat_mix org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_groupby_constcolval org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_groupby_sort_6 org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_hook_context_cs org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_index_auth org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_index_creation org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_input23 org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_input24 org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_input25 org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_input26 org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_input41 org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_insert_into_with_schema org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_insertoverwrite_bucket org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_join_empty org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_lineage2 org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_mergejoin org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_metadataonly1 org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_nestedvirtual org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_nullgroup5 org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_nullinput org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_nullinput2 org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_optimize_nullscan org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_partition_boolexpr org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_pcs org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_ppd_join4 org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_ppd_union_view org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_reduce_deduplicate_exclude_gby org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_part org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_semijoin5 org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_stats_list_bucket org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_subquery_multiinsert org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_topn org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_unionall_join_nullconstant org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_unionall_unbalancedppd org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_vector_join org.apache.hadoop.hive.cli.TestHBaseCliDriver.testCliDriver_hbase_queries org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_vector_complex_all org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_vector_complex_join org.apache.hadoop.hive.llap.tezplugins.TestLlapTaskSchedulerService.testDelayedLocalityNodeCommErrorImmediateAllocation org.apache.hive.service.cli.TestEmbeddedThriftBinaryCLIService.testExecuteStatementParallel org.apache.hive.service.cli.TestEmbeddedThriftBinaryCLIService.testTaskStatus {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-MASTER-Build/327/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-MASTER-Build/327/console Test logs: http://ec2-50-18-27-0.us-west-1.compute.amazonaws.com/logs/PreCommit-HIVE-MASTER-Build-327/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 48 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12815273 - PreCommit-HIVE-MASTER-Build > Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty > tables > --- > > Key: HIVE-14137 > URL:
[jira] [Commented] (HIVE-14137) Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty tables
[ https://issues.apache.org/jira/browse/HIVE-14137?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15356308#comment-15356308 ] Sahil Takiar commented on HIVE-14137: - [~vikram.dixit] based on the Git History, you may have some more context on the method {{Utilities.getInputPaths}}, let me know if you have any feedback on the bug or the fix. > Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty > tables > --- > > Key: HIVE-14137 > URL: https://issues.apache.org/jira/browse/HIVE-14137 > Project: Hive > Issue Type: Bug >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-14137.1.patch, HIVE-14137.patch > > > The following queries: > {code} > -- Setup > drop table if exists empty1; > create table empty1 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > drop table if exists empty2; > create table empty2 (col1 bigint, col2 bigint) stored as parquet > tblproperties ('parquet.compress'='snappy'); > drop table if exists empty3; > create table empty3 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > -- All empty HDFS directories. > -- Fails with [08S01]: Error while processing statement: FAILED: Execution > Error, return code 3 from org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- Two empty HDFS directories. > -- Create an empty file in HDFS. > insert into empty1 select * from empty1 where false; > -- Same query fails with [08S01]: Error while processing statement: FAILED: > Execution Error, return code 3 from > org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- One empty HDFS directory. > -- Create an empty file in HDFS. > insert into empty2 select * from empty2 where false; > -- Same query succeeds. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > {code} > Will result in the following exception: > {code} > org.apache.hadoop.fs.FileAlreadyExistsException: > /tmp/hive/hive/1f3837aa-9407-4780-92b1-42a66d205139/hive_2016-06-24_15-45-23_206_79177714958655528-2/-mr-10004/0/emptyFile > for client 172.26.14.151 already exists > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2784) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2676) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2561) > at > org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:593) > at > org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.create(AuthorizationProviderProxyClientProtocol.java:111) > at > org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:393) > at > org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2086) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2082) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1693) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2080) > at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) > at > sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57) > at > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > at java.lang.reflect.Constructor.newInstance(Constructor.java:526) > at > org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106) > at > org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73) > at > org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:1902) > at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1738) > at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1663) >
[jira] [Commented] (HIVE-14137) Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty tables
[ https://issues.apache.org/jira/browse/HIVE-14137?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15356280#comment-15356280 ] Sahil Takiar commented on HIVE-14137: - The root cause is the {{Utilities.getInputPaths}} method. If it is ever invoked for two different aliases, both of which are empty tables, it will try to create the same temp file for the two tables. The first file creation will succeed, and then the second one will fail with a {{FileAlreadyExistsException}}. The solution is to ensure that each alias used a unique file path, this is done by including the alias name in the path of the file to be created. > Hive on Spark throws FileAlreadyExistsException for jobs with multiple empty > tables > --- > > Key: HIVE-14137 > URL: https://issues.apache.org/jira/browse/HIVE-14137 > Project: Hive > Issue Type: Bug >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-14137.patch > > > The following queries: > {code} > -- Setup > drop table if exists empty1; > create table empty1 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > drop table if exists empty2; > create table empty2 (col1 bigint, col2 bigint) stored as parquet > tblproperties ('parquet.compress'='snappy'); > drop table if exists empty3; > create table empty3 (col1 bigint) stored as parquet tblproperties > ('parquet.compress'='snappy'); > -- All empty HDFS directories. > -- Fails with [08S01]: Error while processing statement: FAILED: Execution > Error, return code 3 from org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- Two empty HDFS directories. > -- Create an empty file in HDFS. > insert into empty1 select * from empty1 where false; > -- Same query fails with [08S01]: Error while processing statement: FAILED: > Execution Error, return code 3 from > org.apache.hadoop.hive.ql.exec.spark.SparkTask. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > -- One empty HDFS directory. > -- Create an empty file in HDFS. > insert into empty2 select * from empty2 where false; > -- Same query succeeds. > select empty1.col1 > from empty1 > inner join empty2 > on empty2.col1 = empty1.col1 > inner join empty3 > on empty3.col1 = empty2.col2; > {code} > Will result in the following exception: > {code} > org.apache.hadoop.fs.FileAlreadyExistsException: > /tmp/hive/hive/1f3837aa-9407-4780-92b1-42a66d205139/hive_2016-06-24_15-45-23_206_79177714958655528-2/-mr-10004/0/emptyFile > for client 172.26.14.151 already exists > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2784) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2676) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2561) > at > org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:593) > at > org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.create(AuthorizationProviderProxyClientProtocol.java:111) > at > org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:393) > at > org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2086) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2082) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1693) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2080) > at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) > at > sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57) > at > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > at java.lang.reflect.Constructor.newInstance(Constructor.java:526) > at > org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106) > at >