[jira] [Commented] (HIVE-14803) S3: Stats gathering for insert queries can be expensive for partitioned dataset
[ https://issues.apache.org/jira/browse/HIVE-14803?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15631683#comment-15631683 ] Hive QA commented on HIVE-14803: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12836719/HIVE-14803.5.patch {color:red}ERROR:{color} -1 due to build exiting with an error Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/1938/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/1938/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-1938/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Tests exited with: NonZeroExitCodeException Command 'bash /data/hiveptest/working/scratch/source-prep.sh' failed with exit status 1 and output '+ date '+%Y-%m-%d %T.%3N' 2016-11-03 05:37:49.039 + [[ -n /usr/lib/jvm/java-8-openjdk-amd64 ]] + export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 + JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 + export PATH=/usr/lib/jvm/java-8-openjdk-amd64/bin/:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games + PATH=/usr/lib/jvm/java-8-openjdk-amd64/bin/:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games + export 'ANT_OPTS=-Xmx1g -XX:MaxPermSize=256m ' + ANT_OPTS='-Xmx1g -XX:MaxPermSize=256m ' + export 'MAVEN_OPTS=-Xmx1g -Dhttp.proxyHost=localhost -Dhttp.proxyPort=3128' + MAVEN_OPTS='-Xmx1g -Dhttp.proxyHost=localhost -Dhttp.proxyPort=3128' + cd /data/hiveptest/working/ + tee /data/hiveptest/logs/PreCommit-HIVE-Build-1938/source-prep.txt + [[ false == \t\r\u\e ]] + mkdir -p maven ivy + [[ git = \s\v\n ]] + [[ git = \g\i\t ]] + [[ -z master ]] + [[ -d apache-github-source-source ]] + [[ ! -d apache-github-source-source/.git ]] + [[ ! -d apache-github-source-source ]] + date '+%Y-%m-%d %T.%3N' 2016-11-03 05:37:49.041 + cd apache-github-source-source + git fetch origin + git reset --hard HEAD HEAD is now at 345353c HIVE-15039: A better job monitor console output for HoS (Rui reviewed by Xuefu and Ferdinand) + git clean -f -d + git checkout master Already on 'master' Your branch is up-to-date with 'origin/master'. + git reset --hard origin/master HEAD is now at 345353c HIVE-15039: A better job monitor console output for HoS (Rui reviewed by Xuefu and Ferdinand) + git merge --ff-only origin/master Already up-to-date. + date '+%Y-%m-%d %T.%3N' 2016-11-03 05:37:49.916 + patchCommandPath=/data/hiveptest/working/scratch/smart-apply-patch.sh + patchFilePath=/data/hiveptest/working/scratch/build.patch + [[ -f /data/hiveptest/working/scratch/build.patch ]] + chmod +x /data/hiveptest/working/scratch/smart-apply-patch.sh + /data/hiveptest/working/scratch/smart-apply-patch.sh /data/hiveptest/working/scratch/build.patch Going to apply patch with: patch -p0 patching file ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java + [[ maven == \m\a\v\e\n ]] + rm -rf /data/hiveptest/working/maven/org/apache/hive + mvn -B clean install -DskipTests -T 4 -q -Dmaven.repo.local=/data/hiveptest/working/maven ANTLR Parser Generator Version 3.4 org/apache/hadoop/hive/metastore/parser/Filter.g DataNucleus Enhancer (version 4.1.6) for API "JDO" DataNucleus Enhancer : Classpath >> /usr/share/maven/boot/plexus-classworlds-2.x.jar ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MDatabase ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MFieldSchema ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MType ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MTable ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MConstraint ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MSerDeInfo ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MOrder ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MColumnDescriptor ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MStringList ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MStorageDescriptor ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MPartition ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MIndex ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MRole ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MRoleMap ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MGlobalPrivilege ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MDBPrivilege ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MTablePrivilege ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MPartitionPrivilege ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MTableColumnPrivilege ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MPartitionColumnPrivilege ENHANCED (Persistable) :
[jira] [Commented] (HIVE-14803) S3: Stats gathering for insert queries can be expensive for partitioned dataset
[ https://issues.apache.org/jira/browse/HIVE-14803?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15631646#comment-15631646 ] Hive QA commented on HIVE-14803: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12836719/HIVE-14803.5.patch {color:red}ERROR:{color} -1 due to build exiting with an error Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/1937/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/1937/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-1937/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Tests exited with: NonZeroExitCodeException Command 'bash /data/hiveptest/working/scratch/source-prep.sh' failed with exit status 1 and output '+ date '+%Y-%m-%d %T.%3N' 2016-11-03 05:22:58.735 + [[ -n /usr/lib/jvm/java-8-openjdk-amd64 ]] + export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 + JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 + export PATH=/usr/lib/jvm/java-8-openjdk-amd64/bin/:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games + PATH=/usr/lib/jvm/java-8-openjdk-amd64/bin/:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games + export 'ANT_OPTS=-Xmx1g -XX:MaxPermSize=256m ' + ANT_OPTS='-Xmx1g -XX:MaxPermSize=256m ' + export 'MAVEN_OPTS=-Xmx1g -Dhttp.proxyHost=localhost -Dhttp.proxyPort=3128' + MAVEN_OPTS='-Xmx1g -Dhttp.proxyHost=localhost -Dhttp.proxyPort=3128' + cd /data/hiveptest/working/ + tee /data/hiveptest/logs/PreCommit-HIVE-Build-1937/source-prep.txt + [[ false == \t\r\u\e ]] + mkdir -p maven ivy + [[ git = \s\v\n ]] + [[ git = \g\i\t ]] + [[ -z master ]] + [[ -d apache-github-source-source ]] + [[ ! -d apache-github-source-source/.git ]] + [[ ! -d apache-github-source-source ]] + date '+%Y-%m-%d %T.%3N' 2016-11-03 05:22:58.738 + cd apache-github-source-source + git fetch origin + git reset --hard HEAD HEAD is now at 345353c HIVE-15039: A better job monitor console output for HoS (Rui reviewed by Xuefu and Ferdinand) + git clean -f -d Removing ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java Removing ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ Removing ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java Removing ql/src/test/queries/clientpositive/parquet_types_vectorization.q Removing ql/src/test/results/clientpositive/parquet_types_vectorization.q.out + git checkout master Already on 'master' Your branch is up-to-date with 'origin/master'. + git reset --hard origin/master HEAD is now at 345353c HIVE-15039: A better job monitor console output for HoS (Rui reviewed by Xuefu and Ferdinand) + git merge --ff-only origin/master Already up-to-date. + date '+%Y-%m-%d %T.%3N' 2016-11-03 05:22:59.621 + patchCommandPath=/data/hiveptest/working/scratch/smart-apply-patch.sh + patchFilePath=/data/hiveptest/working/scratch/build.patch + [[ -f /data/hiveptest/working/scratch/build.patch ]] + chmod +x /data/hiveptest/working/scratch/smart-apply-patch.sh + /data/hiveptest/working/scratch/smart-apply-patch.sh /data/hiveptest/working/scratch/build.patch Going to apply patch with: patch -p0 patching file ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java + [[ maven == \m\a\v\e\n ]] + rm -rf /data/hiveptest/working/maven/org/apache/hive + mvn -B clean install -DskipTests -T 4 -q -Dmaven.repo.local=/data/hiveptest/working/maven ANTLR Parser Generator Version 3.4 org/apache/hadoop/hive/metastore/parser/Filter.g DataNucleus Enhancer (version 4.1.6) for API "JDO" DataNucleus Enhancer : Classpath >> /usr/share/maven/boot/plexus-classworlds-2.x.jar ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MDatabase ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MFieldSchema ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MType ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MTable ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MConstraint ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MSerDeInfo ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MOrder ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MColumnDescriptor ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MStringList ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MStorageDescriptor ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MPartition ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MIndex ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MRole ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MRoleMap ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MGlobalPrivilege ENHANCED (Persistable) : org.apache.hadoop.hive.metastore.model.MDBPrivilege ENHANCED (Persistable)
[jira] [Updated] (HIVE-14803) S3: Stats gathering for insert queries can be expensive for partitioned dataset
[ https://issues.apache.org/jira/browse/HIVE-14803?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Rajesh Balamohan updated HIVE-14803: Status: Patch Available (was: Open) > S3: Stats gathering for insert queries can be expensive for partitioned > dataset > --- > > Key: HIVE-14803 > URL: https://issues.apache.org/jira/browse/HIVE-14803 > Project: Hive > Issue Type: Improvement > Components: Metastore >Affects Versions: 2.1.0 >Reporter: Rajesh Balamohan >Assignee: Rajesh Balamohan >Priority: Minor > Attachments: HIVE-14803.1.patch, HIVE-14803.2.patch, > HIVE-14803.3.patch, HIVE-14803.4.patch, HIVE-14803.5.patch > > > StatsTask's aggregateStats populates stats details for all partitions by > checking the file sizes which turns out to be expensive when larger number of > partitions are inserted. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-14815) Implement Parquet vectorization reader for Primitive types
[ https://issues.apache.org/jira/browse/HIVE-14815?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15631499#comment-15631499 ] Hive QA commented on HIVE-14815: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12836718/HIVE-14815.4.patch {color:red}ERROR:{color} -1 due to build exiting with an error Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/1935/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/1935/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-1935/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Tests exited with: NonZeroExitCodeException Command 'bash /data/hiveptest/working/scratch/source-prep.sh' failed with exit status 1 and output '+ date '+%Y-%m-%d %T.%3N' 2016-11-03 04:14:52.111 + [[ -n /usr/lib/jvm/java-8-openjdk-amd64 ]] + export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 + JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 + export PATH=/usr/lib/jvm/java-8-openjdk-amd64/bin/:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games + PATH=/usr/lib/jvm/java-8-openjdk-amd64/bin/:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games + export 'ANT_OPTS=-Xmx1g -XX:MaxPermSize=256m ' + ANT_OPTS='-Xmx1g -XX:MaxPermSize=256m ' + export 'MAVEN_OPTS=-Xmx1g -Dhttp.proxyHost=localhost -Dhttp.proxyPort=3128' + MAVEN_OPTS='-Xmx1g -Dhttp.proxyHost=localhost -Dhttp.proxyPort=3128' + cd /data/hiveptest/working/ + tee /data/hiveptest/logs/PreCommit-HIVE-Build-1935/source-prep.txt + [[ false == \t\r\u\e ]] + mkdir -p maven ivy + [[ git = \s\v\n ]] + [[ git = \g\i\t ]] + [[ -z master ]] + [[ -d apache-github-source-source ]] + [[ ! -d apache-github-source-source/.git ]] + [[ ! -d apache-github-source-source ]] + date '+%Y-%m-%d %T.%3N' 2016-11-03 04:14:52.114 + cd apache-github-source-source + git fetch origin + git reset --hard HEAD HEAD is now at 345353c HIVE-15039: A better job monitor console output for HoS (Rui reviewed by Xuefu and Ferdinand) + git clean -f -d Removing common/src/java/org/apache/hadoop/hive/common/ValidWriteIds.java Removing common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig Removing common/src/test/org/apache/hive/common/util/MockFileSystem.java Removing metastore/scripts/upgrade/derby/037-HIVE-14637.derby.sql Removing metastore/scripts/upgrade/mssql/022-HIVE-14637.mssql.sql Removing metastore/scripts/upgrade/mysql/037-HIVE-14637.mysql.sql Removing metastore/scripts/upgrade/oracle/037-HIVE-14637.oracle.sql Removing metastore/scripts/upgrade/postgres/036-HIVE-14637.postgres.sql Removing metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/FinalizeWriteIdRequest.java Removing metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/FinalizeWriteIdResult.java Removing metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/GetNextWriteIdRequest.java Removing metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/GetNextWriteIdResult.java Removing metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/GetValidWriteIdsRequest.java Removing metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/GetValidWriteIdsResult.java Removing metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/HeartbeatWriteIdRequest.java Removing metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/HeartbeatWriteIdResult.java Removing metastore/src/java/org/apache/hadoop/hive/metastore/MmCleanerThread.java Removing metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableWrite.java Removing ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitTask.java Removing ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitWork.java Removing ql/src/test/queries/clientnegative/mm_concatenate.q Removing ql/src/test/queries/clientnegative/mm_truncate_cols.q Removing ql/src/test/queries/clientpositive/mm_all.q Removing ql/src/test/queries/clientpositive/mm_all2.q Removing ql/src/test/queries/clientpositive/mm_conversions.q Removing ql/src/test/queries/clientpositive/mm_current.q Removing ql/src/test/queries/clientpositive/mm_insertonly_acid.q Removing ql/src/test/results/clientnegative/mm_concatenate.q.out Removing ql/src/test/results/clientnegative/mm_truncate_cols.q.out Removing ql/src/test/results/clientpositive/llap/mm_all.q.out Removing ql/src/test/results/clientpositive/llap/mm_all2.q.out Removing ql/src/test/results/clientpositive/llap/mm_conversions.q.out Removing ql/src/test/results/clientpositive/llap/mm_current.q.out Removing ql/src/test/results/clientpositive/mm_insertonly_acid.q.out + git checkout master Already on 'master' Your branch is up-to-date with 'origin/master'. + git reset --hard origin/master HEAD is now at 345353c HIVE-15039: A better job
[jira] [Updated] (HIVE-14803) S3: Stats gathering for insert queries can be expensive for partitioned dataset
[ https://issues.apache.org/jira/browse/HIVE-14803?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Rajesh Balamohan updated HIVE-14803: Attachment: HIVE-14803.5.patch > S3: Stats gathering for insert queries can be expensive for partitioned > dataset > --- > > Key: HIVE-14803 > URL: https://issues.apache.org/jira/browse/HIVE-14803 > Project: Hive > Issue Type: Improvement > Components: Metastore >Affects Versions: 2.1.0 >Reporter: Rajesh Balamohan >Assignee: Rajesh Balamohan >Priority: Minor > Attachments: HIVE-14803.1.patch, HIVE-14803.2.patch, > HIVE-14803.3.patch, HIVE-14803.4.patch, HIVE-14803.5.patch > > > StatsTask's aggregateStats populates stats details for all partitions by > checking the file sizes which turns out to be expensive when larger number of > partitions are inserted. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-14815) Implement Parquet vectorization reader for Primitive types
[ https://issues.apache.org/jira/browse/HIVE-14815?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Ferdinand Xu updated HIVE-14815: Attachment: HIVE-14815.4.patch > Implement Parquet vectorization reader for Primitive types > --- > > Key: HIVE-14815 > URL: https://issues.apache.org/jira/browse/HIVE-14815 > Project: Hive > Issue Type: Sub-task >Reporter: Ferdinand Xu >Assignee: Ferdinand Xu > Attachments: HIVE-14815.1.patch, HIVE-14815.2.patch, > HIVE-14815.3.patch, HIVE-14815.4.patch, HIVE-14815.patch > > > Parquet doesn't provide a vectorized reader which can be used by Hive > directly. Also for Decimal Column batch, it consists of a batch of > HiveDecimal which is a Hive type which is unknown for Parquet. To support > Hive vectorization execution engine in Hive, we have to implement the > vectorized Parquet reader in Hive side. To limit the performance impacts, we > need to implement a page level vectorized reader. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15104) Hive on Spark generate more shuffle data than hive on mr
[ https://issues.apache.org/jira/browse/HIVE-15104?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15631376#comment-15631376 ] Xuefu Zhang commented on HIVE-15104: This is rather interesting. I know I originally reviewed HIVE-8017, but I didn't really know why ByteWritable works for MR while we need HiveKey for Spark. Since Spark is stable now, it would be interesting to find out at least why, whether we can optimize or not. [~ruili], since you originally discovered the problem, could you revisit the issue? Thanks. > Hive on Spark generate more shuffle data than hive on mr > > > Key: HIVE-15104 > URL: https://issues.apache.org/jira/browse/HIVE-15104 > Project: Hive > Issue Type: Bug > Components: Spark >Affects Versions: 1.2.1 >Reporter: wangwenli >Assignee: Aihua Xu > > the same sql, running on spark and mr engine, will generate different size > of shuffle data. > i think it is because of hive on mr just serialize part of HiveKey, but hive > on spark which using kryo will serialize full of Hivekey object. > what is your opionion? -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15109) Set MaxPermSize to 256M for maven tests
[ https://issues.apache.org/jira/browse/HIVE-15109?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15631364#comment-15631364 ] Chaoyu Tang commented on HIVE-15109: If so, [~sseth], [~spena] could you +1 on this patch? Thanks > Set MaxPermSize to 256M for maven tests > --- > > Key: HIVE-15109 > URL: https://issues.apache.org/jira/browse/HIVE-15109 > Project: Hive > Issue Type: Test > Components: Test >Reporter: Chaoyu Tang >Assignee: Chaoyu Tang >Priority: Minor > Attachments: HIVE-15109.patch > > > Trying to run the qtests, for example, > mvn test -Dtest=TestMiniTezCliDriver -Dqfile=explainanalyze_1.q > and got > {code} > Running org.apache.hadoop.hive.cli.TestMiniTezCliDriver > Tests run: 0, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 29.591 sec - > in org.apache.hadoop.hive.cli.TestMiniTezCliDriver > {code} > Looking into the hive.log, and found that it was due to too small PermGen > space: > {code} > 2016-11-01T19:52:19,039 ERROR > [org.apache.hadoop.util.JvmPauseMonitor$Monitor@261e733f] > server.NIOServerCnxnFactory: Thread > Thread[org.apache.hadoop.util.JvmPauseMonitor$Monitor@261e733f,5,main] died > java.lang.OutOfMemoryError: PermGen space > {code} > Setting env MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=256M" would not help. > We can set MaxPermSize to maven.test.jvm.args in pom.xml instead: > {code} > -Xmx2048m -XX:MaxPermSize=256M > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-14990) run all tests for MM tables and fix the issues that are found
[ https://issues.apache.org/jira/browse/HIVE-14990?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15631360#comment-15631360 ] Hive QA commented on HIVE-14990: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12836708/HIVE-14990.06.patch {color:red}ERROR:{color} -1 due to build exiting with an error Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/1933/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/1933/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-1933/ Messages: {noformat} This message was trimmed, see log for full details patching file itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/history/TestHiveHistory.java patching file itests/hive-unit/src/test/java/org/apache/hive/service/server/TestHS2ClearDanglingScratchDir.java patching file itests/pom.xml patching file itests/qtest-spark/pom.xml patching file metastore/if/hive_metastore.thrift patching file metastore/scripts/upgrade/derby/037-HIVE-14637.derby.sql patching file metastore/scripts/upgrade/derby/hive-schema-2.2.0.derby.sql patching file metastore/scripts/upgrade/derby/upgrade-2.1.0-to-2.2.0.derby.sql patching file metastore/scripts/upgrade/mssql/022-HIVE-14637.mssql.sql patching file metastore/scripts/upgrade/mssql/hive-schema-2.2.0.mssql.sql patching file metastore/scripts/upgrade/mssql/upgrade-2.1.0-to-2.2.0.mssql.sql patching file metastore/scripts/upgrade/mysql/037-HIVE-14637.mysql.sql patching file metastore/scripts/upgrade/mysql/hive-schema-2.2.0.mysql.sql patching file metastore/scripts/upgrade/mysql/upgrade-2.1.0-to-2.2.0.mysql.sql patching file metastore/scripts/upgrade/oracle/037-HIVE-14637.oracle.sql patching file metastore/scripts/upgrade/oracle/hive-schema-2.2.0.oracle.sql patching file metastore/scripts/upgrade/oracle/upgrade-2.1.0-to-2.2.0.oracle.sql patching file metastore/scripts/upgrade/postgres/036-HIVE-14637.postgres.sql patching file metastore/scripts/upgrade/postgres/hive-schema-2.2.0.postgres.sql patching file metastore/scripts/upgrade/postgres/upgrade-2.1.0-to-2.2.0.postgres.sql patching file metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.cpp patching file metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.h patching file metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore_server.skeleton.cpp patching file metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp patching file metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h patching file metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/FinalizeWriteIdRequest.java patching file metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/FinalizeWriteIdResult.java patching file metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/GetAllFunctionsResponse.java patching file metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/GetNextWriteIdRequest.java patching file metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/GetNextWriteIdResult.java patching file metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/GetValidWriteIdsRequest.java patching file metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/GetValidWriteIdsResult.java patching file metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/HeartbeatWriteIdRequest.java patching file metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/HeartbeatWriteIdResult.java patching file metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Table.java patching file metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ThriftHiveMetastore.java patching file metastore/src/gen/thrift/gen-php/metastore/ThriftHiveMetastore.php patching file metastore/src/gen/thrift/gen-php/metastore/Types.php patching file metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore-remote patching file metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore.py patching file metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py patching file metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb patching file metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb patching file metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java patching file metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java patching file metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java patching file metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java patching file metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreThread.java patching file metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java patching file
[jira] [Updated] (HIVE-15039) A better job monitor console output for HoS
[ https://issues.apache.org/jira/browse/HIVE-15039?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Rui Li updated HIVE-15039: -- Resolution: Fixed Fix Version/s: 2.2.0 Status: Resolved (was: Patch Available) Committed to master. Thanks [~xuefuz] and [~Ferd] for the review! > A better job monitor console output for HoS > --- > > Key: HIVE-15039 > URL: https://issues.apache.org/jira/browse/HIVE-15039 > Project: Hive > Issue Type: Improvement >Reporter: Rui Li >Assignee: Rui Li > Fix For: 2.2.0 > > Attachments: HIVE-15039.1.patch, HIVE-15039.2.patch, example > screenshot.png, with patch.png > > > When there're many stages, it's very difficult to read the console output of > job progress of HoS. Attached screenshot is an example. > We may learn from HoT as it does much better than HoS. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15039) A better job monitor console output for HoS
[ https://issues.apache.org/jira/browse/HIVE-15039?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Rui Li updated HIVE-15039: -- Labels: TODOC2.2 (was: ) > A better job monitor console output for HoS > --- > > Key: HIVE-15039 > URL: https://issues.apache.org/jira/browse/HIVE-15039 > Project: Hive > Issue Type: Improvement >Reporter: Rui Li >Assignee: Rui Li > Labels: TODOC2.2 > Fix For: 2.2.0 > > Attachments: HIVE-15039.1.patch, HIVE-15039.2.patch, example > screenshot.png, with patch.png > > > When there're many stages, it's very difficult to read the console output of > job progress of HoS. Attached screenshot is an example. > We may learn from HoT as it does much better than HoS. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Comment Edited] (HIVE-14990) run all tests for MM tables and fix the issues that are found
[ https://issues.apache.org/jira/browse/HIVE-14990?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15631299#comment-15631299 ] Sergey Shelukhin edited comment on HIVE-14990 at 11/3/16 2:31 AM: -- Fixed a bunch more issues. The next one is that bucketmapjoin doesn't work. Overall the bucket usage that is not enforsible, like loading files one by one into bucketed table, won't work. It's already extremely fragile it seems. was (Author: sershe): Fixed a bunch more issues. The next one is that bucketmapjoin doesn't work. Overall the bucket usage that is not enforsible, like loading files one by one into bucketed table, won't work. > run all tests for MM tables and fix the issues that are found > - > > Key: HIVE-14990 > URL: https://issues.apache.org/jira/browse/HIVE-14990 > Project: Hive > Issue Type: Sub-task >Reporter: Sergey Shelukhin >Assignee: Sergey Shelukhin > Attachments: HIVE-14990.01.patch, HIVE-14990.02.patch, > HIVE-14990.03.patch, HIVE-14990.04.patch, HIVE-14990.04.patch, > HIVE-14990.05.patch, HIVE-14990.05.patch, HIVE-14990.06.patch, > HIVE-14990.patch > > > Expected failures (lack of support in MM tables for certain commands) > 1) All HCat tests > 2) Almost all merge tests (alter .. concat is not supported). > 3) Tests that run dfs commands with specific paths. > 4) Truncate column -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-14990) run all tests for MM tables and fix the issues that are found
[ https://issues.apache.org/jira/browse/HIVE-14990?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sergey Shelukhin updated HIVE-14990: Attachment: HIVE-14990.06.patch Fixed a bunch more issues. The next one is that bucketmapjoin doesn't work. Overall the bucket usage that is not enforsible, like loading files one by one into bucketed table, won't work. > run all tests for MM tables and fix the issues that are found > - > > Key: HIVE-14990 > URL: https://issues.apache.org/jira/browse/HIVE-14990 > Project: Hive > Issue Type: Sub-task >Reporter: Sergey Shelukhin >Assignee: Sergey Shelukhin > Attachments: HIVE-14990.01.patch, HIVE-14990.02.patch, > HIVE-14990.03.patch, HIVE-14990.04.patch, HIVE-14990.04.patch, > HIVE-14990.05.patch, HIVE-14990.05.patch, HIVE-14990.06.patch, > HIVE-14990.patch > > > Expected failures (lack of support in MM tables for certain commands) > 1) All HCat tests > 2) Almost all merge tests (alter .. concat is not supported). > 3) Tests that run dfs commands with specific paths. > 4) Truncate column -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-14089) complex type support in LLAP IO is broken
[ https://issues.apache.org/jira/browse/HIVE-14089?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15631294#comment-15631294 ] Sergey Shelukhin commented on HIVE-14089: - The patch is very stale, will return to it shortly > complex type support in LLAP IO is broken > -- > > Key: HIVE-14089 > URL: https://issues.apache.org/jira/browse/HIVE-14089 > Project: Hive > Issue Type: Bug >Affects Versions: 2.2.0 >Reporter: Prasanth Jayachandran >Assignee: Sergey Shelukhin > Attachments: HIVE-14089.WIP.2.patch, HIVE-14089.WIP.3.patch, > HIVE-14089.WIP.patch > > > HIVE-13617 is causing MiniLlapCliDriver following test failures > {code} > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_vector_complex_all > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_vector_complex_join > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-14990) run all tests for MM tables and fix the issues that are found
[ https://issues.apache.org/jira/browse/HIVE-14990?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sergey Shelukhin updated HIVE-14990: Description: Expected failures (lack of support in MM tables for certain commands) 1) All HCat tests 2) Almost all merge tests (alter .. concat is not supported). 3) Tests that run dfs commands with specific paths. 4) Truncate column was: Expected failures (lack of support in MM tables for certain commands) 1) All HCat tests 2) Almost all merge tests (alter .. concat is not supported). 3) Truncate column > run all tests for MM tables and fix the issues that are found > - > > Key: HIVE-14990 > URL: https://issues.apache.org/jira/browse/HIVE-14990 > Project: Hive > Issue Type: Sub-task >Reporter: Sergey Shelukhin >Assignee: Sergey Shelukhin > Attachments: HIVE-14990.01.patch, HIVE-14990.02.patch, > HIVE-14990.03.patch, HIVE-14990.04.patch, HIVE-14990.04.patch, > HIVE-14990.05.patch, HIVE-14990.05.patch, HIVE-14990.patch > > > Expected failures (lack of support in MM tables for certain commands) > 1) All HCat tests > 2) Almost all merge tests (alter .. concat is not supported). > 3) Tests that run dfs commands with specific paths. > 4) Truncate column -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15082) Hive-1.2 cannot read data from complex data types with TIMESTAMP column, stored in Parquet
[ https://issues.apache.org/jira/browse/HIVE-15082?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15631096#comment-15631096 ] Vaibhav Gumashta commented on HIVE-15082: - [~osayankin] Thanks for the patch. Can you submit this again for a QA run against branch-1.2 > Hive-1.2 cannot read data from complex data types with TIMESTAMP column, > stored in Parquet > -- > > Key: HIVE-15082 > URL: https://issues.apache.org/jira/browse/HIVE-15082 > Project: Hive > Issue Type: Bug >Affects Versions: 1.2.0 >Reporter: Oleksiy Sayankin >Assignee: Oleksiy Sayankin > Fix For: 1.2.2 > > Attachments: HIVE-15082-branch-1.2.patch, HIVE-15082-branch-1.patch > > > *STEP 1. Create test data* > {code:sql} > select * from dual; > {code} > *EXPECTED RESULT:* > {noformat} > Pretty_UnIQUe_StrinG > {noformat} > {code:sql} > create table test_parquet1(login timestamp) stored as parquet; > insert overwrite table test_parquet1 select from_unixtime(unix_timestamp()) > from dual; > select * from test_parquet1 limit 1; > {code} > *EXPECTED RESULT:* > No exceptions. Current timestamp as result. > {noformat} > 2016-10-27 10:58:19 > {noformat} > *STEP 2. Store timestamp in array in parquet file* > {code:sql} > create table test_parquet2(x array) stored as parquet; > insert overwrite table test_parquet2 select array(login) from test_parquet1; > select * from test_parquet2; > {code} > *EXPECTED RESULT:* > No exceptions. Current timestamp in brackets as result. > {noformat} > ["2016-10-27 10:58:19"] > {noformat} > *ACTUAL RESULT:* > {noformat} > ERROR [main]: CliDriver (SessionState.java:printError(963)) - Failed with > exception java.io.IOException:parquet.io.ParquetDecodingException: Can not > read value at 0 in block -1 in file > hdfs:///user/hive/warehouse/test_parquet2/00_0 > java.io.IOException: parquet.io.ParquetDecodingException: Can not read value > at 0 in block -1 in file hdfs:///user/hive/warehouse/test_parquet2/00_0 > {noformat} > *ROOT-CAUSE:* > Incorrect initialization of {{metadata}} {{HashMap}} causes that it has > {{null}} value in enumeration > {{org.apache.hadoop.hive.ql.io.parquet.convert.ETypeConverter}} when > executing following line: > {code:java} > boolean skipConversion = > Boolean.valueOf(metadata.get(HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION.varname)); > {code} > in element {{ETIMESTAMP_CONVERTER}}. > JVM throws NPE and parquet library can not read data from file and throws > {noformat} > java.io.IOException:parquet.io.ParquetDecodingException: Can not read value > at 0 in block -1 in file hdfs:///user/hive/warehouse/test_parquet2/00_0 > {noformat} > for its turn. > *SOLUTION:* > Perform initialization in separate method to skip overriding it with {{null}} > value in block of code > {code:java} > if (parent != null) { > setMetadata(parent.getMetadata()); > } > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15082) Hive-1.2 cannot read data from complex data types with TIMESTAMP column, stored in Parquet
[ https://issues.apache.org/jira/browse/HIVE-15082?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Vaibhav Gumashta updated HIVE-15082: Fix Version/s: (was: 1.2.3) 1.2.2 > Hive-1.2 cannot read data from complex data types with TIMESTAMP column, > stored in Parquet > -- > > Key: HIVE-15082 > URL: https://issues.apache.org/jira/browse/HIVE-15082 > Project: Hive > Issue Type: Bug >Affects Versions: 1.2.0 >Reporter: Oleksiy Sayankin >Assignee: Oleksiy Sayankin > Fix For: 1.2.2 > > Attachments: HIVE-15082-branch-1.2.patch, HIVE-15082-branch-1.patch > > > *STEP 1. Create test data* > {code:sql} > select * from dual; > {code} > *EXPECTED RESULT:* > {noformat} > Pretty_UnIQUe_StrinG > {noformat} > {code:sql} > create table test_parquet1(login timestamp) stored as parquet; > insert overwrite table test_parquet1 select from_unixtime(unix_timestamp()) > from dual; > select * from test_parquet1 limit 1; > {code} > *EXPECTED RESULT:* > No exceptions. Current timestamp as result. > {noformat} > 2016-10-27 10:58:19 > {noformat} > *STEP 2. Store timestamp in array in parquet file* > {code:sql} > create table test_parquet2(x array) stored as parquet; > insert overwrite table test_parquet2 select array(login) from test_parquet1; > select * from test_parquet2; > {code} > *EXPECTED RESULT:* > No exceptions. Current timestamp in brackets as result. > {noformat} > ["2016-10-27 10:58:19"] > {noformat} > *ACTUAL RESULT:* > {noformat} > ERROR [main]: CliDriver (SessionState.java:printError(963)) - Failed with > exception java.io.IOException:parquet.io.ParquetDecodingException: Can not > read value at 0 in block -1 in file > hdfs:///user/hive/warehouse/test_parquet2/00_0 > java.io.IOException: parquet.io.ParquetDecodingException: Can not read value > at 0 in block -1 in file hdfs:///user/hive/warehouse/test_parquet2/00_0 > {noformat} > *ROOT-CAUSE:* > Incorrect initialization of {{metadata}} {{HashMap}} causes that it has > {{null}} value in enumeration > {{org.apache.hadoop.hive.ql.io.parquet.convert.ETypeConverter}} when > executing following line: > {code:java} > boolean skipConversion = > Boolean.valueOf(metadata.get(HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION.varname)); > {code} > in element {{ETIMESTAMP_CONVERTER}}. > JVM throws NPE and parquet library can not read data from file and throws > {noformat} > java.io.IOException:parquet.io.ParquetDecodingException: Can not read value > at 0 in block -1 in file hdfs:///user/hive/warehouse/test_parquet2/00_0 > {noformat} > for its turn. > *SOLUTION:* > Perform initialization in separate method to skip overriding it with {{null}} > value in block of code > {code:java} > if (parent != null) { > setMetadata(parent.getMetadata()); > } > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Issue Comment Deleted] (HIVE-15082) Hive-1.2 cannot read data from complex data types with TIMESTAMP column, stored in Parquet
[ https://issues.apache.org/jira/browse/HIVE-15082?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Vaibhav Gumashta updated HIVE-15082: Comment: was deleted (was: Removing fix version 1.2.2 as this is not a blocker.) > Hive-1.2 cannot read data from complex data types with TIMESTAMP column, > stored in Parquet > -- > > Key: HIVE-15082 > URL: https://issues.apache.org/jira/browse/HIVE-15082 > Project: Hive > Issue Type: Bug >Affects Versions: 1.2.0 >Reporter: Oleksiy Sayankin >Assignee: Oleksiy Sayankin > Fix For: 1.2.2 > > Attachments: HIVE-15082-branch-1.2.patch, HIVE-15082-branch-1.patch > > > *STEP 1. Create test data* > {code:sql} > select * from dual; > {code} > *EXPECTED RESULT:* > {noformat} > Pretty_UnIQUe_StrinG > {noformat} > {code:sql} > create table test_parquet1(login timestamp) stored as parquet; > insert overwrite table test_parquet1 select from_unixtime(unix_timestamp()) > from dual; > select * from test_parquet1 limit 1; > {code} > *EXPECTED RESULT:* > No exceptions. Current timestamp as result. > {noformat} > 2016-10-27 10:58:19 > {noformat} > *STEP 2. Store timestamp in array in parquet file* > {code:sql} > create table test_parquet2(x array) stored as parquet; > insert overwrite table test_parquet2 select array(login) from test_parquet1; > select * from test_parquet2; > {code} > *EXPECTED RESULT:* > No exceptions. Current timestamp in brackets as result. > {noformat} > ["2016-10-27 10:58:19"] > {noformat} > *ACTUAL RESULT:* > {noformat} > ERROR [main]: CliDriver (SessionState.java:printError(963)) - Failed with > exception java.io.IOException:parquet.io.ParquetDecodingException: Can not > read value at 0 in block -1 in file > hdfs:///user/hive/warehouse/test_parquet2/00_0 > java.io.IOException: parquet.io.ParquetDecodingException: Can not read value > at 0 in block -1 in file hdfs:///user/hive/warehouse/test_parquet2/00_0 > {noformat} > *ROOT-CAUSE:* > Incorrect initialization of {{metadata}} {{HashMap}} causes that it has > {{null}} value in enumeration > {{org.apache.hadoop.hive.ql.io.parquet.convert.ETypeConverter}} when > executing following line: > {code:java} > boolean skipConversion = > Boolean.valueOf(metadata.get(HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION.varname)); > {code} > in element {{ETIMESTAMP_CONVERTER}}. > JVM throws NPE and parquet library can not read data from file and throws > {noformat} > java.io.IOException:parquet.io.ParquetDecodingException: Can not read value > at 0 in block -1 in file hdfs:///user/hive/warehouse/test_parquet2/00_0 > {noformat} > for its turn. > *SOLUTION:* > Perform initialization in separate method to skip overriding it with {{null}} > value in block of code > {code:java} > if (parent != null) { > setMetadata(parent.getMetadata()); > } > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15007) Hive 1.2.2 release planning
[ https://issues.apache.org/jira/browse/HIVE-15007?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15631088#comment-15631088 ] Vaibhav Gumashta commented on HIVE-15007: - Test run analysis: *pass locally*: {code} org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_udaf_corr org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_udf_from_utc_timestamp org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_udf_to_utc_timestamp org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_ivyDownload org.apache.hadoop.hive.cli.TestNegativeCliDriver.testNegativeCliDriver_authorization_uri_import org.apache.hive.minikdc.TestMiniHiveKdc.testLogin org.apache.hive.minikdc.TestHiveAuthFactory.testStartTokenManagerForMemoryTokenStore org.apache.hive.minikdc.TestHiveAuthFactory.testStartTokenManagerForDBTokenStore {code} *fail*: {code} org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_join_merge_multi_expressions org.apache.hadoop.hive.cli.TestMinimrCliDriver.testCliDriver_exchgpartition2lel org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_vector_auto_smb_mapjoin_14 org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_tez_smb_empty org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_bucket_map_join_tez1 org.apache.hive.beeline.TestBeelineArgParsing.testAddLocalJar[0] org.apache.hive.beeline.TestBeelineArgParsing.testAddLocalJarWithoutAddDriverClazz[0] org.apache.hive.beeline.TestBeelineArgParsing.testAddLocalJar[1] {code} I'll investigate the failed ones. > Hive 1.2.2 release planning > --- > > Key: HIVE-15007 > URL: https://issues.apache.org/jira/browse/HIVE-15007 > Project: Hive > Issue Type: Task >Affects Versions: 1.2.1 >Reporter: Vaibhav Gumashta >Assignee: Vaibhav Gumashta > Attachments: HIVE-15007-branch-1.2.patch, > HIVE-15007-branch-1.2.patch, HIVE-15007-branch-1.2.patch, > HIVE-15007-branch-1.2.patch, HIVE-15007-branch-1.2.patch, > HIVE-15007.branch-1.2.patch > > > Discussed with [~spena] about triggering unit test runs for 1.2.2 release and > creating a patch which will trigger precommits looks like a good way. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-14910) Flaky test: TestSparkClient.testJobSubmission
[ https://issues.apache.org/jira/browse/HIVE-14910?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15631077#comment-15631077 ] Rui Li commented on HIVE-14910: --- +1 > Flaky test: TestSparkClient.testJobSubmission > - > > Key: HIVE-14910 > URL: https://issues.apache.org/jira/browse/HIVE-14910 > Project: Hive > Issue Type: Sub-task >Reporter: Siddharth Seth >Assignee: Barna Zsombor Klara > Attachments: HIVE-14910.1.patch, HIVE-14910.2.patch, HIVE-14910.patch > > > Have seen this fail in multiple runs (not consistently) > e.g. https://builds.apache.org/job/PreCommit-HIVE-Build/1426/testReport/ -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15082) Hive-1.2 cannot read data from complex data types with TIMESTAMP column, stored in Parquet
[ https://issues.apache.org/jira/browse/HIVE-15082?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15631068#comment-15631068 ] Vaibhav Gumashta commented on HIVE-15082: - Removing fix version 1.2.2 as this is not a blocker. > Hive-1.2 cannot read data from complex data types with TIMESTAMP column, > stored in Parquet > -- > > Key: HIVE-15082 > URL: https://issues.apache.org/jira/browse/HIVE-15082 > Project: Hive > Issue Type: Bug >Affects Versions: 1.2.0 >Reporter: Oleksiy Sayankin >Assignee: Oleksiy Sayankin > Fix For: 1.2.3 > > Attachments: HIVE-15082-branch-1.2.patch, HIVE-15082-branch-1.patch > > > *STEP 1. Create test data* > {code:sql} > select * from dual; > {code} > *EXPECTED RESULT:* > {noformat} > Pretty_UnIQUe_StrinG > {noformat} > {code:sql} > create table test_parquet1(login timestamp) stored as parquet; > insert overwrite table test_parquet1 select from_unixtime(unix_timestamp()) > from dual; > select * from test_parquet1 limit 1; > {code} > *EXPECTED RESULT:* > No exceptions. Current timestamp as result. > {noformat} > 2016-10-27 10:58:19 > {noformat} > *STEP 2. Store timestamp in array in parquet file* > {code:sql} > create table test_parquet2(x array) stored as parquet; > insert overwrite table test_parquet2 select array(login) from test_parquet1; > select * from test_parquet2; > {code} > *EXPECTED RESULT:* > No exceptions. Current timestamp in brackets as result. > {noformat} > ["2016-10-27 10:58:19"] > {noformat} > *ACTUAL RESULT:* > {noformat} > ERROR [main]: CliDriver (SessionState.java:printError(963)) - Failed with > exception java.io.IOException:parquet.io.ParquetDecodingException: Can not > read value at 0 in block -1 in file > hdfs:///user/hive/warehouse/test_parquet2/00_0 > java.io.IOException: parquet.io.ParquetDecodingException: Can not read value > at 0 in block -1 in file hdfs:///user/hive/warehouse/test_parquet2/00_0 > {noformat} > *ROOT-CAUSE:* > Incorrect initialization of {{metadata}} {{HashMap}} causes that it has > {{null}} value in enumeration > {{org.apache.hadoop.hive.ql.io.parquet.convert.ETypeConverter}} when > executing following line: > {code:java} > boolean skipConversion = > Boolean.valueOf(metadata.get(HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION.varname)); > {code} > in element {{ETIMESTAMP_CONVERTER}}. > JVM throws NPE and parquet library can not read data from file and throws > {noformat} > java.io.IOException:parquet.io.ParquetDecodingException: Can not read value > at 0 in block -1 in file hdfs:///user/hive/warehouse/test_parquet2/00_0 > {noformat} > for its turn. > *SOLUTION:* > Perform initialization in separate method to skip overriding it with {{null}} > value in block of code > {code:java} > if (parent != null) { > setMetadata(parent.getMetadata()); > } > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15101) Spark client can be stuck in RUNNING state
[ https://issues.apache.org/jira/browse/HIVE-15101?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15631069#comment-15631069 ] Rui Li commented on HIVE-15101: --- Maybe RemoteDriver's log is more useful. For yarn-client, you should find it in hive.log on the node which runs the query. For yarn-cluster, it's in the AM log - usually the 1st container of the application. > Spark client can be stuck in RUNNING state > -- > > Key: HIVE-15101 > URL: https://issues.apache.org/jira/browse/HIVE-15101 > Project: Hive > Issue Type: Bug > Components: Spark >Affects Versions: 2.0.0, 2.1.0 > Environment: Hive 2.1.0 > Spark 1.6.2 >Reporter: Satoshi Iijima > Attachments: hadoop-yarn-nodemanager.log > > > When a Hive-on-Spark job is executed on YARN environment where UNHEALTHY > NodeManager exists, Spark client can be stuck in RUNNING state. > thread dump: > {code} > "008ee7b6-b083-4ac9-ae1c-b6097d9bf761 main" #1 prio=5 os_prio=0 > tid=0x7f14f4013800 nid=0x3855 in Object.wait() [0x7f14fd9b1000] >java.lang.Thread.State: WAITING (on object monitor) > at java.lang.Object.wait(Native Method) > - waiting on <0xf6615550> (a > io.netty.util.concurrent.DefaultPromise) > at java.lang.Object.wait(Object.java:502) > at > io.netty.util.concurrent.DefaultPromise.await(DefaultPromise.java:254) > - locked <0xf6615550> (a > io.netty.util.concurrent.DefaultPromise) > at io.netty.util.concurrent.DefaultPromise.await(DefaultPromise.java:32) > at io.netty.util.concurrent.AbstractFuture.get(AbstractFuture.java:31) > at > org.apache.hive.spark.client.SparkClientImpl.(SparkClientImpl.java:104) > at > org.apache.hive.spark.client.SparkClientFactory.createClient(SparkClientFactory.java:80) > - locked <0xf21b8e08> (a java.lang.Class for > org.apache.hive.spark.client.SparkClientFactory) > at > org.apache.hadoop.hive.ql.exec.spark.RemoteHiveSparkClient.createRemoteClient(RemoteHiveSparkClient.java:99) > at > org.apache.hadoop.hive.ql.exec.spark.RemoteHiveSparkClient.(RemoteHiveSparkClient.java:95) > at > org.apache.hadoop.hive.ql.exec.spark.HiveSparkClientFactory.createHiveSparkClient(HiveSparkClientFactory.java:67) > at > org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionImpl.open(SparkSessionImpl.java:62) > at > org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManagerImpl.getSession(SparkSessionManagerImpl.java:114) > at > org.apache.hadoop.hive.ql.exec.spark.SparkUtilities.getSparkSession(SparkUtilities.java:136) > at > org.apache.hadoop.hive.ql.exec.spark.SparkTask.execute(SparkTask.java:89) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:197) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1858) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1562) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1313) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1084) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1072) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:232) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:335) > at > org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:742) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at org.apache.hadoop.util.RunJar.run(RunJar.java:239) > at org.apache.hadoop.util.RunJar.main(RunJar.java:153) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15082) Hive-1.2 cannot read data from complex data types with TIMESTAMP column, stored in Parquet
[ https://issues.apache.org/jira/browse/HIVE-15082?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Vaibhav Gumashta updated HIVE-15082: Fix Version/s: (was: 1.2.2) 1.2.3 > Hive-1.2 cannot read data from complex data types with TIMESTAMP column, > stored in Parquet > -- > > Key: HIVE-15082 > URL: https://issues.apache.org/jira/browse/HIVE-15082 > Project: Hive > Issue Type: Bug >Affects Versions: 1.2.0 >Reporter: Oleksiy Sayankin >Assignee: Oleksiy Sayankin > Fix For: 1.2.3 > > Attachments: HIVE-15082-branch-1.2.patch, HIVE-15082-branch-1.patch > > > *STEP 1. Create test data* > {code:sql} > select * from dual; > {code} > *EXPECTED RESULT:* > {noformat} > Pretty_UnIQUe_StrinG > {noformat} > {code:sql} > create table test_parquet1(login timestamp) stored as parquet; > insert overwrite table test_parquet1 select from_unixtime(unix_timestamp()) > from dual; > select * from test_parquet1 limit 1; > {code} > *EXPECTED RESULT:* > No exceptions. Current timestamp as result. > {noformat} > 2016-10-27 10:58:19 > {noformat} > *STEP 2. Store timestamp in array in parquet file* > {code:sql} > create table test_parquet2(x array) stored as parquet; > insert overwrite table test_parquet2 select array(login) from test_parquet1; > select * from test_parquet2; > {code} > *EXPECTED RESULT:* > No exceptions. Current timestamp in brackets as result. > {noformat} > ["2016-10-27 10:58:19"] > {noformat} > *ACTUAL RESULT:* > {noformat} > ERROR [main]: CliDriver (SessionState.java:printError(963)) - Failed with > exception java.io.IOException:parquet.io.ParquetDecodingException: Can not > read value at 0 in block -1 in file > hdfs:///user/hive/warehouse/test_parquet2/00_0 > java.io.IOException: parquet.io.ParquetDecodingException: Can not read value > at 0 in block -1 in file hdfs:///user/hive/warehouse/test_parquet2/00_0 > {noformat} > *ROOT-CAUSE:* > Incorrect initialization of {{metadata}} {{HashMap}} causes that it has > {{null}} value in enumeration > {{org.apache.hadoop.hive.ql.io.parquet.convert.ETypeConverter}} when > executing following line: > {code:java} > boolean skipConversion = > Boolean.valueOf(metadata.get(HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION.varname)); > {code} > in element {{ETIMESTAMP_CONVERTER}}. > JVM throws NPE and parquet library can not read data from file and throws > {noformat} > java.io.IOException:parquet.io.ParquetDecodingException: Can not read value > at 0 in block -1 in file hdfs:///user/hive/warehouse/test_parquet2/00_0 > {noformat} > for its turn. > *SOLUTION:* > Perform initialization in separate method to skip overriding it with {{null}} > value in block of code > {code:java} > if (parent != null) { > setMetadata(parent.getMetadata()); > } > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15104) Hive on Spark generate more shuffle data than hive on mr
[ https://issues.apache.org/jira/browse/HIVE-15104?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15631053#comment-15631053 ] Rui Li commented on HIVE-15104: --- We need to use HiveKey because it holds the proper hash code to be used for partitioning. MR also uses HiveKey, but in OutputCollector, seems it only serializes the BytesWritable part. [~wenli], is this what you mean? I suspect we'll need help from Spark if we want to do something similar. > Hive on Spark generate more shuffle data than hive on mr > > > Key: HIVE-15104 > URL: https://issues.apache.org/jira/browse/HIVE-15104 > Project: Hive > Issue Type: Bug > Components: Spark >Affects Versions: 1.2.1 >Reporter: wangwenli >Assignee: Aihua Xu > > the same sql, running on spark and mr engine, will generate different size > of shuffle data. > i think it is because of hive on mr just serialize part of HiveKey, but hive > on spark which using kryo will serialize full of Hivekey object. > what is your opionion? -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15093) S3-to-S3 Renames: Files should be moved individually rather than at a directory level
[ https://issues.apache.org/jira/browse/HIVE-15093?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15630823#comment-15630823 ] Sahil Takiar commented on HIVE-15093: - [~ashutoshc] This occurs when running {{CREATE TABLE ... AS SELECT ...}} or {{INSERT OVERWRITE TABLE ... SELECT ...}} queries, when both the scratch directory and destination directory are both S3. These queries will move a directory from the scratch dir to the final table location (e..g {{/user/hive/warehouse/table1/}}). If the scratch dir is on S3 and the final table location is on S3, then the {{S3AFileSystem.rename}} method will be used to move the data. Since you cannot rename files on S3, this operation basically copies all the data from the source directory to the destination directory. The way it does this copy is pretty inefficient: for each file under the source directory, it issues a copy request to S3 (a copy request is basically just an HTTP request), waits for S3 to finish copying the data, and then moves on to the next file. > S3-to-S3 Renames: Files should be moved individually rather than at a > directory level > - > > Key: HIVE-15093 > URL: https://issues.apache.org/jira/browse/HIVE-15093 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 2.1.0 >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-15093.1.patch, HIVE-15093.2.patch, > HIVE-15093.3.patch, HIVE-15093.4.patch, HIVE-15093.5.patch, > HIVE-15093.6.patch, HIVE-15093.7.patch > > > Hive's MoveTask uses the Hive.moveFile method to move data within a > distributed filesystem as well as blobstore filesystems. > If the move is done within the same filesystem: > 1: If the source path is a subdirectory of the destination path, files will > be moved one by one using a threapool of workers > 2: If the source path is not a subdirectory of the destination path, a single > rename operation is used to move the entire directory > The second option may not work well on blobstores such as S3. Renames are not > metadata operations and require copying all the data. Client connectors to > blobstores may not efficiently rename directories. Worst case, the connector > will copy each file one by one, sequentially rather than using a threadpool > of workers to copy the data (e.g. HADOOP-13600). > Hive already has code to rename files using a threadpool of workers, but this > only occurs in case number 1. > This JIRA aims to modify the code so that case 1 is triggered when copying > within a blobstore. The focus is on copies within a blobstore because > needToCopy will return true if the src and target filesystems are different, > in which case a different code path is triggered. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15093) S3-to-S3 Renames: Files should be moved individually rather than at a directory level
[ https://issues.apache.org/jira/browse/HIVE-15093?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15630761#comment-15630761 ] Ashutosh Chauhan commented on HIVE-15093: - [~stakiar] For clarity, what Hive statement may result in this issue? Is it into into directory? or something else. > S3-to-S3 Renames: Files should be moved individually rather than at a > directory level > - > > Key: HIVE-15093 > URL: https://issues.apache.org/jira/browse/HIVE-15093 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 2.1.0 >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-15093.1.patch, HIVE-15093.2.patch, > HIVE-15093.3.patch, HIVE-15093.4.patch, HIVE-15093.5.patch, > HIVE-15093.6.patch, HIVE-15093.7.patch > > > Hive's MoveTask uses the Hive.moveFile method to move data within a > distributed filesystem as well as blobstore filesystems. > If the move is done within the same filesystem: > 1: If the source path is a subdirectory of the destination path, files will > be moved one by one using a threapool of workers > 2: If the source path is not a subdirectory of the destination path, a single > rename operation is used to move the entire directory > The second option may not work well on blobstores such as S3. Renames are not > metadata operations and require copying all the data. Client connectors to > blobstores may not efficiently rename directories. Worst case, the connector > will copy each file one by one, sequentially rather than using a threadpool > of workers to copy the data (e.g. HADOOP-13600). > Hive already has code to rename files using a threadpool of workers, but this > only occurs in case number 1. > This JIRA aims to modify the code so that case 1 is triggered when copying > within a blobstore. The focus is on copies within a blobstore because > needToCopy will return true if the src and target filesystems are different, > in which case a different code path is triggered. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-14970) repeated insert into is broken for buckets (incorrect results for tablesample)
[ https://issues.apache.org/jira/browse/HIVE-14970?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15630619#comment-15630619 ] Sergey Shelukhin commented on HIVE-14970: - [~ashutoshc] [~gopalv] I believe this can also affect BucketingSortingReduceSinkOptimizer - storeBucketPathMapping stores it based on file order > repeated insert into is broken for buckets (incorrect results for tablesample) > -- > > Key: HIVE-14970 > URL: https://issues.apache.org/jira/browse/HIVE-14970 > Project: Hive > Issue Type: Bug >Reporter: Sergey Shelukhin >Priority: Critical > > Running on a regular CLI driver > {noformat} > CREATE TABLE src_bucket(key STRING, value STRING) CLUSTERED BY (key) SORTED > BY (key) INTO 2 BUCKETS; > insert into table src_bucket select key,value from srcpart limit 10; > dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/src_bucket/; > select *, INPUT__FILE__NAME from src_bucket; > select * from src_bucket tablesample (bucket 1 out of 2) s; > select * from src_bucket tablesample (bucket 2 out of 2) s; > insert into table src_bucket select key,value from srcpart limit 10; > dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/src_bucket/; > select *, INPUT__FILE__NAME from src_bucket; > select * from src_bucket tablesample (bucket 1 out of 2) s; > select * from src_bucket tablesample (bucket 2 out of 2) s; > {noformat} > Results in the following (with masking disabled and grepping away the noise). > Looks like bucket mapping completely breaks due to extra files, which may > have implications for all the optimizations that depend on them. > This should work or at least fail if this is not supported. > {noformat} > PREHOOK: query: CREATE TABLE src_bucket(key STRING, value STRING) CLUSTERED > BY (key) SORTED BY (key) INTO 2 BUCKETS > PREHOOK: query: insert into table src_bucket select key,value from srcpart > limit 10 > Found 2 items > -rwxr-xr-x 1 sergey staff 46 2016-10-14 16:09 > pfile:///Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0 > -rwxr-xr-x 1 sergey staff 68 2016-10-14 16:09 > pfile:///Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/01_0 > PREHOOK: query: select *, INPUT__FILE__NAME from src_bucket > 165 val_165 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0 > 255 val_255 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0 > 484 val_484 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0 > 86val_86 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0 > 238 val_238 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/01_0 > 27val_27 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/01_0 > 278 val_278 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/01_0 > 311 val_311 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/01_0 > 409 val_409 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/01_0 > 98val_98 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/01_0 > PREHOOK: query: select * from src_bucket tablesample (bucket 1 out of 2) s > 165 val_165 > 255 val_255 > 484 val_484 > 86val_86 > PREHOOK: query: select * from src_bucket tablesample (bucket 2 out of 2) s > 238 val_238 > 27val_27 > 278 val_278 > 311 val_311 > 409 val_409 > 98val_98 > {noformat} > So far so good. > {noformat} > PREHOOK: query: insert into table src_bucket select key,value from srcpart > limit 10 > Found 4 items > -rwxr-xr-x 1 sergey staff 46 2016-10-14 16:09 > pfile:///Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0 > -rwxr-xr-x 1 sergey staff 46 2016-10-14 16:09 > pfile:///Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0_copy_1 > -rwxr-xr-x 1 sergey staff 68 2016-10-14 16:09 > pfile:///Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/01_0 > -rwxr-xr-x 1 sergey staff 68 2016-10-14 16:09 > pfile:///Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/01_0_copy_1 > PREHOOK: query: select *, INPUT__FILE__NAME from src_bucket > 165 val_165 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0 > 255 val_255 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0 > 484 val_484 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0 > 86val_86 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0 > 165 val_165 >
[jira] [Updated] (HIVE-14970) repeated insert into is broken for buckets (incorrect results for tablesample, BucketingSortingReduceSinkOptimizer)
[ https://issues.apache.org/jira/browse/HIVE-14970?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sergey Shelukhin updated HIVE-14970: Summary: repeated insert into is broken for buckets (incorrect results for tablesample, BucketingSortingReduceSinkOptimizer) (was: repeated insert into is broken for buckets (incorrect results for tablesample)) > repeated insert into is broken for buckets (incorrect results for > tablesample, BucketingSortingReduceSinkOptimizer) > --- > > Key: HIVE-14970 > URL: https://issues.apache.org/jira/browse/HIVE-14970 > Project: Hive > Issue Type: Bug >Reporter: Sergey Shelukhin >Priority: Critical > > Running on a regular CLI driver > {noformat} > CREATE TABLE src_bucket(key STRING, value STRING) CLUSTERED BY (key) SORTED > BY (key) INTO 2 BUCKETS; > insert into table src_bucket select key,value from srcpart limit 10; > dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/src_bucket/; > select *, INPUT__FILE__NAME from src_bucket; > select * from src_bucket tablesample (bucket 1 out of 2) s; > select * from src_bucket tablesample (bucket 2 out of 2) s; > insert into table src_bucket select key,value from srcpart limit 10; > dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/src_bucket/; > select *, INPUT__FILE__NAME from src_bucket; > select * from src_bucket tablesample (bucket 1 out of 2) s; > select * from src_bucket tablesample (bucket 2 out of 2) s; > {noformat} > Results in the following (with masking disabled and grepping away the noise). > Looks like bucket mapping completely breaks due to extra files, which may > have implications for all the optimizations that depend on them. > This should work or at least fail if this is not supported. > {noformat} > PREHOOK: query: CREATE TABLE src_bucket(key STRING, value STRING) CLUSTERED > BY (key) SORTED BY (key) INTO 2 BUCKETS > PREHOOK: query: insert into table src_bucket select key,value from srcpart > limit 10 > Found 2 items > -rwxr-xr-x 1 sergey staff 46 2016-10-14 16:09 > pfile:///Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0 > -rwxr-xr-x 1 sergey staff 68 2016-10-14 16:09 > pfile:///Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/01_0 > PREHOOK: query: select *, INPUT__FILE__NAME from src_bucket > 165 val_165 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0 > 255 val_255 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0 > 484 val_484 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0 > 86val_86 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0 > 238 val_238 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/01_0 > 27val_27 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/01_0 > 278 val_278 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/01_0 > 311 val_311 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/01_0 > 409 val_409 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/01_0 > 98val_98 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/01_0 > PREHOOK: query: select * from src_bucket tablesample (bucket 1 out of 2) s > 165 val_165 > 255 val_255 > 484 val_484 > 86val_86 > PREHOOK: query: select * from src_bucket tablesample (bucket 2 out of 2) s > 238 val_238 > 27val_27 > 278 val_278 > 311 val_311 > 409 val_409 > 98val_98 > {noformat} > So far so good. > {noformat} > PREHOOK: query: insert into table src_bucket select key,value from srcpart > limit 10 > Found 4 items > -rwxr-xr-x 1 sergey staff 46 2016-10-14 16:09 > pfile:///Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0 > -rwxr-xr-x 1 sergey staff 46 2016-10-14 16:09 > pfile:///Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0_copy_1 > -rwxr-xr-x 1 sergey staff 68 2016-10-14 16:09 > pfile:///Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/01_0 > -rwxr-xr-x 1 sergey staff 68 2016-10-14 16:09 > pfile:///Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/01_0_copy_1 > PREHOOK: query: select *, INPUT__FILE__NAME from src_bucket > 165 val_165 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0 > 255 val_255 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0 > 484 val_484 > pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/00_0 > 86val_86 >
[jira] [Commented] (HIVE-15093) S3-to-S3 Renames: Files should be moved individually rather than at a directory level
[ https://issues.apache.org/jira/browse/HIVE-15093?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15630600#comment-15630600 ] Sahil Takiar commented on HIVE-15093: - [~yalovyyi] thanks for your comments, a few thoughts: {quote} This approach will block any possible optimization in a connector library. {quote} Not sure how this blocks optimizations in connector libraries. The connectors are external to Hive and are free to implement any optimizations they want. Once they do implement those optimizations, we can modify to Hive to pick them up. In this case, that just requires making a config change. {quote} File system level operation should not be re-implemented in application. {quote} Generally agree, but I would argue that this isn't re-implementing any native bloblstore operations. Blobstores don't allow renames in the first place. This patch just uses a different methodology for simulating renames. {quote} Thread pool is not the only option here, different blob stores have different ways to optimize move use case. {quote} I'm only familiar with the s3a connector, what do other blob store connectors do? {quote} An application should be blobstore agnostic. {quote} Generally agree, which is why this change is configurable. It can be turned off by default it that makes things better. If the community feels like this isn't a reasonable patch, then I'm open to discussing alternatives. > S3-to-S3 Renames: Files should be moved individually rather than at a > directory level > - > > Key: HIVE-15093 > URL: https://issues.apache.org/jira/browse/HIVE-15093 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 2.1.0 >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-15093.1.patch, HIVE-15093.2.patch, > HIVE-15093.3.patch, HIVE-15093.4.patch, HIVE-15093.5.patch, > HIVE-15093.6.patch, HIVE-15093.7.patch > > > Hive's MoveTask uses the Hive.moveFile method to move data within a > distributed filesystem as well as blobstore filesystems. > If the move is done within the same filesystem: > 1: If the source path is a subdirectory of the destination path, files will > be moved one by one using a threapool of workers > 2: If the source path is not a subdirectory of the destination path, a single > rename operation is used to move the entire directory > The second option may not work well on blobstores such as S3. Renames are not > metadata operations and require copying all the data. Client connectors to > blobstores may not efficiently rename directories. Worst case, the connector > will copy each file one by one, sequentially rather than using a threadpool > of workers to copy the data (e.g. HADOOP-13600). > Hive already has code to rename files using a threadpool of workers, but this > only occurs in case number 1. > This JIRA aims to modify the code so that case 1 is triggered when copying > within a blobstore. The focus is on copies within a blobstore because > needToCopy will return true if the src and target filesystems are different, > in which case a different code path is triggered. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15109) Set MaxPermSize to 256M for maven tests
[ https://issues.apache.org/jira/browse/HIVE-15109?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15630478#comment-15630478 ] Siddharth Seth commented on HIVE-15109: --- This is just for tests. Not sure which version actually moved to Java8. Think 2.1 still supports Java7. [~ctang.ma] - the jira I was referring to is HIVE-15085. I meant to say that once this goes in, I will update that patch to also include the PermSize setting. Think this can go in independently. > Set MaxPermSize to 256M for maven tests > --- > > Key: HIVE-15109 > URL: https://issues.apache.org/jira/browse/HIVE-15109 > Project: Hive > Issue Type: Test > Components: Test >Reporter: Chaoyu Tang >Assignee: Chaoyu Tang >Priority: Minor > Attachments: HIVE-15109.patch > > > Trying to run the qtests, for example, > mvn test -Dtest=TestMiniTezCliDriver -Dqfile=explainanalyze_1.q > and got > {code} > Running org.apache.hadoop.hive.cli.TestMiniTezCliDriver > Tests run: 0, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 29.591 sec - > in org.apache.hadoop.hive.cli.TestMiniTezCliDriver > {code} > Looking into the hive.log, and found that it was due to too small PermGen > space: > {code} > 2016-11-01T19:52:19,039 ERROR > [org.apache.hadoop.util.JvmPauseMonitor$Monitor@261e733f] > server.NIOServerCnxnFactory: Thread > Thread[org.apache.hadoop.util.JvmPauseMonitor$Monitor@261e733f,5,main] died > java.lang.OutOfMemoryError: PermGen space > {code} > Setting env MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=256M" would not help. > We can set MaxPermSize to maven.test.jvm.args in pom.xml instead: > {code} > -Xmx2048m -XX:MaxPermSize=256M > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15104) Hive on Spark generate more shuffle data than hive on mr
[ https://issues.apache.org/jira/browse/HIVE-15104?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15630435#comment-15630435 ] Aihua Xu commented on HIVE-15104: - This is changed by HIVE-8017. [~lirui] Do you recall what kind of issues it caused? > Hive on Spark generate more shuffle data than hive on mr > > > Key: HIVE-15104 > URL: https://issues.apache.org/jira/browse/HIVE-15104 > Project: Hive > Issue Type: Bug > Components: Spark >Affects Versions: 1.2.1 >Reporter: wangwenli >Assignee: Aihua Xu > > the same sql, running on spark and mr engine, will generate different size > of shuffle data. > i think it is because of hive on mr just serialize part of HiveKey, but hive > on spark which using kryo will serialize full of Hivekey object. > what is your opionion? -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-14943) Base Implementation
[ https://issues.apache.org/jira/browse/HIVE-14943?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15630393#comment-15630393 ] Hive QA commented on HIVE-14943: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12836614/HIVE-14943.8.patch {color:green}SUCCESS:{color} +1 due to 5 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 5 failed/errored test(s), 10685 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[join_acid_non_acid] (batchId=150) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[union_fast_stats] (batchId=145) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_2] (batchId=91) org.apache.hadoop.hive.ql.lockmgr.TestDbTxnManager2.testMerge3Way02 (batchId=268) org.apache.hadoop.hive.ql.lockmgr.TestDbTxnManager2.testUpdateWithSubquery (batchId=268) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/1932/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/1932/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-1932/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 5 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12836614 - PreCommit-HIVE-Build > Base Implementation > --- > > Key: HIVE-14943 > URL: https://issues.apache.org/jira/browse/HIVE-14943 > Project: Hive > Issue Type: Sub-task > Components: Transactions >Reporter: Eugene Koifman >Assignee: Eugene Koifman > Attachments: HIVE-14943.2.patch, HIVE-14943.3.patch, > HIVE-14943.4.patch, HIVE-14943.5.patch, HIVE-14943.6.patch, > HIVE-14943.7.patch, HIVE-14943.8.patch, HIVE-14943.patch > > > Create the 1st pass functional implementation of MERGE > This should run e2e and produce correct results. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15107) HiveLexer can throw NPE in allowQuoteId
[ https://issues.apache.org/jira/browse/HIVE-15107?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15630383#comment-15630383 ] Ratandeep Ratti commented on HIVE-15107: Seems like this has been fixed here HIVE-13101. In HIVE-13101 by default {{false}} is returned if Configuration is null. But isn't the more appropriate default response {{true}} since the default value of HiveConf.ConfVars.HIVE_QUOTEDID_SUPPORT is {{column}}?? > HiveLexer can throw NPE in allowQuoteId > --- > > Key: HIVE-15107 > URL: https://issues.apache.org/jira/browse/HIVE-15107 > Project: Hive > Issue Type: Bug >Affects Versions: 1.1.1 >Reporter: Ratandeep Ratti >Assignee: Ratandeep Ratti > Attachments: HIVE-15107.patch > > > In HiveLexer.allowQuoteId we reference the HiveConf field, which may be null. > The configuration field is set in ParseDriver only if the hive.ql.Context > variable is not null. ParseDriver exposes API such as > org.apache.hadoop.hive.ql.parse.ParseDriver#parse(java.lang.String) which can > result in the hive.ql.Context field to be null. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15109) Set MaxPermSize to 256M for maven tests
[ https://issues.apache.org/jira/browse/HIVE-15109?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15630361#comment-15630361 ] Sergio Peña commented on HIVE-15109: If this happens only on JDK7, then do we want to support it on our current master branch? I think we moved to JDK8 since 2.1, and we want to start working with it only. Also, this is qtest, is the flag affecting Hive commands running in JDK7? or is it just on test? > Set MaxPermSize to 256M for maven tests > --- > > Key: HIVE-15109 > URL: https://issues.apache.org/jira/browse/HIVE-15109 > Project: Hive > Issue Type: Test > Components: Test >Reporter: Chaoyu Tang >Assignee: Chaoyu Tang >Priority: Minor > Attachments: HIVE-15109.patch > > > Trying to run the qtests, for example, > mvn test -Dtest=TestMiniTezCliDriver -Dqfile=explainanalyze_1.q > and got > {code} > Running org.apache.hadoop.hive.cli.TestMiniTezCliDriver > Tests run: 0, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 29.591 sec - > in org.apache.hadoop.hive.cli.TestMiniTezCliDriver > {code} > Looking into the hive.log, and found that it was due to too small PermGen > space: > {code} > 2016-11-01T19:52:19,039 ERROR > [org.apache.hadoop.util.JvmPauseMonitor$Monitor@261e733f] > server.NIOServerCnxnFactory: Thread > Thread[org.apache.hadoop.util.JvmPauseMonitor$Monitor@261e733f,5,main] died > java.lang.OutOfMemoryError: PermGen space > {code} > Setting env MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=256M" would not help. > We can set MaxPermSize to maven.test.jvm.args in pom.xml instead: > {code} > -Xmx2048m -XX:MaxPermSize=256M > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-14910) Flaky test: TestSparkClient.testJobSubmission
[ https://issues.apache.org/jira/browse/HIVE-14910?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15630232#comment-15630232 ] Hive QA commented on HIVE-14910: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12836529/HIVE-14910.2.patch {color:green}SUCCESS:{color} +1 due to 2 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 2 failed/errored test(s), 10628 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[join_acid_non_acid] (batchId=150) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[union_fast_stats] (batchId=145) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/1931/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/1931/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-1931/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 2 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12836529 - PreCommit-HIVE-Build > Flaky test: TestSparkClient.testJobSubmission > - > > Key: HIVE-14910 > URL: https://issues.apache.org/jira/browse/HIVE-14910 > Project: Hive > Issue Type: Sub-task >Reporter: Siddharth Seth >Assignee: Barna Zsombor Klara > Attachments: HIVE-14910.1.patch, HIVE-14910.2.patch, HIVE-14910.patch > > > Have seen this fail in multiple runs (not consistently) > e.g. https://builds.apache.org/job/PreCommit-HIVE-Build/1426/testReport/ -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15109) Set MaxPermSize to 256M for maven tests
[ https://issues.apache.org/jira/browse/HIVE-15109?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15630175#comment-15630175 ] Chaoyu Tang commented on HIVE-15109: Yeah, it looks like that Perm Generation has been removed from JDK8 (see http://openjdk.java.net/jeps/122). The test does not have the problem when running against JDK 8 without this setting. But I still think the MaxPermSize should be added back in order for the tests with JDK7 to run successfully. [~sseth] What is your another JIRA, could you link that to this JIRA and I will close this one? > Set MaxPermSize to 256M for maven tests > --- > > Key: HIVE-15109 > URL: https://issues.apache.org/jira/browse/HIVE-15109 > Project: Hive > Issue Type: Test > Components: Test >Reporter: Chaoyu Tang >Assignee: Chaoyu Tang >Priority: Minor > Attachments: HIVE-15109.patch > > > Trying to run the qtests, for example, > mvn test -Dtest=TestMiniTezCliDriver -Dqfile=explainanalyze_1.q > and got > {code} > Running org.apache.hadoop.hive.cli.TestMiniTezCliDriver > Tests run: 0, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 29.591 sec - > in org.apache.hadoop.hive.cli.TestMiniTezCliDriver > {code} > Looking into the hive.log, and found that it was due to too small PermGen > space: > {code} > 2016-11-01T19:52:19,039 ERROR > [org.apache.hadoop.util.JvmPauseMonitor$Monitor@261e733f] > server.NIOServerCnxnFactory: Thread > Thread[org.apache.hadoop.util.JvmPauseMonitor$Monitor@261e733f,5,main] died > java.lang.OutOfMemoryError: PermGen space > {code} > Setting env MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=256M" would not help. > We can set MaxPermSize to maven.test.jvm.args in pom.xml instead: > {code} > -Xmx2048m -XX:MaxPermSize=256M > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15093) S3-to-S3 Renames: Files should be moved individually rather than at a directory level
[ https://issues.apache.org/jira/browse/HIVE-15093?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15630078#comment-15630078 ] Hive QA commented on HIVE-15093: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12836600/HIVE-15093.7.patch {color:green}SUCCESS:{color} +1 due to 1 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 2 failed/errored test(s), 10630 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[join_acid_non_acid] (batchId=150) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[union_fast_stats] (batchId=145) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/1930/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/1930/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-1930/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 2 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12836600 - PreCommit-HIVE-Build > S3-to-S3 Renames: Files should be moved individually rather than at a > directory level > - > > Key: HIVE-15093 > URL: https://issues.apache.org/jira/browse/HIVE-15093 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 2.1.0 >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-15093.1.patch, HIVE-15093.2.patch, > HIVE-15093.3.patch, HIVE-15093.4.patch, HIVE-15093.5.patch, > HIVE-15093.6.patch, HIVE-15093.7.patch > > > Hive's MoveTask uses the Hive.moveFile method to move data within a > distributed filesystem as well as blobstore filesystems. > If the move is done within the same filesystem: > 1: If the source path is a subdirectory of the destination path, files will > be moved one by one using a threapool of workers > 2: If the source path is not a subdirectory of the destination path, a single > rename operation is used to move the entire directory > The second option may not work well on blobstores such as S3. Renames are not > metadata operations and require copying all the data. Client connectors to > blobstores may not efficiently rename directories. Worst case, the connector > will copy each file one by one, sequentially rather than using a threadpool > of workers to copy the data (e.g. HADOOP-13600). > Hive already has code to rename files using a threadpool of workers, but this > only occurs in case number 1. > This JIRA aims to modify the code so that case 1 is triggered when copying > within a blobstore. The focus is on copies within a blobstore because > needToCopy will return true if the src and target filesystems are different, > in which case a different code path is triggered. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15094) Fix test failures for 2.1.1 regarding schema evolution with DECIMAL types
[ https://issues.apache.org/jira/browse/HIVE-15094?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15629879#comment-15629879 ] Hive QA commented on HIVE-15094: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12836594/HIVE-15094-branch-2.1.patch {color:green}SUCCESS:{color} +1 due to 15 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 23 failed/errored test(s), 10462 tests executed *Failed tests:* {noformat} TestJdbcWithMiniHA - did not produce a TEST-*.xml file (likely timed out) (batchId=494) TestJdbcWithMiniMr - did not produce a TEST-*.xml file (likely timed out) (batchId=491) TestMsgBusConnection - did not produce a TEST-*.xml file (likely timed out) (batchId=362) TestOperationLoggingAPIWithTez - did not produce a TEST-*.xml file (likely timed out) (batchId=484) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_acid_table_stats (batchId=92) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_insert_values_orig_table_use_metadata (batchId=109) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_list_bucket_dml_12 (batchId=87) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_orc_ppd_schema_evol_3a (batchId=97) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_part (batchId=68) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_stats_list_bucket (batchId=118) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_stats_null_optimizer (batchId=154) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_vector_between_in (batchId=99) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_orc_ppd_basic (batchId=521) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_part (batchId=521) org.apache.hadoop.hive.cli.TestMiniSparkOnYarnCliDriver.testCliDriver_constprog_partitioner (batchId=539) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_orc_ppd_basic (batchId=187) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_orc_ppd_schema_evol_3a (batchId=198) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_part (batchId=193) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_vector_between_in (batchId=199) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_vector_cast_constant (batchId=183) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_vector_complex_all (batchId=200) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver_vector_between_in (batchId=233) org.apache.hive.jdbc.TestJdbcWithMiniLlap.testLlapInputFormatEndToEnd (batchId=487) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/1929/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/1929/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-1929/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 23 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12836594 - PreCommit-HIVE-Build > Fix test failures for 2.1.1 regarding schema evolution with DECIMAL types > - > > Key: HIVE-15094 > URL: https://issues.apache.org/jira/browse/HIVE-15094 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 2.1.0 >Reporter: Sergio Peña >Assignee: Jesus Camacho Rodriguez > Attachments: HIVE-15094-branch-2.1.patch > > > Several tests failures related to schema evolution are happening on > branch-2.1 due to a patch reverted in the past. > {noformat} > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_table >
[jira] [Updated] (HIVE-14943) Base Implementation
[ https://issues.apache.org/jira/browse/HIVE-14943?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Eugene Koifman updated HIVE-14943: -- Attachment: HIVE-14943.8.patch > Base Implementation > --- > > Key: HIVE-14943 > URL: https://issues.apache.org/jira/browse/HIVE-14943 > Project: Hive > Issue Type: Sub-task > Components: Transactions >Reporter: Eugene Koifman >Assignee: Eugene Koifman > Attachments: HIVE-14943.2.patch, HIVE-14943.3.patch, > HIVE-14943.4.patch, HIVE-14943.5.patch, HIVE-14943.6.patch, > HIVE-14943.7.patch, HIVE-14943.8.patch, HIVE-14943.patch > > > Create the 1st pass functional implementation of MERGE > This should run e2e and produce correct results. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Assigned] (HIVE-13966) DbNotificationListener: can loose DDL operation notifications
[ https://issues.apache.org/jira/browse/HIVE-13966?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Mohit Sabharwal reassigned HIVE-13966: -- Assignee: Mohit Sabharwal (was: Rahul Sharma) > DbNotificationListener: can loose DDL operation notifications > - > > Key: HIVE-13966 > URL: https://issues.apache.org/jira/browse/HIVE-13966 > Project: Hive > Issue Type: Bug > Components: HCatalog >Reporter: Nachiket Vaidya >Assignee: Mohit Sabharwal >Priority: Critical > Attachments: HIVE-13966.1.patch, HIVE-13966.2.patch, > HIVE-13966.3.patch, HIVE-13966.pdf > > > The code for each API in HiveMetaStore.java is like this: > 1. openTransaction() > 2. -- operation-- > 3. commit() or rollback() based on result of the operation. > 4. add entry to notification log (unconditionally) > If the operation is failed (in step 2), we still add entry to notification > log. Found this issue in testing. > It is still ok as this is the case of false positive. > If the operation is successful and adding to notification log failed, the > user will get an MetaException. It will not rollback the operation, as it is > already committed. We need to handle this case so that we will not have false > negatives. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Resolved] (HIVE-14911) Notification entry should not be written for failed events.
[ https://issues.apache.org/jira/browse/HIVE-14911?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Mohit Sabharwal resolved HIVE-14911. Resolution: Duplicate > Notification entry should not be written for failed events. > --- > > Key: HIVE-14911 > URL: https://issues.apache.org/jira/browse/HIVE-14911 > Project: Hive > Issue Type: Bug >Reporter: Sravya Tirukkovalur > -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-11072) Add data validation between Hive metastore upgrades tests
[ https://issues.apache.org/jira/browse/HIVE-11072?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15629702#comment-15629702 ] Naveen Gangam commented on HIVE-11072: -- [~aihuaxu] Thanks for the review and the comments. I had fixed this for the prepare.sh script but missed it for this script. I will fix the JAVA_HOME like I did for the prepare.sh (thats used in the schema upgrade testing) where its read from /etc/alternatives The table names are not hard-coded in the script. They are detected from the hive schema SQL files. So the script should work for any new tables in the future schema. However, the foreign keys are hardcoded in the script. This information is used in determining whether or not to generate a random column value. For foreign key references, it uses the already generated values that are stored in a map. There is no consistent way to determine the FK references from parsing the schema files across different DBs. For example, derby does it via alter table (PK/FK constraints not part of the create table statement) {code} ALTER TABLE "APP"."IDXS" ADD CONSTRAINT "IDXS_FK1" FOREIGN KEY ("ORIG_TBL_ID") REFERENCES "APP"."TBLS" ("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; {code} Oracle: {code} ALTER TABLE IDXS ADD CONSTRAINT IDXS_FK1 FOREIGN KEY (ORIG_TBL_ID) REFERENCES TBLS (TBL_ID) INITIALLY DEFERRED ; {code} I intend to remove this hardcoded FKs in the future, either via adding more logic in parsing the schema files or the simplest fix would be to add a static value for these columns in the dataload.properties file. This will eliminate the need to generate values for columns that have FK references. > Add data validation between Hive metastore upgrades tests > - > > Key: HIVE-11072 > URL: https://issues.apache.org/jira/browse/HIVE-11072 > Project: Hive > Issue Type: New Feature > Components: Tests >Reporter: Sergio Peña >Assignee: Naveen Gangam > Attachments: HIVE-11072.1.patch, HIVE-11072.2.patch, > HIVE-11072.3.patch, HIVE-11072.4.patch > > > An existing Hive metastore upgrade test is running on Hive jenkins. However, > these scripts do test only database schema upgrade, not data validation > between upgrades. > We should validate data between metastore version upgrades. Using data > validation, we may ensure that data won't be damaged, or corrupted when > upgrading the Hive metastore. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15035) Clean up Hive licenses for binary distribution
[ https://issues.apache.org/jira/browse/HIVE-15035?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15629563#comment-15629563 ] Owen O'Malley commented on HIVE-15035: -- Revisions to previous comments: * Ok, I reconsidered and like the current license file per referenced library. Largely because the copyright notices are often incorporated in the license. * You should probably create a NOTICE file for the binary releases, even if it is the template one. > Clean up Hive licenses for binary distribution > -- > > Key: HIVE-15035 > URL: https://issues.apache.org/jira/browse/HIVE-15035 > Project: Hive > Issue Type: Bug > Components: distribution >Affects Versions: 2.1.0 >Reporter: Alan Gates >Assignee: Alan Gates > Attachments: HIVE-15035.2.patch, HIVE-15035.patch > > > Hive's current LICENSE file contains information not needed for the source > distribution. For the binary distribution we are missing many license files > as a number of jars included in Hive come with various licenses. This all > needs cleaned up. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-11072) Add data validation between Hive metastore upgrades tests
[ https://issues.apache.org/jira/browse/HIVE-11072?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15629540#comment-15629540 ] Aihua Xu commented on HIVE-11072: - I notice that we are using absolute paths for java_home and derby home. Not sure if that could cause issue on different env? And also, we are hardcoding the table names in some places. That seems to cause maintenance issue. > Add data validation between Hive metastore upgrades tests > - > > Key: HIVE-11072 > URL: https://issues.apache.org/jira/browse/HIVE-11072 > Project: Hive > Issue Type: New Feature > Components: Tests >Reporter: Sergio Peña >Assignee: Naveen Gangam > Attachments: HIVE-11072.1.patch, HIVE-11072.2.patch, > HIVE-11072.3.patch, HIVE-11072.4.patch > > > An existing Hive metastore upgrade test is running on Hive jenkins. However, > these scripts do test only database schema upgrade, not data validation > between upgrades. > We should validate data between metastore version upgrades. Using data > validation, we may ensure that data won't be damaged, or corrupted when > upgrading the Hive metastore. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15109) Set MaxPermSize to 256M for maven tests
[ https://issues.apache.org/jira/browse/HIVE-15109?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15629525#comment-15629525 ] Siddharth Seth commented on HIVE-15109: --- Looks good to me. Had removed the permsize setting in a previous patch since it's not required with Java8. There's another jira which tries to reduce the memory to 1G instead of 2G for these tests - I'll update that patch with PermSize as well if this is needed. > Set MaxPermSize to 256M for maven tests > --- > > Key: HIVE-15109 > URL: https://issues.apache.org/jira/browse/HIVE-15109 > Project: Hive > Issue Type: Test > Components: Test >Reporter: Chaoyu Tang >Assignee: Chaoyu Tang >Priority: Minor > Attachments: HIVE-15109.patch > > > Trying to run the qtests, for example, > mvn test -Dtest=TestMiniTezCliDriver -Dqfile=explainanalyze_1.q > and got > {code} > Running org.apache.hadoop.hive.cli.TestMiniTezCliDriver > Tests run: 0, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 29.591 sec - > in org.apache.hadoop.hive.cli.TestMiniTezCliDriver > {code} > Looking into the hive.log, and found that it was due to too small PermGen > space: > {code} > 2016-11-01T19:52:19,039 ERROR > [org.apache.hadoop.util.JvmPauseMonitor$Monitor@261e733f] > server.NIOServerCnxnFactory: Thread > Thread[org.apache.hadoop.util.JvmPauseMonitor$Monitor@261e733f,5,main] died > java.lang.OutOfMemoryError: PermGen space > {code} > Setting env MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=256M" would not help. > We can set MaxPermSize to maven.test.jvm.args in pom.xml instead: > {code} > -Xmx2048m -XX:MaxPermSize=256M > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-14910) Flaky test: TestSparkClient.testJobSubmission
[ https://issues.apache.org/jira/browse/HIVE-14910?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Barna Zsombor Klara updated HIVE-14910: --- Status: Patch Available (was: Open) > Flaky test: TestSparkClient.testJobSubmission > - > > Key: HIVE-14910 > URL: https://issues.apache.org/jira/browse/HIVE-14910 > Project: Hive > Issue Type: Sub-task >Reporter: Siddharth Seth >Assignee: Barna Zsombor Klara > Attachments: HIVE-14910.1.patch, HIVE-14910.2.patch, HIVE-14910.patch > > > Have seen this fail in multiple runs (not consistently) > e.g. https://builds.apache.org/job/PreCommit-HIVE-Build/1426/testReport/ -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15093) S3-to-S3 Renames: Files should be moved individually rather than at a directory level
[ https://issues.apache.org/jira/browse/HIVE-15093?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15629478#comment-15629478 ] Illya Yalovyy commented on HIVE-15093: -- -1 This approach will block any possible optimization in a connector library. File system level operation should not be re-implemented in application. Thread pool is not the only option here, different blob stores have different ways to optimize move use case. An application should be blobstore agnostic. Does it make sense? > S3-to-S3 Renames: Files should be moved individually rather than at a > directory level > - > > Key: HIVE-15093 > URL: https://issues.apache.org/jira/browse/HIVE-15093 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 2.1.0 >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-15093.1.patch, HIVE-15093.2.patch, > HIVE-15093.3.patch, HIVE-15093.4.patch, HIVE-15093.5.patch, > HIVE-15093.6.patch, HIVE-15093.7.patch > > > Hive's MoveTask uses the Hive.moveFile method to move data within a > distributed filesystem as well as blobstore filesystems. > If the move is done within the same filesystem: > 1: If the source path is a subdirectory of the destination path, files will > be moved one by one using a threapool of workers > 2: If the source path is not a subdirectory of the destination path, a single > rename operation is used to move the entire directory > The second option may not work well on blobstores such as S3. Renames are not > metadata operations and require copying all the data. Client connectors to > blobstores may not efficiently rename directories. Worst case, the connector > will copy each file one by one, sequentially rather than using a threadpool > of workers to copy the data (e.g. HADOOP-13600). > Hive already has code to rename files using a threadpool of workers, but this > only occurs in case number 1. > This JIRA aims to modify the code so that case 1 is triggered when copying > within a blobstore. The focus is on copies within a blobstore because > needToCopy will return true if the src and target filesystems are different, > in which case a different code path is triggered. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15093) S3-to-S3 Renames: Files should be moved individually rather than at a directory level
[ https://issues.apache.org/jira/browse/HIVE-15093?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sahil Takiar updated HIVE-15093: Attachment: HIVE-15093.7.patch {{TestBlobStorageUtils.testValidAndInvalidFileSystems}} was failing because the {{FileSystem.getScheme()}} was being mocked. Since the {{BlobStorageUtils.isBlobStorageFileSystem()}} method was changed to use {{FileSystem.getURI()}} instead, I changed the mocking logic. > S3-to-S3 Renames: Files should be moved individually rather than at a > directory level > - > > Key: HIVE-15093 > URL: https://issues.apache.org/jira/browse/HIVE-15093 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 2.1.0 >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-15093.1.patch, HIVE-15093.2.patch, > HIVE-15093.3.patch, HIVE-15093.4.patch, HIVE-15093.5.patch, > HIVE-15093.6.patch, HIVE-15093.7.patch > > > Hive's MoveTask uses the Hive.moveFile method to move data within a > distributed filesystem as well as blobstore filesystems. > If the move is done within the same filesystem: > 1: If the source path is a subdirectory of the destination path, files will > be moved one by one using a threapool of workers > 2: If the source path is not a subdirectory of the destination path, a single > rename operation is used to move the entire directory > The second option may not work well on blobstores such as S3. Renames are not > metadata operations and require copying all the data. Client connectors to > blobstores may not efficiently rename directories. Worst case, the connector > will copy each file one by one, sequentially rather than using a threadpool > of workers to copy the data (e.g. HADOOP-13600). > Hive already has code to rename files using a threadpool of workers, but this > only occurs in case number 1. > This JIRA aims to modify the code so that case 1 is triggered when copying > within a blobstore. The focus is on copies within a blobstore because > needToCopy will return true if the src and target filesystems are different, > in which case a different code path is triggered. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Issue Comment Deleted] (HIVE-15094) Fix test failures for 2.1.1 regarding schema evolution with DECIMAL types
[ https://issues.apache.org/jira/browse/HIVE-15094?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sergio Peña updated HIVE-15094: --- Comment: was deleted (was: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12836594/HIVE-15094-branch-2.1.patch {color:red}ERROR:{color} -1 due to build exiting with an error Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/1928/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/1928/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-1928/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 923 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12836594 - PreCommit-HIVE-Build) > Fix test failures for 2.1.1 regarding schema evolution with DECIMAL types > - > > Key: HIVE-15094 > URL: https://issues.apache.org/jira/browse/HIVE-15094 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 2.1.0 >Reporter: Sergio Peña >Assignee: Jesus Camacho Rodriguez > Attachments: HIVE-15094-branch-2.1.patch > > > Several tests failures related to schema evolution are happening on > branch-2.1 due to a patch reverted in the past. > {noformat} > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_table >
[jira] [Commented] (HIVE-15094) Fix test failures for 2.1.1 regarding schema evolution with DECIMAL types
[ https://issues.apache.org/jira/browse/HIVE-15094?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15629390#comment-15629390 ] Hive QA commented on HIVE-15094: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12836594/HIVE-15094-branch-2.1.patch {color:red}ERROR:{color} -1 due to build exiting with an error Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/1928/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/1928/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-1928/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 923 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12836594 - PreCommit-HIVE-Build > Fix test failures for 2.1.1 regarding schema evolution with DECIMAL types > - > > Key: HIVE-15094 > URL: https://issues.apache.org/jira/browse/HIVE-15094 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 2.1.0 >Reporter: Sergio Peña >Assignee: Jesus Camacho Rodriguez > Attachments: HIVE-15094-branch-2.1.patch > > > Several tests failures related to schema evolution are happening on > branch-2.1 due to a patch reverted in the past. > {noformat} > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_table >
[jira] [Commented] (HIVE-15094) Fix test failures for 2.1.1 regarding schema evolution with DECIMAL types
[ https://issues.apache.org/jira/browse/HIVE-15094?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15629387#comment-15629387 ] Sergio Peña commented on HIVE-15094: Hopefully HiveQA will work this time. I had to make some configuration changes on ptest to make it work. > Fix test failures for 2.1.1 regarding schema evolution with DECIMAL types > - > > Key: HIVE-15094 > URL: https://issues.apache.org/jira/browse/HIVE-15094 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 2.1.0 >Reporter: Sergio Peña >Assignee: Jesus Camacho Rodriguez > Attachments: HIVE-15094-branch-2.1.patch > > > Several tests failures related to schema evolution are happening on > branch-2.1 due to a patch reverted in the past. > {noformat} > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_table > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_text_vec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_table > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_text_vec_mapwork_table >
[jira] [Commented] (HIVE-15109) Set MaxPermSize to 256M for maven tests
[ https://issues.apache.org/jira/browse/HIVE-15109?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15629391#comment-15629391 ] Sergio Peña commented on HIVE-15109: LGTM, what do you think [~sseth]? You've been working on this memory changes. > Set MaxPermSize to 256M for maven tests > --- > > Key: HIVE-15109 > URL: https://issues.apache.org/jira/browse/HIVE-15109 > Project: Hive > Issue Type: Test > Components: Test >Reporter: Chaoyu Tang >Assignee: Chaoyu Tang >Priority: Minor > Attachments: HIVE-15109.patch > > > Trying to run the qtests, for example, > mvn test -Dtest=TestMiniTezCliDriver -Dqfile=explainanalyze_1.q > and got > {code} > Running org.apache.hadoop.hive.cli.TestMiniTezCliDriver > Tests run: 0, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 29.591 sec - > in org.apache.hadoop.hive.cli.TestMiniTezCliDriver > {code} > Looking into the hive.log, and found that it was due to too small PermGen > space: > {code} > 2016-11-01T19:52:19,039 ERROR > [org.apache.hadoop.util.JvmPauseMonitor$Monitor@261e733f] > server.NIOServerCnxnFactory: Thread > Thread[org.apache.hadoop.util.JvmPauseMonitor$Monitor@261e733f,5,main] died > java.lang.OutOfMemoryError: PermGen space > {code} > Setting env MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=256M" would not help. > We can set MaxPermSize to maven.test.jvm.args in pom.xml instead: > {code} > -Xmx2048m -XX:MaxPermSize=256M > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15094) Fix test failures for 2.1.1 regarding schema evolution with DECIMAL types
[ https://issues.apache.org/jira/browse/HIVE-15094?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Jesus Camacho Rodriguez updated HIVE-15094: --- Attachment: (was: HIVE-15094-branch-2.1.patch) > Fix test failures for 2.1.1 regarding schema evolution with DECIMAL types > - > > Key: HIVE-15094 > URL: https://issues.apache.org/jira/browse/HIVE-15094 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 2.1.0 >Reporter: Sergio Peña >Assignee: Jesus Camacho Rodriguez > Attachments: HIVE-15094-branch-2.1.patch > > > Several tests failures related to schema evolution are happening on > branch-2.1 due to a patch reverted in the past. > {noformat} > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_table > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_text_vec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_table > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_text_vec_mapwork_table >
[jira] [Updated] (HIVE-15094) Fix test failures for 2.1.1 regarding schema evolution with DECIMAL types
[ https://issues.apache.org/jira/browse/HIVE-15094?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Jesus Camacho Rodriguez updated HIVE-15094: --- Attachment: HIVE-15094-branch-2.1.patch > Fix test failures for 2.1.1 regarding schema evolution with DECIMAL types > - > > Key: HIVE-15094 > URL: https://issues.apache.org/jira/browse/HIVE-15094 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 2.1.0 >Reporter: Sergio Peña >Assignee: Jesus Camacho Rodriguez > Attachments: HIVE-15094-branch-2.1.patch > > > Several tests failures related to schema evolution are happening on > branch-2.1 due to a patch reverted in the past. > {noformat} > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_table > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_text_vec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_table > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_text_vec_mapwork_table >
[jira] [Updated] (HIVE-15094) Fix test failures for 2.1.1 regarding schema evolution with DECIMAL types
[ https://issues.apache.org/jira/browse/HIVE-15094?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Jesus Camacho Rodriguez updated HIVE-15094: --- Attachment: (was: HIVE-15094-branch-2.1.patch) > Fix test failures for 2.1.1 regarding schema evolution with DECIMAL types > - > > Key: HIVE-15094 > URL: https://issues.apache.org/jira/browse/HIVE-15094 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 2.1.0 >Reporter: Sergio Peña >Assignee: Jesus Camacho Rodriguez > Attachments: HIVE-15094-branch-2.1.patch > > > Several tests failures related to schema evolution are happening on > branch-2.1 due to a patch reverted in the past. > {noformat} > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_part > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_table > org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_table > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_text_nonvec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_table > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_text_vec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_acid_mapwork_table > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_vec_mapwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_acidvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_nonvec_mapwork_table > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_text_vecrow_mapwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_orc_nonvec_fetchwork_part > org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver_schema_evol_text_vec_mapwork_table >
[jira] [Commented] (HIVE-15109) Set MaxPermSize to 256M for maven tests
[ https://issues.apache.org/jira/browse/HIVE-15109?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15629290#comment-15629290 ] Chaoyu Tang commented on HIVE-15109: The failed tests seem not related. [~sseth], [~spena], Could you review the patch. It looks like to be related to a change from HIVE-14887. > Set MaxPermSize to 256M for maven tests > --- > > Key: HIVE-15109 > URL: https://issues.apache.org/jira/browse/HIVE-15109 > Project: Hive > Issue Type: Test > Components: Test >Reporter: Chaoyu Tang >Assignee: Chaoyu Tang >Priority: Minor > Attachments: HIVE-15109.patch > > > Trying to run the qtests, for example, > mvn test -Dtest=TestMiniTezCliDriver -Dqfile=explainanalyze_1.q > and got > {code} > Running org.apache.hadoop.hive.cli.TestMiniTezCliDriver > Tests run: 0, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 29.591 sec - > in org.apache.hadoop.hive.cli.TestMiniTezCliDriver > {code} > Looking into the hive.log, and found that it was due to too small PermGen > space: > {code} > 2016-11-01T19:52:19,039 ERROR > [org.apache.hadoop.util.JvmPauseMonitor$Monitor@261e733f] > server.NIOServerCnxnFactory: Thread > Thread[org.apache.hadoop.util.JvmPauseMonitor$Monitor@261e733f,5,main] died > java.lang.OutOfMemoryError: PermGen space > {code} > Setting env MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=256M" would not help. > We can set MaxPermSize to maven.test.jvm.args in pom.xml instead: > {code} > -Xmx2048m -XX:MaxPermSize=256M > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-11111) Insert on skewed table with STORED AS DIRECTORY is broken
[ https://issues.apache.org/jira/browse/HIVE-1?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15629222#comment-15629222 ] Wojciech Meler commented on HIVE-1: --- Did you manage to fetch data from skewed table? I get empty result with select * from testskew. > Insert on skewed table with STORED AS DIRECTORY is broken > - > > Key: HIVE-1 > URL: https://issues.apache.org/jira/browse/HIVE-1 > Project: Hive > Issue Type: Bug >Affects Versions: 1.2.0 >Reporter: Damien Carol > > Doing these queries fails: > {code:sql} > RESET; > DROP TABLE IF EXISTS testskew; > CREATE TABLE IF NOT EXISTS testskew (key int, value STRING) > SKEWED BY (key) ON (1,5,6) STORED AS DIRECTORIES > STORED AS ORC; > insert into testskew VALUES > (1, 'one'), > (1, 'one'), > (1, 'one'), > (1, 'one'), > (1, 'one'), > (1, 'one'), > (2, 'two'), > (3, 'three'), > (5, 'five'), > (5, 'five'), > (5, 'five'), > (5, 'five'), > (5, 'five'), > (6, 'six'), > (6, 'six'), > (6, 'six'), > (6, 'six'), > (6, 'six'), > (6, 'six'); > {code} > Stacktrace: > {noformat} > INFO : Session is already open > INFO : > INFO : Status: Running (Executing on YARN cluster with App id > application_1434957292922_0059) > INFO : Map 1: 0/1 > INFO : Map 1: 0(+1)/1 > INFO : Map 1: 1/1 > INFO : Loading data to table test.testskew from > hdfs://nc-h07/user/hive/warehouse/test.db/testskew/.hive-staging_hive_2015-06-25_17-29-34_385_4424227988595852796-14/-ext-1 > ERROR : Failed with exception checkPaths: > hdfs://nc-h07/user/hive/warehouse/test.db/testskew/.hive-staging_hive_2015-06-25_17-29-34_385_4424227988595852796-14/-ext-1 > has nested directory > hdfs://nc-h07/user/hive/warehouse/test.db/testskew/.hive-staging_hive_2015-06-25_17-29-34_385_4424227988595852796-14/-ext-1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME > org.apache.hadoop.hive.ql.metadata.HiveException: checkPaths: > hdfs://nc-h07/user/hive/warehouse/test.db/testskew/.hive-staging_hive_2015-06-25_17-29-34_385_4424227988595852796-14/-ext-1 > has nested directory > hdfs://nc-h07/user/hive/warehouse/test.db/testskew/.hive-staging_hive_2015-06-25_17-29-34_385_4424227988595852796-14/-ext-1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME > at org.apache.hadoop.hive.ql.metadata.Hive.checkPaths(Hive.java:2466) > at org.apache.hadoop.hive.ql.metadata.Hive.copyFiles(Hive.java:2701) > at org.apache.hadoop.hive.ql.metadata.Hive.loadTable(Hive.java:1645) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:297) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:160) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:88) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1650) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1409) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1192) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1059) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1054) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:154) > at > org.apache.hive.service.cli.operation.SQLOperation.access$100(SQLOperation.java:71) > at > org.apache.hive.service.cli.operation.SQLOperation$1$1.run(SQLOperation.java:206) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628) > at > org.apache.hive.service.cli.operation.SQLOperation$1.run(SQLOperation.java:218) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:744) > Error: Error while processing statement: FAILED: Execution Error, return code > 1 from org.apache.hadoop.hive.ql.exec.MoveTask (state=08S01,code=1) > {noformat} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-14803) S3: Stats gathering for insert queries can be expensive for partitioned dataset
[ https://issues.apache.org/jira/browse/HIVE-14803?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15629157#comment-15629157 ] Hive QA commented on HIVE-14803: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12836551/HIVE-14803.4.patch {color:red}ERROR:{color} -1 due to no test(s) being added or modified. {color:red}ERROR:{color} -1 due to 25 failed/errored test(s), 10628 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[autoColumnStats_8] (batchId=13) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join14] (batchId=13) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join9] (batchId=68) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[explain_dependency2] (batchId=64) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto_partitioned] (batchId=10) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_part6] (batchId=24) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[join_map_ppr] (batchId=68) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[lateral_view_ppd] (batchId=78) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[list_bucket_dml_3] (batchId=13) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[louter_join_ppr] (batchId=39) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[merge3] (batchId=53) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[ppd_udf_case] (batchId=39) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[transform_ppr1] (batchId=13) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[transform_ppr2] (batchId=39) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[bucket6] (batchId=131) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[transform_ppr1] (batchId=131) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[join26] (batchId=100) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[join32] (batchId=100) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[stats12] (batchId=118) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union_lateralview] (batchId=103) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union_ppr] (batchId=100) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union_remove_25] (batchId=129) org.apache.hive.jdbc.TestJdbcWithMiniHS2.testConnectionSchemaAPIs (batchId=213) org.apache.hive.jdbc.TestJdbcWithMiniHS2.testHttpHeaderSize (batchId=213) org.apache.hive.spark.client.TestSparkClient.testJobSubmission (batchId=272) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/1923/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/1923/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-1923/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 25 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12836551 - PreCommit-HIVE-Build > S3: Stats gathering for insert queries can be expensive for partitioned > dataset > --- > > Key: HIVE-14803 > URL: https://issues.apache.org/jira/browse/HIVE-14803 > Project: Hive > Issue Type: Improvement > Components: Metastore >Affects Versions: 2.1.0 >Reporter: Rajesh Balamohan >Assignee: Rajesh Balamohan >Priority: Minor > Attachments: HIVE-14803.1.patch, HIVE-14803.2.patch, > HIVE-14803.3.patch, HIVE-14803.4.patch > > > StatsTask's aggregateStats populates stats details for all partitions by > checking the file sizes which turns out to be expensive when larger number of > partitions are inserted. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15101) Spark client can be stuck in RUNNING state
[ https://issues.apache.org/jira/browse/HIVE-15101?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15629148#comment-15629148 ] Ferdinand Xu commented on HIVE-15101: - Thank you for attaching the log. Looks like container is exiting with error. Can you also attach application log as well? Thank you! > Spark client can be stuck in RUNNING state > -- > > Key: HIVE-15101 > URL: https://issues.apache.org/jira/browse/HIVE-15101 > Project: Hive > Issue Type: Bug > Components: Spark >Affects Versions: 2.0.0, 2.1.0 > Environment: Hive 2.1.0 > Spark 1.6.2 >Reporter: Satoshi Iijima > Attachments: hadoop-yarn-nodemanager.log > > > When a Hive-on-Spark job is executed on YARN environment where UNHEALTHY > NodeManager exists, Spark client can be stuck in RUNNING state. > thread dump: > {code} > "008ee7b6-b083-4ac9-ae1c-b6097d9bf761 main" #1 prio=5 os_prio=0 > tid=0x7f14f4013800 nid=0x3855 in Object.wait() [0x7f14fd9b1000] >java.lang.Thread.State: WAITING (on object monitor) > at java.lang.Object.wait(Native Method) > - waiting on <0xf6615550> (a > io.netty.util.concurrent.DefaultPromise) > at java.lang.Object.wait(Object.java:502) > at > io.netty.util.concurrent.DefaultPromise.await(DefaultPromise.java:254) > - locked <0xf6615550> (a > io.netty.util.concurrent.DefaultPromise) > at io.netty.util.concurrent.DefaultPromise.await(DefaultPromise.java:32) > at io.netty.util.concurrent.AbstractFuture.get(AbstractFuture.java:31) > at > org.apache.hive.spark.client.SparkClientImpl.(SparkClientImpl.java:104) > at > org.apache.hive.spark.client.SparkClientFactory.createClient(SparkClientFactory.java:80) > - locked <0xf21b8e08> (a java.lang.Class for > org.apache.hive.spark.client.SparkClientFactory) > at > org.apache.hadoop.hive.ql.exec.spark.RemoteHiveSparkClient.createRemoteClient(RemoteHiveSparkClient.java:99) > at > org.apache.hadoop.hive.ql.exec.spark.RemoteHiveSparkClient.(RemoteHiveSparkClient.java:95) > at > org.apache.hadoop.hive.ql.exec.spark.HiveSparkClientFactory.createHiveSparkClient(HiveSparkClientFactory.java:67) > at > org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionImpl.open(SparkSessionImpl.java:62) > at > org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManagerImpl.getSession(SparkSessionManagerImpl.java:114) > at > org.apache.hadoop.hive.ql.exec.spark.SparkUtilities.getSparkSession(SparkUtilities.java:136) > at > org.apache.hadoop.hive.ql.exec.spark.SparkTask.execute(SparkTask.java:89) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:197) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1858) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1562) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1313) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1084) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1072) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:232) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:335) > at > org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:742) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at org.apache.hadoop.util.RunJar.run(RunJar.java:239) > at org.apache.hadoop.util.RunJar.main(RunJar.java:153) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-14803) S3: Stats gathering for insert queries can be expensive for partitioned dataset
[ https://issues.apache.org/jira/browse/HIVE-14803?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Rajesh Balamohan updated HIVE-14803: Status: Open (was: Patch Available) > S3: Stats gathering for insert queries can be expensive for partitioned > dataset > --- > > Key: HIVE-14803 > URL: https://issues.apache.org/jira/browse/HIVE-14803 > Project: Hive > Issue Type: Improvement > Components: Metastore >Affects Versions: 2.1.0 >Reporter: Rajesh Balamohan >Assignee: Rajesh Balamohan >Priority: Minor > Attachments: HIVE-14803.1.patch, HIVE-14803.2.patch, > HIVE-14803.3.patch, HIVE-14803.4.patch > > > StatsTask's aggregateStats populates stats details for all partitions by > checking the file sizes which turns out to be expensive when larger number of > partitions are inserted. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-14803) S3: Stats gathering for insert queries can be expensive for partitioned dataset
[ https://issues.apache.org/jira/browse/HIVE-14803?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15628977#comment-15628977 ] Hive QA commented on HIVE-14803: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12836551/HIVE-14803.4.patch {color:red}ERROR:{color} -1 due to no test(s) being added or modified. {color:red}ERROR:{color} -1 due to 28 failed/errored test(s), 10628 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[autoColumnStats_8] (batchId=13) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join19] (batchId=58) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[dynpart_sort_optimization_acid2] (batchId=29) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[explain_logical] (batchId=58) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[fouter_join_ppr] (batchId=29) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_map_ppr_multi_distinct] (batchId=45) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[index_auto_unused] (batchId=35) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_part5] (batchId=35) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_part7] (batchId=15) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[input_part8] (batchId=28) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[list_bucket_dml_6] (batchId=29) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[load_dyn_part2] (batchId=52) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[load_dyn_part9] (batchId=35) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[mapjoin_mapjoin] (batchId=45) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[mapjoin_subquery] (batchId=45) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[nonmr_fetch_threshold] (batchId=73) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[partition_boolexpr] (batchId=28) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[transform_ppr1] (batchId=13) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[load_dyn_part3] (batchId=137) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[sample1] (batchId=137) org.apache.hadoop.hive.cli.TestMiniSparkOnYarnCliDriver.testCliDriver[infer_bucket_sort_num_buckets] (batchId=157) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_4] (batchId=91) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[groupby_ppr] (batchId=105) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[join32_lessSize] (batchId=95) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[mapjoin_distinct] (batchId=115) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[optimize_nullscan] (batchId=126) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[router_join_ppr] (batchId=126) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[sample8] (batchId=105) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/1922/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/1922/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-1922/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 28 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12836551 - PreCommit-HIVE-Build > S3: Stats gathering for insert queries can be expensive for partitioned > dataset > --- > > Key: HIVE-14803 > URL: https://issues.apache.org/jira/browse/HIVE-14803 > Project: Hive > Issue Type: Improvement > Components: Metastore >Affects Versions: 2.1.0 >Reporter: Rajesh Balamohan >Assignee: Rajesh Balamohan >Priority: Minor > Attachments: HIVE-14803.1.patch, HIVE-14803.2.patch, > HIVE-14803.3.patch, HIVE-14803.4.patch > > > StatsTask's aggregateStats populates stats details for all partitions by > checking the file sizes which turns out to be expensive when larger number of > partitions are inserted. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15076) Improve scalability of LDAP authentication provider group filter
[ https://issues.apache.org/jira/browse/HIVE-15076?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15628913#comment-15628913 ] Illya Yalovyy commented on HIVE-15076: -- [~aihuaxu], [~ashutoshc], [~ctang.ma], [~szehon], Could you please take a look at this CR? https://reviews.apache.org/r/53204/ > Improve scalability of LDAP authentication provider group filter > > > Key: HIVE-15076 > URL: https://issues.apache.org/jira/browse/HIVE-15076 > Project: Hive > Issue Type: Improvement > Components: Authentication >Affects Versions: 2.1.0 >Reporter: Illya Yalovyy >Assignee: Illya Yalovyy > Attachments: HIVE-15076.1.patch, HIVE-15076.2.patch > > > Current implementation uses following algorithm: > # For a given user find all groups that user is a member of. (A list of > LDAP groups is constructed as a result of that request) > # Match this list of groups with provided group filter. > > Time/Memory complexity of this approach is O(N) on client side, where N – is > a number of groups the user has membership in. On a large directory (800+ > groups per user) we can observe up to 2x performance degradation and failures > because of size of LDAP response (LDAP: error code 4 - Sizelimit Exceeded). > > Some Directory Services (Microsoft Active Directory for instance) provide a > virtual attribute for User Object that contains a list of groups that user > belongs to. This attribute can be used to quickly determine whether this user > passes or fails the group filter. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-14803) S3: Stats gathering for insert queries can be expensive for partitioned dataset
[ https://issues.apache.org/jira/browse/HIVE-14803?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Rajesh Balamohan updated HIVE-14803: Attachment: HIVE-14803.4.patch > S3: Stats gathering for insert queries can be expensive for partitioned > dataset > --- > > Key: HIVE-14803 > URL: https://issues.apache.org/jira/browse/HIVE-14803 > Project: Hive > Issue Type: Improvement > Components: Metastore >Affects Versions: 2.1.0 >Reporter: Rajesh Balamohan >Assignee: Rajesh Balamohan >Priority: Minor > Attachments: HIVE-14803.1.patch, HIVE-14803.2.patch, > HIVE-14803.3.patch, HIVE-14803.4.patch > > > StatsTask's aggregateStats populates stats details for all partitions by > checking the file sizes which turns out to be expensive when larger number of > partitions are inserted. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15101) Spark client can be stuck in RUNNING state
[ https://issues.apache.org/jira/browse/HIVE-15101?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Satoshi Iijima updated HIVE-15101: -- Attachment: hadoop-yarn-nodemanager.log > Spark client can be stuck in RUNNING state > -- > > Key: HIVE-15101 > URL: https://issues.apache.org/jira/browse/HIVE-15101 > Project: Hive > Issue Type: Bug > Components: Spark >Affects Versions: 2.0.0, 2.1.0 > Environment: Hive 2.1.0 > Spark 1.6.2 >Reporter: Satoshi Iijima > Attachments: hadoop-yarn-nodemanager.log > > > When a Hive-on-Spark job is executed on YARN environment where UNHEALTHY > NodeManager exists, Spark client can be stuck in RUNNING state. > thread dump: > {code} > "008ee7b6-b083-4ac9-ae1c-b6097d9bf761 main" #1 prio=5 os_prio=0 > tid=0x7f14f4013800 nid=0x3855 in Object.wait() [0x7f14fd9b1000] >java.lang.Thread.State: WAITING (on object monitor) > at java.lang.Object.wait(Native Method) > - waiting on <0xf6615550> (a > io.netty.util.concurrent.DefaultPromise) > at java.lang.Object.wait(Object.java:502) > at > io.netty.util.concurrent.DefaultPromise.await(DefaultPromise.java:254) > - locked <0xf6615550> (a > io.netty.util.concurrent.DefaultPromise) > at io.netty.util.concurrent.DefaultPromise.await(DefaultPromise.java:32) > at io.netty.util.concurrent.AbstractFuture.get(AbstractFuture.java:31) > at > org.apache.hive.spark.client.SparkClientImpl.(SparkClientImpl.java:104) > at > org.apache.hive.spark.client.SparkClientFactory.createClient(SparkClientFactory.java:80) > - locked <0xf21b8e08> (a java.lang.Class for > org.apache.hive.spark.client.SparkClientFactory) > at > org.apache.hadoop.hive.ql.exec.spark.RemoteHiveSparkClient.createRemoteClient(RemoteHiveSparkClient.java:99) > at > org.apache.hadoop.hive.ql.exec.spark.RemoteHiveSparkClient.(RemoteHiveSparkClient.java:95) > at > org.apache.hadoop.hive.ql.exec.spark.HiveSparkClientFactory.createHiveSparkClient(HiveSparkClientFactory.java:67) > at > org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionImpl.open(SparkSessionImpl.java:62) > at > org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManagerImpl.getSession(SparkSessionManagerImpl.java:114) > at > org.apache.hadoop.hive.ql.exec.spark.SparkUtilities.getSparkSession(SparkUtilities.java:136) > at > org.apache.hadoop.hive.ql.exec.spark.SparkTask.execute(SparkTask.java:89) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:197) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1858) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1562) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1313) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1084) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1072) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:232) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:335) > at > org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:742) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at org.apache.hadoop.util.RunJar.run(RunJar.java:239) > at org.apache.hadoop.util.RunJar.main(RunJar.java:153) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15101) Spark client can be stuck in RUNNING state
[ https://issues.apache.org/jira/browse/HIVE-15101?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15628706#comment-15628706 ] Satoshi Iijima commented on HIVE-15101: --- I have added version info to environment and attached node manager log. > Spark client can be stuck in RUNNING state > -- > > Key: HIVE-15101 > URL: https://issues.apache.org/jira/browse/HIVE-15101 > Project: Hive > Issue Type: Bug > Components: Spark >Affects Versions: 2.0.0, 2.1.0 > Environment: Hive 2.1.0 > Spark 1.6.2 >Reporter: Satoshi Iijima > Attachments: hadoop-yarn-nodemanager.log > > > When a Hive-on-Spark job is executed on YARN environment where UNHEALTHY > NodeManager exists, Spark client can be stuck in RUNNING state. > thread dump: > {code} > "008ee7b6-b083-4ac9-ae1c-b6097d9bf761 main" #1 prio=5 os_prio=0 > tid=0x7f14f4013800 nid=0x3855 in Object.wait() [0x7f14fd9b1000] >java.lang.Thread.State: WAITING (on object monitor) > at java.lang.Object.wait(Native Method) > - waiting on <0xf6615550> (a > io.netty.util.concurrent.DefaultPromise) > at java.lang.Object.wait(Object.java:502) > at > io.netty.util.concurrent.DefaultPromise.await(DefaultPromise.java:254) > - locked <0xf6615550> (a > io.netty.util.concurrent.DefaultPromise) > at io.netty.util.concurrent.DefaultPromise.await(DefaultPromise.java:32) > at io.netty.util.concurrent.AbstractFuture.get(AbstractFuture.java:31) > at > org.apache.hive.spark.client.SparkClientImpl.(SparkClientImpl.java:104) > at > org.apache.hive.spark.client.SparkClientFactory.createClient(SparkClientFactory.java:80) > - locked <0xf21b8e08> (a java.lang.Class for > org.apache.hive.spark.client.SparkClientFactory) > at > org.apache.hadoop.hive.ql.exec.spark.RemoteHiveSparkClient.createRemoteClient(RemoteHiveSparkClient.java:99) > at > org.apache.hadoop.hive.ql.exec.spark.RemoteHiveSparkClient.(RemoteHiveSparkClient.java:95) > at > org.apache.hadoop.hive.ql.exec.spark.HiveSparkClientFactory.createHiveSparkClient(HiveSparkClientFactory.java:67) > at > org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionImpl.open(SparkSessionImpl.java:62) > at > org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManagerImpl.getSession(SparkSessionManagerImpl.java:114) > at > org.apache.hadoop.hive.ql.exec.spark.SparkUtilities.getSparkSession(SparkUtilities.java:136) > at > org.apache.hadoop.hive.ql.exec.spark.SparkTask.execute(SparkTask.java:89) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:197) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1858) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1562) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1313) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1084) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1072) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:232) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:335) > at > org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:742) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at org.apache.hadoop.util.RunJar.run(RunJar.java:239) > at org.apache.hadoop.util.RunJar.main(RunJar.java:153) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15101) Spark client can be stuck in RUNNING state
[ https://issues.apache.org/jira/browse/HIVE-15101?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Satoshi Iijima updated HIVE-15101: -- Environment: Hive 2.1.0 Spark 1.6.2 > Spark client can be stuck in RUNNING state > -- > > Key: HIVE-15101 > URL: https://issues.apache.org/jira/browse/HIVE-15101 > Project: Hive > Issue Type: Bug > Components: Spark >Affects Versions: 2.0.0, 2.1.0 > Environment: Hive 2.1.0 > Spark 1.6.2 >Reporter: Satoshi Iijima > > When a Hive-on-Spark job is executed on YARN environment where UNHEALTHY > NodeManager exists, Spark client can be stuck in RUNNING state. > thread dump: > {code} > "008ee7b6-b083-4ac9-ae1c-b6097d9bf761 main" #1 prio=5 os_prio=0 > tid=0x7f14f4013800 nid=0x3855 in Object.wait() [0x7f14fd9b1000] >java.lang.Thread.State: WAITING (on object monitor) > at java.lang.Object.wait(Native Method) > - waiting on <0xf6615550> (a > io.netty.util.concurrent.DefaultPromise) > at java.lang.Object.wait(Object.java:502) > at > io.netty.util.concurrent.DefaultPromise.await(DefaultPromise.java:254) > - locked <0xf6615550> (a > io.netty.util.concurrent.DefaultPromise) > at io.netty.util.concurrent.DefaultPromise.await(DefaultPromise.java:32) > at io.netty.util.concurrent.AbstractFuture.get(AbstractFuture.java:31) > at > org.apache.hive.spark.client.SparkClientImpl.(SparkClientImpl.java:104) > at > org.apache.hive.spark.client.SparkClientFactory.createClient(SparkClientFactory.java:80) > - locked <0xf21b8e08> (a java.lang.Class for > org.apache.hive.spark.client.SparkClientFactory) > at > org.apache.hadoop.hive.ql.exec.spark.RemoteHiveSparkClient.createRemoteClient(RemoteHiveSparkClient.java:99) > at > org.apache.hadoop.hive.ql.exec.spark.RemoteHiveSparkClient.(RemoteHiveSparkClient.java:95) > at > org.apache.hadoop.hive.ql.exec.spark.HiveSparkClientFactory.createHiveSparkClient(HiveSparkClientFactory.java:67) > at > org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionImpl.open(SparkSessionImpl.java:62) > at > org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManagerImpl.getSession(SparkSessionManagerImpl.java:114) > at > org.apache.hadoop.hive.ql.exec.spark.SparkUtilities.getSparkSession(SparkUtilities.java:136) > at > org.apache.hadoop.hive.ql.exec.spark.SparkTask.execute(SparkTask.java:89) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:197) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1858) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1562) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1313) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1084) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1072) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:232) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:335) > at > org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:742) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at org.apache.hadoop.util.RunJar.run(RunJar.java:239) > at org.apache.hadoop.util.RunJar.main(RunJar.java:153) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15104) Hive on Spark generate more shuffle data than hive on mr
[ https://issues.apache.org/jira/browse/HIVE-15104?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15628605#comment-15628605 ] Rui Li commented on HIVE-15104: --- Seems MR can just serialize the key as BytesWritable instead of HiveKey. We once hit some problem when only serializing the BytesWritable part. But I think it's worth investigating whether we can improve. > Hive on Spark generate more shuffle data than hive on mr > > > Key: HIVE-15104 > URL: https://issues.apache.org/jira/browse/HIVE-15104 > Project: Hive > Issue Type: Bug > Components: Spark >Affects Versions: 1.2.1 >Reporter: wangwenli >Assignee: Aihua Xu > > the same sql, running on spark and mr engine, will generate different size > of shuffle data. > i think it is because of hive on mr just serialize part of HiveKey, but hive > on spark which using kryo will serialize full of Hivekey object. > what is your opionion? -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15057) Support other types of operators (other than SELECT)
[ https://issues.apache.org/jira/browse/HIVE-15057?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15628506#comment-15628506 ] Hive QA commented on HIVE-15057: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12836233/HIVE-15057.1.patch {color:green}SUCCESS:{color} +1 due to 1 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 1086 failed/errored test(s), 10628 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestAccumuloCliDriver.testCliDriver[accumulo_joins] (batchId=215) org.apache.hadoop.hive.cli.TestAccumuloCliDriver.testCliDriver[accumulo_queries] (batchId=215) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[acid_join] (batchId=14) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[acid_mapjoin] (batchId=9) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[allcolref_in_udf] (batchId=47) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[ambiguous_col] (batchId=43) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[annotate_stats_join] (batchId=48) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[annotate_stats_join_pkfk] (batchId=13) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[archive_excludeHadoop20] (batchId=59) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[archive_multi] (batchId=28) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[authorization_view_1] (batchId=17) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[authorization_view_3] (batchId=31) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[authorization_view_4] (batchId=7) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[authorization_view_disable_cbo_1] (batchId=63) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[authorization_view_disable_cbo_3] (batchId=8) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[authorization_view_disable_cbo_4] (batchId=11) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[autoColumnStats_9] (batchId=33) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join0] (batchId=78) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join10] (batchId=32) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join11] (batchId=8) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join12] (batchId=22) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join13] (batchId=72) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join14] (batchId=13) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join15] (batchId=14) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join16] (batchId=35) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join17] (batchId=73) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join18] (batchId=11) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join18_multi_distinct] (batchId=24) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join19] (batchId=58) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join19_inclause] (batchId=16) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join1] (batchId=69) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join20] (batchId=79) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join21] (batchId=72) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join22] (batchId=50) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join23] (batchId=17) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join24] (batchId=67) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join25] (batchId=65) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join26] (batchId=12) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join27] (batchId=80) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join28] (batchId=64) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join29] (batchId=49) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join2] (batchId=57) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join31] (batchId=40) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join32] (batchId=76) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join33] (batchId=11) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join3] (batchId=73) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join4] (batchId=63) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join5] (batchId=65) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join6] (batchId=77) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join7] (batchId=24) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join8] (batchId=77) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join9] (batchId=68)
[jira] [Commented] (HIVE-14910) Flaky test: TestSparkClient.testJobSubmission
[ https://issues.apache.org/jira/browse/HIVE-14910?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15628390#comment-15628390 ] Barna Zsombor Klara commented on HIVE-14910: Yes, currently the listener is only used in unit tests. > Flaky test: TestSparkClient.testJobSubmission > - > > Key: HIVE-14910 > URL: https://issues.apache.org/jira/browse/HIVE-14910 > Project: Hive > Issue Type: Sub-task >Reporter: Siddharth Seth >Assignee: Barna Zsombor Klara > Attachments: HIVE-14910.1.patch, HIVE-14910.2.patch, HIVE-14910.patch > > > Have seen this fail in multiple runs (not consistently) > e.g. https://builds.apache.org/job/PreCommit-HIVE-Build/1426/testReport/ -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-14910) Flaky test: TestSparkClient.testJobSubmission
[ https://issues.apache.org/jira/browse/HIVE-14910?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Barna Zsombor Klara updated HIVE-14910: --- Attachment: HIVE-14910.2.patch Attaching new patch after review board review. Thank you for the review [~xuefuz] > Flaky test: TestSparkClient.testJobSubmission > - > > Key: HIVE-14910 > URL: https://issues.apache.org/jira/browse/HIVE-14910 > Project: Hive > Issue Type: Sub-task >Reporter: Siddharth Seth >Assignee: Barna Zsombor Klara > Attachments: HIVE-14910.1.patch, HIVE-14910.2.patch, HIVE-14910.patch > > > Have seen this fail in multiple runs (not consistently) > e.g. https://builds.apache.org/job/PreCommit-HIVE-Build/1426/testReport/ -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15093) S3-to-S3 Renames: Files should be moved individually rather than at a directory level
[ https://issues.apache.org/jira/browse/HIVE-15093?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15628369#comment-15628369 ] Hive QA commented on HIVE-15093: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12836508/HIVE-15093.6.patch {color:green}SUCCESS:{color} +1 due to 1 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 2 failed/errored test(s), 10630 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_3] (batchId=90) org.apache.hadoop.hive.ql.util.TestBlobStorageUtils.testValidAndInvalidFileSystems (batchId=236) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/1920/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/1920/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-1920/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 2 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12836508 - PreCommit-HIVE-Build > S3-to-S3 Renames: Files should be moved individually rather than at a > directory level > - > > Key: HIVE-15093 > URL: https://issues.apache.org/jira/browse/HIVE-15093 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 2.1.0 >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-15093.1.patch, HIVE-15093.2.patch, > HIVE-15093.3.patch, HIVE-15093.4.patch, HIVE-15093.5.patch, HIVE-15093.6.patch > > > Hive's MoveTask uses the Hive.moveFile method to move data within a > distributed filesystem as well as blobstore filesystems. > If the move is done within the same filesystem: > 1: If the source path is a subdirectory of the destination path, files will > be moved one by one using a threapool of workers > 2: If the source path is not a subdirectory of the destination path, a single > rename operation is used to move the entire directory > The second option may not work well on blobstores such as S3. Renames are not > metadata operations and require copying all the data. Client connectors to > blobstores may not efficiently rename directories. Worst case, the connector > will copy each file one by one, sequentially rather than using a threadpool > of workers to copy the data (e.g. HADOOP-13600). > Hive already has code to rename files using a threadpool of workers, but this > only occurs in case number 1. > This JIRA aims to modify the code so that case 1 is triggered when copying > within a blobstore. The focus is on copies within a blobstore because > needToCopy will return true if the src and target filesystems are different, > in which case a different code path is triggered. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15039) A better job monitor console output for HoS
[ https://issues.apache.org/jira/browse/HIVE-15039?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15628317#comment-15628317 ] Rui Li commented on HIVE-15039: --- Failed tests cannot be reproduced. I'll commit this shortly if no one has further comments. > A better job monitor console output for HoS > --- > > Key: HIVE-15039 > URL: https://issues.apache.org/jira/browse/HIVE-15039 > Project: Hive > Issue Type: Improvement >Reporter: Rui Li >Assignee: Rui Li > Attachments: HIVE-15039.1.patch, HIVE-15039.2.patch, example > screenshot.png, with patch.png > > > When there're many stages, it's very difficult to read the console output of > job progress of HoS. Attached screenshot is an example. > We may learn from HoT as it does much better than HoS. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-14815) Implement Parquet vectorization reader for Primitive types
[ https://issues.apache.org/jira/browse/HIVE-14815?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Ferdinand Xu updated HIVE-14815: Summary: Implement Parquet vectorization reader for Primitive types (was: Implement Parquet vectorization reader ) > Implement Parquet vectorization reader for Primitive types > --- > > Key: HIVE-14815 > URL: https://issues.apache.org/jira/browse/HIVE-14815 > Project: Hive > Issue Type: Sub-task >Reporter: Ferdinand Xu >Assignee: Ferdinand Xu > Attachments: HIVE-14815.1.patch, HIVE-14815.2.patch, > HIVE-14815.3.patch, HIVE-14815.patch > > > Parquet doesn't provide a vectorized reader which can be used by Hive > directly. Also for Decimal Column batch, it consists of a batch of > HiveDecimal which is a Hive type which is unknown for Parquet. To support > Hive vectorization execution engine in Hive, we have to implement the > vectorized Parquet reader in Hive side. To limit the performance impacts, we > need to implement a page level vectorized reader. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15039) A better job monitor console output for HoS
[ https://issues.apache.org/jira/browse/HIVE-15039?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15628197#comment-15628197 ] Hive QA commented on HIVE-15039: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12836499/HIVE-15039.2.patch {color:red}ERROR:{color} -1 due to no test(s) being added or modified. {color:red}ERROR:{color} -1 due to 2 failed/errored test(s), 10624 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_2] (batchId=91) org.apache.hadoop.hive.cli.TestSparkNegativeCliDriver.org.apache.hadoop.hive.cli.TestSparkNegativeCliDriver (batchId=226) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/1919/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/1919/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-1919/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 2 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12836499 - PreCommit-HIVE-Build > A better job monitor console output for HoS > --- > > Key: HIVE-15039 > URL: https://issues.apache.org/jira/browse/HIVE-15039 > Project: Hive > Issue Type: Improvement >Reporter: Rui Li >Assignee: Rui Li > Attachments: HIVE-15039.1.patch, HIVE-15039.2.patch, example > screenshot.png, with patch.png > > > When there're many stages, it's very difficult to read the console output of > job progress of HoS. Attached screenshot is an example. > We may learn from HoT as it does much better than HoS. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-13830) Hive on spark driver crash with Spark 1.6.1
[ https://issues.apache.org/jira/browse/HIVE-13830?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15628158#comment-15628158 ] Alexandre Linte commented on HIVE-13830: I'm still using Spark 1.6.1, Hive 2.1.0 and Hadoop 2.7.2, the error remains valid. > Hive on spark driver crash with Spark 1.6.1 > --- > > Key: HIVE-13830 > URL: https://issues.apache.org/jira/browse/HIVE-13830 > Project: Hive > Issue Type: Bug > Components: Spark, spark-branch >Affects Versions: 2.0.0, 2.1.0 > Environment: Hadoop 2.7.2, Hive 2.1.0, Spark 1.6.1, Kerberos >Reporter: Alexandre Linte > > With Hive 1.2.1 I was able to use Hive on successfully with the use of the > -assembly "-assembly-1.4.1-hadoop2.7.1.jar". > Today with Hive 2.0.0, I'm unable to use Hive on whether it be with the > -assembly "-assembly-1.4.1-hadoop2.7.1.jar" or the -assembly > "-assembly-1.6.1-hadoop2.7.2.jar". > My configuration is the following: > * -. available in HIVE_DIR/ > * assembly available in HIVE_DIR/lib > I gathered several logs below: > - HQL commands > {noformat} > $ hive -v --database shfs3453 > SLF4J: Class path contains multiple SLF4J bindings. > SLF4J: Found binding in > [jar:file:/opt/application/Hive/apache-hive-2.0.0-bin/lib/hive-jdbc-2.0.0-standalone.jar!/org/slf4j/impl/StaticLoggerBinder.class] > SLF4J: Found binding in > [jar:file:/opt/application/Hive/apache-hive-2.0.0-bin/lib/log4j-slf4j-impl-2.4.1.jar!/org/slf4j/impl/StaticLoggerBinder.class] > SLF4J: Found binding in > [jar:file:/opt/application//-1.6.1/assembly/target/scala-2.10/-assembly-1.6.1-hadoop2.7.2.jar!/org/slf4j/impl/StaticLoggerBinder.class] > SLF4J: Found binding in > [jar:file:/opt/application/Hadoop/hadoop-2.7.2/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class] > SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an > explanation. > SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory] > Logging initialized using configuration in > file:/opt/application/Hive/apache-hive-2.0.0-bin/conf/hive-log4j2.properties > use shfs3453 > OK > Time taken: 1.425 seconds > Hive-on-MR is deprecated in Hive 2 and may not be available in the future > versions. Consider using a different execution engine (i.e. tez, ) or using > Hive 1.X releases. > hive (shfs3453)> set hive.execution.engine=; > set hive.execution.engine= > hive (shfs3453)> set .master=yarn-client; > set .master=yarn-client > hive (shfs3453)> CREATE TABLE chicagoCrimes2 (ID BIGINT, CaseNumber STRING, > Day STRING, Block STRING, IUCR INT, PrimaryType STRING, Description STRING, > LocationDescription STRING, Arrest BOOLEAN, Domestic BOOLEAN, Beat INT, > District INT, Ward INT, CommunityArea INT, FBICode INT, XCoordinate BIGINT, > YCoordinate BIGINT, Year INT, UpdatedOn STRING, Latitude FLOAT, Longitude > FLOAT, Location STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED > AS TEXTFILE; > CREATE TABLE chicagoCrimes2 (ID BIGINT, CaseNumber STRING, Day STRING, Block > STRING, IUCR INT, PrimaryType STRING, Description STRING, LocationDescription > STRING, Arrest BOOLEAN, Domestic BOOLEAN, Beat INT, District INT, Ward INT, > CommunityArea INT, FBICode INT, XCoordinate BIGINT, YCoordinate BIGINT, Year > INT, UpdatedOn STRING, Latitude FLOAT, Longitude FLOAT, Location STRING) ROW > FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE > OK > Time taken: 0.408 seconds > hive (shfs3453)> INSERT OVERWRITE TABLE chicagocrimes2 SELECT * FROM > chicagocrimes WHERE Description = 'FIRST DEGREE MURDER'; > INSERT OVERWRITE TABLE chicagocrimes2 SELECT * FROM chicagocrimes WHERE > Description = 'FIRST DEGREE MURDER' > Query ID = shfs3453_20160524092714_41c89aec-2c6f-49e9-98c7-d227ca144f73 > Total jobs = 1 > Launching Job 1 out of 1 > In order to change the average load for a reducer (in bytes): > set hive.exec.reducers.bytes.per.reducer= > In order to limit the maximum number of reducers: > set hive.exec.reducers.max= > In order to set a constant number of reducers: > set mapreduce.job.reduces= > Starting Job = 79484279-8e75-4b13-8e71-7de463f4d51e > Status: SENT > Failed to execute task, with exception 'java.lang.IllegalStateException(RPC > channel is closed.)' > FAILED: Execution , return code 1 from > org.apache.hadoop.hive.ql.exec..SparkTask > {noformat} > - Client logs > {noformat} > May 24 09:32:19 hive-cli - org.apache.hive..client.rpc.RpcDispatcherReceived > message:io.netty.handler.codec.DecoderException: > java.lang.NoClassDefFoundError: org/apache/hive//client/Job > at > io.netty.handler.codec.ByteToMessageDecoder.callDecode(ByteToMessageDecoder.java:358) > at > io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:230) > at >
[jira] [Commented] (HIVE-15109) Set MaxPermSize to 256M for maven tests
[ https://issues.apache.org/jira/browse/HIVE-15109?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15628049#comment-15628049 ] Hive QA commented on HIVE-15109: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12836497/HIVE-15109.patch {color:red}ERROR:{color} -1 due to no test(s) being added or modified. {color:red}ERROR:{color} -1 due to 2 failed/errored test(s), 10628 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[columnstats_part_coltype] (batchId=148) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_2] (batchId=91) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/1918/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/1918/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-1918/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 2 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12836497 - PreCommit-HIVE-Build > Set MaxPermSize to 256M for maven tests > --- > > Key: HIVE-15109 > URL: https://issues.apache.org/jira/browse/HIVE-15109 > Project: Hive > Issue Type: Test > Components: Test >Reporter: Chaoyu Tang >Assignee: Chaoyu Tang >Priority: Minor > Attachments: HIVE-15109.patch > > > Trying to run the qtests, for example, > mvn test -Dtest=TestMiniTezCliDriver -Dqfile=explainanalyze_1.q > and got > {code} > Running org.apache.hadoop.hive.cli.TestMiniTezCliDriver > Tests run: 0, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 29.591 sec - > in org.apache.hadoop.hive.cli.TestMiniTezCliDriver > {code} > Looking into the hive.log, and found that it was due to too small PermGen > space: > {code} > 2016-11-01T19:52:19,039 ERROR > [org.apache.hadoop.util.JvmPauseMonitor$Monitor@261e733f] > server.NIOServerCnxnFactory: Thread > Thread[org.apache.hadoop.util.JvmPauseMonitor$Monitor@261e733f,5,main] died > java.lang.OutOfMemoryError: PermGen space > {code} > Setting env MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=256M" would not help. > We can set MaxPermSize to maven.test.jvm.args in pom.xml instead: > {code} > -Xmx2048m -XX:MaxPermSize=256M > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Issue Comment Deleted] (HIVE-14837) JDBC: standalone jar is missing hadoop core dependencies
[ https://issues.apache.org/jira/browse/HIVE-14837?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Gopal V updated HIVE-14837: --- Comment: was deleted (was: [~garydgregory]: can you please send out a mail to the dev@ lists on Apache? That looks like it is not pushing any hive build jar to the public snapshots dir since Oct 13th.) > JDBC: standalone jar is missing hadoop core dependencies > > > Key: HIVE-14837 > URL: https://issues.apache.org/jira/browse/HIVE-14837 > Project: Hive > Issue Type: Bug > Components: JDBC >Affects Versions: 2.2.0 >Reporter: Gopal V >Assignee: Tao Li > Fix For: 2.2.0 > > Attachments: HIVE-14837.1.patch > > > {code} > 2016/09/24 00:31:57 ERROR - jmeter.threads.JMeterThread: Test failed! > java.lang.NoClassDefFoundError: org/apache/hadoop/conf/Configuration > at > org.apache.hive.jdbc.HiveConnection.createUnderlyingTransport(HiveConnection.java:418) > at > org.apache.hive.jdbc.HiveConnection.createBinaryTransport(HiveConnection.java:438) > at > org.apache.hive.jdbc.HiveConnection.openTransport(HiveConnection.java:225) > at org.apache.hive.jdbc.HiveConnection.(HiveConnection.java:182) > at org.apache.hive.jdbc.HiveDriver.connect(HiveDriver.java:107) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-14837) JDBC: standalone jar is missing hadoop core dependencies
[ https://issues.apache.org/jira/browse/HIVE-14837?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15627927#comment-15627927 ] Gopal V commented on HIVE-14837: [~garydgregory]: can you please send out a mail to the dev@ lists on Apache? That looks like it is not pushing any hive build jar to the public snapshots dir since Oct 13th. > JDBC: standalone jar is missing hadoop core dependencies > > > Key: HIVE-14837 > URL: https://issues.apache.org/jira/browse/HIVE-14837 > Project: Hive > Issue Type: Bug > Components: JDBC >Affects Versions: 2.2.0 >Reporter: Gopal V >Assignee: Tao Li > Fix For: 2.2.0 > > Attachments: HIVE-14837.1.patch > > > {code} > 2016/09/24 00:31:57 ERROR - jmeter.threads.JMeterThread: Test failed! > java.lang.NoClassDefFoundError: org/apache/hadoop/conf/Configuration > at > org.apache.hive.jdbc.HiveConnection.createUnderlyingTransport(HiveConnection.java:418) > at > org.apache.hive.jdbc.HiveConnection.createBinaryTransport(HiveConnection.java:438) > at > org.apache.hive.jdbc.HiveConnection.openTransport(HiveConnection.java:225) > at org.apache.hive.jdbc.HiveConnection.(HiveConnection.java:182) > at org.apache.hive.jdbc.HiveDriver.connect(HiveDriver.java:107) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15108) allow Hive script to skip hadoop version check and HBase classpath
[ https://issues.apache.org/jira/browse/HIVE-15108?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15627925#comment-15627925 ] Hive QA commented on HIVE-15108: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12836489/HIVE-15108.patch {color:red}ERROR:{color} -1 due to build exiting with an error Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/1917/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/1917/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-1917/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Tests exited with: ExecutionException: java.util.concurrent.ExecutionException: org.apache.hive.ptest.execution.ssh.SSHExecutionException: RSyncResult [localFile=/data/hiveptest/logs/PreCommit-HIVE-Build-1917/succeeded/83-TestNegativeCliDriver-nopart_insert.q-input41.q-having1.q-and-770-more, remoteFile=/home/hiveptest/104.154.188.198-hiveptest-1/logs/, getExitCode()=255, getException()=null, getUser()=hiveptest, getHost()=104.154.188.198, getInstance()=1]: 'Warning: Permanently added '104.154.188.198' (ECDSA) to the list of known hosts. receiving incremental file list ./ TEST-83-TestNegativeCliDriver-nopart_insert.q-input41.q-having1.q-and-770-more-TEST-org.apache.hadoop.hive.cli.TestNegativeCliDriver.xml 0 0%0.00kB/s0:00:00 146,113 100%3.98MB/s0:00:00 (xfr#1, to-chk=5/7) maven-test.txt 0 0%0.00kB/s0:00:00 38,702 100%1.05MB/s0:00:00 (xfr#2, to-chk=4/7) logs/ logs/derby.log 0 0%0.00kB/s0:00:00 1,001 100% 27.93kB/s0:00:00 (xfr#3, to-chk=1/7) logs/hive.log 0 0%0.00kB/s0:00:00 40,501,248 37% 38.17MB/s0:00:01 Timeout, server 104.154.188.198 not responding. rsync: connection unexpectedly closed (85649145 bytes received so far) [receiver] rsync error: error in rsync protocol data stream (code 12) at io.c(226) [receiver=3.1.1] rsync: connection unexpectedly closed (446 bytes received so far) [generator] rsync error: unexplained error (code 255) at io.c(226) [generator=3.1.1] ssh: connect to host 104.154.188.198 port 22: Connection timed out rsync: connection unexpectedly closed (0 bytes received so far) [Receiver] rsync error: unexplained error (code 255) at io.c(226) [Receiver=3.1.1] ssh: connect to host 104.154.188.198 port 22: Connection timed out rsync: connection unexpectedly closed (0 bytes received so far) [Receiver] rsync error: unexplained error (code 255) at io.c(226) [Receiver=3.1.1] ssh: connect to host 104.154.188.198 port 22: Connection timed out rsync: connection unexpectedly closed (0 bytes received so far) [Receiver] rsync error: unexplained error (code 255) at io.c(226) [Receiver=3.1.1] ssh: connect to host 104.154.188.198 port 22: Connection timed out rsync: connection unexpectedly closed (0 bytes received so far) [Receiver] rsync error: unexplained error (code 255) at io.c(226) [Receiver=3.1.1] ' {noformat} This message is automatically generated. ATTACHMENT ID: 12836489 - PreCommit-HIVE-Build > allow Hive script to skip hadoop version check and HBase classpath > -- > > Key: HIVE-15108 > URL: https://issues.apache.org/jira/browse/HIVE-15108 > Project: Hive > Issue Type: Bug >Reporter: Sergey Shelukhin >Assignee: Sergey Shelukhin > Attachments: HIVE-15108.patch > > > Both will be performed by default, as before -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-14837) JDBC: standalone jar is missing hadoop core dependencies
[ https://issues.apache.org/jira/browse/HIVE-14837?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15627928#comment-15627928 ] Gopal V commented on HIVE-14837: [~garydgregory]: can you please send out a mail to the dev@ lists on Apache? That looks like it is not pushing any hive build jar to the public snapshots dir since Oct 13th. > JDBC: standalone jar is missing hadoop core dependencies > > > Key: HIVE-14837 > URL: https://issues.apache.org/jira/browse/HIVE-14837 > Project: Hive > Issue Type: Bug > Components: JDBC >Affects Versions: 2.2.0 >Reporter: Gopal V >Assignee: Tao Li > Fix For: 2.2.0 > > Attachments: HIVE-14837.1.patch > > > {code} > 2016/09/24 00:31:57 ERROR - jmeter.threads.JMeterThread: Test failed! > java.lang.NoClassDefFoundError: org/apache/hadoop/conf/Configuration > at > org.apache.hive.jdbc.HiveConnection.createUnderlyingTransport(HiveConnection.java:418) > at > org.apache.hive.jdbc.HiveConnection.createBinaryTransport(HiveConnection.java:438) > at > org.apache.hive.jdbc.HiveConnection.openTransport(HiveConnection.java:225) > at org.apache.hive.jdbc.HiveConnection.(HiveConnection.java:182) > at org.apache.hive.jdbc.HiveDriver.connect(HiveDriver.java:107) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-14837) JDBC: standalone jar is missing hadoop core dependencies
[ https://issues.apache.org/jira/browse/HIVE-14837?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15627919#comment-15627919 ] Gary Gregory commented on HIVE-14837: - Hi, Also, I do not see a SNAPSHOT build since this commit in https://repository.apache.org/content/groups/snapshots/org/apache/hive/hive-jdbc/2.2.0-SNAPSHOT/ And, I cannot build locally per [HIVE-15111]. Thank you, Gary > JDBC: standalone jar is missing hadoop core dependencies > > > Key: HIVE-14837 > URL: https://issues.apache.org/jira/browse/HIVE-14837 > Project: Hive > Issue Type: Bug > Components: JDBC >Affects Versions: 2.2.0 >Reporter: Gopal V >Assignee: Tao Li > Fix For: 2.2.0 > > Attachments: HIVE-14837.1.patch > > > {code} > 2016/09/24 00:31:57 ERROR - jmeter.threads.JMeterThread: Test failed! > java.lang.NoClassDefFoundError: org/apache/hadoop/conf/Configuration > at > org.apache.hive.jdbc.HiveConnection.createUnderlyingTransport(HiveConnection.java:418) > at > org.apache.hive.jdbc.HiveConnection.createBinaryTransport(HiveConnection.java:438) > at > org.apache.hive.jdbc.HiveConnection.openTransport(HiveConnection.java:225) > at org.apache.hive.jdbc.HiveConnection.(HiveConnection.java:182) > at org.apache.hive.jdbc.HiveDriver.connect(HiveDriver.java:107) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-15093) S3-to-S3 Renames: Files should be moved individually rather than at a directory level
[ https://issues.apache.org/jira/browse/HIVE-15093?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15627872#comment-15627872 ] Sahil Takiar commented on HIVE-15093: - [~spena] comments addressed, updated the RB. Bunch of failed tests because {{ProxyLocalFileSystem}} does not support the {{FileSystem.getScheme()}}, which is used by {{BlobStorageUtils.isBlobStorageFileSystem}}. Updated the code so that it uses {{FileSystem.getURI().getScheme()}} instead. > S3-to-S3 Renames: Files should be moved individually rather than at a > directory level > - > > Key: HIVE-15093 > URL: https://issues.apache.org/jira/browse/HIVE-15093 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 2.1.0 >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-15093.1.patch, HIVE-15093.2.patch, > HIVE-15093.3.patch, HIVE-15093.4.patch, HIVE-15093.5.patch, HIVE-15093.6.patch > > > Hive's MoveTask uses the Hive.moveFile method to move data within a > distributed filesystem as well as blobstore filesystems. > If the move is done within the same filesystem: > 1: If the source path is a subdirectory of the destination path, files will > be moved one by one using a threapool of workers > 2: If the source path is not a subdirectory of the destination path, a single > rename operation is used to move the entire directory > The second option may not work well on blobstores such as S3. Renames are not > metadata operations and require copying all the data. Client connectors to > blobstores may not efficiently rename directories. Worst case, the connector > will copy each file one by one, sequentially rather than using a threadpool > of workers to copy the data (e.g. HADOOP-13600). > Hive already has code to rename files using a threadpool of workers, but this > only occurs in case number 1. > This JIRA aims to modify the code so that case 1 is triggered when copying > within a blobstore. The focus is on copies within a blobstore because > needToCopy will return true if the src and target filesystems are different, > in which case a different code path is triggered. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-14837) JDBC: standalone jar is missing hadoop core dependencies
[ https://issues.apache.org/jira/browse/HIVE-14837?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15627870#comment-15627870 ] Gary Gregory commented on HIVE-14837: - Hi, Do you have a time frame for 2.2.0? Thank you, Gary > JDBC: standalone jar is missing hadoop core dependencies > > > Key: HIVE-14837 > URL: https://issues.apache.org/jira/browse/HIVE-14837 > Project: Hive > Issue Type: Bug > Components: JDBC >Affects Versions: 2.2.0 >Reporter: Gopal V >Assignee: Tao Li > Fix For: 2.2.0 > > Attachments: HIVE-14837.1.patch > > > {code} > 2016/09/24 00:31:57 ERROR - jmeter.threads.JMeterThread: Test failed! > java.lang.NoClassDefFoundError: org/apache/hadoop/conf/Configuration > at > org.apache.hive.jdbc.HiveConnection.createUnderlyingTransport(HiveConnection.java:418) > at > org.apache.hive.jdbc.HiveConnection.createBinaryTransport(HiveConnection.java:438) > at > org.apache.hive.jdbc.HiveConnection.openTransport(HiveConnection.java:225) > at org.apache.hive.jdbc.HiveConnection.(HiveConnection.java:182) > at org.apache.hive.jdbc.HiveDriver.connect(HiveDriver.java:107) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (HIVE-15093) S3-to-S3 Renames: Files should be moved individually rather than at a directory level
[ https://issues.apache.org/jira/browse/HIVE-15093?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sahil Takiar updated HIVE-15093: Summary: S3-to-S3 Renames: Files should be moved individually rather than at a directory level (was: For S3-to-S3 renames, files should be moved individually rather than at a directory level) > S3-to-S3 Renames: Files should be moved individually rather than at a > directory level > - > > Key: HIVE-15093 > URL: https://issues.apache.org/jira/browse/HIVE-15093 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 2.1.0 >Reporter: Sahil Takiar >Assignee: Sahil Takiar > Attachments: HIVE-15093.1.patch, HIVE-15093.2.patch, > HIVE-15093.3.patch, HIVE-15093.4.patch, HIVE-15093.5.patch, HIVE-15093.6.patch > > > Hive's MoveTask uses the Hive.moveFile method to move data within a > distributed filesystem as well as blobstore filesystems. > If the move is done within the same filesystem: > 1: If the source path is a subdirectory of the destination path, files will > be moved one by one using a threapool of workers > 2: If the source path is not a subdirectory of the destination path, a single > rename operation is used to move the entire directory > The second option may not work well on blobstores such as S3. Renames are not > metadata operations and require copying all the data. Client connectors to > blobstores may not efficiently rename directories. Worst case, the connector > will copy each file one by one, sequentially rather than using a threadpool > of workers to copy the data (e.g. HADOOP-13600). > Hive already has code to rename files using a threadpool of workers, but this > only occurs in case number 1. > This JIRA aims to modify the code so that case 1 is triggered when copying > within a blobstore. The focus is on copies within a blobstore because > needToCopy will return true if the src and target filesystems are different, > in which case a different code path is triggered. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Resolved] (HIVE-15110) JDBC standalone jar is missing classes from hadoop-commons jar.
[ https://issues.apache.org/jira/browse/HIVE-15110?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Gopal V resolved HIVE-15110. Resolution: Duplicate > JDBC standalone jar is missing classes from hadoop-commons jar. > --- > > Key: HIVE-15110 > URL: https://issues.apache.org/jira/browse/HIVE-15110 > Project: Hive > Issue Type: Bug > Components: JDBC >Affects Versions: 2.0.0, 2.1.0 > Environment: JBoss Developer Studio > Version: 9.1.0.GA > Build id: GA-v20160414-0124-B497 > Build date: 20160414-0124 > Oracle jdk1.8.0_91 >Reporter: Gary Gregory > > - Create a Generic JDBC Driver in Eclipse DTP or JBoss Dev Studio. > - Connect to a Hive server. > - You get the error: > {noformat} > java.lang.NoClassDefFoundError: org/apache/hadoop/conf/Configuration > at > org.apache.hive.jdbc.HiveConnection.createUnderlyingTransport(HiveConnection.java:432) > at > org.apache.hive.jdbc.HiveConnection.createBinaryTransport(HiveConnection.java:452) > at > org.apache.hive.jdbc.HiveConnection.openTransport(HiveConnection.java:193) > at org.apache.hive.jdbc.HiveConnection.(HiveConnection.java:157) > at org.apache.hive.jdbc.HiveDriver.connect(HiveDriver.java:107) > at > org.eclipse.datatools.connectivity.drivers.jdbc.JDBCConnection.createConnection(JDBCConnection.java:328) > at > org.eclipse.datatools.connectivity.DriverConnectionBase.internalCreateConnection(DriverConnectionBase.java:105) > at > org.eclipse.datatools.connectivity.DriverConnectionBase.open(DriverConnectionBase.java:54) > at > org.eclipse.datatools.connectivity.drivers.jdbc.JDBCConnection.open(JDBCConnection.java:96) > at > org.eclipse.datatools.connectivity.drivers.jdbc.JDBCConnectionFactory.createConnection(JDBCConnectionFactory.java:53) > at > org.eclipse.datatools.connectivity.internal.ConnectionFactoryProvider.createConnection(ConnectionFactoryProvider.java:83) > at > org.eclipse.datatools.connectivity.internal.ConnectionProfile.createConnection(ConnectionProfile.java:359) > at > org.eclipse.datatools.connectivity.internal.ManagedConnection.createConnection(ManagedConnection.java:166) > at > org.eclipse.datatools.connectivity.internal.CreateConnectionJob.run(CreateConnectionJob.java:56) > at org.eclipse.core.internal.jobs.Worker.run(Worker.java:55) > Caused by: java.lang.ClassNotFoundException: > org.apache.hadoop.conf.Configuration > at java.net.URLClassLoader.findClass(URLClassLoader.java:381) > at java.lang.ClassLoader.loadClass(ClassLoader.java:424) > at java.net.FactoryURLClassLoader.loadClass(URLClassLoader.java:814) > at java.lang.ClassLoader.loadClass(ClassLoader.java:357) > ... 15 more > {noformat} > For 2.0.0, I exploded the standalone jar, added the contents of > hadoop-common-2.7.2.jar, and repacked the jar. That works. > Please fix this as I do not want to do this for all new versions. I have to > do this nonesense because one of the runtime containers I deal with only > deals with JDBC drivers that are all in one jar. -- This message was sent by Atlassian JIRA (v6.3.4#6332)