Damien Carol created HIVE-11111:
-----------------------------------
Summary: Insert on skewed table with STORED AS DIRECTORY is broken
Key: HIVE-11111
URL: https://issues.apache.org/jira/browse/HIVE-11111
Project: Hive
Issue Type: Bug
Affects Versions: 1.2.0
Reporter: Damien Carol
Doing these queries fails:
{code:sql}
RESET;
DROP TABLE IF EXISTS testskew;
CREATE TABLE IF NOT EXISTS testskew (key int, value STRING)
SKEWED BY (key) ON (1,5,6) STORED AS DIRECTORIES
STORED AS ORC;
insert into testskew VALUES
(1, 'one'),
(1, 'one'),
(1, 'one'),
(1, 'one'),
(1, 'one'),
(1, 'one'),
(2, 'two'),
(3, 'three'),
(5, 'five'),
(5, 'five'),
(5, 'five'),
(5, 'five'),
(5, 'five'),
(6, 'six'),
(6, 'six'),
(6, 'six'),
(6, 'six'),
(6, 'six'),
(6, 'six');
{code}
Stacktrace:
{noformat}
INFO : Session is already open
INFO :
INFO : Status: Running (Executing on YARN cluster with App id
application_1434957292922_0059)
INFO : Map 1: 0/1
INFO : Map 1: 0(+1)/1
INFO : Map 1: 1/1
INFO : Loading data to table test.testskew from
hdfs://nc-h07/user/hive/warehouse/test.db/testskew/.hive-staging_hive_2015-06-25_17-29-34_385_4424227988595852796-14/-ext-10000
ERROR : Failed with exception checkPaths:
hdfs://nc-h07/user/hive/warehouse/test.db/testskew/.hive-staging_hive_2015-06-25_17-29-34_385_4424227988595852796-14/-ext-10000
has nested directory
hdfs://nc-h07/user/hive/warehouse/test.db/testskew/.hive-staging_hive_2015-06-25_17-29-34_385_4424227988595852796-14/-ext-10000/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME
org.apache.hadoop.hive.ql.metadata.HiveException: checkPaths:
hdfs://nc-h07/user/hive/warehouse/test.db/testskew/.hive-staging_hive_2015-06-25_17-29-34_385_4424227988595852796-14/-ext-10000
has nested directory
hdfs://nc-h07/user/hive/warehouse/test.db/testskew/.hive-staging_hive_2015-06-25_17-29-34_385_4424227988595852796-14/-ext-10000/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME
at org.apache.hadoop.hive.ql.metadata.Hive.checkPaths(Hive.java:2466)
at org.apache.hadoop.hive.ql.metadata.Hive.copyFiles(Hive.java:2701)
at org.apache.hadoop.hive.ql.metadata.Hive.loadTable(Hive.java:1645)
at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:297)
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:160)
at
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:88)
at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1650)
at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1409)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1192)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1059)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1054)
at
org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:154)
at
org.apache.hive.service.cli.operation.SQLOperation.access$100(SQLOperation.java:71)
at
org.apache.hive.service.cli.operation.SQLOperation$1$1.run(SQLOperation.java:206)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
at
org.apache.hive.service.cli.operation.SQLOperation$1.run(SQLOperation.java:218)
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:744)
Error: Error while processing statement: FAILED: Execution Error, return code 1
from org.apache.hadoop.hive.ql.exec.MoveTask (state=08S01,code=1)
{noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)