Chetan Bhat created SPARK-35635:
-----------------------------------

             Summary: concurrent insert statements from multiple beeline fail 
with job aborted exception
                 Key: SPARK-35635
                 URL: https://issues.apache.org/jira/browse/SPARK-35635
             Project: Spark
          Issue Type: Bug
          Components: SQL
    Affects Versions: 3.1.1
         Environment: Spark 3.1.1
            Reporter: Chetan Bhat


Create tables - 
CREATE TABLE J1_TBL (
 i integer,
 j integer,
 t string
) USING parquet;

CREATE TABLE J2_TBL (
 i integer,
 k integer
) USING parquet;

>From 4 concurrent beeline sessions execute the insert into select queries - 
INSERT INTO J1_TBL VALUES (1, 4, 'one');
INSERT INTO J1_TBL VALUES (2, 3, 'two');
INSERT INTO J1_TBL VALUES (3, 2, 'three');
INSERT INTO J1_TBL VALUES (4, 1, 'four');
INSERT INTO J1_TBL VALUES (5, 0, 'five');
INSERT INTO J1_TBL VALUES (6, 6, 'six');
INSERT INTO J1_TBL VALUES (7, 7, 'seven');
INSERT INTO J1_TBL VALUES (8, 8, 'eight');
INSERT INTO J1_TBL VALUES (0, NULL, 'zero');
INSERT INTO J1_TBL VALUES (NULL, NULL, 'null');
INSERT INTO J1_TBL VALUES (NULL, 0, 'zero');

INSERT INTO J2_TBL VALUES (1, -1);
INSERT INTO J2_TBL VALUES (2, 2);
INSERT INTO J2_TBL VALUES (3, -3);
INSERT INTO J2_TBL VALUES (2, 4);
INSERT INTO J2_TBL VALUES (5, -5);
INSERT INTO J2_TBL VALUES (5, -5);
INSERT INTO J2_TBL VALUES (0, NULL);
INSERT INTO J2_TBL VALUES (NULL, NULL);
INSERT INTO J2_TBL VALUES (NULL, 0);

 

Issue : concurrent insert statements from multiple beeline fail with job 
aborted exception.

0: jdbc:hive2://10.19.89.222:23040/> INSERT INTO J1_TBL VALUES (8, 8, 'eight');
Error: org.apache.hive.service.cli.HiveSQLException: Error running query: 
org.apache.spark.SparkException: Job aborted.
 at 
org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:366)
 at 
org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:263)
 at 
org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3$$Lambda$1781/750578465.apply$mcV$sp(Unknown
 Source)
 at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
 at 
org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties(SparkOperation.scala:78)
 at 
org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties$(SparkOperation.scala:62)
 at 
org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:45)
 at 
org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:263)
 at 
org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:258)
 at java.security.AccessController.doPrivileged(Native Method)
 at javax.security.auth.Subject.doAs(Subject.java:422)
 at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729)
 at 
org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:272)
 at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
 at java.util.concurrent.FutureTask.run(FutureTask.java:266)
 at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
 at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
 at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.spark.SparkException: Job aborted.
 at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:231)
 at 
org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:188)
 at 
org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:109)
 at 
org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:107)
 at 
org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:121)
 at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:228)
 at org.apache.spark.sql.Dataset$$Lambda$1650/1168893915.apply(Unknown Source)
 at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3687)
 at org.apache.spark.sql.Dataset$$Lambda$1651/490917717.apply(Unknown Source)
 at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
 at 
org.apache.spark.sql.execution.SQLExecution$$$Lambda$1659/232159932.apply(Unknown
 Source)
 at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
 at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
 at 
org.apache.spark.sql.execution.SQLExecution$$$Lambda$1652/615787142.apply(Unknown
 Source)
 at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:777)
 at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
 at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3685)
 at org.apache.spark.sql.Dataset.<init>(Dataset.scala:228)
 at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99)
 at org.apache.spark.sql.Dataset$$$Lambda$1304/1010914377.apply(Unknown Source)
 at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:777)
 at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:96)
 at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:615)
 at org.apache.spark.sql.SparkSession$$Lambda$1236/744351354.apply(Unknown 
Source)
 at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:777)
 at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:610)
 at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:650)
 at 
org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:325)
 ... 17 more
Caused by: java.io.FileNotFoundException: File 
hdfs://hacluster/user/sparkhive/warehouse/chetan.db/j1_tbl/_temporary/0 does 
not exist.
 at 
org.apache.hadoop.hdfs.DistributedFileSystem.listStatusInternal(DistributedFileSystem.java:1109)
 at 
org.apache.hadoop.hdfs.DistributedFileSystem.access$1000(DistributedFileSystem.java:142)
 at 
org.apache.hadoop.hdfs.DistributedFileSystem$24.doCall(DistributedFileSystem.java:1169)
 at 
org.apache.hadoop.hdfs.DistributedFileSystem$24.doCall(DistributedFileSystem.java:1166)
 at 
org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
 at 
org.apache.hadoop.hdfs.DistributedFileSystem.listStatus(DistributedFileSystem.java:1176)
 at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1959)
 at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:2001)
 at 
org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.getAllCommittedTaskPaths(FileOutputCommitter.java:333)
 at 
org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.commitJobInternal(FileOutputCommitter.java:403)
 at 
org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.commitJob(FileOutputCommitter.java:376)
 at 
org.apache.parquet.hadoop.ParquetOutputCommitter.commitJob(ParquetOutputCommitter.java:48)
 at 
org.apache.spark.internal.io.HadoopMapReduceCommitProtocol.commitJob(HadoopMapReduceCommitProtocol.scala:188)
 at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:220)
 ... 44 more (state=,code=0)



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to