[ 
https://issues.apache.org/jira/browse/SPARK-29764?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Felix Kizhakkel Jose updated SPARK-29764:
-----------------------------------------
    Description: 
Hello,
 I have been doing a proof of concept for data lake structure and analytics 
using Apache Spark. 
 When I add a java java.time.LocalDateTime/java.time.LocalDate properties in my 
data model, the serialization to Parquet start failing.
 *My Data Model:*

@Data
 public class Employee

{ private UUID id = UUID.randomUUID(); private String name; private int age; 
private LocalDate dob; private LocalDateTime startDateTime; private String 
phone; private Address address; }

 

 *Serialization Snippet*

public void serialize()

{ List<Employee> inputDataToSerialize = getInputDataToSerialize(); // this 
creates 100,000 employee objects Encoder<Employee> employeeEncoder = 
Encoders.bean(Employee.class);

Dataset<Employee> employeeDataset = sparkSession.createDataset( 
inputDataToSerialize, employeeEncoder );

employeeDataset.write() .mode(SaveMode.Append) 
.parquet("/Users/felix/Downloads/spark.parquet"); }

+*Exception Stack Trace:*
 +
 *java.lang.IllegalStateException: Failed to execute 
CommandLineRunnerjava.lang.IllegalStateException: Failed to execute 
CommandLineRunner at 
org.springframework.boot.SpringApplication.callRunner(SpringApplication.java:803)
 at 
org.springframework.boot.SpringApplication.callRunners(SpringApplication.java:784)
 at 
org.springframework.boot.SpringApplication.afterRefresh(SpringApplication.java:771)
 at org.springframework.boot.SpringApplication.run(SpringApplication.java:316) 
at org.springframework.boot.SpringApplication.run(SpringApplication.java:1186) 
at org.springframework.boot.SpringApplication.run(SpringApplication.java:1175) 
at com.felix.Application.main(Application.java:45)Caused by: 
org.apache.spark.SparkException: Job aborted. at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:198)
 at 
org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:170)
 at 
org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:104)
 at 
org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:102)
 at 
org.apache.spark.sql.execution.command.DataWritingCommandExec.doExecute(commands.scala:122)
 at 
org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131)
 at 
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155)
 at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) 
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) 
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at 
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
 at 
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80) at 
org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:676)
 at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:78)
 at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
 at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
 at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:676) 
at 
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:290) 
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:271) at 
org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:229) at 
org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:566) at 
com.felix.SparkParquetSerializer.serialize(SparkParquetSerializer.java:24) at 
com.felix.Application.run(Application.java:63) at 
org.springframework.boot.SpringApplication.callRunner(SpringApplication.java:800)
 ... 6 moreCaused by: org.apache.spark.SparkException: Job aborted due to stage 
failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 
in stage 0.0 (TID 0, localhost, executor driver): 
org.apache.spark.SparkException: Task failed while writing rows. at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:257)
 at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$15(FileFormatWriter.scala:177)
 at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at 
org.apache.spark.scheduler.Task.run(Task.scala:123) at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:411)
 at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at 
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) 
at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) 
at java.lang.Thread.run(Thread.java:748)Caused by: 
java.lang.NegativeArraySizeException at 
org.apache.spark.unsafe.types.UTF8String.getBytes(UTF8String.java:297) at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$8(ParquetWriteSupport.scala:164)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$8$adapted(ParquetWriteSupport.scala:162)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$writeFields$1(ParquetWriteSupport.scala:124)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeField(ParquetWriteSupport.scala:435)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.writeFields(ParquetWriteSupport.scala:124)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$16(ParquetWriteSupport.scala:196)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeGroup(ParquetWriteSupport.scala:429)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$15(ParquetWriteSupport.scala:196)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$15$adapted(ParquetWriteSupport.scala:194)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$writeFields$1(ParquetWriteSupport.scala:124)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeField(ParquetWriteSupport.scala:435)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.writeFields(ParquetWriteSupport.scala:124)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$write$1(ParquetWriteSupport.scala:114)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeMessage(ParquetWriteSupport.scala:423)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:114)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:50)
 at 
org.apache.parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:128)
 at 
org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:182)
 at 
org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:44)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.write(ParquetOutputWriter.scala:40)
 at 
org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.write(FileFormatDataWriter.scala:137)
 at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:245)
 at 
org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1394)
 at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:242)
 ... 9 more*

Could you please help me to identify on what am I doing wrong? This is blocking 
me from proceeding further with Apache Spark + Parque

  was:
Hello,
 I have been doing a proof of concept for data lake structure and analytics 
using Apache Spark. 
 When I add a java java.time.LocalDateTime/java.time.LocalDate properties in my 
data model, the serialization to Parquet start failing.
 *My Data Model:*

@Data
 public class Employee

{ private UUID id = UUID.randomUUID(); private String name; private int age; 
private LocalDate dob; private LocalDateTime startDateTime; private String 
phone; private Address address; }

 

 *Serialization Snippet*

public void serialize(){

List<Employee> inputDataToSerialize = getInputDataToSerialize(); // this 
creates 100,000 employee objects Encoder<Employee> employeeEncoder = 
Encoders.bean(Employee.class);

Dataset<Employee> employeeDataset = sparkSession.createDataset( 
inputDataToSerialize, employeeEncoder );

employeeDataset.write() .mode(SaveMode.Append) 
.parquet("/Users/felix/Downloads/spark.parquet");

}

+*Exception Stack Trace:*
+
 *java.lang.IllegalStateException: Failed to execute 
CommandLineRunnerjava.lang.IllegalStateException: Failed to execute 
CommandLineRunner at 
org.springframework.boot.SpringApplication.callRunner(SpringApplication.java:803)
 at 
org.springframework.boot.SpringApplication.callRunners(SpringApplication.java:784)
 at 
org.springframework.boot.SpringApplication.afterRefresh(SpringApplication.java:771)
 at org.springframework.boot.SpringApplication.run(SpringApplication.java:316) 
at org.springframework.boot.SpringApplication.run(SpringApplication.java:1186) 
at org.springframework.boot.SpringApplication.run(SpringApplication.java:1175) 
at com.felix.Application.main(Application.java:45)Caused by: 
org.apache.spark.SparkException: Job aborted. at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:198)
 at 
org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:170)
 at 
org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:104)
 at 
org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:102)
 at 
org.apache.spark.sql.execution.command.DataWritingCommandExec.doExecute(commands.scala:122)
 at 
org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131)
 at 
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155)
 at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) 
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) 
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at 
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
 at 
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80) at 
org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:676)
 at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:78)
 at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
 at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
 at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:676) 
at 
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:290) 
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:271) at 
org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:229) at 
org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:566) at 
com.felix.SparkParquetSerializer.serialize(SparkParquetSerializer.java:24) at 
com.felix.Application.run(Application.java:63) at 
org.springframework.boot.SpringApplication.callRunner(SpringApplication.java:800)
 ... 6 moreCaused by: org.apache.spark.SparkException: Job aborted due to stage 
failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 
in stage 0.0 (TID 0, localhost, executor driver): 
org.apache.spark.SparkException: Task failed while writing rows. at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:257)
 at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$15(FileFormatWriter.scala:177)
 at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at 
org.apache.spark.scheduler.Task.run(Task.scala:123) at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:411)
 at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at 
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) 
at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) 
at java.lang.Thread.run(Thread.java:748)Caused by: 
java.lang.NegativeArraySizeException at 
org.apache.spark.unsafe.types.UTF8String.getBytes(UTF8String.java:297) at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$8(ParquetWriteSupport.scala:164)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$8$adapted(ParquetWriteSupport.scala:162)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$writeFields$1(ParquetWriteSupport.scala:124)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeField(ParquetWriteSupport.scala:435)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.writeFields(ParquetWriteSupport.scala:124)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$16(ParquetWriteSupport.scala:196)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeGroup(ParquetWriteSupport.scala:429)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$15(ParquetWriteSupport.scala:196)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$15$adapted(ParquetWriteSupport.scala:194)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$writeFields$1(ParquetWriteSupport.scala:124)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeField(ParquetWriteSupport.scala:435)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.writeFields(ParquetWriteSupport.scala:124)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$write$1(ParquetWriteSupport.scala:114)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeMessage(ParquetWriteSupport.scala:423)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:114)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:50)
 at 
org.apache.parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:128)
 at 
org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:182)
 at 
org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:44)
 at 
org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.write(ParquetOutputWriter.scala:40)
 at 
org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.write(FileFormatDataWriter.scala:137)
 at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:245)
 at 
org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1394)
 at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:242)
 ... 9 more*

Could you please help me to identify on what am I doing wrong? This is blocking 
me from proceeding further with Apache Spark + Parque


> Error on Serializing POJO with java datetime property to a Parquet file
> -----------------------------------------------------------------------
>
>                 Key: SPARK-29764
>                 URL: https://issues.apache.org/jira/browse/SPARK-29764
>             Project: Spark
>          Issue Type: Bug
>          Components: Java API, Spark Core, SQL
>    Affects Versions: 2.4.4
>            Reporter: Felix Kizhakkel Jose
>            Priority: Blocker
>
> Hello,
>  I have been doing a proof of concept for data lake structure and analytics 
> using Apache Spark. 
>  When I add a java java.time.LocalDateTime/java.time.LocalDate properties in 
> my data model, the serialization to Parquet start failing.
>  *My Data Model:*
> @Data
>  public class Employee
> { private UUID id = UUID.randomUUID(); private String name; private int age; 
> private LocalDate dob; private LocalDateTime startDateTime; private String 
> phone; private Address address; }
>  
>  *Serialization Snippet*
> public void serialize()
> { List<Employee> inputDataToSerialize = getInputDataToSerialize(); // this 
> creates 100,000 employee objects Encoder<Employee> employeeEncoder = 
> Encoders.bean(Employee.class);
> Dataset<Employee> employeeDataset = sparkSession.createDataset( 
> inputDataToSerialize, employeeEncoder );
> employeeDataset.write() .mode(SaveMode.Append) 
> .parquet("/Users/felix/Downloads/spark.parquet"); }
> +*Exception Stack Trace:*
>  +
>  *java.lang.IllegalStateException: Failed to execute 
> CommandLineRunnerjava.lang.IllegalStateException: Failed to execute 
> CommandLineRunner at 
> org.springframework.boot.SpringApplication.callRunner(SpringApplication.java:803)
>  at 
> org.springframework.boot.SpringApplication.callRunners(SpringApplication.java:784)
>  at 
> org.springframework.boot.SpringApplication.afterRefresh(SpringApplication.java:771)
>  at 
> org.springframework.boot.SpringApplication.run(SpringApplication.java:316) at 
> org.springframework.boot.SpringApplication.run(SpringApplication.java:1186) 
> at 
> org.springframework.boot.SpringApplication.run(SpringApplication.java:1175) 
> at com.felix.Application.main(Application.java:45)Caused by: 
> org.apache.spark.SparkException: Job aborted. at 
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:198)
>  at 
> org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:170)
>  at 
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:104)
>  at 
> org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:102)
>  at 
> org.apache.spark.sql.execution.command.DataWritingCommandExec.doExecute(commands.scala:122)
>  at 
> org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131)
>  at 
> org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155)
>  at 
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
>  at 
> org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at 
> org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at 
> org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
>  at 
> org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80) 
> at 
> org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:676)
>  at 
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:78)
>  at 
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
>  at 
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
>  at 
> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:676) at 
> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:290)
>  at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:271) at 
> org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:229) at 
> org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:566) at 
> com.felix.SparkParquetSerializer.serialize(SparkParquetSerializer.java:24) at 
> com.felix.Application.run(Application.java:63) at 
> org.springframework.boot.SpringApplication.callRunner(SpringApplication.java:800)
>  ... 6 moreCaused by: org.apache.spark.SparkException: Job aborted due to 
> stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost 
> task 0.0 in stage 0.0 (TID 0, localhost, executor driver): 
> org.apache.spark.SparkException: Task failed while writing rows. at 
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:257)
>  at 
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$15(FileFormatWriter.scala:177)
>  at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at 
> org.apache.spark.scheduler.Task.run(Task.scala:123) at 
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:411)
>  at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at 
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>  at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>  at java.lang.Thread.run(Thread.java:748)Caused by: 
> java.lang.NegativeArraySizeException at 
> org.apache.spark.unsafe.types.UTF8String.getBytes(UTF8String.java:297) at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$8(ParquetWriteSupport.scala:164)
>  at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$8$adapted(ParquetWriteSupport.scala:162)
>  at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$writeFields$1(ParquetWriteSupport.scala:124)
>  at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeField(ParquetWriteSupport.scala:435)
>  at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.writeFields(ParquetWriteSupport.scala:124)
>  at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$16(ParquetWriteSupport.scala:196)
>  at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeGroup(ParquetWriteSupport.scala:429)
>  at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$15(ParquetWriteSupport.scala:196)
>  at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$makeWriter$15$adapted(ParquetWriteSupport.scala:194)
>  at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$writeFields$1(ParquetWriteSupport.scala:124)
>  at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeField(ParquetWriteSupport.scala:435)
>  at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.writeFields(ParquetWriteSupport.scala:124)
>  at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.$anonfun$write$1(ParquetWriteSupport.scala:114)
>  at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.consumeMessage(ParquetWriteSupport.scala:423)
>  at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:114)
>  at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.write(ParquetWriteSupport.scala:50)
>  at 
> org.apache.parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:128)
>  at 
> org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:182)
>  at 
> org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:44)
>  at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.write(ParquetOutputWriter.scala:40)
>  at 
> org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.write(FileFormatDataWriter.scala:137)
>  at 
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:245)
>  at 
> org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1394)
>  at 
> org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:242)
>  ... 9 more*
> Could you please help me to identify on what am I doing wrong? This is 
> blocking me from proceeding further with Apache Spark + Parque



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to