[ https://issues.apache.org/jira/browse/SPARK-25749?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Raj updated SPARK-25749: ------------------------ Attachment: build.sbt > Exception thrown while reading avro file with large schema > ---------------------------------------------------------- > > Key: SPARK-25749 > URL: https://issues.apache.org/jira/browse/SPARK-25749 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.3.0, 2.3.1, 2.3.2 > Reporter: Raj > Priority: Blocker > Attachments: EncoderExample.scala, MainCC.scala, build.sbt > > > Hi, We are migrating our jobs from Spark 2.2.0 to Spark 2.3.1. One of the job > reads avro source that has large nested schema. The job fails for Spark > 2.3.1(Have tested in Spark 2.3.0 & Spark 2.3.2 and the job fails in this case > also). I am able to replicate this with some sample data. Please find below > the code, build file & exception log > *Code (EncoderExample.scala)* > > package com.rj.enc > import com.rj.logger.CustomLogger > import org.apache.log4j.Logger > import com.rj.sc.SparkUtil > import org.apache.spark.sql.catalyst.ScalaReflection > import org.apache.spark.sql.types.StructType > import org.apache.spark.sql.Encoders > object EncoderExample { > > val log: Logger = CustomLogger.getLogger(this.getClass.getName.dropRight(1)) > val user = "xxx" > val sourcePath = s"file:///Users/$user/del/avrodata" > val resultPath = s"file:///Users/$user/del/pqdata" > > def main(args: Array[String]): Unit = { > writeData() // Create sample data > readData() // Read, Process & write back the results (App fails in this > method for spark 2.3.1) > } > > def readData(): Unit = { > log.info("sourcePath -> " + sourcePath) > val ss = SparkUtil.getSparkSession(this.getClass.getName) > val schema = > ScalaReflection.schemaFor[MainCC].dataType.asInstanceOf[StructType] > import com.databricks.spark.avro._ > import ss.implicits._ > val ds = ss.sqlContext.read.schema(schema).option("basePath", sourcePath). > avro(this.sourcePath).as[MainCC] > log.info("Schema -> " + ds.schema.treeString) > log.info("Count x -> " + ds.count) > val encr = Encoders.product[ResultCC] > val res = ds.map{ x => > val es: Long = x.header.tamp > ResultCC(es = es) > }(encr) > res.write.parquet(this.resultPath) > } > > def writeData(): Unit = { > val ss = SparkUtil.getSparkSession(this.getClass.getName) > import ss.implicits._ > val ds = ss.sparkContext.parallelize(Seq(MainCC(), > MainCC())).toDF//.as[MainCC] > log.info("source count 5 -> " + ds.count) > import com.databricks.spark.avro._ > ds.write.avro(this.sourcePath) > log.info("Written") > } > > } > final case class ResultCC( > es: Long) > *Case Class (Schema of source avro data)* > package com.rj.enc > > case class Header(tamp: Long = 12, xy: Option[String] = > Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")) > > case class Key(hi: Option[String] = > Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")) > > case class L30 ( > l1: Option[Double] = Some(123d) > ,l2: Option[Double] = Some(123d) > ,l3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,l4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,l5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,l6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,l7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ) > > case class C45 ( > r1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,r2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ) > > case class B45 ( > e1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,e2: Option[Int] = Some(123) > ,e3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ) > > case class D45 (`t1`: Option[String] = > Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")) > > case class M30 ( > b1: Option[B45] = Some(B45()) > ,b2: Option[C45] = Some(C45()) > ,b3: Option[D45] = Some(D45()) > ) > > case class Y50 ( > g1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,g2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,g3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,g4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,g5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,g6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ) > > case class X50 ( > c1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,c2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,c3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,c4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ) > > case class L10 ( > u1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,u2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,u3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,u4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,u5: Option[Y50] = Some(Y50()) > ,u6: Option[X50] = Some(X50()) > ,u7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ) > > case class Z10 ( > m1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ) > > case class X10( > i1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,i2: Option[L30] = Some(L30()) > ,i3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,i4: Option[M30] = Some(M30()) > ,i5: Option[Boolean] = Some(false) > ) > > case class R10 ( > t1: Option[Long] = Some(123l) > ,t2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,t3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,t4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,t5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,t6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,t7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,t8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,t9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,u1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,u2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,u3: Option[Map[String, Option[String]]] = Some(Map.empty) > ,u4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ) > > case class A15( > h1: Option[R10] = Some(R10()) > ,h2: Option[X10] = Some(X10()) > ,h3: Option[L10] = Some(L10()) > ,h4: Option[Z10] = Some(Z10()) > ) > > case class B15( > m1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), > m2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), > m3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), > m4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), > m5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), > m6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), > m7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), > m8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), > m9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), > n1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), > n2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), > n3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), > n4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ){ > def toMap: Map[String,String]={ > Map( > ("m1", this.m1), > ("m2", this.m2), > ("m3", this.m3), > ("m4", this.m4), > ("m5", this.m5), > ("m6", this.m6), > ("m7", this.m7), > ("m8", this.m8), > ("m9", this.m9), > ("n1", this.n1), > ("n2", this.n2), > ("n3", this.n2), > ("n4", this.n3) > ).map(tup => { > val (k,v) = tup > (k, v.orNull) > }) > } > } > > case class Value ( > a1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,a2: Option[Long] = Some(123l) > ,a3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,a4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,a5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,a6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,a7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,a8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,a9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,b1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,b2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,b3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,b4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,b5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,b6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,b7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,b8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,b9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,c1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,c2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,c3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,c4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,c5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,c6: Option[Double] = Some(1.23d) > ,c7: Option[Double] = Some(1.1d) > ,c8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,c9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,d1: Option[Int] = Some(123) > ,d2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,d3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,d4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,d5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,d6: Option[Long] = Some(123) > ,d7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,d9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,e1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,e2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,e3: Option[Int] = Some(123) > ,e4: Option[Int] = Some(234) > ,e5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,e6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,e7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,e8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,e9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,f1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,f2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,f3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,f4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,f5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,f6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,f7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,f8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,f9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,g1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,g2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,g3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,g4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,g5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,g6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,g7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,g8: Option[Int] = Some(123) > ,g9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,h1: Option[Long] = Some(123l) > ,h2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,h3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,h4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,h5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,h6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,h7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,h8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,h9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,i1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,i2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,i3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,i4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,i5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,i6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,i7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,i8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,i9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,j1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,j2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,j3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,j4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,j5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,j6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,j7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,j8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,j9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,k1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,k2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,k3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,k4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,k5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,k6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,k7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,k8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,k9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,l1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,l2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,l3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,l4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,l5: Option[Int] = Some(123) > ,l6: Option[Int] = Some(123) > ,l7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,l8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,l9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,m1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,m2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,m3: Option[Map[String, Option[String]]] = Some(Map.empty) > ,m4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,m5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,m6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,m7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,m8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,m9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,n1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,n2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,n3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,n4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,n5: Option[Boolean] = Some(true) > ,n6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ,n7: Option[A15] = Some(A15()) > ,n8: Option[B15] = Some(B15()) > ,n9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") > ) > > final case class MainCC(date: Int = 20181008, > header: Header = Header(), value: Value = Value(), key: Key = Key()) > > +*build.sbt*+ > name := "sparkutil" > version := "5.0" > scalaVersion := "2.11.8" > EclipseKeys.withSource := true > scalacOptions ++= Seq( > "-Ywarn-dead-code", > "-Ywarn-unused" > ) > val sparkVer = "2.3.1" > libraryDependencies ++= Seq( > "org.apache.spark" %% "spark-core" % sparkVer % "provided", > "org.apache.spark" %% "spark-sql" % sparkVer % "provided", > "org.apache.spark" %% "spark-hive" % sparkVer % "provided", > "com.databricks" %% "spark-avro" % "4.0.0", > "log4j" % "log4j" % "1.2.17", > "com.github.scopt" %% "scopt" % "3.6.0", > "com.googlecode.json-simple" % "json-simple" % "1.1.1", > "com.google.cloud" % "google-cloud-bigquery" % "0.17.1-beta", > "com.databricks" %% "spark-avro" % "4.0.0", > "org.scalatest" %% "scalatest" % "3.0.5", > "com.fasterxml.jackson.dataformat" % "jackson-dataformat-yaml" % "2.8.3" > ) > assemblyMergeStrategy in assembly := { > case PathList("com", "google", xs @ _*) => MergeStrategy.last > case PathList("org", "apache", "avro", xs @ _*) => MergeStrategy.last > case "project.properties" => MergeStrategy.last > case PathList("META-INF", xs @ _*) => > (xs map \{_.toLowerCase}) match { > case ("manifest.mf" :: Nil) | ("index.list" :: Nil) | ("dependencies" > :: Nil) => MergeStrategy.discard > case _ => MergeStrategy.discard > } > case _ => > MergeStrategy.first > } > +*Exception Thrown*+ (PS: I have also increase the heap size in eclipse, but > that does not solve the issue) > 18/10/16 12:28:40 || ERROR || org.apache.spark.util.Utils || logError() || 91 > || Aborting task > java.lang.OutOfMemoryError: Java heap space > at java.util.Arrays.copyOf(Arrays.java:3332) > at > java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:124) > at java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:448) > at java.lang.StringBuilder.append(StringBuilder.java:136) > at scala.collection.mutable.StringBuilder.append(StringBuilder.scala:200) > at > scala.collection.TraversableOnce$$anonfun$addString$1.apply(TraversableOnce.scala:364) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at scala.collection.TraversableOnce$class.addString(TraversableOnce.scala:357) > at scala.collection.AbstractTraversable.addString(Traversable.scala:104) > at scala.collection.TraversableOnce$class.mkString(TraversableOnce.scala:323) > at scala.collection.AbstractTraversable.mkString(Traversable.scala:104) > at scala.collection.TraversableOnce$class.mkString(TraversableOnce.scala:325) > at scala.collection.AbstractTraversable.mkString(Traversable.scala:104) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$5.apply(RuleExecutor.scala:137) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$5.apply(RuleExecutor.scala:138) > at org.apache.spark.internal.Logging$class.logDebug(Logging.scala:58) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.logDebug(RuleExecutor.scala:40) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:134) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76) > at scala.collection.immutable.List.foreach(List.scala:381) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76) > at > org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$$anonfun$canonicalize$1.apply(GenerateUnsafeProjection.scala:354) > at > org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$$anonfun$canonicalize$1.apply(GenerateUnsafeProjection.scala:354) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at scala.collection.immutable.List.foreach(List.scala:381) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) > at scala.collection.immutable.List.map(List.scala:285) > at > org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.canonicalize(GenerateUnsafeProjection.scala:354) > at > org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.canonicalize(GenerateUnsafeProjection.scala:32) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:1321) > 18/10/16 12:28:40 || ERROR || > org.apache.spark.sql.execution.datasources.FileFormatWriter || logError() || > 70 || Job job_20181016122823_0005 aborted. > 18/10/16 12:28:40 || ERROR || org.apache.spark.executor.Executor || > logError() || 91 || Exception in task 0.0 in stage 5.0 (TID 5) > org.apache.spark.SparkException: Task failed while writing rows. > at > org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:285) > at > org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:197) > -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org