When trying to save the word2vec model trained over 10G of data leads to below OOM error.
java.lang.OutOfMemoryError: Requested array size exceeds VM limit Spark Version: 1.6 spark.dynamicAllocation.enable false spark.executor.memory 75g spark.driver.memory 150g spark.driver.cores 10 Full Stack Trace: java.lang.OutOfMemoryError: Requested array size exceeds VM limit at java.util.Arrays.copyOf(Arrays.java:3332) at java.lang.AbstractStringBuilder.expandCapacity(AbstractStringBuilder.java:137) at java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:121) at java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:421) at java.lang.StringBuilder.append(StringBuilder.java:136) at java.lang.StringBuilder.append(StringBuilder.java:131) at scala.StringContext.standardInterpolator(StringContext.scala:122) at scala.StringContext.s(StringContext.scala:90) at org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:70) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:52) at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation.run(InsertIntoHadoopFsRelation.scala:108) at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:58) at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:56) at org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:70) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:130) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:130) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:55) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:55) at org.apache.spark.sql.execution.datasources.ResolvedDataSource$.apply(ResolvedDataSource.scala:256) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:148) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:139) at org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:334) at org.apache.spark.ml.feature.Word2VecModel$Word2VecModelWriter.saveImpl(Word2Vec.scala:271) at org.apache.spark.ml.util.MLWriter.save(ReadWrite.scala:91) at org.apache.spark.ml.util.MLWritable$class.save(ReadWrite.scala:131) at org.apache.spark.ml.feature.Word2VecModel.save(Word2Vec.scala:172) -- View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/OutOfMemoryError-When-saving-Word2Vec-tp27142.html Sent from the Apache Spark User List mailing list archive at Nabble.com. --------------------------------------------------------------------- To unsubscribe, e-mail: user-unsubscr...@spark.apache.org For additional commands, e-mail: user-h...@spark.apache.org