Repository: spark Updated Branches: refs/heads/master b1eccfc88 -> d94a44d7c
[SPARK-3269][SQL] Decreases initial buffer size for row set to prevent OOM When a large batch size is specified, `SparkSQLOperationManager` OOMs even if the whole result set is much smaller than the batch size. Author: Cheng Lian <lian.cs....@gmail.com> Closes #2171 from liancheng/jdbc-fetch-size and squashes the following commits: 5e1623b [Cheng Lian] Decreases initial buffer size for row set to prevent OOM Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d94a44d7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d94a44d7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d94a44d7 Branch: refs/heads/master Commit: d94a44d7caaf3fe7559d9ad7b10872fa16cf81ca Parents: b1eccfc Author: Cheng Lian <lian.cs....@gmail.com> Authored: Fri Aug 29 15:36:04 2014 -0700 Committer: Michael Armbrust <mich...@databricks.com> Committed: Fri Aug 29 15:36:04 2014 -0700 ---------------------------------------------------------------------- .../sql/hive/thriftserver/server/SparkSQLOperationManager.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/d94a44d7/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala ---------------------------------------------------------------------- diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala index 6eccb1b..f12b5a6 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala @@ -66,9 +66,10 @@ class SparkSQLOperationManager(hiveContext: HiveContext) extends OperationManage if (!iter.hasNext) { new RowSet() } else { - val maxRows = maxRowsL.toInt // Do you really want a row batch larger than Int Max? No. + // maxRowsL here typically maps to java.sql.Statement.getFetchSize, which is an int + val maxRows = maxRowsL.toInt var curRow = 0 - var rowSet = new ArrayBuffer[Row](maxRows) + var rowSet = new ArrayBuffer[Row](maxRows.min(1024)) while (curRow < maxRows && iter.hasNext) { val sparkRow = iter.next() --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org