spark git commit: [SPARK-21575][SPARKR] Eliminate needless synchronization in java-R serialization
Repository: spark Updated Branches: refs/heads/master 44e501ace -> 106eaa9b9 [SPARK-21575][SPARKR] Eliminate needless synchronization in java-R serialization ## What changes were proposed in this pull request? Remove surplus synchronized blocks. ## How was this patch tested? Unit tests run OK. Author: iurii.antCloses #18775 from SereneAnt/eliminate_unnecessary_synchronization_in_java-R_serialization. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/106eaa9b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/106eaa9b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/106eaa9b Branch: refs/heads/master Commit: 106eaa9b95192f0cdbb382c11efdcb85032e679b Parents: 44e501a Author: iurii.ant Authored: Mon Jul 31 10:42:09 2017 +0800 Committer: Yanbo Liang Committed: Mon Jul 31 10:42:09 2017 +0800 -- .../org/apache/spark/api/r/JVMObjectTracker.scala | 16 ++-- 1 file changed, 2 insertions(+), 14 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/106eaa9b/core/src/main/scala/org/apache/spark/api/r/JVMObjectTracker.scala -- diff --git a/core/src/main/scala/org/apache/spark/api/r/JVMObjectTracker.scala b/core/src/main/scala/org/apache/spark/api/r/JVMObjectTracker.scala index 3432700..fe7438a 100644 --- a/core/src/main/scala/org/apache/spark/api/r/JVMObjectTracker.scala +++ b/core/src/main/scala/org/apache/spark/api/r/JVMObjectTracker.scala @@ -37,13 +37,7 @@ private[r] class JVMObjectTracker { /** * Returns the JVM object associated with the input key or None if not found. */ - final def get(id: JVMObjectId): Option[Object] = this.synchronized { -if (objMap.containsKey(id)) { - Some(objMap.get(id)) -} else { - None -} - } + final def get(id: JVMObjectId): Option[Object] = Option(objMap.get(id)) /** * Returns the JVM object associated with the input key or throws an exception if not found. @@ -67,13 +61,7 @@ private[r] class JVMObjectTracker { /** * Removes and returns a JVM object with the specific ID from the tracker, or None if not found. */ - final def remove(id: JVMObjectId): Option[Object] = this.synchronized { -if (objMap.containsKey(id)) { - Some(objMap.remove(id)) -} else { - None -} - } + final def remove(id: JVMObjectId): Option[Object] = Option(objMap.remove(id)) /** * Number of JVM objects being tracked. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-19839][CORE] release longArray in BytesToBytesMap
Repository: spark Updated Branches: refs/heads/master f1a798b57 -> 44e501ace [SPARK-19839][CORE] release longArray in BytesToBytesMap ## What changes were proposed in this pull request? When BytesToBytesMap spills, its longArray should be released. Otherwise, it may not released until the task complete. This array may take a significant amount of memory, which cannot be used by later operator, such as UnsafeShuffleExternalSorter, resulting in more frequent spill in sorter. This patch release the array as destructive iterator will not use this array anymore. ## How was this patch tested? Manual test in production Author: Zhan ZhangCloses #17180 from zhzhan/memory. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/44e501ac Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/44e501ac Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/44e501ac Branch: refs/heads/master Commit: 44e501ace38ea6e6535a8731e045681175d170ec Parents: f1a798b Author: Zhan Zhang Authored: Sun Jul 30 18:50:19 2017 -0700 Committer: Xiao Li Committed: Sun Jul 30 18:50:19 2017 -0700 -- .../main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java | 5 + .../sql/execution/UnsafeFixedWidthAggregationMapSuite.scala | 5 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/44e501ac/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java -- diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java index 3b6200e..610ace3 100644 --- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java +++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java @@ -258,6 +258,11 @@ public final class BytesToBytesMap extends MemoryConsumer { this.destructive = destructive; if (destructive) { destructiveIterator = this; +// longArray will not be used anymore if destructive is true, release it now. +if (longArray != null) { + freeArray(longArray); + longArray = null; +} } } http://git-wip-us.apache.org/repos/asf/spark/blob/44e501ac/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala index 50d8e30..d194f58 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala @@ -127,9 +127,10 @@ class UnsafeFixedWidthAggregationMapSuite PAGE_SIZE_BYTES ) val groupKey = InternalRow(UTF8String.fromString("cats")) +val row = map.getAggregationBuffer(groupKey) // Looking up a key stores a zero-entry in the map (like Python Counters or DefaultDicts) -assert(map.getAggregationBuffer(groupKey) != null) +assert(row != null) val iter = map.iterator() assert(iter.next()) iter.getKey.getString(0) should be ("cats") @@ -138,7 +139,7 @@ class UnsafeFixedWidthAggregationMapSuite // Modifications to rows retrieved from the map should update the values in the map iter.getValue.setInt(0, 42) -map.getAggregationBuffer(groupKey).getInt(0) should be (42) +row.getInt(0) should be (42) map.free() } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR] Minor comment fixes in merge_spark_pr.py script
Repository: spark Updated Branches: refs/heads/master 6830e90de -> f1a798b57 [MINOR] Minor comment fixes in merge_spark_pr.py script ## What changes were proposed in this pull request? This PR proposes to fix few rather typos in `merge_spark_pr.py`. - `# usage: ./apache-pr-merge.py(see config env vars below)` -> `# usage: ./merge_spark_pr.py(see config env vars below)` - `... have local a Spark ...` -> `... have a local Spark ...` - `... to Apache.` -> `... to Apache Spark.` I skimmed this file and these look all I could find. ## How was this patch tested? pep8 check (`./dev/lint-python`). Author: hyukjinkwonCloses #18776 from HyukjinKwon/minor-merge-script. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f1a798b5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f1a798b5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f1a798b5 Branch: refs/heads/master Commit: f1a798b5763abb5fca3aed592c3114dab5aefda2 Parents: 6830e90 Author: hyukjinkwon Authored: Mon Jul 31 10:07:33 2017 +0900 Committer: hyukjinkwon Committed: Mon Jul 31 10:07:33 2017 +0900 -- dev/merge_spark_pr.py | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f1a798b5/dev/merge_spark_pr.py -- diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py index 4bacb38..28971b8 100755 --- a/dev/merge_spark_pr.py +++ b/dev/merge_spark_pr.py @@ -17,10 +17,11 @@ # limitations under the License. # -# Utility for creating well-formed pull request merges and pushing them to Apache. -# usage: ./apache-pr-merge.py(see config env vars below) +# Utility for creating well-formed pull request merges and pushing them to Apache +# Spark. +# usage: ./merge_spark_pr.py(see config env vars below) # -# This utility assumes you already have local a Spark git folder and that you +# This utility assumes you already have a local Spark git folder and that you # have added remotes corresponding to both (i) the github apache Spark # mirror and (ii) the apache git repo. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR][DOC] Replace numTasks with numPartitions in programming guide
Repository: spark Updated Branches: refs/heads/master d79816ddb -> 6830e90de [MINOR][DOC] Replace numTasks with numPartitions in programming guide In programming guide, `numTasks` is used in several places as arguments of Transformations. However, in code, `numPartitions` is used. In this fix, I replace `numTasks` with `numPartitions` in programming guide for consistency. Author: Cheng WangCloses #18774 from polarke/replace-numtasks-with-numpartitions-in-doc. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6830e90d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6830e90d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6830e90d Branch: refs/heads/master Commit: 6830e90de5c98a5c42e75144f76313e4d51d381a Parents: d79816d Author: Cheng Wang Authored: Sun Jul 30 18:45:45 2017 +0100 Committer: Sean Owen Committed: Sun Jul 30 18:45:45 2017 +0100 -- docs/rdd-programming-guide.md | 16 1 file changed, 8 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6830e90d/docs/rdd-programming-guide.md -- diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md index c0215c8..2602598 100644 --- a/docs/rdd-programming-guide.md +++ b/docs/rdd-programming-guide.md @@ -978,40 +978,40 @@ for details. Return a new RDD that contains the intersection of elements in the source dataset and the argument. - distinct([numTasks])) + distinct([numPartitions])) Return a new dataset that contains the distinct elements of the source dataset. - groupByKey([numTasks]) + groupByKey([numPartitions]) When called on a dataset of (K, V) pairs, returns a dataset of (K, IterableV) pairs. Note: If you are grouping in order to perform an aggregation (such as a sum or average) over each key, using reduceByKey or aggregateByKey will yield much better performance. Note: By default, the level of parallelism in the output depends on the number of partitions of the parent RDD. - You can pass an optional numTasks argument to set a different number of tasks. + You can pass an optional numPartitions argument to set a different number of tasks. - reduceByKey(func, [numTasks]) + reduceByKey(func, [numPartitions]) When called on a dataset of (K, V) pairs, returns a dataset of (K, V) pairs where the values for each key are aggregated using the given reduce function func, which must be of type (V,V) => V. Like in groupByKey, the number of reduce tasks is configurable through an optional second argument. - aggregateByKey(zeroValue)(seqOp, combOp, [numTasks]) + aggregateByKey(zeroValue)(seqOp, combOp, [numPartitions]) When called on a dataset of (K, V) pairs, returns a dataset of (K, U) pairs where the values for each key are aggregated using the given combine functions and a neutral "zero" value. Allows an aggregated value type that is different than the input value type, while avoiding unnecessary allocations. Like in groupByKey, the number of reduce tasks is configurable through an optional second argument. - sortByKey([ascending], [numTasks]) + sortByKey([ascending], [numPartitions]) When called on a dataset of (K, V) pairs where K implements Ordered, returns a dataset of (K, V) pairs sorted by keys in ascending or descending order, as specified in the boolean ascending argument. - join(otherDataset, [numTasks]) + join(otherDataset, [numPartitions]) When called on datasets of type (K, V) and (K, W), returns a dataset of (K, (V, W)) pairs with all pairs of elements for each key. Outer joins are supported through leftOuterJoin, rightOuterJoin, and fullOuterJoin. - cogroup(otherDataset, [numTasks]) + cogroup(otherDataset, [numPartitions]) When called on datasets of type (K, V) and (K, W), returns a dataset of (K, (IterableV, IterableW)) tuples. This operation is also called groupWith. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-21297][WEB-UI] Add count in 'JDBC/ODBC Server' page.
Repository: spark Updated Branches: refs/heads/master 51f99fb25 -> d79816ddb [SPARK-21297][WEB-UI] Add count in 'JDBC/ODBC Server' page. ## What changes were proposed in this pull request? 1.add count about 'Session Statistics' and 'SQL Statistics' in 'JDBC/ODBC Server' page.The purpose is to know the statistics clearly. fix before: ![1](https://user-images.githubusercontent.com/26266482/27819373-7fbe4002-60cc-11e7-9e7f-e9cc6f9ef746.png) fix after: ![1](https://user-images.githubusercontent.com/26266482/28700157-876cb7d6-7380-11e7-869c-0a4f18d65357.png) ## How was this patch tested? manual tests Please review http://spark.apache.org/contributing.html before opening a pull request. Author: guoxiaolongCloses #18525 from guoxiaolongzte/SPARK-21297. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d79816dd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d79816dd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d79816dd Branch: refs/heads/master Commit: d79816ddb941782e4126e894e28e8e624802429d Parents: 51f99fb Author: guoxiaolong Authored: Sun Jul 30 18:44:31 2017 +0100 Committer: Sean Owen Committed: Sun Jul 30 18:44:31 2017 +0100 -- .../apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d79816dd/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala -- diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala index 17589cf..f517bff 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala @@ -103,7 +103,7 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage("" } val content = - SQL Statistics ++ + SQL Statistics ({numStatement}) ++ {table.getOrElse("No statistics have been generated yet.")} @@ -164,7 +164,7 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage("" } val content = - Session Statistics ++ + Session Statistics ({numBatches}) ++ {table.getOrElse("No statistics have been generated yet.")} - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SQL] Fix typo in DataframeWriter doc
Repository: spark Updated Branches: refs/heads/master 6550086bb -> 51f99fb25 [SQL] Fix typo in DataframeWriter doc ## What changes were proposed in this pull request? The format of none should be consistent with other compression codec(\`snappy\`, \`lz4\`) as \`none\`. ## How was this patch tested? This is a typo. Author: GuoChenzhaoCloses #18758 from gczsjdy/typo. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/51f99fb2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/51f99fb2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/51f99fb2 Branch: refs/heads/master Commit: 51f99fb25b0d524164e7bf15e63d99abb6c22431 Parents: 6550086 Author: GuoChenzhao Authored: Sun Jul 30 22:18:38 2017 +0900 Committer: hyukjinkwon Committed: Sun Jul 30 22:18:38 2017 +0900 -- sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/51f99fb2/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index 255c406..0fcda46 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -499,7 +499,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { * * `compression` (default is the value specified in `spark.sql.parquet.compression.codec`): * compression codec to use when saving to file. This can be one of the known case-insensitive - * shorten names(none, `snappy`, `gzip`, and `lzo`). This will override + * shorten names(`none`, `snappy`, `gzip`, and `lzo`). This will override * `spark.sql.parquet.compression.codec`. * * - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org