spark git commit: [SPARK-21052][SQL][FOLLOW-UP] Add hash map metrics to join

lixiao Fri, 30 Jun 2017 15:11:41 -0700

Repository: spark
Updated Branches:
  refs/heads/master eed9c4ef8 -> fd1325522



[SPARK-21052][SQL][FOLLOW-UP] Add hash map metrics to join

## What changes were proposed in this pull request?

Remove `numHashCollisions` in `BytesToBytesMap`. And change 
`getAverageProbesPerLookup()` to `getAverageProbesPerLookup` as suggested.

## How was this patch tested?

Existing tests.

Author: Liang-Chi Hsieh <vii...@gmail.com>

Closes #18480 from viirya/SPARK-21052-followup.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fd132552
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fd132552
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fd132552

Branch: refs/heads/master
Commit: fd1325522549937232f37215db53d6478f48644c
Parents: eed9c4e
Author: Liang-Chi Hsieh <vii...@gmail.com>
Authored: Fri Jun 30 15:11:27 2017 -0700
Committer: gatorsmile <gatorsm...@gmail.com>
Committed: Fri Jun 30 15:11:27 2017 -0700

----------------------------------------------------------------------
 .../spark/unsafe/map/BytesToBytesMap.java       | 33 --------------------
 .../spark/sql/execution/joins/HashJoin.scala    |  2 +-
 .../sql/execution/joins/HashedRelation.scala    |  8 ++---
 3 files changed, 5 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/fd132552/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java 
b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index 4bef21b..3b6200e 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -160,14 +160,10 @@ public final class BytesToBytesMap extends MemoryConsumer 
{
 
   private final boolean enablePerfMetrics;
 
-  private long timeSpentResizingNs = 0;
-
   private long numProbes = 0;
 
   private long numKeyLookups = 0;
 
-  private long numHashCollisions = 0;
-
   private long peakMemoryUsedBytes = 0L;
 
   private final int initialCapacity;
@@ -489,10 +485,6 @@ public final class BytesToBytesMap extends MemoryConsumer {
             );
             if (areEqual) {
               return;
-            } else {
-              if (enablePerfMetrics) {
-                numHashCollisions++;
-              }
             }
           }
         }
@@ -860,16 +852,6 @@ public final class BytesToBytesMap extends MemoryConsumer {
   }
 
   /**
-   * Returns the total amount of time spent resizing this map (in nanoseconds).
-   */
-  public long getTimeSpentResizingNs() {
-    if (!enablePerfMetrics) {
-      throw new IllegalStateException();
-    }
-    return timeSpentResizingNs;
-  }
-
-  /**
    * Returns the average number of probes per key lookup.
    */
   public double getAverageProbesPerLookup() {
@@ -879,13 +861,6 @@ public final class BytesToBytesMap extends MemoryConsumer {
     return (1.0 * numProbes) / numKeyLookups;
   }
 
-  public long getNumHashCollisions() {
-    if (!enablePerfMetrics) {
-      throw new IllegalStateException();
-    }
-    return numHashCollisions;
-  }
-
   @VisibleForTesting
   public int getNumDataPages() {
     return dataPages.size();
@@ -923,10 +898,6 @@ public final class BytesToBytesMap extends MemoryConsumer {
   void growAndRehash() {
     assert(longArray != null);
 
-    long resizeStartTime = -1;
-    if (enablePerfMetrics) {
-      resizeStartTime = System.nanoTime();
-    }
     // Store references to the old data structures to be used when we re-hash
     final LongArray oldLongArray = longArray;
     final int oldCapacity = (int) oldLongArray.size() / 2;
@@ -951,9 +922,5 @@ public final class BytesToBytesMap extends MemoryConsumer {
       longArray.set(newPos * 2 + 1, hashcode);
     }
     freeArray(oldLongArray);
-
-    if (enablePerfMetrics) {
-      timeSpentResizingNs += System.nanoTime() - resizeStartTime;
-    }
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/fd132552/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
index b09edf3..0396168 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
@@ -215,7 +215,7 @@ trait HashJoin {
 
     // At the end of the task, we update the avg hash probe.
     TaskContext.get().addTaskCompletionListener(_ =>
-      avgHashProbe.set(hashed.getAverageProbesPerLookup()))
+      avgHashProbe.set(hashed.getAverageProbesPerLookup))
 
     val resultProj = createResultProjection
     joinedIter.map { r =>

http://git-wip-us.apache.org/repos/asf/spark/blob/fd132552/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index 3c70285..2038cb9 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -83,7 +83,7 @@ private[execution] sealed trait HashedRelation extends 
KnownSizeEstimation {
   /**
    * Returns the average number of probes per key lookup.
    */
-  def getAverageProbesPerLookup(): Double
+  def getAverageProbesPerLookup: Double
 }
 
 private[execution] object HashedRelation {
@@ -280,7 +280,7 @@ private[joins] class UnsafeHashedRelation(
     read(in.readInt, in.readLong, in.readBytes)
   }
 
-  override def getAverageProbesPerLookup(): Double = 
binaryMap.getAverageProbesPerLookup()
+  override def getAverageProbesPerLookup: Double = 
binaryMap.getAverageProbesPerLookup
 }
 
 private[joins] object UnsafeHashedRelation {
@@ -776,7 +776,7 @@ private[execution] final class LongToUnsafeRowMap(val mm: 
TaskMemoryManager, cap
   /**
    * Returns the average number of probes per key lookup.
    */
-  def getAverageProbesPerLookup(): Double = numProbes.toDouble / numKeyLookups
+  def getAverageProbesPerLookup: Double = numProbes.toDouble / numKeyLookups
 }
 
 private[joins] class LongHashedRelation(
@@ -829,7 +829,7 @@ private[joins] class LongHashedRelation(
     map = in.readObject().asInstanceOf[LongToUnsafeRowMap]
   }
 
-  override def getAverageProbesPerLookup(): Double = 
map.getAverageProbesPerLookup()
+  override def getAverageProbesPerLookup: Double = 
map.getAverageProbesPerLookup
 }
 
 /**


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-21052][SQL][FOLLOW-UP] Add hash map metrics to join

Reply via email to