Repository: spark
Updated Branches:
  refs/heads/master 681387b2d -> 091f81e1f


[SPARK-15762][SQL] Cache Metadata & StructType hashCodes; use singleton 
Metadata.empty

We should cache `Metadata.hashCode` and use a singleton for `Metadata.empty` 
because calculating metadata hashCodes appears to be a bottleneck for certain 
workloads.

We should also cache `StructType.hashCode`.

In an optimizer stress-test benchmark run by ericl, these `hashCode` calls 
accounted for roughly 40% of the total CPU time and this bottleneck was 
completely eliminated by the caching added by this patch.

Author: Josh Rosen <joshro...@databricks.com>

Closes #13504 from JoshRosen/metadata-fix.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/091f81e1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/091f81e1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/091f81e1

Branch: refs/heads/master
Commit: 091f81e1f7ef1581376c71e3872ce06f4c1713bd
Parents: 681387b
Author: Josh Rosen <joshro...@databricks.com>
Authored: Sat Jun 4 14:14:50 2016 -0700
Committer: Josh Rosen <joshro...@databricks.com>
Committed: Sat Jun 4 14:14:50 2016 -0700

----------------------------------------------------------------------
 .../src/main/scala/org/apache/spark/sql/types/Metadata.scala  | 7 +++++--
 .../main/scala/org/apache/spark/sql/types/StructType.scala    | 3 ++-
 2 files changed, 7 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/091f81e1/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala
index 1fb2e24..657bd86 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala
@@ -104,7 +104,8 @@ sealed class Metadata private[types] (private[types] val 
map: Map[String, Any])
     }
   }
 
-  override def hashCode: Int = Metadata.hash(this)
+  private lazy val _hashCode: Int = Metadata.hash(this)
+  override def hashCode: Int = _hashCode
 
   private def get[T](key: String): T = {
     map(key).asInstanceOf[T]
@@ -115,8 +116,10 @@ sealed class Metadata private[types] (private[types] val 
map: Map[String, Any])
 
 object Metadata {
 
+  private[this] val _empty = new Metadata(Map.empty)
+
   /** Returns an empty Metadata. */
-  def empty: Metadata = new Metadata(Map.empty)
+  def empty: Metadata = _empty
 
   /** Creates a Metadata instance from JSON. */
   def fromJson(json: String): Metadata = {

http://git-wip-us.apache.org/repos/asf/spark/blob/091f81e1/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index fd2b524..9a92373 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -112,7 +112,8 @@ case class StructType(fields: Array[StructField]) extends 
DataType with Seq[Stru
     }
   }
 
-  override def hashCode(): Int = 
java.util.Arrays.hashCode(fields.asInstanceOf[Array[AnyRef]])
+  private lazy val _hashCode: Int = 
java.util.Arrays.hashCode(fields.asInstanceOf[Array[AnyRef]])
+  override def hashCode(): Int = _hashCode
 
   /**
    * Creates a new [[StructType]] by adding a new field.


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to