This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new f732647  [SPARK-27054][BUILD][SQL] Remove the Calcite dependency
f732647 is described below

commit f732647ae4f486531440dd1239719085c367181e
Author: Yuming Wang <yumw...@ebay.com>
AuthorDate: Sat Mar 9 16:34:24 2019 -0800

    [SPARK-27054][BUILD][SQL] Remove the Calcite dependency
    
    ## What changes were proposed in this pull request?
    
    Calcite is only used for 
[runSqlHive](https://github.com/apache/spark/blob/02bbe977abaf7006b845a7e99d612b0235aa0025/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala#L699-L705)
 when 
`hive.cbo.enable=true`([SemanticAnalyzer](https://github.com/apache/hive/blob/release-1.2.1/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java#L278-L280)).
    So we can disable `hive.cbo.enable` and remove Calcite dependency.
    
    ## How was this patch tested?
    
    Exist tests
    
    Closes #23970 from wangyum/SPARK-27054.
    
    Lead-authored-by: Yuming Wang <yumw...@ebay.com>
    Co-authored-by: Yuming Wang <wgy...@gmail.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 LICENSE-binary                                     |  3 -
 NOTICE-binary                                      |  9 ---
 dev/deps/spark-deps-hadoop-2.7                     |  4 --
 dev/deps/spark-deps-hadoop-3.1                     |  4 --
 pom.xml                                            | 72 ++--------------------
 sql/hive/pom.xml                                   |  8 ---
 .../spark/sql/hive/client/HiveClientImpl.scala     |  2 +
 .../org/apache/spark/sql/hive/client/package.scala | 41 ++++++++----
 8 files changed, 37 insertions(+), 106 deletions(-)

diff --git a/LICENSE-binary b/LICENSE-binary
index 541cc4c..0c157cf 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -260,9 +260,6 @@ net.sf.supercsv:super-csv
 org.apache.arrow:arrow-format
 org.apache.arrow:arrow-memory
 org.apache.arrow:arrow-vector
-org.apache.calcite:calcite-avatica
-org.apache.calcite:calcite-core
-org.apache.calcite:calcite-linq4j
 org.apache.commons:commons-crypto
 org.apache.commons:commons-lang3
 org.apache.hadoop:hadoop-annotations
diff --git a/NOTICE-binary b/NOTICE-binary
index b707c43..df41618 100644
--- a/NOTICE-binary
+++ b/NOTICE-binary
@@ -792,15 +792,6 @@ Copyright 2005-2006 The Apache Software Foundation
 Apache Jakarta HttpClient
 Copyright 1999-2007 The Apache Software Foundation
 
-Calcite Avatica
-Copyright 2012-2015 The Apache Software Foundation
-
-Calcite Core
-Copyright 2012-2015 The Apache Software Foundation
-
-Calcite Linq4j
-Copyright 2012-2015 The Apache Software Foundation
-
 Apache HttpClient
 Copyright 1999-2017 The Apache Software Foundation
 
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index d53039f..53267ea 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -24,9 +24,6 @@ avro-mapred-1.8.2-hadoop2.jar
 bonecp-0.8.0.RELEASE.jar
 breeze-macros_2.12-0.13.2.jar
 breeze_2.12-0.13.2.jar
-calcite-avatica-1.2.0-incubating.jar
-calcite-core-1.2.0-incubating.jar
-calcite-linq4j-1.2.0-incubating.jar
 chill-java-0.9.3.jar
 chill_2.12-0.9.3.jar
 commons-beanutils-1.7.0.jar
@@ -57,7 +54,6 @@ datanucleus-api-jdo-3.2.6.jar
 datanucleus-core-3.2.10.jar
 datanucleus-rdbms-3.2.9.jar
 derby-10.12.1.1.jar
-eigenbase-properties-1.1.5.jar
 flatbuffers-java-1.9.0.jar
 generex-1.0.1.jar
 gson-2.2.4.jar
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index d1a6b27..367bd45 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -22,9 +22,6 @@ avro-mapred-1.8.2-hadoop2.jar
 bonecp-0.8.0.RELEASE.jar
 breeze-macros_2.12-0.13.2.jar
 breeze_2.12-0.13.2.jar
-calcite-avatica-1.2.0-incubating.jar
-calcite-core-1.2.0-incubating.jar
-calcite-linq4j-1.2.0-incubating.jar
 chill-java-0.9.3.jar
 chill_2.12-0.9.3.jar
 commons-beanutils-1.9.3.jar
@@ -56,7 +53,6 @@ datanucleus-rdbms-3.2.9.jar
 derby-10.12.1.1.jar
 dnsjava-2.1.7.jar
 ehcache-3.3.1.jar
-eigenbase-properties-1.1.5.jar
 flatbuffers-java-1.9.0.jar
 generex-1.0.1.jar
 geronimo-jcache_1.0_spec-1.0-alpha-1.jar
diff --git a/pom.xml b/pom.xml
index 0e1913a..1608309 100644
--- a/pom.xml
+++ b/pom.xml
@@ -168,7 +168,6 @@
     <fasterxml.jackson.version>2.9.8</fasterxml.jackson.version>
     <snappy.version>1.1.7.1</snappy.version>
     <netlib.java.version>1.1.2</netlib.java.version>
-    <calcite.version>1.2.0-incubating</calcite.version>
     <commons-codec.version>1.10</commons-codec.version>
     <commons-io.version>2.4</commons-io.version>
     <!-- org.apache.commons/commons-lang/-->
@@ -1467,12 +1466,16 @@
             <groupId>org.apache.avro</groupId>
             <artifactId>avro-mapred</artifactId>
           </exclusion>
-          <!--  this is needed and must be explicitly included later-->
+          <!--  Do not need Calcite because we disabled hive.cbo.enable -->
           <exclusion>
             <groupId>org.apache.calcite</groupId>
             <artifactId>calcite-core</artifactId>
           </exclusion>
           <exclusion>
+            <groupId>org.apache.calcite</groupId>
+            <artifactId>calcite-avatica</artifactId>
+          </exclusion>
+          <exclusion>
             <groupId>org.apache.curator</groupId>
             <artifactId>apache-curator</artifactId>
           </exclusion>
@@ -1842,71 +1845,6 @@
         <scope>compile</scope>
       </dependency>
       <dependency>
-        <groupId>org.apache.calcite</groupId>
-        <artifactId>calcite-core</artifactId>
-        <version>${calcite.version}</version>
-        <exclusions>
-          <exclusion>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-annotations</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-core</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-databind</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.google.guava</groupId>
-            <artifactId>guava</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.google.code.findbugs</groupId>
-            <artifactId>jsr305</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.codehaus.janino</groupId>
-            <artifactId>janino</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.codehaus.janino</groupId>
-            <artifactId>commons-compiler</artifactId>
-          </exclusion>
-          <!-- hsqldb interferes with the use of derby as the default db
-            in hive's use of datanucleus.
-          -->
-          <exclusion>
-            <groupId>org.hsqldb</groupId>
-            <artifactId>hsqldb</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.pentaho</groupId>
-            <artifactId>pentaho-aggdesigner-algorithm</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
-      <dependency>
-        <groupId>org.apache.calcite</groupId>
-        <artifactId>calcite-avatica</artifactId>
-        <version>${calcite.version}</version>
-        <exclusions>
-          <exclusion>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-annotations</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-core</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-databind</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
-      <dependency>
         <groupId>org.codehaus.janino</groupId>
         <artifactId>janino</artifactId>
         <version>${janino.version}</version>
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index dc8b733..55afbe7 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -130,14 +130,6 @@
       <artifactId>commons-httpclient</artifactId>
     </dependency>
     <dependency>
-      <groupId>org.apache.calcite</groupId>
-      <artifactId>calcite-avatica</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.calcite</groupId>
-      <artifactId>calcite-core</artifactId>
-    </dependency>
-    <dependency>
       <groupId>org.apache.httpcomponents</groupId>
       <artifactId>httpclient</artifactId>
     </dependency>
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 74237a9..8132dee 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -178,6 +178,8 @@ private[hive] class HiveClientImpl(
          """.stripMargin)
       hiveConf.set(k, v)
     }
+    // Disable CBO because we removed the Calcite dependency.
+    hiveConf.setBoolean("hive.cbo.enable", false)
     val state = new SessionState(hiveConf)
     if (clientLoader.cachedHive != null) {
       Hive.set(clientLoader.cachedHive.asInstanceOf[Hive])
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
index b6a4949..70d042e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
@@ -29,19 +29,20 @@ package object client {
     case object v12 extends HiveVersion("0.12.0")
     case object v13 extends HiveVersion("0.13.1")
 
-    // Hive 0.14 depends on calcite 0.9.2-incubating-SNAPSHOT which does not 
exist in
-    // maven central anymore, so override those with a version that exists.
+    // Do not need Calcite because we disabled hive.cbo.enable.
     //
-    // The other excluded dependencies are also nowhere to be found, so 
exclude them explicitly. If
+    // The other excluded dependencies are nowhere to be found, so exclude 
them explicitly. If
     // they're needed by the metastore client, users will have to dig them out 
of somewhere and use
     // configuration to point Spark at the correct jars.
     case object v14 extends HiveVersion("0.14.0",
-      extraDeps = Seq("org.apache.calcite:calcite-core:1.3.0-incubating",
-        "org.apache.calcite:calcite-avatica:1.3.0-incubating"),
-      exclusions = Seq("org.pentaho:pentaho-aggdesigner-algorithm"))
+      exclusions = Seq("org.apache.calcite:calcite-core",
+        "org.apache.calcite:calcite-avatica",
+        "org.pentaho:pentaho-aggdesigner-algorithm"))
 
     case object v1_0 extends HiveVersion("1.0.0",
       exclusions = Seq("eigenbase:eigenbase-properties",
+        "org.apache.calcite:calcite-core",
+        "org.apache.calcite:calcite-avatica",
         "org.pentaho:pentaho-aggdesigner-algorithm",
         "net.hydromatic:linq4j",
         "net.hydromatic:quidem"))
@@ -51,6 +52,8 @@ package object client {
     // and fails.
     case object v1_1 extends HiveVersion("1.1.0",
       exclusions = Seq("eigenbase:eigenbase-properties",
+        "org.apache.calcite:calcite-core",
+        "org.apache.calcite:calcite-avatica",
         "org.apache.curator:*",
         "org.pentaho:pentaho-aggdesigner-algorithm",
         "net.hydromatic:linq4j",
@@ -58,32 +61,48 @@ package object client {
 
     case object v1_2 extends HiveVersion("1.2.2",
       exclusions = Seq("eigenbase:eigenbase-properties",
+        "org.apache.calcite:calcite-core",
+        "org.apache.calcite:calcite-avatica",
         "org.apache.curator:*",
         "org.pentaho:pentaho-aggdesigner-algorithm",
         "net.hydromatic:linq4j",
         "net.hydromatic:quidem"))
 
     case object v2_0 extends HiveVersion("2.0.1",
-      exclusions = Seq("org.apache.curator:*",
+      exclusions = Seq("org.apache.calcite:calcite-core",
+        "org.apache.calcite:calcite-avatica",
+        "org.apache.curator:*",
         "org.pentaho:pentaho-aggdesigner-algorithm"))
 
     case object v2_1 extends HiveVersion("2.1.1",
-      exclusions = Seq("org.apache.curator:*",
+      exclusions = Seq("org.apache.calcite:calcite-core",
+        "org.apache.calcite:calcite-avatica",
+        "org.apache.curator:*",
         "org.pentaho:pentaho-aggdesigner-algorithm"))
 
     case object v2_2 extends HiveVersion("2.2.0",
-      exclusions = Seq("org.apache.curator:*",
+      exclusions = Seq("org.apache.calcite:calcite-core",
+        "org.apache.calcite:calcite-druid",
+        "org.apache.calcite.avatica:avatica",
+        "org.apache.curator:*",
         "org.pentaho:pentaho-aggdesigner-algorithm"))
 
+    // Since HIVE-14496, Hive materialized view need calcite-core.
+    // For spark, only VersionsSuite currently creates a hive materialized 
view for testing.
     case object v2_3 extends HiveVersion("2.3.4",
-      exclusions = Seq("org.apache.curator:*",
+      exclusions = Seq("org.apache.calcite:calcite-druid",
+        "org.apache.calcite.avatica:avatica",
+        "org.apache.curator:*",
         "org.pentaho:pentaho-aggdesigner-algorithm"))
 
     // Since Hive 3.0, HookUtils uses org.apache.logging.log4j.util.Strings
+    // Since HIVE-14496, Hive.java uses calcite-core
     case object v3_1 extends HiveVersion("3.1.1",
       extraDeps = Seq("org.apache.logging.log4j:log4j-api:2.10.0",
         "org.apache.derby:derby:10.14.1.0"),
-      exclusions = Seq("org.apache.curator:*",
+      exclusions = Seq("org.apache.calcite:calcite-druid",
+        "org.apache.calcite.avatica:avatica",
+        "org.apache.curator:*",
         "org.pentaho:pentaho-aggdesigner-algorithm"))
 
     val allSupportedHiveVersions = Set(v12, v13, v14, v1_0, v1_1, v1_2, v2_0, 
v2_1, v2_2, v2_3, v3_1)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to