This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new f732647 [SPARK-27054][BUILD][SQL] Remove the Calcite dependency f732647 is described below commit f732647ae4f486531440dd1239719085c367181e Author: Yuming Wang <yumw...@ebay.com> AuthorDate: Sat Mar 9 16:34:24 2019 -0800 [SPARK-27054][BUILD][SQL] Remove the Calcite dependency ## What changes were proposed in this pull request? Calcite is only used for [runSqlHive](https://github.com/apache/spark/blob/02bbe977abaf7006b845a7e99d612b0235aa0025/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala#L699-L705) when `hive.cbo.enable=true`([SemanticAnalyzer](https://github.com/apache/hive/blob/release-1.2.1/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java#L278-L280)). So we can disable `hive.cbo.enable` and remove Calcite dependency. ## How was this patch tested? Exist tests Closes #23970 from wangyum/SPARK-27054. Lead-authored-by: Yuming Wang <yumw...@ebay.com> Co-authored-by: Yuming Wang <wgy...@gmail.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- LICENSE-binary | 3 - NOTICE-binary | 9 --- dev/deps/spark-deps-hadoop-2.7 | 4 -- dev/deps/spark-deps-hadoop-3.1 | 4 -- pom.xml | 72 ++-------------------- sql/hive/pom.xml | 8 --- .../spark/sql/hive/client/HiveClientImpl.scala | 2 + .../org/apache/spark/sql/hive/client/package.scala | 41 ++++++++---- 8 files changed, 37 insertions(+), 106 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 541cc4c..0c157cf 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -260,9 +260,6 @@ net.sf.supercsv:super-csv org.apache.arrow:arrow-format org.apache.arrow:arrow-memory org.apache.arrow:arrow-vector -org.apache.calcite:calcite-avatica -org.apache.calcite:calcite-core -org.apache.calcite:calcite-linq4j org.apache.commons:commons-crypto org.apache.commons:commons-lang3 org.apache.hadoop:hadoop-annotations diff --git a/NOTICE-binary b/NOTICE-binary index b707c43..df41618 100644 --- a/NOTICE-binary +++ b/NOTICE-binary @@ -792,15 +792,6 @@ Copyright 2005-2006 The Apache Software Foundation Apache Jakarta HttpClient Copyright 1999-2007 The Apache Software Foundation -Calcite Avatica -Copyright 2012-2015 The Apache Software Foundation - -Calcite Core -Copyright 2012-2015 The Apache Software Foundation - -Calcite Linq4j -Copyright 2012-2015 The Apache Software Foundation - Apache HttpClient Copyright 1999-2017 The Apache Software Foundation diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7 index d53039f..53267ea 100644 --- a/dev/deps/spark-deps-hadoop-2.7 +++ b/dev/deps/spark-deps-hadoop-2.7 @@ -24,9 +24,6 @@ avro-mapred-1.8.2-hadoop2.jar bonecp-0.8.0.RELEASE.jar breeze-macros_2.12-0.13.2.jar breeze_2.12-0.13.2.jar -calcite-avatica-1.2.0-incubating.jar -calcite-core-1.2.0-incubating.jar -calcite-linq4j-1.2.0-incubating.jar chill-java-0.9.3.jar chill_2.12-0.9.3.jar commons-beanutils-1.7.0.jar @@ -57,7 +54,6 @@ datanucleus-api-jdo-3.2.6.jar datanucleus-core-3.2.10.jar datanucleus-rdbms-3.2.9.jar derby-10.12.1.1.jar -eigenbase-properties-1.1.5.jar flatbuffers-java-1.9.0.jar generex-1.0.1.jar gson-2.2.4.jar diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1 index d1a6b27..367bd45 100644 --- a/dev/deps/spark-deps-hadoop-3.1 +++ b/dev/deps/spark-deps-hadoop-3.1 @@ -22,9 +22,6 @@ avro-mapred-1.8.2-hadoop2.jar bonecp-0.8.0.RELEASE.jar breeze-macros_2.12-0.13.2.jar breeze_2.12-0.13.2.jar -calcite-avatica-1.2.0-incubating.jar -calcite-core-1.2.0-incubating.jar -calcite-linq4j-1.2.0-incubating.jar chill-java-0.9.3.jar chill_2.12-0.9.3.jar commons-beanutils-1.9.3.jar @@ -56,7 +53,6 @@ datanucleus-rdbms-3.2.9.jar derby-10.12.1.1.jar dnsjava-2.1.7.jar ehcache-3.3.1.jar -eigenbase-properties-1.1.5.jar flatbuffers-java-1.9.0.jar generex-1.0.1.jar geronimo-jcache_1.0_spec-1.0-alpha-1.jar diff --git a/pom.xml b/pom.xml index 0e1913a..1608309 100644 --- a/pom.xml +++ b/pom.xml @@ -168,7 +168,6 @@ <fasterxml.jackson.version>2.9.8</fasterxml.jackson.version> <snappy.version>1.1.7.1</snappy.version> <netlib.java.version>1.1.2</netlib.java.version> - <calcite.version>1.2.0-incubating</calcite.version> <commons-codec.version>1.10</commons-codec.version> <commons-io.version>2.4</commons-io.version> <!-- org.apache.commons/commons-lang/--> @@ -1467,12 +1466,16 @@ <groupId>org.apache.avro</groupId> <artifactId>avro-mapred</artifactId> </exclusion> - <!-- this is needed and must be explicitly included later--> + <!-- Do not need Calcite because we disabled hive.cbo.enable --> <exclusion> <groupId>org.apache.calcite</groupId> <artifactId>calcite-core</artifactId> </exclusion> <exclusion> + <groupId>org.apache.calcite</groupId> + <artifactId>calcite-avatica</artifactId> + </exclusion> + <exclusion> <groupId>org.apache.curator</groupId> <artifactId>apache-curator</artifactId> </exclusion> @@ -1842,71 +1845,6 @@ <scope>compile</scope> </dependency> <dependency> - <groupId>org.apache.calcite</groupId> - <artifactId>calcite-core</artifactId> - <version>${calcite.version}</version> - <exclusions> - <exclusion> - <groupId>com.fasterxml.jackson.core</groupId> - <artifactId>jackson-annotations</artifactId> - </exclusion> - <exclusion> - <groupId>com.fasterxml.jackson.core</groupId> - <artifactId>jackson-core</artifactId> - </exclusion> - <exclusion> - <groupId>com.fasterxml.jackson.core</groupId> - <artifactId>jackson-databind</artifactId> - </exclusion> - <exclusion> - <groupId>com.google.guava</groupId> - <artifactId>guava</artifactId> - </exclusion> - <exclusion> - <groupId>com.google.code.findbugs</groupId> - <artifactId>jsr305</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.janino</groupId> - <artifactId>janino</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.janino</groupId> - <artifactId>commons-compiler</artifactId> - </exclusion> - <!-- hsqldb interferes with the use of derby as the default db - in hive's use of datanucleus. - --> - <exclusion> - <groupId>org.hsqldb</groupId> - <artifactId>hsqldb</artifactId> - </exclusion> - <exclusion> - <groupId>org.pentaho</groupId> - <artifactId>pentaho-aggdesigner-algorithm</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>org.apache.calcite</groupId> - <artifactId>calcite-avatica</artifactId> - <version>${calcite.version}</version> - <exclusions> - <exclusion> - <groupId>com.fasterxml.jackson.core</groupId> - <artifactId>jackson-annotations</artifactId> - </exclusion> - <exclusion> - <groupId>com.fasterxml.jackson.core</groupId> - <artifactId>jackson-core</artifactId> - </exclusion> - <exclusion> - <groupId>com.fasterxml.jackson.core</groupId> - <artifactId>jackson-databind</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> <groupId>org.codehaus.janino</groupId> <artifactId>janino</artifactId> <version>${janino.version}</version> diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index dc8b733..55afbe7 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -130,14 +130,6 @@ <artifactId>commons-httpclient</artifactId> </dependency> <dependency> - <groupId>org.apache.calcite</groupId> - <artifactId>calcite-avatica</artifactId> - </dependency> - <dependency> - <groupId>org.apache.calcite</groupId> - <artifactId>calcite-core</artifactId> - </dependency> - <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> </dependency> diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 74237a9..8132dee 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -178,6 +178,8 @@ private[hive] class HiveClientImpl( """.stripMargin) hiveConf.set(k, v) } + // Disable CBO because we removed the Calcite dependency. + hiveConf.setBoolean("hive.cbo.enable", false) val state = new SessionState(hiveConf) if (clientLoader.cachedHive != null) { Hive.set(clientLoader.cachedHive.asInstanceOf[Hive]) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala index b6a4949..70d042e 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala @@ -29,19 +29,20 @@ package object client { case object v12 extends HiveVersion("0.12.0") case object v13 extends HiveVersion("0.13.1") - // Hive 0.14 depends on calcite 0.9.2-incubating-SNAPSHOT which does not exist in - // maven central anymore, so override those with a version that exists. + // Do not need Calcite because we disabled hive.cbo.enable. // - // The other excluded dependencies are also nowhere to be found, so exclude them explicitly. If + // The other excluded dependencies are nowhere to be found, so exclude them explicitly. If // they're needed by the metastore client, users will have to dig them out of somewhere and use // configuration to point Spark at the correct jars. case object v14 extends HiveVersion("0.14.0", - extraDeps = Seq("org.apache.calcite:calcite-core:1.3.0-incubating", - "org.apache.calcite:calcite-avatica:1.3.0-incubating"), - exclusions = Seq("org.pentaho:pentaho-aggdesigner-algorithm")) + exclusions = Seq("org.apache.calcite:calcite-core", + "org.apache.calcite:calcite-avatica", + "org.pentaho:pentaho-aggdesigner-algorithm")) case object v1_0 extends HiveVersion("1.0.0", exclusions = Seq("eigenbase:eigenbase-properties", + "org.apache.calcite:calcite-core", + "org.apache.calcite:calcite-avatica", "org.pentaho:pentaho-aggdesigner-algorithm", "net.hydromatic:linq4j", "net.hydromatic:quidem")) @@ -51,6 +52,8 @@ package object client { // and fails. case object v1_1 extends HiveVersion("1.1.0", exclusions = Seq("eigenbase:eigenbase-properties", + "org.apache.calcite:calcite-core", + "org.apache.calcite:calcite-avatica", "org.apache.curator:*", "org.pentaho:pentaho-aggdesigner-algorithm", "net.hydromatic:linq4j", @@ -58,32 +61,48 @@ package object client { case object v1_2 extends HiveVersion("1.2.2", exclusions = Seq("eigenbase:eigenbase-properties", + "org.apache.calcite:calcite-core", + "org.apache.calcite:calcite-avatica", "org.apache.curator:*", "org.pentaho:pentaho-aggdesigner-algorithm", "net.hydromatic:linq4j", "net.hydromatic:quidem")) case object v2_0 extends HiveVersion("2.0.1", - exclusions = Seq("org.apache.curator:*", + exclusions = Seq("org.apache.calcite:calcite-core", + "org.apache.calcite:calcite-avatica", + "org.apache.curator:*", "org.pentaho:pentaho-aggdesigner-algorithm")) case object v2_1 extends HiveVersion("2.1.1", - exclusions = Seq("org.apache.curator:*", + exclusions = Seq("org.apache.calcite:calcite-core", + "org.apache.calcite:calcite-avatica", + "org.apache.curator:*", "org.pentaho:pentaho-aggdesigner-algorithm")) case object v2_2 extends HiveVersion("2.2.0", - exclusions = Seq("org.apache.curator:*", + exclusions = Seq("org.apache.calcite:calcite-core", + "org.apache.calcite:calcite-druid", + "org.apache.calcite.avatica:avatica", + "org.apache.curator:*", "org.pentaho:pentaho-aggdesigner-algorithm")) + // Since HIVE-14496, Hive materialized view need calcite-core. + // For spark, only VersionsSuite currently creates a hive materialized view for testing. case object v2_3 extends HiveVersion("2.3.4", - exclusions = Seq("org.apache.curator:*", + exclusions = Seq("org.apache.calcite:calcite-druid", + "org.apache.calcite.avatica:avatica", + "org.apache.curator:*", "org.pentaho:pentaho-aggdesigner-algorithm")) // Since Hive 3.0, HookUtils uses org.apache.logging.log4j.util.Strings + // Since HIVE-14496, Hive.java uses calcite-core case object v3_1 extends HiveVersion("3.1.1", extraDeps = Seq("org.apache.logging.log4j:log4j-api:2.10.0", "org.apache.derby:derby:10.14.1.0"), - exclusions = Seq("org.apache.curator:*", + exclusions = Seq("org.apache.calcite:calcite-druid", + "org.apache.calcite.avatica:avatica", + "org.apache.curator:*", "org.pentaho:pentaho-aggdesigner-algorithm")) val allSupportedHiveVersions = Set(v12, v13, v14, v1_0, v1_1, v1_2, v2_0, v2_1, v2_2, v2_3, v3_1) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org