This is an automated email from the ASF dual-hosted git repository. pdallig pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/zeppelin.git
The following commit(s) were added to refs/heads/master by this push: new 3ebb815737 [ZEPPELIN-5946][FOLLOWUP] Use Spark 3.4.1 in default (#4652) 3ebb815737 is described below commit 3ebb81573786a928c79212c6502532dfc85ab0ba Author: Cheng Pan <cheng...@apache.org> AuthorDate: Fri Oct 6 14:04:00 2023 +0800 [ZEPPELIN-5946][FOLLOWUP] Use Spark 3.4.1 in default (#4652) * [ZEPPELIN-5946][FOLLOWUP] Use Spark 3.4.1 and Scala 2.12 in default * Update docs/setup/basics/how_to_build.md Co-authored-by: Matthias Koch <23187557+matthias-k...@users.noreply.github.com> * reflect * default 2.11 --------- Co-authored-by: Matthias Koch <23187557+matthias-k...@users.noreply.github.com> --- dev/change_scala_version.sh | 8 +++--- docs/setup/basics/how_to_build.md | 33 +++++++++++----------- k8s/zeppelin-server.yaml | 2 +- pom.xml | 2 +- rlang/pom.xml | 10 +++++-- spark/README.md | 4 --- spark/interpreter/pom.xml | 10 +++---- spark/pom.xml | 4 +-- spark/scala-2.12/pom.xml | 4 +-- spark/scala-2.13/pom.xml | 4 +-- .../zeppelin/spark/SparkScala213Interpreter.scala | 14 ++++++++- spark/spark3-shims/pom.xml | 2 +- zeppelin-integration/pom.xml | 12 -------- zeppelin-web/pom.xml | 12 -------- 14 files changed, 55 insertions(+), 66 deletions(-) diff --git a/dev/change_scala_version.sh b/dev/change_scala_version.sh index 5713945077..581f463f01 100755 --- a/dev/change_scala_version.sh +++ b/dev/change_scala_version.sh @@ -19,7 +19,7 @@ set -e -VALID_VERSIONS=( 2.10 2.11 ) +VALID_VERSIONS=( 2.11 2.12 ) usage() { echo "Usage: $(basename $0) [-h|--help] <version> @@ -45,11 +45,11 @@ check_scala_version() { check_scala_version "${TO_VERSION}" if [ "${TO_VERSION}" = "2.11" ]; then - FROM_VERSION="2.10" - SCALA_LIB_VERSION="2.11.7" + FROM_VERSION="2.12" + SCALA_LIB_VERSION="2.11.12" else FROM_VERSION="2.11" - SCALA_LIB_VERSION="2.10.5" + SCALA_LIB_VERSION="2.12.17" fi sed_i() { diff --git a/docs/setup/basics/how_to_build.md b/docs/setup/basics/how_to_build.md index d31dcb8f5a..32f9918c1a 100644 --- a/docs/setup/basics/how_to_build.md +++ b/docs/setup/basics/how_to_build.md @@ -84,7 +84,7 @@ You can directly start Zeppelin by running the following command after successfu To be noticed, this scala profile affect the modules (e.g. cassandra) that use scala except Spark interpreter (Spark interpreter use other profiles to control its scala version, see the doc below). -Set scala version (default 2.10). Available profiles are +Set scala version (default 2.11). Available profiles are ``` -Pscala-2.11 @@ -93,9 +93,9 @@ Set scala version (default 2.10). Available profiles are #### Spark Interpreter -To be noticed, the spark profiles here only affect the uni test (no need to specify `SPARK_HOME`) of spark interpreter. +To be noticed, the spark profiles here only affect the unit test (no need to specify `SPARK_HOME`) of spark interpreter. Zeppelin doesn't require you to build with different spark to make different versions of spark work in Zeppelin. -You can run different versions of Spark in Zeppelin as long as you specify `SPARK_HOME`. Actually Zeppelin supports all the versions of Spark from 1.6 to 3.0. +You can run different versions of Spark in Zeppelin as long as you specify `SPARK_HOME`. Actually Zeppelin supports all the versions of Spark from 3.2 to 3.4. To build with a specific Spark version or scala versions, define one or more of the following profiles and options: @@ -106,10 +106,9 @@ Set spark major version Available profiles are ``` +-Pspark-3.4 +-Pspark-3.3 -Pspark-3.2 --Pspark-3.1 --Pspark-3.0 --Pspark-2.4 ``` minor version can be adjusted by `-Dspark.version=x.x.x` @@ -117,13 +116,13 @@ minor version can be adjusted by `-Dspark.version=x.x.x` ##### `-Pspark-scala-[version] (optional)` To be noticed, these profiles also only affect the unit test (no need to specify `SPARK_HOME`) of Spark interpreter. -Actually Zeppelin supports all the versions of scala (2.11, 2.12) in Spark interpreter as long as you specify `SPARK_HOME`. +Actually Zeppelin supports all the versions of scala (2.12, 2.13) in Spark interpreter as long as you specify `SPARK_HOME`. Available profiles are ``` --Pspark-scala-2.11 -Pspark-scala-2.12 +-Pspark-scala-2.13 ``` #### Build hadoop with Zeppelin (`-Phadoop[version]`) @@ -131,7 +130,7 @@ Available profiles are To be noticed, hadoop profiles only affect Zeppelin server, it doesn't affect any interpreter. Zeppelin server use hadoop in some cases, such as using hdfs as notebook storage. You can check this [page](./hadoop_integration.html) for more details about how to configure hadoop in Zeppelin. -Set hadoop major version (default hadoop2). +Set hadoop major version (default hadoop3). Available profiles are ``` @@ -156,11 +155,11 @@ Build examples under zeppelin-examples directory Here are some examples with several options: ```bash -# build with spark-3.0, spark-scala-2.12 -./mvnw clean package -Pspark-3.0 -Pspark-scala-2.12 -DskipTests +# build with spark-3.3, spark-scala-2.12 +./mvnw clean package -Pspark-3.3 -Pspark-scala-2.12 -DskipTests -# build with spark-2.4, spark-scala-2.11 -./mvnw clean package -Pspark-2.4 -Pspark-scala-2.11 -DskipTests +# build with spark-3.4, spark-scala-2.13 +./mvnw clean package -Pspark-3.4 -Pspark-scala-2.13 -DskipTests ``` @@ -186,7 +185,7 @@ spark.bin.download.url # default http://d3kbcqa49mib13.cloudfront.net/${spark.ar Py4J package ```bash -python.py4j.version # default 0.9.2 +python.py4j.version # default 0.10.9.7 pypi.repo.url # default https://pypi.python.org/packages python.py4j.repo.folder # default /64/5c/01e13b68e8caafece40d549f232c9b5677ad1016071a48d04cc3895acaa3 ``` @@ -200,7 +199,7 @@ Frontend Maven Plugin configurations ``` plugin.frontend.nodeDownloadRoot # default https://nodejs.org/dist/ -plugin.frontend.npmDownloadRoot # default http://registry.npmjs.org/npm/-/ +plugin.frontend.npmDownloadRoot # default https://registry.npmjs.org/npm/-/ plugin.frontend.yarnDownloadRoot # default https://github.com/yarnpkg/yarn/releases/download/ ``` @@ -301,10 +300,10 @@ To package the final distribution including the compressed archive, run: To build a distribution with specific profiles, run: ```sh -./mvnw clean package -Pbuild-distr -Pspark-2.4 +./mvnw clean package -Pbuild-distr -Pspark-3.4 ``` -The profiles `-Pspark-2.4` can be adjusted if you wish to build to a specific spark versions. +The profiles `-Pspark-3.4` can be adjusted if you wish to build to a specific spark versions. The archive is generated under _`zeppelin-distribution/target`_ directory diff --git a/k8s/zeppelin-server.yaml b/k8s/zeppelin-server.yaml index eba48e38d0..bca207c4f6 100644 --- a/k8s/zeppelin-server.yaml +++ b/k8s/zeppelin-server.yaml @@ -28,7 +28,7 @@ data: # Default value is 'local.zeppelin-project.org' while it points 127.0.0.1 and `kubectl port-forward zeppelin-server` will give localhost to connects. # If you have your ingress controller configured to connect to `zeppelin-server` service and have a domain name for it (with wildcard subdomain point the same address), you can replace serviceDomain field with your own domain. SERVICE_DOMAIN: local.zeppelin-project.org:8080 - ZEPPELIN_K8S_SPARK_CONTAINER_IMAGE: spark:2.4.5 + ZEPPELIN_K8S_SPARK_CONTAINER_IMAGE: spark:3.4.1 ZEPPELIN_K8S_CONTAINER_IMAGE: zeppelin-interpreter:0.11.0-SNAPSHOT ZEPPELIN_HOME: /opt/zeppelin ZEPPELIN_SERVER_RPC_PORTRANGE: 12320:12320 diff --git a/pom.xml b/pom.xml index 84a190c372..d64b831651 100644 --- a/pom.xml +++ b/pom.xml @@ -105,7 +105,7 @@ <scala.version>${scala.2.11.version}</scala.version> <scala.binary.version>2.11</scala.binary.version> <scala.2.11.version>2.11.12</scala.2.11.version> - <scala.2.12.version>2.12.16</scala.2.12.version> + <scala.2.12.version>2.12.17</scala.2.12.version> <scalatest.version>3.0.7</scalatest.version> <scalacheck.version>1.12.5</scalacheck.version> diff --git a/rlang/pom.xml b/rlang/pom.xml index 006c7bd33d..a291c63b51 100644 --- a/rlang/pom.xml +++ b/rlang/pom.xml @@ -34,7 +34,7 @@ <properties> <interpreter.name>r</interpreter.name> - <spark.version>2.4.5</spark.version> + <spark.version>3.4.1</spark.version> <spark.archive>spark-${spark.version}</spark.archive> <spark.bin.download.url> @@ -98,8 +98,14 @@ <dependency> <groupId>org.apache.spark</groupId> - <artifactId>spark-core_2.11</artifactId> + <artifactId>spark-core_2.12</artifactId> <version>${spark.version}</version> + <exclusions> + <exclusion> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client-api</artifactId> + </exclusion> + </exclusions> </dependency> <dependency> diff --git a/spark/README.md b/spark/README.md index a9b039ec5e..76220accd8 100644 --- a/spark/README.md +++ b/spark/README.md @@ -12,16 +12,12 @@ Spark interpreter is the first and most important interpreter of Zeppelin. It su Due to incompatibility between Spark versions, there are several spark-shims modules for each supported Spark version. * spark-scala-parent - Parent module for each Scala module -* scala-2.11 - - Scala module for Scala 2.11 * scala-2.12 - Scala module for Scala 2.12 * scala-2.13 - Scala module for Scala 2.13 * spark-shims - Parent module for each Spark module -* spark2-shims - - Shims module for Spark2 * spark3-shims - Shims module for Spark3 diff --git a/spark/interpreter/pom.xml b/spark/interpreter/pom.xml index ee8d6a5afa..f3686a6238 100644 --- a/spark/interpreter/pom.xml +++ b/spark/interpreter/pom.xml @@ -48,7 +48,7 @@ <spark.version>3.4.1</spark.version> <protobuf.version>3.21.12</protobuf.version> <py4j.version>0.10.9.7</py4j.version> - <spark.scala.version>2.12.7</spark.scala.version> + <spark.scala.version>2.12.17</spark.scala.version> <spark.scala.binary.version>2.12</spark.scala.binary.version> <spark.archive>spark-${spark.version}</spark.archive> @@ -136,7 +136,7 @@ <exclusions> <exclusion> <groupId>org.apache.spark</groupId> - <artifactId>spark-core_2.11</artifactId> + <artifactId>spark-core_2.12</artifactId> </exclusion> </exclusions> </dependency> @@ -494,7 +494,7 @@ <profile> <id>spark-scala-2.13</id> <properties> - <spark.scala.version>2.13.4</spark.scala.version> + <spark.scala.version>2.13.8</spark.scala.version> <spark.scala.binary.version>2.13</spark.scala.binary.version> </properties> </profile> @@ -505,7 +505,7 @@ <activeByDefault>true</activeByDefault> </activation> <properties> - <spark.scala.version>2.12.7</spark.scala.version> + <spark.scala.version>2.12.17</spark.scala.version> <spark.scala.binary.version>2.12</spark.scala.binary.version> </properties> </profile> @@ -532,7 +532,7 @@ <datanucleus.core.version>4.1.17</datanucleus.core.version> <datanucleus.rdbms.version>4.1.19</datanucleus.rdbms.version> <datanucleus.apijdo.version>4.2.4</datanucleus.apijdo.version> - <spark.version>3.3.2</spark.version> + <spark.version>3.3.3</spark.version> <protobuf.version>2.5.0</protobuf.version> <py4j.version>0.10.9.5</py4j.version> </properties> diff --git a/spark/pom.xml b/spark/pom.xml index 7985b41261..b0931f5d61 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -40,8 +40,8 @@ <!-- spark versions --> <spark.version>3.4.1</spark.version> <protobuf.version>2.5.0</protobuf.version> - <py4j.version>0.10.9</py4j.version> - <spark.scala.version>2.12.18</spark.scala.version> + <py4j.version>0.10.9.7</py4j.version> + <spark.scala.version>2.12.17</spark.scala.version> <spark.scala.binary.version>2.12</spark.scala.binary.version> <scala.compile.version>${spark.scala.version}</scala.compile.version> diff --git a/spark/scala-2.12/pom.xml b/spark/scala-2.12/pom.xml index a76ee4d8b0..4f856450bd 100644 --- a/spark/scala-2.12/pom.xml +++ b/spark/scala-2.12/pom.xml @@ -31,8 +31,8 @@ <name>Zeppelin: Spark Interpreter Scala_2.12</name> <properties> - <spark.version>2.4.5</spark.version> - <spark.scala.version>2.12.15</spark.scala.version> + <spark.version>3.4.1</spark.version> + <spark.scala.version>2.12.17</spark.scala.version> <spark.scala.binary.version>2.12</spark.scala.binary.version> <spark.scala.compile.version>${spark.scala.version}</spark.scala.compile.version> </properties> diff --git a/spark/scala-2.13/pom.xml b/spark/scala-2.13/pom.xml index d2f337ed1b..bd98978c27 100644 --- a/spark/scala-2.13/pom.xml +++ b/spark/scala-2.13/pom.xml @@ -31,8 +31,8 @@ <name>Zeppelin: Spark Interpreter Scala_2.13</name> <properties> - <spark.version>3.3.0</spark.version> - <spark.scala.version>2.13.4</spark.scala.version> + <spark.version>3.4.1</spark.version> + <spark.scala.version>2.13.8</spark.scala.version> <spark.scala.binary.version>2.13</spark.scala.binary.version> <spark.scala.compile.version>${spark.scala.version}</spark.scala.compile.version> </properties> diff --git a/spark/scala-2.13/src/main/scala/org/apache/zeppelin/spark/SparkScala213Interpreter.scala b/spark/scala-2.13/src/main/scala/org/apache/zeppelin/spark/SparkScala213Interpreter.scala index 9b966e46ba..659e5788f5 100644 --- a/spark/scala-2.13/src/main/scala/org/apache/zeppelin/spark/SparkScala213Interpreter.scala +++ b/spark/scala-2.13/src/main/scala/org/apache/zeppelin/spark/SparkScala213Interpreter.scala @@ -126,9 +126,21 @@ class SparkScala213Interpreter(conf: SparkConf, override def completion(buf: String, cursor: Int, context: InterpreterContext): java.util.List[InterpreterCompletion] = { + // ZEPPELIN-5946: Spark 3.2 uses Scala 2.13.5, Spark 3.3 to 3.5 use Scala 2.13.8. + // In Scala 2.13.7(scala/scala#9656), field "defString" was renamed to "name". + val completionCandidateClass = classOf[CompletionCandidate] + val nameMethod = try { + completionCandidateClass.getMethod("name") + } catch { + case _: NoSuchMethodException => + completionCandidateClass.getMethod("defString") + } scalaCompletion.complete(buf.substring(0, cursor), cursor) .candidates - .map(e => new InterpreterCompletion(e.defString, e.defString, null)) + .map { e: CompletionCandidate => + val name = nameMethod.invoke(e).asInstanceOf[String] + new InterpreterCompletion(name, name, null) + } .asJava } diff --git a/spark/spark3-shims/pom.xml b/spark/spark3-shims/pom.xml index 58137444aa..e5ba0aebc7 100644 --- a/spark/spark3-shims/pom.xml +++ b/spark/spark3-shims/pom.xml @@ -32,7 +32,7 @@ <properties> <scala.binary.version>2.12</scala.binary.version> - <spark.version>3.3.3</spark.version> + <spark.version>3.4.1</spark.version> </properties> <dependencies> diff --git a/zeppelin-integration/pom.xml b/zeppelin-integration/pom.xml index 8e5a17a442..ad412a965a 100644 --- a/zeppelin-integration/pom.xml +++ b/zeppelin-integration/pom.xml @@ -288,18 +288,6 @@ </dependency> </dependencies> </profile> - - <profile> - <id>spark-scala-2.11</id> - <dependencies> - <dependency> - <groupId>org.apache.zeppelin</groupId> - <artifactId>spark-scala-2.11</artifactId> - <version>${project.version}</version> - <scope>test</scope> - </dependency> - </dependencies> - </profile> </profiles> </project> diff --git a/zeppelin-web/pom.xml b/zeppelin-web/pom.xml index a87893741d..d37a9ae447 100644 --- a/zeppelin-web/pom.xml +++ b/zeppelin-web/pom.xml @@ -302,18 +302,6 @@ </dependency> </dependencies> </profile> - - <profile> - <id>spark-scala-2.11</id> - <dependencies> - <dependency> - <groupId>org.apache.zeppelin</groupId> - <artifactId>spark-scala-2.11</artifactId> - <version>${project.version}</version> - <scope>test</scope> - </dependency> - </dependencies> - </profile> </profiles> </project>