This is an automated email from the ASF dual-hosted git repository.
lresende pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-toree.git
The following commit(s) were added to refs/heads/master by this push:
new 06294028 [TOREE-537] Upgrade Spark 3.3.2 (#201)
06294028 is described below
commit 062940280865c954c64a54cadd20b03f14c89caa
Author: Cheng Pan <[email protected]>
AuthorDate: Thu Aug 10 04:22:30 2023 +0800
[TOREE-537] Upgrade Spark 3.3.2 (#201)
---
Dockerfile | 13 +++++++------
Dockerfile.toree-dev | 11 ++++++-----
Makefile | 2 +-
build.sbt | 8 ++++----
etc/kernel.json | 2 +-
project/Dependencies.scala | 8 ++++----
6 files changed, 23 insertions(+), 21 deletions(-)
diff --git a/Dockerfile b/Dockerfile
index 0cf32a45..8154754e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -26,7 +26,8 @@ RUN curl -sL https://deb.nodesource.com/setup_0.12 | bash -
&& \
npm install -g bower
# for Apache Spark demos
-ENV APACHE_SPARK_VERSION 3.0.3
+ENV APACHE_SPARK_VERSION 3.3.2
+ENV APACHE_SPARK_CUSTOM_NAME=hadoop3
RUN apt-get -y update && \
apt-get -y install software-properties-common
@@ -46,11 +47,11 @@ RUN echo "===> install Java" && \
update-java-alternatives -s java-8-oracle
RUN cd /tmp && \
- wget -q
http://apache.claz.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop2.7.tgz
&& \
- tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop2.7.tgz -C /usr/local
&& \
- rm spark-${APACHE_SPARK_VERSION}-bin-hadoop2.7.tgz
+ wget -q
https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz
&& \
+ tar xzf spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz
-C /usr/local && \
+ rm spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz
-RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop2.7 spark
+RUN cd /usr/local && ln -s
spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME} spark
# R support
RUN apt-get update && \
@@ -61,7 +62,7 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/*
ENV SPARK_HOME /usr/local/spark
-ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9-src.zip
+ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9.5-src.zip
ENV PYSPARK_PYTHON /home/main/anaconda2/envs/python3/bin/python
ENV R_LIBS_USER $SPARK_HOME/R/lib
diff --git a/Dockerfile.toree-dev b/Dockerfile.toree-dev
index 4dad9171..fa3fc975 100644
--- a/Dockerfile.toree-dev
+++ b/Dockerfile.toree-dev
@@ -23,7 +23,8 @@ FROM jupyter/all-spark-notebook
USER root
# Spark dependencies
-ENV APACHE_SPARK_VERSION 3.0.3
+ENV APACHE_SPARK_VERSION 3.3.2
+ENV APACHE_SPARK_CUSTOM_NAME=hadoop3
RUN apt-get -y update && \
apt-get install -y --no-install-recommends openjdk-8-jdk
ca-certificates-java && \
@@ -35,14 +36,14 @@ RUN apt-get -y update && \
# Installing Spark3
RUN cd /tmp && \
- wget -q
https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop2.7.tgz
&& \
- tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop2.7.tgz -C /usr/local && \
- rm spark-${APACHE_SPARK_VERSION}-bin-hadoop2.7.tgz
+ wget -q
https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz
&& \
+ tar xzf spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz
-C /usr/local && \
+ rm spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz
# Overwrite symlink
RUN cd /usr/local && \
rm spark && \
- ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop2.7 spark
+ ln -s spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME} spark
# Remove other scala kernels
RUN cd /opt/conda/share/jupyter/kernels/ && \
diff --git a/Makefile b/Makefile
index ad67676b..2a5699c7 100644
--- a/Makefile
+++ b/Makefile
@@ -30,7 +30,7 @@ IS_SNAPSHOT?=true
SNAPSHOT:=-SNAPSHOT
endif
-APACHE_SPARK_VERSION?=3.0.3
+APACHE_SPARK_VERSION?=3.3.2
SCALA_VERSION?=2.12
IMAGE?=jupyter/all-spark-notebook:latest
EXAMPLE_IMAGE?=apache/toree-examples
diff --git a/build.sbt b/build.sbt
index d41889c2..4c32a31e 100644
--- a/build.sbt
+++ b/build.sbt
@@ -22,11 +22,11 @@ ThisBuild / version := Properties.envOrElse("VERSION",
"0.0.0-dev") +
(if ((ThisBuild / isSnapshot ).value) "-SNAPSHOT" else "")
ThisBuild / isSnapshot := Properties.envOrElse("IS_SNAPSHOT","true").toBoolean
ThisBuild / organization := "org.apache.toree.kernel"
-ThisBuild / crossScalaVersions := Seq("2.12.15") //
https://github.com/scala/bug/issues/12475, for Spark 3.2.0
+ThisBuild / crossScalaVersions := Seq("2.12.15")
ThisBuild / scalaVersion := (ThisBuild / crossScalaVersions ).value.head
ThisBuild / Dependencies.sparkVersion := {
val envVar = "APACHE_SPARK_VERSION"
- val defaultVersion = "3.0.0"
+ val defaultVersion = "3.3.2"
Properties.envOrNone(envVar) match {
case None =>
@@ -58,8 +58,8 @@ ThisBuild / javacOptions ++= Seq(
"-Xlint:-options",
"-Xlint:-processing",
"-Werror", // Treat warnings as errors
- "-source", "1.6",
- "-target", "1.6"
+ "-source", "1.8",
+ "-target", "1.8"
)
// Options provided to forked JVMs through sbt, based on our .jvmopts file
ThisBuild / javaOptions ++= Seq(
diff --git a/etc/kernel.json b/etc/kernel.json
index a428a57e..868215ac 100644
--- a/etc/kernel.json
+++ b/etc/kernel.json
@@ -4,7 +4,7 @@
},
"display_name": "Apache Toree (development)",
"env": {
- "PYTHONPATH":
"/usr/local/spark/python:/usr/local/spark/python/lib/py4j-0.10.9-src.zip",
+ "PYTHONPATH":
"/usr/local/spark/python:/usr/local/spark/python/lib/py4j-0.10.9.5-src.zip",
"SPARK_HOME": "/usr/local/spark",
"CAPTURE_STANDARD_ERR": "true",
"MAX_INTERPRETER_THREADS": "16",
diff --git a/project/Dependencies.scala b/project/Dependencies.scala
index 9c90ee6e..eb763caf 100644
--- a/project/Dependencies.scala
+++ b/project/Dependencies.scala
@@ -36,10 +36,10 @@ object Dependencies {
val coursier = "io.get-coursier" %% "coursier" % coursierVersion // Apache v2
val coursierCache = "io.get-coursier" %% "coursier-cache" % coursierVersion
// Apache v2
- val ivy = "org.apache.ivy" % "ivy" % "2.4.0" // Apache v2
+ val ivy = "org.apache.ivy" % "ivy" % "2.5.1" // Apache v2
- // use the same jackson version in test than the one provided at runtime by
Spark 3.0.0
- val jacksonDatabind = "com.fasterxml.jackson.core" % "jackson-databind" %
"2.10.0" // Apache v2
+ // use the same jackson version in test than the one provided at runtime by
Spark 3.3.2
+ val jacksonDatabind = "com.fasterxml.jackson.core" % "jackson-databind" %
"2.13.4.2" // Apache v2
val jeroMq = "org.zeromq" % "jeromq" % "0.5.3" // MPL v2
@@ -55,7 +55,7 @@ object Dependencies {
val scalaTest = "org.scalatest" %% "scalatest" % "3.0.8" // Apache v2
- val slf4jApi = "org.slf4j" % "slf4j-api" % "1.7.30" // MIT
+ val slf4jApi = "org.slf4j" % "slf4j-api" % "1.7.32" // MIT
val sparkVersion = settingKey[String]("Version of Apache Spark to use in
Toree") // defined in root build
val sparkCore = Def.setting{ "org.apache.spark" %% "spark-core" %
sparkVersion.value } // Apache v2