This is an automated email from the ASF dual-hosted git repository.
ggal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-livy.git
The following commit(s) were added to refs/heads/master by this push:
new 710c0784 [LIVY-1010] Add support for Spark 3.5.6
710c0784 is described below
commit 710c0784dae73aa7c5a6ce0bac264bcfdefd7681
Author: György Gál <[email protected]>
AuthorDate: Tue Aug 12 19:38:36 2025 +0200
[LIVY-1010] Add support for Spark 3.5.6
## What changes were proposed in this pull request?
Keep Spark 3 support up to date with the latest Spark 3 release, upgrade
dependencies.
## How was this patch tested?
Unit and integration tests.
---
.github/workflows/integration-tests.yaml | 3 +-
integration-test/pom.xml | 5 +++
pom.xml | 49 +++++++++++++++++-----
python-api/setup.py | 4 +-
.../apache/livy/repl/PythonInterpreterSpec.scala | 5 +++
.../org/apache/livy/repl/PythonSessionSpec.scala | 8 +++-
.../org/apache/livy/repl/SQLInterpreterSpec.scala | 2 +-
server/pom.xml | 5 +++
.../org/apache/livy/utils/LivySparkUtils.scala | 12 +++++-
.../org/apache/livy/utils/SparkKubernetesApp.scala | 2 +-
.../livy/thriftserver/ThriftServerSuites.scala | 9 ++--
thriftserver/session/pom.xml | 6 ---
12 files changed, 85 insertions(+), 25 deletions(-)
diff --git a/.github/workflows/integration-tests.yaml
b/.github/workflows/integration-tests.yaml
index 3ba4981f..1dafec6e 100644
--- a/.github/workflows/integration-tests.yaml
+++ b/.github/workflows/integration-tests.yaml
@@ -48,7 +48,8 @@ jobs:
-
name: Set Python 3 as default for Spark 3 builds
if: ${{ contains(matrix.maven_profile, 'spark3') }}
- run: pyenv global 3
+ # This can be removed once support for Python 2 and Spark 2 is removed
and the default python executable is python3
+ run: pyenv global 3 && echo "PYSPARK_PYTHON=$(which python3)" >>
"$GITHUB_ENV"
-
name: Build with Maven
run: mvn -Pthriftserver ${{ matrix.maven_profile }} -DskipTests
-Dmaven.javadoc.skip=true -B -V -e verify
diff --git a/integration-test/pom.xml b/integration-test/pom.xml
index 209f161d..97413e0b 100644
--- a/integration-test/pom.xml
+++ b/integration-test/pom.xml
@@ -111,6 +111,11 @@
</exclusions>
</dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ </dependency>
+
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
diff --git a/pom.xml b/pom.xml
index 8c5e1050..a88e449f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -81,19 +81,21 @@
<asynchttpclient.version>2.10.1</asynchttpclient.version>
<hadoop.scope>compile</hadoop.scope>
<slf4j.version>1.7.36</slf4j.version>
- <reload4j.version>1.2.25</reload4j.version>
+ <reload4j.version>1.2.26</reload4j.version>
<spark.scala-2.11.version>2.4.5</spark.scala-2.11.version>
<spark.scala-2.12.version>2.4.5</spark.scala-2.12.version>
<spark.version>${spark.scala-2.11.version}</spark.version>
<kubernetes.client.version>5.6.0</kubernetes.client.version>
<hive.version>3.0.0</hive.version>
- <commons-codec.version>1.9</commons-codec.version>
- <httpclient.version>4.5.13</httpclient.version>
- <httpcore.version>4.4.4</httpcore.version>
+ <commons-codec.version>1.15</commons-codec.version>
+ <commons-lang3.version>3.17.0</commons-lang3.version>
+ <httpclient.version>4.5.14</httpclient.version>
+ <httpcore.version>4.4.16</httpcore.version>
<jackson.version>2.12.7</jackson.version>
<jackson-databind.version>2.12.7.1</jackson-databind.version>
+ <jacoco.version>0.8.13</jacoco.version>
<javax.servlet-api.version>3.1.0</javax.servlet-api.version>
- <jetty.version>9.4.50.v20221201</jetty.version>
+ <jetty.version>9.4.56.v20240826</jetty.version>
<junit.version>4.13.1</junit.version>
<libthrift.version>0.9.3</libthrift.version>
<kryo.version>4.0.2</kryo.version>
@@ -130,6 +132,8 @@
<!-- Set this to "true" to skip R tests. -->
<skipRTests>false</skipRTests>
+ <!-- Set this to "true" to skip PySpark2 tests. -->
+ <skipPySpark2Tests>false</skipPySpark2Tests>
<!-- Set this to "true" to skip PySpark3 tests. -->
<skipPySpark3Tests>false</skipPySpark3Tests>
@@ -307,6 +311,12 @@
<version>${commons-codec.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-lang3</artifactId>
+ <version>${commons-lang3.version}</version>
+ </dependency>
+
<dependency>
<groupId>io.dropwizard.metrics</groupId>
<artifactId>metrics-core</artifactId>
@@ -412,6 +422,18 @@
</exclusions>
</dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <version>${hadoop.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
@@ -789,7 +811,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
- <version>3.2.1</version>
+ <version>3.5.0</version>
</plugin>
<plugin>
@@ -810,6 +832,7 @@
<spark.ui.enabled>false</spark.ui.enabled>
<project.version>${project.version}</project.version>
<skipRTests>${skipRTests}</skipRTests>
+ <skipPySpark2Tests>${skipPySpark2Tests}</skipPySpark2Tests>
<skipPySpark3Tests>${skipPySpark3Tests}</skipPySpark3Tests>
</systemProperties>
<redirectTestOutputToFile>${test.redirectToFile}</redirectTestOutputToFile>
@@ -839,6 +862,7 @@
<spark.ui.enabled>false</spark.ui.enabled>
<project.version>${project.version}</project.version>
<skipRTests>${skipRTests}</skipRTests>
+ <skipPySpark2Tests>${skipPySpark2Tests}</skipPySpark2Tests>
<skipPySpark3Tests>${skipPySpark3Tests}</skipPySpark3Tests>
</systemProperties>
<stdout>D</stdout>
@@ -1081,7 +1105,7 @@
<plugin>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
- <version>0.8.2</version>
+ <version>${jacoco.version}</version>
<executions>
<execution>
<goals>
@@ -1163,7 +1187,7 @@
<id>scala-2.12</id>
<properties>
<scala.binary.version>2.12</scala.binary.version>
- <scala.version>2.12.15</scala.version>
+ <scala.version>2.12.18</scala.version>
</properties>
</profile>
@@ -1192,15 +1216,20 @@
<profile>
<id>spark3</id>
<properties>
- <spark.version>3.2.3</spark.version>
+ <spark.version>3.5.6</spark.version>
+ <hadoop.major-minor.version>3</hadoop.major-minor.version>
+ <hadoop.version>3.3.4</hadoop.version>
<java.version>1.8</java.version>
<py4j.version>0.10.9.7</py4j.version>
<json4s.version>3.7.0-M11</json4s.version>
- <netty.version>4.1.92.Final</netty.version>
+ <netty.version>4.1.96.Final</netty.version>
+ <jackson.version>2.15.2</jackson.version>
+ <jackson-databind.version>2.15.2</jackson-databind.version>
<spark.bin.name>spark-${spark.version}-bin-hadoop${hadoop.major-minor.version}</spark.bin.name>
<spark.bin.download.url>
https://archive.apache.org/dist/spark/spark-${spark.version}/${spark.bin.name}.tgz
</spark.bin.download.url>
+ <skipPySpark2Tests>true</skipPySpark2Tests>
</properties>
</profile>
diff --git a/python-api/setup.py b/python-api/setup.py
index f7602e9f..f284edb7 100644
--- a/python-api/setup.py
+++ b/python-api/setup.py
@@ -31,7 +31,6 @@ requirements = [
'cloudpickle>=0.2.1',
'configparser>=3.5.0',
'future>=0.15.2',
- 'futures>=3.0.5',
'mock~=3.0.5',
'requests>=2.10.0',
'responses>=0.5.1',
@@ -54,6 +53,9 @@ setup(
keywords='livy pyspark development',
classifiers=CLASSIFIERS,
install_requires=requirements,
+ extras_require={
+ ':python_version == "2.7"': ['futures']
+ },
setup_requires=['pytest-runner', 'flake8'],
tests_require=['pytest']
)
diff --git
a/repl/src/test/scala/org/apache/livy/repl/PythonInterpreterSpec.scala
b/repl/src/test/scala/org/apache/livy/repl/PythonInterpreterSpec.scala
index e2d63e8c..bce8fd46 100644
--- a/repl/src/test/scala/org/apache/livy/repl/PythonInterpreterSpec.scala
+++ b/repl/src/test/scala/org/apache/livy/repl/PythonInterpreterSpec.scala
@@ -282,6 +282,11 @@ class Python2InterpreterSpec extends
PythonBaseInterpreterSpec {
implicit val formats = DefaultFormats
+ override protected def withFixture(test: NoArgTest): Outcome = {
+ assume(!sys.props.getOrElse("skipPySpark2Tests", "false").toBoolean,
"Skipping PySpark2 tests.")
+ test()
+ }
+
override def createInterpreter(): Interpreter = {
val sparkConf = new SparkConf()
PythonInterpreter(sparkConf, new SparkEntries(sparkConf))
diff --git a/repl/src/test/scala/org/apache/livy/repl/PythonSessionSpec.scala
b/repl/src/test/scala/org/apache/livy/repl/PythonSessionSpec.scala
index b54be11a..486dffef 100644
--- a/repl/src/test/scala/org/apache/livy/repl/PythonSessionSpec.scala
+++ b/repl/src/test/scala/org/apache/livy/repl/PythonSessionSpec.scala
@@ -170,7 +170,13 @@ abstract class PythonSessionSpec extends
BaseSessionSpec(PySpark) {
}
}
-class Python2SessionSpec extends PythonSessionSpec
+class Python2SessionSpec extends PythonSessionSpec {
+
+ override protected def withFixture(test: NoArgTest): Outcome = {
+ assume(!sys.props.getOrElse("skipPySpark2Tests", "false").toBoolean,
"Skipping PySpark2 tests.")
+ test()
+ }
+}
class Python3SessionSpec extends PythonSessionSpec with BeforeAndAfterAll {
diff --git a/repl/src/test/scala/org/apache/livy/repl/SQLInterpreterSpec.scala
b/repl/src/test/scala/org/apache/livy/repl/SQLInterpreterSpec.scala
index 3d9d4aca..5e839d4c 100644
--- a/repl/src/test/scala/org/apache/livy/repl/SQLInterpreterSpec.scala
+++ b/repl/src/test/scala/org/apache/livy/repl/SQLInterpreterSpec.scala
@@ -193,7 +193,7 @@ class SQLInterpreterSpec extends BaseInterpreterSpec {
assert(resp.isInstanceOf[Interpreter.ExecuteError])
val error = resp.asInstanceOf[Interpreter.ExecuteError]
error.ename should be ("Error")
- assert(error.evalue.contains("not found"))
+ assert(error.evalue.contains("not found") || error.evalue.contains("cannot
be found"))
}
it should "fail if submitting multiple queries" in withInterpreter {
interpreter =>
diff --git a/server/pom.xml b/server/pom.xml
index f9c296e5..7e5bc0dc 100644
--- a/server/pom.xml
+++ b/server/pom.xml
@@ -94,6 +94,11 @@
<artifactId>javax.servlet-api</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-lang3</artifactId>
+ </dependency>
+
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
diff --git a/server/src/main/scala/org/apache/livy/utils/LivySparkUtils.scala
b/server/src/main/scala/org/apache/livy/utils/LivySparkUtils.scala
index c94b199b..c1b4320e 100644
--- a/server/src/main/scala/org/apache/livy/utils/LivySparkUtils.scala
+++ b/server/src/main/scala/org/apache/livy/utils/LivySparkUtils.scala
@@ -30,6 +30,16 @@ object LivySparkUtils extends Logging {
// For each Spark version we supported, we need to add this mapping relation
in case Scala
// version cannot be detected from "spark-submit --version".
private val _defaultSparkScalaVersion = SortedMap(
+ // Spark 3.5 + Scala 2.12
+ (3, 5) -> "2.12",
+ // Spark 3.4 + Scala 2.12
+ (3, 4) -> "2.12",
+ // Spark 3.3 + Scala 2.12
+ (3, 3) -> "2.12",
+ // Spark 3.2 + Scala 2.12
+ (3, 2) -> "2.12",
+ // Spark 3.1 + Scala 2.12
+ (3, 1) -> "2.12",
// Spark 3.0 + Scala 2.12
(3, 0) -> "2.12",
// Spark 2.4 + Scala 2.11
@@ -42,7 +52,7 @@ object LivySparkUtils extends Logging {
// Supported Spark version
private val MIN_VERSION = (2, 2)
- private val MAX_VERSION = (3, 1)
+ private val MAX_VERSION = (3, 6)
private val sparkVersionRegex = """version (.*)""".r.unanchored
private val scalaVersionRegex = """Scala version (.*), Java""".r.unanchored
diff --git
a/server/src/main/scala/org/apache/livy/utils/SparkKubernetesApp.scala
b/server/src/main/scala/org/apache/livy/utils/SparkKubernetesApp.scala
index c4574dee..0f466095 100644
--- a/server/src/main/scala/org/apache/livy/utils/SparkKubernetesApp.scala
+++ b/server/src/main/scala/org/apache/livy/utils/SparkKubernetesApp.scala
@@ -31,7 +31,7 @@ import scala.util.control.NonFatal
import io.fabric8.kubernetes.api.model._
import io.fabric8.kubernetes.api.model.networking.v1.{Ingress, IngressBuilder}
import io.fabric8.kubernetes.client.{Config, ConfigBuilder, _}
-import org.apache.commons.lang.StringUtils
+import org.apache.commons.lang3.StringUtils
import org.apache.livy.{LivyConf, Logging}
diff --git
a/thriftserver/server/src/test/scala/org/apache/livy/thriftserver/ThriftServerSuites.scala
b/thriftserver/server/src/test/scala/org/apache/livy/thriftserver/ThriftServerSuites.scala
index c9f91e1d..c0f4a36d 100644
---
a/thriftserver/server/src/test/scala/org/apache/livy/thriftserver/ThriftServerSuites.scala
+++
b/thriftserver/server/src/test/scala/org/apache/livy/thriftserver/ThriftServerSuites.scala
@@ -516,7 +516,9 @@ class BinaryThriftServerSuite extends ThriftServerBaseTest
with CommonThriftTest
statement.close()
}
}
- assert(caught.getMessage.contains("Database 'invalid_database' not
found"))
+ val message = caught.getMessage
+ assert(message.contains("Database 'invalid_database' not found") ||
+ message.contains("The schema `invalid_database` cannot be found"))
}
}
@@ -530,8 +532,9 @@ class BinaryThriftServerSuite extends ThriftServerBaseTest
with CommonThriftTest
statement.close()
}
}
- assert(caught.getMessage.replaceAll("`", "")
- .contains("Table or view not found: global_temp.invalid_table"))
+ val message = caught.getMessage.replaceAll("`", "")
+ assert(message .contains("Table or view not found:
global_temp.invalid_table") ||
+ message.contains("The table or view global_temp.invalid_table cannot
be found"))
}
}
diff --git a/thriftserver/session/pom.xml b/thriftserver/session/pom.xml
index bd66ebf2..d8d606a7 100644
--- a/thriftserver/session/pom.xml
+++ b/thriftserver/session/pom.xml
@@ -55,12 +55,6 @@
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_${scala.binary.version}</artifactId>
<scope>provided</scope>
- <exclusions>
- <exclusion>
- <groupId>*</groupId>
- <artifactId>*</artifactId>
- </exclusion>
- </exclusions>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>