This is an automated email from the ASF dual-hosted git repository. hvanhovell pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new e49aed8cf4f [SPARK-42599][CONNECT][INFRA] Introduce `dev/connect-jvm-client-mima-check` instead of `CompatibilitySuite` e49aed8cf4f is described below commit e49aed8cf4fafcbe8f200add919db8087f12cc34 Author: yangjie01 <yangji...@baidu.com> AuthorDate: Thu Mar 2 14:36:17 2023 -0400 [SPARK-42599][CONNECT][INFRA] Introduce `dev/connect-jvm-client-mima-check` instead of `CompatibilitySuite` ### What changes were proposed in this pull request? The main changes of this pr as follows: - Refactor `CompatibilitySuite` as a new tool `CheckConnectJvmClientCompatibility` - Add a new shell script `dev/connect-jvm-client-mima-check`, it will use `CheckConnectJvmClientCompatibility` to check the mima compatibility of `connect-jvm-client` module. - Add `dev/connect-jvm-client-mima-check` to github task ### Why are the changes needed? For fix test error report in `[VOTE] Release Apache Spark 3.4.0 (RC1)` mail list. Testing `CompatibilitySuite` with maven requires some pre-work: ``` build/mvn clean install -DskipTests -pl sql/core -am build/mvn clean install -DskipTests -pl connector/connect/client/jvm -am ``` So if we run `build/mvn package test` to test whole project as before, `CompatibilitySuite` will failed as follows: ``` CompatibilitySuite: - compatibility MiMa tests *** FAILED *** java.lang.AssertionError: assertion failed: Failed to find the jar inside folder: /home/bjorn/spark-3.4.0/connector/connect/client/jvm/target at scala.Predef$.assert(Predef.scala:223) at org.apache.spark.sql.connect.client.util.IntegrationTestUtils$.findJar(IntegrationTestUtils.scala:67) at org.apache.spark.sql.connect.client.CompatibilitySuite.clientJar$lzycompute(CompatibilitySuite.scala:57) at org.apache.spark.sql.connect.client.CompatibilitySuite.clientJar(CompatibilitySuite.scala:53) at org.apache.spark.sql.connect.client.CompatibilitySuite.$anonfun$new$1(CompatibilitySuite.scala:69) at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at org.scalatest.Transformer.apply(Transformer.scala:22) at org.scalatest.Transformer.apply(Transformer.scala:20) ... - compatibility API tests: Dataset *** FAILED *** java.lang.AssertionError: assertion failed: Failed to find the jar inside folder: /home/bjorn/spark-3.4.0/connector/connect/client/jvm/target at scala.Predef$.assert(Predef.scala:223) at org.apache.spark.sql.connect.client.util.IntegrationTestUtils$.findJar(IntegrationTestUtils.scala:67) at org.apache.spark.sql.connect.client.CompatibilitySuite.clientJar$lzycompute(CompatibilitySuite.scala:57) at org.apache.spark.sql.connect.client.CompatibilitySuite.clientJar(CompatibilitySuite.scala:53) at org.apache.spark.sql.connect.client.CompatibilitySuite.$anonfun$new$7(CompatibilitySuite.scala:110) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at org.scalatest.Transformer.apply(Transformer.scala:22) ``` So we need to fix this problem for developers. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GitHub Actions Closes #40213 from LuciferYang/connect-client-compatibility-backup. Authored-by: yangjie01 <yangji...@baidu.com> Signed-off-by: Herman van Hovell <her...@databricks.com> (cherry picked from commit 12beadbc0525b473575f83d01c962f7238f9069b) Signed-off-by: Herman van Hovell <her...@databricks.com> --- .github/workflows/build_and_test.yml | 3 + ...la => CheckConnectJvmClientCompatibility.scala} | 136 +++++++++++++-------- dev/connect-jvm-client-mima-check | 72 +++++++++++ 3 files changed, 163 insertions(+), 48 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index dfebec72813..31ae5cfee9b 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -565,6 +565,9 @@ jobs: run: ./dev/lint-scala - name: Java linter run: ./dev/lint-java + - name: Spark connect jvm client mima check + if: inputs.branch != 'branch-3.2' && inputs.branch != 'branch-3.3' + run: ./dev/connect-jvm-client-mima-check - name: Install Python linter dependencies run: | # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CompatibilitySuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala similarity index 70% rename from connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CompatibilitySuite.scala rename to connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala index a4cb2a13e5b..4f4ca9ad990 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CompatibilitySuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala @@ -16,49 +16,83 @@ */ package org.apache.spark.sql.connect.client -import java.io.File +import java.io.{File, Writer} import java.net.URLClassLoader +import java.nio.charset.StandardCharsets +import java.nio.file.{Files, Paths} import java.util.regex.Pattern -import com.typesafe.tools.mima.core._ +import scala.reflect.runtime.universe.runtimeMirror + +import com.typesafe.tools.mima.core.{Problem, ProblemFilter, ProblemFilters} import com.typesafe.tools.mima.lib.MiMaLib -import org.apache.spark.sql.connect.client.util.ConnectFunSuite import org.apache.spark.sql.connect.client.util.IntegrationTestUtils._ +import org.apache.spark.util.ChildFirstURLClassLoader /** - * This test checks the binary compatibility of the connect client API against the spark SQL API - * using MiMa. We did not write this check using a SBT build rule as the rule cannot provide the - * same level of freedom as a test. With a test we can: + * A tool for checking the binary compatibility of the connect client API against the spark SQL + * API using MiMa. We did not write this check using a SBT build rule as the rule cannot provide + * the same level of freedom as a test. With a test we can: * 1. Specify any two jars to run the compatibility check. * 1. Easily make the test automatically pick up all new methods added while the client is being * built. * - * The test requires the following artifacts built before running: - * {{{ - * spark-sql - * spark-connect-client-jvm - * }}} - * To build the above artifact, use e.g. `build/sbt package` or `build/mvn clean install - * -DskipTests`. - * - * When debugging this test, if any changes to the client API, the client jar need to be built - * before running the test. An example workflow with SBT for this test: - * 1. Compatibility test has reported an unexpected client API change. - * 1. Fix the wrong client API. - * 1. Build the client jar: `build/sbt package` - * 1. Run the test again: `build/sbt "testOnly - * org.apache.spark.sql.connect.client.CompatibilitySuite"` + * We can run this check by executing the `dev/connect-jvm-client-mima-check`. */ -class CompatibilitySuite extends ConnectFunSuite { +// scalastyle:off println +object CheckConnectJvmClientCompatibility { - private lazy val clientJar: File = - findJar( - "connector/connect/client/jvm", - "spark-connect-client-jvm-assembly", - "spark-connect-client-jvm") + private lazy val sparkHome: String = { + if (!sys.env.contains("SPARK_HOME")) { + throw new IllegalArgumentException("SPARK_HOME is not set.") + } + sys.env("SPARK_HOME") + } - private lazy val sqlJar: File = findJar("sql/core", "spark-sql", "spark-sql") + def main(args: Array[String]): Unit = { + var resultWriter: Writer = null + try { + resultWriter = Files.newBufferedWriter( + Paths.get(s"$sparkHome/.connect-mima-check-result"), + StandardCharsets.UTF_8) + val clientJar: File = + findJar( + "connector/connect/client/jvm", + "spark-connect-client-jvm-assembly", + "spark-connect-client-jvm") + val sqlJar: File = findJar("sql/core", "spark-sql", "spark-sql") + val problems = checkMiMaCompatibility(clientJar, sqlJar) + if (problems.nonEmpty) { + resultWriter.write(s"ERROR: Comparing client jar: $clientJar and and sql jar: $sqlJar \n") + resultWriter.write(s"problems: \n") + resultWriter.write(s"${problems.map(p => p.description("client")).mkString("\n")}") + resultWriter.write("\n") + resultWriter.write( + "Exceptions to binary compatibility can be added in " + + "'CheckConnectJvmClientCompatibility#checkMiMaCompatibility'\n") + } + val incompatibleApis = checkDatasetApiCompatibility(clientJar, sqlJar) + if (incompatibleApis.nonEmpty) { + resultWriter.write( + "ERROR: The Dataset apis only exist in the connect client " + + "module and not belong to the sql module include: \n") + resultWriter.write(incompatibleApis.mkString("\n")) + resultWriter.write("\n") + resultWriter.write( + "Exceptions can be added to exceptionMethods in " + + "'CheckConnectJvmClientCompatibility#checkDatasetApiCompatibility'\n") + } + } catch { + case e: Throwable => + println(e.getMessage) + resultWriter.write(s"ERROR: ${e.getMessage}") + } finally { + if (resultWriter != null) { + resultWriter.close() + } + } + } /** * MiMa takes an old jar (sql jar) and a new jar (client jar) as inputs and then reports all @@ -67,7 +101,7 @@ class CompatibilitySuite extends ConnectFunSuite { * need to be checked. Then exclude rules are applied to filter out all unsupported methods in * the client classes. */ - test("compatibility MiMa tests") { + private def checkMiMaCompatibility(clientJar: File, sqlJar: File): List[Problem] = { val mima = new MiMaLib(Seq(clientJar, sqlJar)) val allProblems = mima.collectProblems(sqlJar, clientJar, List.empty) val includedRules = Seq( @@ -184,30 +218,36 @@ class CompatibilitySuite extends ConnectFunSuite { .filter { p => excludeRules.forall(rule => rule(p)) } - - if (problems.nonEmpty) { - fail( - s"\nComparing client jar: $clientJar\nand sql jar: $sqlJar\n" + - problems.map(p => p.description("client")).mkString("\n")) - } + problems } - test("compatibility API tests: Dataset") { - val clientClassLoader: URLClassLoader = new URLClassLoader(Seq(clientJar.toURI.toURL).toArray) - val sqlClassLoader: URLClassLoader = new URLClassLoader(Seq(sqlJar.toURI.toURL).toArray) + private def checkDatasetApiCompatibility(clientJar: File, sqlJar: File): Seq[String] = { - val clientClass = clientClassLoader.loadClass("org.apache.spark.sql.Dataset") - val sqlClass = sqlClassLoader.loadClass("org.apache.spark.sql.Dataset") + def methods(jar: File, className: String): Seq[String] = { + val classLoader: URLClassLoader = + new ChildFirstURLClassLoader(Seq(jar.toURI.toURL).toArray, this.getClass.getClassLoader) + val mirror = runtimeMirror(classLoader) + // scalastyle:off classforname + val classSymbol = + mirror.classSymbol(Class.forName(className, false, classLoader)) + // scalastyle:on classforname + classSymbol.typeSignature.members + .filter(_.isMethod) + .map(_.asMethod) + .filter(m => m.isPublic) + .map(_.fullName) + .toSeq + } - val newMethods = clientClass.getMethods - val oldMethods = sqlClass.getMethods + val className = "org.apache.spark.sql.Dataset" + val clientMethods = methods(clientJar, className) + val sqlMethods = methods(sqlJar, className) + // Exclude some public methods that must be added through `exceptionMethods` + val exceptionMethods = + Seq("org.apache.spark.sql.Dataset.collectResult", "org.apache.spark.sql.Dataset.plan") - // For now we simply check the new methods is a subset of the old methods. - newMethods - .map(m => m.toString) - .foreach(method => { - assert(oldMethods.map(m => m.toString).contains(method)) - }) + // Find new public functions that are not in sql module `Dataset`. + clientMethods.diff(sqlMethods).diff(exceptionMethods) } private case class IncludeByName(name: String) extends ProblemFilter { diff --git a/dev/connect-jvm-client-mima-check b/dev/connect-jvm-client-mima-check new file mode 100755 index 00000000000..2dbbdaf8764 --- /dev/null +++ b/dev/connect-jvm-client-mima-check @@ -0,0 +1,72 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -o pipefail +set -e + +# Go to the Spark project root directory +FWDIR="$(cd "`dirname "$0"`"/..; pwd)" +cd "$FWDIR" +export SPARK_HOME=$FWDIR +echo $SPARK_HOME + +if [[ -x "$JAVA_HOME/bin/java" ]]; then + JAVA_CMD="$JAVA_HOME/bin/java" +else + JAVA_CMD=java +fi + +rm -f .connect-mima-check-result + +echo "Build sql module, connect-client-jvm module and connect-client-jvm test jar..." +build/sbt "sql/package;connect-client-jvm/assembly;connect-client-jvm/Test/package" + +CONNECT_TEST_CLASSPATH="$(build/sbt -DcopyDependencies=false "export connect-client-jvm/Test/fullClasspath" | grep jar | tail -n1)" +CONNECT_CLASSPATH="$(build/sbt -DcopyDependencies=false "export connect-client-jvm/fullClasspath" | grep jar | tail -n1)" +SQL_CLASSPATH="$(build/sbt -DcopyDependencies=false "export sql/fullClasspath" | grep jar | tail -n1)" + + +echo "Do connect-client-jvm module mima check ..." + +$JAVA_CMD \ + -Xmx2g \ + -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.util.jar=ALL-UNNAMED \ + -cp "$CONNECT_CLASSPATH:$CONNECT_TEST_CLASSPATH:$SQL_CLASSPATH" \ + org.apache.spark.sql.connect.client.CheckConnectJvmClientCompatibility + +echo "finish connect-client-jvm module mima check ..." + +RESULT_SIZE=$(wc -l .connect-mima-check-result | awk '{print $1}') + +# The the file has no content if check passed. +if [[ $RESULT_SIZE -eq "0" ]]; then + ERRORS="" +else + ERRORS=$(grep ERROR .connect-mima-check-result | tail -n1) +fi + +if test ! -z "$ERRORS"; then + cat .connect-mima-check-result + echo -e "connect-client-jvm module mima check failed." + rm .connect-mima-check-result + exit 1 +else + rm .connect-mima-check-result + echo -e "sql and connect-client-jvm module mima check passed." +fi --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org