This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 844821c82da5 [SPARK-47578][R] Migrate RPackageUtils with variables to structured logging framework 844821c82da5 is described below commit 844821c82da56fccb643bde9757799d1cfd3529a Author: Daniel Tenedorio <daniel.tenedo...@databricks.com> AuthorDate: Fri May 31 09:50:09 2024 -0700 [SPARK-47578][R] Migrate RPackageUtils with variables to structured logging framework ### What changes were proposed in this pull request? Migrate logging with variables of the Spark `RPackageUtils` module to structured logging framework. This transforms the log* entries of APIs like this: ``` def logWarning(msg: => String): Unit ``` to ``` def logWarning(entry: LogEntry): Unit ``` ### Why are the changes needed? To enhance Apache Spark's logging system by implementing structured logging. ### Does this PR introduce _any_ user-facing change? Yes, Spark core logs will contain additional MDC ### How was this patch tested? Compiler and scala style checks, as well as code review. ### Was this patch authored or co-authored using generative AI tooling? Brief but appropriate use of GitHub copilot Closes #46815 from dtenedor/log-migration-r-package-utils. Authored-by: Daniel Tenedorio <daniel.tenedo...@databricks.com> Signed-off-by: Gengliang Wang <gengli...@apache.org> --- .../scala/org/apache/spark/internal/LogKey.scala | 1 + .../org/apache/spark/deploy/RPackageUtils.scala | 39 ++++++++++++---------- .../apache/spark/deploy/RPackageUtilsSuite.scala | 2 +- 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala b/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala index cf3156905966..b8b63382fe4c 100644 --- a/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala +++ b/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala @@ -311,6 +311,7 @@ object LogKeys { case object ISOLATION_LEVEL extends LogKey case object ISSUE_DATE extends LogKey case object IS_NETWORK_REQUEST_DONE extends LogKey + case object JAR_ENTRY extends LogKey case object JAR_MESSAGE extends LogKey case object JAR_URL extends LogKey case object JAVA_VERSION extends LogKey diff --git a/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala b/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala index 1a1a680c7faf..5d996381a485 100644 --- a/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala +++ b/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala @@ -27,13 +27,15 @@ import scala.jdk.CollectionConverters._ import com.google.common.io.{ByteStreams, Files} import org.apache.spark.api.r.RUtils -import org.apache.spark.internal.Logging +import org.apache.spark.internal.{LogEntry, Logging, MDC, MessageWithContext} +import org.apache.spark.internal.LogKeys._ import org.apache.spark.util.{RedirectThread, Utils} private[deploy] object RPackageUtils extends Logging { /** The key in the MANIFEST.mf that we look for, in case a jar contains R code. */ private final val hasRPackage = "Spark-HasRPackage" + private final val hasRPackageMDC = MDC(CONFIG, hasRPackage) /** Base of the shell command used in order to install R packages. */ private final val baseInstallCmd = Seq("R", "CMD", "INSTALL", "-l") @@ -42,11 +44,11 @@ private[deploy] object RPackageUtils extends Logging { private final val RJarEntries = "R/pkg" /** Documentation on how the R source file layout should be in the jar. */ - private[deploy] final val RJarDoc = - s"""In order for Spark to build R packages that are parts of Spark Packages, there are a few + private[deploy] final val RJarDoc: MessageWithContext = + log"""In order for Spark to build R packages that are parts of Spark Packages, there are a few |requirements. The R source code must be shipped in a jar, with additional Java/Scala |classes. The jar must be in the following format: - | 1- The Manifest (META-INF/MANIFEST.mf) must contain the key-value: $hasRPackage: true + | 1- The Manifest (META-INF/MANIFEST.mf) must contain the key-value: $hasRPackageMDC: true | 2- The standard R package layout must be preserved under R/pkg/ inside the jar. More | information on the standard R package layout can be found in: | http://cran.r-project.org/doc/contrib/Leisch-CreatingPackages.pdf @@ -61,18 +63,17 @@ private[deploy] object RPackageUtils extends Logging { |R/pkg/R/myRcode.R |org/ |org/apache/ - |... - """.stripMargin.trim + |...""".stripMargin /** Internal method for logging. We log to a printStream in tests, for debugging purposes. */ private def print( - msg: String, + msg: LogEntry, printStream: PrintStream, level: Level = Level.FINE, e: Throwable = null): Unit = { if (printStream != null) { // scalastyle:off println - printStream.println(msg) + printStream.println(msg.message) // scalastyle:on println if (e != null) { e.printStackTrace(printStream) @@ -112,7 +113,7 @@ private[deploy] object RPackageUtils extends Logging { val pathToPkg = Seq(dir, "R", "pkg").mkString(File.separator) val installCmd = baseInstallCmd ++ Seq(libDir, pathToPkg) if (verbose) { - print(s"Building R package with the command: $installCmd", printStream) + print(log"Building R package with the command: ${MDC(COMMAND, installCmd)}", printStream) } try { val builder = new ProcessBuilder(installCmd.asJava) @@ -131,7 +132,7 @@ private[deploy] object RPackageUtils extends Logging { process.waitFor() == 0 } catch { case e: Throwable => - print("Failed to build R package.", printStream, Level.SEVERE, e) + print(log"Failed to build R package.", printStream, Level.SEVERE, e) false } } @@ -150,7 +151,7 @@ private[deploy] object RPackageUtils extends Logging { if (entry.isDirectory) { val dir = new File(tempDir, entryPath) if (verbose) { - print(s"Creating directory: $dir", printStream) + print(log"Creating directory: ${MDC(PATH, dir)}", printStream) } dir.mkdirs } else { @@ -159,7 +160,7 @@ private[deploy] object RPackageUtils extends Logging { Files.createParentDirs(outPath) val outStream = new FileOutputStream(outPath) if (verbose) { - print(s"Extracting $entry to $outPath", printStream) + print(log"Extracting ${MDC(JAR_ENTRY, entry)} to ${MDC(PATH, outPath)}", printStream) } Utils.copyStream(inStream, outStream, closeStreams = true) } @@ -181,32 +182,34 @@ private[deploy] object RPackageUtils extends Logging { val jar = new JarFile(file) Utils.tryWithSafeFinally { if (checkManifestForR(jar)) { - print(s"$file contains R source code. Now installing package.", printStream, Level.INFO) + print(log"${MDC(PATH, file)} contains R source code. Now installing package.", + printStream, Level.INFO) val rSource = extractRFolder(jar, printStream, verbose) if (RUtils.rPackages.isEmpty) { RUtils.rPackages = Some(Utils.createTempDir().getAbsolutePath) } try { if (!rPackageBuilder(rSource, printStream, verbose, RUtils.rPackages.get)) { - print(s"ERROR: Failed to build R package in $file.", printStream) + print(log"ERROR: Failed to build R package in ${MDC(PATH, file)}.", printStream) print(RJarDoc, printStream) } } finally { // clean up if (!rSource.delete()) { - logWarning(s"Error deleting ${rSource.getPath()}") + logWarning(log"Error deleting ${MDC(PATH, rSource.getPath())}") } } } else { if (verbose) { - print(s"$file doesn't contain R source code, skipping...", printStream) + print(log"${MDC(PATH, file)} doesn't contain R source code, skipping...", printStream) } } } { jar.close() } } else { - print(s"WARN: $file resolved as dependency, but not found.", printStream, Level.WARNING) + print(log"WARN: ${MDC(PATH, file)} resolved as dependency, but not found.", + printStream, Level.WARNING) } } } @@ -234,7 +237,7 @@ private[deploy] object RPackageUtils extends Logging { // create a zip file from scratch, do not append to existing file. val zipFile = new File(dir, name) if (!zipFile.delete()) { - logWarning(s"Error deleting ${zipFile.getPath()}") + logWarning(log"Error deleting ${MDC(PATH, zipFile.getPath())}") } val zipOutputStream = new ZipOutputStream(new FileOutputStream(zipFile, false)) try { diff --git a/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala index a8ede31f1d30..77f5268f79ca 100644 --- a/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala @@ -127,7 +127,7 @@ class RPackageUtilsSuite RPackageUtils.checkAndBuildRPackage(jar.getAbsolutePath, new BufferPrintStream, verbose = true) val output = lineBuffer.mkString("\n") - assert(output.contains(RPackageUtils.RJarDoc)) + assert(output.contains(RPackageUtils.RJarDoc.message)) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org