This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 844821c82da5 [SPARK-47578][R] Migrate RPackageUtils with variables to 
structured logging framework
844821c82da5 is described below

commit 844821c82da56fccb643bde9757799d1cfd3529a
Author: Daniel Tenedorio <daniel.tenedo...@databricks.com>
AuthorDate: Fri May 31 09:50:09 2024 -0700

    [SPARK-47578][R] Migrate RPackageUtils with variables to structured logging 
framework
    
    ### What changes were proposed in this pull request?
    
    Migrate logging with variables of the Spark `RPackageUtils` module to 
structured logging framework. This transforms the log* entries of APIs like 
this:
    
    ```
    def logWarning(msg: => String): Unit
    ```
    to
    ```
    def logWarning(entry: LogEntry): Unit
    ```
    
    ### Why are the changes needed?
    
    To enhance Apache Spark's logging system by implementing structured logging.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, Spark core logs will contain additional MDC
    
    ### How was this patch tested?
    
    Compiler and scala style checks, as well as code review.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Brief but appropriate use of GitHub copilot
    
    Closes #46815 from dtenedor/log-migration-r-package-utils.
    
    Authored-by: Daniel Tenedorio <daniel.tenedo...@databricks.com>
    Signed-off-by: Gengliang Wang <gengli...@apache.org>
---
 .../scala/org/apache/spark/internal/LogKey.scala   |  1 +
 .../org/apache/spark/deploy/RPackageUtils.scala    | 39 ++++++++++++----------
 .../apache/spark/deploy/RPackageUtilsSuite.scala   |  2 +-
 3 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala 
b/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala
index cf3156905966..b8b63382fe4c 100644
--- a/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala
+++ b/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala
@@ -311,6 +311,7 @@ object LogKeys {
   case object ISOLATION_LEVEL extends LogKey
   case object ISSUE_DATE extends LogKey
   case object IS_NETWORK_REQUEST_DONE extends LogKey
+  case object JAR_ENTRY extends LogKey
   case object JAR_MESSAGE extends LogKey
   case object JAR_URL extends LogKey
   case object JAVA_VERSION extends LogKey
diff --git a/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala 
b/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala
index 1a1a680c7faf..5d996381a485 100644
--- a/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala
@@ -27,13 +27,15 @@ import scala.jdk.CollectionConverters._
 import com.google.common.io.{ByteStreams, Files}
 
 import org.apache.spark.api.r.RUtils
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{LogEntry, Logging, MDC, MessageWithContext}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.util.{RedirectThread, Utils}
 
 private[deploy] object RPackageUtils extends Logging {
 
   /** The key in the MANIFEST.mf that we look for, in case a jar contains R 
code. */
   private final val hasRPackage = "Spark-HasRPackage"
+  private final val hasRPackageMDC = MDC(CONFIG, hasRPackage)
 
   /** Base of the shell command used in order to install R packages. */
   private final val baseInstallCmd = Seq("R", "CMD", "INSTALL", "-l")
@@ -42,11 +44,11 @@ private[deploy] object RPackageUtils extends Logging {
   private final val RJarEntries = "R/pkg"
 
   /** Documentation on how the R source file layout should be in the jar. */
-  private[deploy] final val RJarDoc =
-    s"""In order for Spark to build R packages that are parts of Spark 
Packages, there are a few
+  private[deploy] final val RJarDoc: MessageWithContext =
+    log"""In order for Spark to build R packages that are parts of Spark 
Packages, there are a few
       |requirements. The R source code must be shipped in a jar, with 
additional Java/Scala
       |classes. The jar must be in the following format:
-      |  1- The Manifest (META-INF/MANIFEST.mf) must contain the key-value: 
$hasRPackage: true
+      |  1- The Manifest (META-INF/MANIFEST.mf) must contain the key-value: 
$hasRPackageMDC: true
       |  2- The standard R package layout must be preserved under R/pkg/ 
inside the jar. More
       |  information on the standard R package layout can be found in:
       |  http://cran.r-project.org/doc/contrib/Leisch-CreatingPackages.pdf
@@ -61,18 +63,17 @@ private[deploy] object RPackageUtils extends Logging {
       |R/pkg/R/myRcode.R
       |org/
       |org/apache/
-      |...
-    """.stripMargin.trim
+      |...""".stripMargin
 
   /** Internal method for logging. We log to a printStream in tests, for 
debugging purposes. */
   private def print(
-      msg: String,
+      msg: LogEntry,
       printStream: PrintStream,
       level: Level = Level.FINE,
       e: Throwable = null): Unit = {
     if (printStream != null) {
       // scalastyle:off println
-      printStream.println(msg)
+      printStream.println(msg.message)
       // scalastyle:on println
       if (e != null) {
         e.printStackTrace(printStream)
@@ -112,7 +113,7 @@ private[deploy] object RPackageUtils extends Logging {
     val pathToPkg = Seq(dir, "R", "pkg").mkString(File.separator)
     val installCmd = baseInstallCmd ++ Seq(libDir, pathToPkg)
     if (verbose) {
-      print(s"Building R package with the command: $installCmd", printStream)
+      print(log"Building R package with the command: ${MDC(COMMAND, 
installCmd)}", printStream)
     }
     try {
       val builder = new ProcessBuilder(installCmd.asJava)
@@ -131,7 +132,7 @@ private[deploy] object RPackageUtils extends Logging {
       process.waitFor() == 0
     } catch {
       case e: Throwable =>
-        print("Failed to build R package.", printStream, Level.SEVERE, e)
+        print(log"Failed to build R package.", printStream, Level.SEVERE, e)
         false
     }
   }
@@ -150,7 +151,7 @@ private[deploy] object RPackageUtils extends Logging {
         if (entry.isDirectory) {
           val dir = new File(tempDir, entryPath)
           if (verbose) {
-            print(s"Creating directory: $dir", printStream)
+            print(log"Creating directory: ${MDC(PATH, dir)}", printStream)
           }
           dir.mkdirs
         } else {
@@ -159,7 +160,7 @@ private[deploy] object RPackageUtils extends Logging {
           Files.createParentDirs(outPath)
           val outStream = new FileOutputStream(outPath)
           if (verbose) {
-            print(s"Extracting $entry to $outPath", printStream)
+            print(log"Extracting ${MDC(JAR_ENTRY, entry)} to ${MDC(PATH, 
outPath)}", printStream)
           }
           Utils.copyStream(inStream, outStream, closeStreams = true)
         }
@@ -181,32 +182,34 @@ private[deploy] object RPackageUtils extends Logging {
         val jar = new JarFile(file)
         Utils.tryWithSafeFinally {
           if (checkManifestForR(jar)) {
-            print(s"$file contains R source code. Now installing package.", 
printStream, Level.INFO)
+            print(log"${MDC(PATH, file)} contains R source code. Now 
installing package.",
+              printStream, Level.INFO)
             val rSource = extractRFolder(jar, printStream, verbose)
             if (RUtils.rPackages.isEmpty) {
               RUtils.rPackages = Some(Utils.createTempDir().getAbsolutePath)
             }
             try {
               if (!rPackageBuilder(rSource, printStream, verbose, 
RUtils.rPackages.get)) {
-                print(s"ERROR: Failed to build R package in $file.", 
printStream)
+                print(log"ERROR: Failed to build R package in ${MDC(PATH, 
file)}.", printStream)
                 print(RJarDoc, printStream)
               }
             } finally {
               // clean up
               if (!rSource.delete()) {
-                logWarning(s"Error deleting ${rSource.getPath()}")
+                logWarning(log"Error deleting ${MDC(PATH, rSource.getPath())}")
               }
             }
           } else {
             if (verbose) {
-              print(s"$file doesn't contain R source code, skipping...", 
printStream)
+              print(log"${MDC(PATH, file)} doesn't contain R source code, 
skipping...", printStream)
             }
           }
         } {
           jar.close()
         }
       } else {
-        print(s"WARN: $file resolved as dependency, but not found.", 
printStream, Level.WARNING)
+        print(log"WARN: ${MDC(PATH, file)} resolved as dependency, but not 
found.",
+          printStream, Level.WARNING)
       }
     }
   }
@@ -234,7 +237,7 @@ private[deploy] object RPackageUtils extends Logging {
     // create a zip file from scratch, do not append to existing file.
     val zipFile = new File(dir, name)
     if (!zipFile.delete()) {
-      logWarning(s"Error deleting ${zipFile.getPath()}")
+      logWarning(log"Error deleting ${MDC(PATH, zipFile.getPath())}")
     }
     val zipOutputStream = new ZipOutputStream(new FileOutputStream(zipFile, 
false))
     try {
diff --git 
a/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala 
b/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
index a8ede31f1d30..77f5268f79ca 100644
--- a/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
@@ -127,7 +127,7 @@ class RPackageUtilsSuite
       RPackageUtils.checkAndBuildRPackage(jar.getAbsolutePath, new 
BufferPrintStream,
         verbose = true)
       val output = lineBuffer.mkString("\n")
-      assert(output.contains(RPackageUtils.RJarDoc))
+      assert(output.contains(RPackageUtils.RJarDoc.message))
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to