(spark) branch master updated: [SPARK-47723][CORE][TESTS] Introduce a tool that can sort alphabetically enumeration field in `LogEntry` automatically

gengliang Thu, 04 Apr 2024 17:05:42 -0700

This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new fb96b1a8d648 [SPARK-47723][CORE][TESTS] Introduce a tool that can sort 
alphabetically enumeration field in `LogEntry` automatically
fb96b1a8d648 is described below

commit fb96b1a8d6480612ca61ec39f62c8db0b341327b
Author: panbingkun <panbing...@baidu.com>
AuthorDate: Thu Apr 4 17:04:53 2024 -0700

    [SPARK-47723][CORE][TESTS] Introduce a tool that can sort alphabetically 
enumeration field in `LogEntry` automatically
    
    ### What changes were proposed in this pull request?
    The pr aims to `introduce` a `tool` that can `sort alphabetically` 
enumeration field in `LogEntry` automatically.
    
    ### Why are the changes needed?
    Enable developers to more conveniently write the enumeration values in 
`LogEntry` in alphabetical order according to the requirements of structured 
log development documents.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    - Manually test.
      ```
      SPARK_GENERATE_GOLDEN_FILES=1
      build/sbt "common-utils/testOnly *LogKeySuite -- -t \"LogKey enumeration 
fields are correctly sorted\""
      ```
    - Pass GA.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #45867 from panbingkun/SPARK-47723.
    
    Lead-authored-by: panbingkun <panbing...@baidu.com>
    Co-authored-by: panbingkun <pbk1...@gmail.com>
    Signed-off-by: Gengliang Wang <gengli...@apache.org>
---
 .../scala/org/apache/spark/util/LogKeySuite.scala  | 71 ++++++++++++++++++++--
 1 file changed, 67 insertions(+), 4 deletions(-)

diff --git 
a/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala 
b/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala
index 1f3c2d77d35f..24a24538ad72 100644
--- a/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala
+++ b/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala
@@ -17,17 +17,80 @@
 
 package org.apache.spark.util
 
+import java.nio.charset.StandardCharsets
+import java.nio.file.{Files, Path}
+import java.util.{ArrayList => JList}
+
+import scala.jdk.CollectionConverters._
+
+import org.apache.commons.io.FileUtils
 import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite
 
 import org.apache.spark.internal.{Logging, LogKey}
+import org.apache.spark.internal.LogKey.LogKey
 
+// scalastyle:off line.size.limit
+/**
+ * To re-generate the LogKey class file, run:
+ * {{{
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "common-utils/testOnly 
org.apache.spark.util.LogKeySuite"
+ * }}}
+ */
+// scalastyle:on line.size.limit
 class LogKeySuite
     extends AnyFunSuite // scalastyle:ignore funsuite
     with Logging {
 
-  test("LogKey enumeration fields must be sorted alphabetically") {
-    val keys = LogKey.values.toSeq
-    assert(keys === keys.sortBy(_.toString),
-      "LogKey enumeration fields must be sorted alphabetically")
+  /**
+   * Get a Path relative to the root project. It is assumed that a spark home 
is set.
+   */
+  protected final def getWorkspaceFilePath(first: String, more: String*): Path 
= {
+    if (!(sys.props.contains("spark.test.home") || 
sys.env.contains("SPARK_HOME"))) {
+      fail("spark.test.home or SPARK_HOME is not set.")
+    }
+    val sparkHome = sys.props.getOrElse("spark.test.home", 
sys.env("SPARK_HOME"))
+    java.nio.file.Paths.get(sparkHome, first +: more: _*)
+  }
+
+  private val regenerateGoldenFiles: Boolean = 
System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1"
+
+  private val logKeyFilePath = getWorkspaceFilePath("common", "utils", "src", 
"main", "scala",
+    "org", "apache", "spark", "internal", "LogKey.scala")
+
+  // regenerate the file `LogKey.scala` with its enumeration fields sorted 
alphabetically
+  private def regenerateLogKeyFile(
+      originalKeys: Seq[LogKey], sortedKeys: Seq[LogKey]): Unit = {
+    if (originalKeys != sortedKeys) {
+      val logKeyFile = logKeyFilePath.toFile
+      logInfo(s"Regenerating LogKey file $logKeyFile")
+      val originalContents = FileUtils.readLines(logKeyFile, 
StandardCharsets.UTF_8)
+      val sortedContents = new JList[String]()
+      var firstMatch = false
+      originalContents.asScala.foreach { line =>
+        if (line.trim.startsWith("val ") && line.trim.endsWith(" = Value")) {
+          if (!firstMatch) {
+            sortedKeys.foreach { logKey =>
+              sortedContents.add(s"  val ${logKey.toString} = Value")
+            }
+            firstMatch = true
+          }
+        } else {
+          sortedContents.add(line)
+        }
+      }
+      Files.delete(logKeyFile.toPath)
+      FileUtils.writeLines(logKeyFile, StandardCharsets.UTF_8.name(), 
sortedContents)
+    }
+  }
+
+  test("LogKey enumeration fields are correctly sorted") {
+    val originalKeys = LogKey.values.toSeq
+    val sortedKeys = originalKeys.sortBy(_.toString)
+    if (regenerateGoldenFiles) {
+      regenerateLogKeyFile(originalKeys, sortedKeys)
+    } else {
+      assert(originalKeys === sortedKeys,
+        "LogKey enumeration fields must be sorted alphabetically")
+    }
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-47723][CORE][TESTS] Introduce a tool that can sort alphabetically enumeration field in `LogEntry` automatically

Reply via email to