This is an automated email from the ASF dual-hosted git repository.

srowen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new a4a274c4e4f [SPARK-43263][BUILD] Upgrade `FasterXML jackson` to 2.15.0
a4a274c4e4f is described below

commit a4a274c4e4f709765e7a8c687347816d8951a681
Author: bjornjorgensen <bjornjorgen...@gmail.com>
AuthorDate: Fri Apr 28 08:29:59 2023 -0500

    [SPARK-43263][BUILD] Upgrade `FasterXML jackson` to 2.15.0
    
    ### What changes were proposed in this pull request?
    Upgrade FasterXML jackson from 2.14.2 to 2.15.0
    
    ### Why are the changes needed?
    Upgrade Snakeyaml to 2.0 (resolves CVE-2022-1471 [CVE-2022-1471 at 
nist](https://nvd.nist.gov/vuln/detail/CVE-2022-1471)
    
    ### Does this PR introduce _any_ user-facing change?
    This PR introduces user-facing changes by implementing streaming read 
constraints in the JSONOptions class. The constraints limit the size of input 
constructs, improving security and efficiency when processing input data.
    
    Users working with JSON data larger than the following default settings may 
need to adjust the constraints accordingly:
    
    Maximum Number value length: 1000 characters (`DEFAULT_MAX_NUM_LEN`)
    Maximum String value length: 5,000,000 characters (`DEFAULT_MAX_STRING_LEN`)
    Maximum Nesting depth: 1000 levels (`DEFAULT_MAX_DEPTH`)
    Additionally, the maximum magnitude of scale for BigDecimal to BigInteger 
conversion is set to 100,000 digits (`MAX_BIGINT_SCALE_MAGNITUDE`) and cannot 
be changed.
    
    Users can customize the constraints as needed by providing the 
corresponding options in the parameters object. If not explicitly specified, 
default settings will be applied.
    
    ### How was this patch tested?
    Pass GA
    
    Closes #40933 from bjornjorgensen/test_jacon.
    
    Authored-by: bjornjorgensen <bjornjorgen...@gmail.com>
    Signed-off-by: Sean Owen <sro...@gmail.com>
---
 dev/deps/spark-deps-hadoop-3-hive-2.3              | 16 +++++++-------
 pom.xml                                            |  4 ++--
 .../spark/sql/catalyst/json/JSONOptions.scala      | 25 +++++++++++++++++++++-
 3 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 
b/dev/deps/spark-deps-hadoop-3-hive-2.3
index a6c41cdd726..bd689f9e913 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -97,13 +97,13 @@ httpcore/4.4.16//httpcore-4.4.16.jar
 ini4j/0.5.4//ini4j-0.5.4.jar
 istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
 ivy/2.5.1//ivy-2.5.1.jar
-jackson-annotations/2.14.2//jackson-annotations-2.14.2.jar
-jackson-core/2.14.2//jackson-core-2.14.2.jar
-jackson-databind/2.14.2//jackson-databind-2.14.2.jar
-jackson-dataformat-cbor/2.14.2//jackson-dataformat-cbor-2.14.2.jar
-jackson-dataformat-yaml/2.14.2//jackson-dataformat-yaml-2.14.2.jar
-jackson-datatype-jsr310/2.14.2//jackson-datatype-jsr310-2.14.2.jar
-jackson-module-scala_2.12/2.14.2//jackson-module-scala_2.12-2.14.2.jar
+jackson-annotations/2.15.0//jackson-annotations-2.15.0.jar
+jackson-core/2.15.0//jackson-core-2.15.0.jar
+jackson-databind/2.15.0//jackson-databind-2.15.0.jar
+jackson-dataformat-cbor/2.15.0//jackson-dataformat-cbor-2.15.0.jar
+jackson-dataformat-yaml/2.15.0//jackson-dataformat-yaml-2.15.0.jar
+jackson-datatype-jsr310/2.15.0//jackson-datatype-jsr310-2.15.0.jar
+jackson-module-scala_2.12/2.15.0//jackson-module-scala_2.12-2.15.0.jar
 jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar
 jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar
 jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar
@@ -233,7 +233,7 @@ scala-xml_2.12/2.1.0//scala-xml_2.12-2.1.0.jar
 shims/0.9.39//shims-0.9.39.jar
 slf4j-api/2.0.7//slf4j-api-2.0.7.jar
 snakeyaml-engine/2.6//snakeyaml-engine-2.6.jar
-snakeyaml/1.33//snakeyaml-1.33.jar
+snakeyaml/2.0//snakeyaml-2.0.jar
 snappy-java/1.1.9.1//snappy-java-1.1.9.1.jar
 spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar
 spire-platform_2.12/0.17.0//spire-platform_2.12-0.17.0.jar
diff --git a/pom.xml b/pom.xml
index c74da8e3ace..df7fef1cf79 100644
--- a/pom.xml
+++ b/pom.xml
@@ -184,8 +184,8 @@
     <scalafmt.validateOnly>true</scalafmt.validateOnly>
     <scalafmt.changedOnly>true</scalafmt.changedOnly>
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
-    <fasterxml.jackson.version>2.14.2</fasterxml.jackson.version>
-    
<fasterxml.jackson.databind.version>2.14.2</fasterxml.jackson.databind.version>
+    <fasterxml.jackson.version>2.15.0</fasterxml.jackson.version>
+    
<fasterxml.jackson.databind.version>2.15.0</fasterxml.jackson.databind.version>
     <snappy.version>1.1.9.1</snappy.version>
     <netlib.ludovic.dev.version>3.0.3</netlib.ludovic.dev.version>
     <commons-codec.version>1.15</commons-codec.version>
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index bf5b83e9df0..c06f411c505 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -21,7 +21,7 @@ import java.nio.charset.{Charset, StandardCharsets}
 import java.time.ZoneId
 import java.util.Locale
 
-import com.fasterxml.jackson.core.{JsonFactory, JsonFactoryBuilder}
+import com.fasterxml.jackson.core.{JsonFactory, JsonFactoryBuilder, 
StreamReadConstraints}
 import com.fasterxml.jackson.core.json.JsonReadFeature
 
 import org.apache.spark.internal.Logging
@@ -43,6 +43,21 @@ private[sql] class JSONOptions(
 
   import JSONOptions._
 
+  private val maxNestingDepth: Int = parameters
+    .get("maxNestingDepth")
+    .map(_.toInt)
+    .getOrElse(StreamReadConstraints.DEFAULT_MAX_DEPTH)
+
+  private val maxNumLen: Int = parameters
+    .get("maxNumLen")
+    .map(_.toInt)
+    .getOrElse(StreamReadConstraints.DEFAULT_MAX_NUM_LEN)
+
+  private val maxStringLen: Int = parameters
+    .get("maxStringLen")
+    .map(_.toInt)
+    .getOrElse(StreamReadConstraints.DEFAULT_MAX_STRING_LEN)
+
   def this(
     parameters: Map[String, String],
     defaultTimeZoneId: String,
@@ -176,6 +191,13 @@ private[sql] class JSONOptions(
 
   /** Build a Jackson [[JsonFactory]] using JSON options. */
   def buildJsonFactory(): JsonFactory = {
+    val streamReadConstraints = StreamReadConstraints
+      .builder()
+      .maxNestingDepth(maxNestingDepth)
+      .maxNumberLength(maxNumLen)
+      .maxStringLength(maxStringLen)
+      .build()
+
     new JsonFactoryBuilder()
       .configure(JsonReadFeature.ALLOW_JAVA_COMMENTS, allowComments)
       .configure(JsonReadFeature.ALLOW_UNQUOTED_FIELD_NAMES, 
allowUnquotedFieldNames)
@@ -186,6 +208,7 @@ private[sql] class JSONOptions(
         JsonReadFeature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER,
         allowBackslashEscapingAnyCharacter)
       .configure(JsonReadFeature.ALLOW_UNESCAPED_CONTROL_CHARS, 
allowUnquotedControlChars)
+      .streamReadConstraints(streamReadConstraints)
       .build()
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to