This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new a4a274c4e4f [SPARK-43263][BUILD] Upgrade `FasterXML jackson` to 2.15.0 a4a274c4e4f is described below commit a4a274c4e4f709765e7a8c687347816d8951a681 Author: bjornjorgensen <bjornjorgen...@gmail.com> AuthorDate: Fri Apr 28 08:29:59 2023 -0500 [SPARK-43263][BUILD] Upgrade `FasterXML jackson` to 2.15.0 ### What changes were proposed in this pull request? Upgrade FasterXML jackson from 2.14.2 to 2.15.0 ### Why are the changes needed? Upgrade Snakeyaml to 2.0 (resolves CVE-2022-1471 [CVE-2022-1471 at nist](https://nvd.nist.gov/vuln/detail/CVE-2022-1471) ### Does this PR introduce _any_ user-facing change? This PR introduces user-facing changes by implementing streaming read constraints in the JSONOptions class. The constraints limit the size of input constructs, improving security and efficiency when processing input data. Users working with JSON data larger than the following default settings may need to adjust the constraints accordingly: Maximum Number value length: 1000 characters (`DEFAULT_MAX_NUM_LEN`) Maximum String value length: 5,000,000 characters (`DEFAULT_MAX_STRING_LEN`) Maximum Nesting depth: 1000 levels (`DEFAULT_MAX_DEPTH`) Additionally, the maximum magnitude of scale for BigDecimal to BigInteger conversion is set to 100,000 digits (`MAX_BIGINT_SCALE_MAGNITUDE`) and cannot be changed. Users can customize the constraints as needed by providing the corresponding options in the parameters object. If not explicitly specified, default settings will be applied. ### How was this patch tested? Pass GA Closes #40933 from bjornjorgensen/test_jacon. Authored-by: bjornjorgensen <bjornjorgen...@gmail.com> Signed-off-by: Sean Owen <sro...@gmail.com> --- dev/deps/spark-deps-hadoop-3-hive-2.3 | 16 +++++++------- pom.xml | 4 ++-- .../spark/sql/catalyst/json/JSONOptions.scala | 25 +++++++++++++++++++++- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index a6c41cdd726..bd689f9e913 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -97,13 +97,13 @@ httpcore/4.4.16//httpcore-4.4.16.jar ini4j/0.5.4//ini4j-0.5.4.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.5.1//ivy-2.5.1.jar -jackson-annotations/2.14.2//jackson-annotations-2.14.2.jar -jackson-core/2.14.2//jackson-core-2.14.2.jar -jackson-databind/2.14.2//jackson-databind-2.14.2.jar -jackson-dataformat-cbor/2.14.2//jackson-dataformat-cbor-2.14.2.jar -jackson-dataformat-yaml/2.14.2//jackson-dataformat-yaml-2.14.2.jar -jackson-datatype-jsr310/2.14.2//jackson-datatype-jsr310-2.14.2.jar -jackson-module-scala_2.12/2.14.2//jackson-module-scala_2.12-2.14.2.jar +jackson-annotations/2.15.0//jackson-annotations-2.15.0.jar +jackson-core/2.15.0//jackson-core-2.15.0.jar +jackson-databind/2.15.0//jackson-databind-2.15.0.jar +jackson-dataformat-cbor/2.15.0//jackson-dataformat-cbor-2.15.0.jar +jackson-dataformat-yaml/2.15.0//jackson-dataformat-yaml-2.15.0.jar +jackson-datatype-jsr310/2.15.0//jackson-datatype-jsr310-2.15.0.jar +jackson-module-scala_2.12/2.15.0//jackson-module-scala_2.12-2.15.0.jar jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar @@ -233,7 +233,7 @@ scala-xml_2.12/2.1.0//scala-xml_2.12-2.1.0.jar shims/0.9.39//shims-0.9.39.jar slf4j-api/2.0.7//slf4j-api-2.0.7.jar snakeyaml-engine/2.6//snakeyaml-engine-2.6.jar -snakeyaml/1.33//snakeyaml-1.33.jar +snakeyaml/2.0//snakeyaml-2.0.jar snappy-java/1.1.9.1//snappy-java-1.1.9.1.jar spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar spire-platform_2.12/0.17.0//spire-platform_2.12-0.17.0.jar diff --git a/pom.xml b/pom.xml index c74da8e3ace..df7fef1cf79 100644 --- a/pom.xml +++ b/pom.xml @@ -184,8 +184,8 @@ <scalafmt.validateOnly>true</scalafmt.validateOnly> <scalafmt.changedOnly>true</scalafmt.changedOnly> <codehaus.jackson.version>1.9.13</codehaus.jackson.version> - <fasterxml.jackson.version>2.14.2</fasterxml.jackson.version> - <fasterxml.jackson.databind.version>2.14.2</fasterxml.jackson.databind.version> + <fasterxml.jackson.version>2.15.0</fasterxml.jackson.version> + <fasterxml.jackson.databind.version>2.15.0</fasterxml.jackson.databind.version> <snappy.version>1.1.9.1</snappy.version> <netlib.ludovic.dev.version>3.0.3</netlib.ludovic.dev.version> <commons-codec.version>1.15</commons-codec.version> diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala index bf5b83e9df0..c06f411c505 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala @@ -21,7 +21,7 @@ import java.nio.charset.{Charset, StandardCharsets} import java.time.ZoneId import java.util.Locale -import com.fasterxml.jackson.core.{JsonFactory, JsonFactoryBuilder} +import com.fasterxml.jackson.core.{JsonFactory, JsonFactoryBuilder, StreamReadConstraints} import com.fasterxml.jackson.core.json.JsonReadFeature import org.apache.spark.internal.Logging @@ -43,6 +43,21 @@ private[sql] class JSONOptions( import JSONOptions._ + private val maxNestingDepth: Int = parameters + .get("maxNestingDepth") + .map(_.toInt) + .getOrElse(StreamReadConstraints.DEFAULT_MAX_DEPTH) + + private val maxNumLen: Int = parameters + .get("maxNumLen") + .map(_.toInt) + .getOrElse(StreamReadConstraints.DEFAULT_MAX_NUM_LEN) + + private val maxStringLen: Int = parameters + .get("maxStringLen") + .map(_.toInt) + .getOrElse(StreamReadConstraints.DEFAULT_MAX_STRING_LEN) + def this( parameters: Map[String, String], defaultTimeZoneId: String, @@ -176,6 +191,13 @@ private[sql] class JSONOptions( /** Build a Jackson [[JsonFactory]] using JSON options. */ def buildJsonFactory(): JsonFactory = { + val streamReadConstraints = StreamReadConstraints + .builder() + .maxNestingDepth(maxNestingDepth) + .maxNumberLength(maxNumLen) + .maxStringLength(maxStringLen) + .build() + new JsonFactoryBuilder() .configure(JsonReadFeature.ALLOW_JAVA_COMMENTS, allowComments) .configure(JsonReadFeature.ALLOW_UNQUOTED_FIELD_NAMES, allowUnquotedFieldNames) @@ -186,6 +208,7 @@ private[sql] class JSONOptions( JsonReadFeature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, allowBackslashEscapingAnyCharacter) .configure(JsonReadFeature.ALLOW_UNESCAPED_CONTROL_CHARS, allowUnquotedControlChars) + .streamReadConstraints(streamReadConstraints) .build() } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org