This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 4cdb6d487ed [SPARK-44266][SQL] Move Util.truncatedString to sql/api 4cdb6d487ed is described below commit 4cdb6d487ed18891bb7f63f9fb20f33cbbcc26c2 Author: Rui Wang <rui.w...@databricks.com> AuthorDate: Mon Jul 3 20:04:17 2023 -0700 [SPARK-44266][SQL] Move Util.truncatedString to sql/api ### What changes were proposed in this pull request? Move Util.truncatedString to sql/api. ### Why are the changes needed? Make StructType depends less on Catalyst so towards simpler DataType interface. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing test Closes #41811 from amaliujia/move_out_truncatedString. Authored-by: Rui Wang <rui.w...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../spark/sql/catalyst/util/StringUtils.scala | 39 ++++++++++++++++++++++ .../apache/spark/sql/catalyst/util/package.scala | 20 ++--------- .../org/apache/spark/sql/types/StructType.scala | 5 ++- 3 files changed, 43 insertions(+), 21 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala index 10ac988da2e..384453e3b53 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala @@ -16,8 +16,11 @@ */ package org.apache.spark.sql.catalyst.util +import java.util.concurrent.atomic.AtomicBoolean + import scala.collection.mutable.ArrayBuffer +import org.apache.spark.internal.Logging import org.apache.spark.unsafe.array.ByteArrayUtils /** @@ -63,3 +66,39 @@ class StringConcat(val maxLength: Int = ByteArrayUtils.MAX_ROUNDED_ARRAY_LENGTH) result.toString } } + +object SparkStringUtils extends Logging { + /** Whether we have warned about plan string truncation yet. */ + private val truncationWarningPrinted = new AtomicBoolean(false) + + /** + * Format a sequence with semantics similar to calling .mkString(). Any elements beyond + * maxNumToStringFields will be dropped and replaced by a "... N more fields" placeholder. + * + * @return the trimmed and formatted string. + */ + def truncatedString[T]( + seq: Seq[T], + start: String, + sep: String, + end: String, + maxFields: Int): String = { + if (seq.length > maxFields) { + if (truncationWarningPrinted.compareAndSet(false, true)) { + logWarning( + "Truncated the string representation of a plan since it was too large. This " + + s"behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.") + } + val numFields = math.max(0, maxFields - 1) + seq.take(numFields).mkString( + start, sep, sep + "... " + (seq.length - numFields) + " more fields" + end) + } else { + seq.mkString(start, sep, end) + } + } + + /** Shorthand for calling truncatedString() without start or end strings. */ + def truncatedString[T](seq: Seq[T], sep: String, maxFields: Int): String = { + truncatedString(seq, "", sep, "", maxFields) + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala index c7c226f01db..0555d8d5fa4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala @@ -20,13 +20,11 @@ package org.apache.spark.sql.catalyst import java.io._ import java.nio.charset.Charset import java.nio.charset.StandardCharsets.UTF_8 -import java.util.concurrent.atomic.AtomicBoolean import com.google.common.io.ByteStreams import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{MetadataBuilder, NumericType, StringType, StructType} import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.Utils @@ -155,9 +153,6 @@ package object util extends Logging { builder.toString() } - /** Whether we have warned about plan string truncation yet. */ - private val truncationWarningPrinted = new AtomicBoolean(false) - /** * Format a sequence with semantics similar to calling .mkString(). Any elements beyond * maxNumToStringFields will be dropped and replaced by a "... N more fields" placeholder. @@ -170,23 +165,12 @@ package object util extends Logging { sep: String, end: String, maxFields: Int): String = { - if (seq.length > maxFields) { - if (truncationWarningPrinted.compareAndSet(false, true)) { - logWarning( - "Truncated the string representation of a plan since it was too large. This " + - s"behavior can be adjusted by setting '${SQLConf.MAX_TO_STRING_FIELDS.key}'.") - } - val numFields = math.max(0, maxFields - 1) - seq.take(numFields).mkString( - start, sep, sep + "... " + (seq.length - numFields) + " more fields" + end) - } else { - seq.mkString(start, sep, end) - } + SparkStringUtils.truncatedString(seq, start, sep, end, maxFields) } /** Shorthand for calling truncatedString() without start or end strings. */ def truncatedString[T](seq: Seq[T], sep: String, maxFields: Int): String = { - truncatedString(seq, "", sep, "", maxFields) + SparkStringUtils.truncatedString(seq, "", sep, "", maxFields) } val METADATA_COL_ATTR_KEY = "__metadata_col" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala index dad8252e5ca..5857aaa9530 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala @@ -29,9 +29,8 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, LegacyTypeStringParser} import org.apache.spark.sql.catalyst.trees.Origin import org.apache.spark.sql.catalyst.types.DataTypeUtils +import org.apache.spark.sql.catalyst.util.{SparkStringUtils, StringConcat} import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._ -import org.apache.spark.sql.catalyst.util.StringConcat -import org.apache.spark.sql.catalyst.util.truncatedString import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.util.collection.Utils @@ -423,7 +422,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru override def simpleString: String = { val fieldTypes = fields.view.map(field => s"${field.name}:${field.dataType.simpleString}").toSeq - truncatedString( + SparkStringUtils.truncatedString( fieldTypes, "struct<", ",", ">", SQLConf.get.maxToStringFields) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org