This is an automated email from the ASF dual-hosted git repository.
mingliang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new ebcba49cd9 [CORE] Add nativeFilters info for simpleString of scan
(#8169)
ebcba49cd9 is described below
commit ebcba49cd9bd36c858459870d6b54556f2936c49
Author: Mingliang Zhu <[email protected]>
AuthorDate: Mon Dec 9 13:51:08 2024 +0800
[CORE] Add nativeFilters info for simpleString of scan (#8169)
---
.../gluten/execution/BatchScanExecTransformer.scala | 10 ++++++++++
.../gluten/execution/FileSourceScanExecTransformer.scala | 15 +++++++++++++++
.../execution/datasources/v2/AbstractBatchScanExec.scala | 9 +--------
3 files changed, 26 insertions(+), 8 deletions(-)
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/BatchScanExecTransformer.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/BatchScanExecTransformer.scala
index 4f603e1024..d229117aa4 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/BatchScanExecTransformer.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/BatchScanExecTransformer.scala
@@ -27,6 +27,7 @@ import org.apache.gluten.utils.FileIndexUtil
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.QueryPlan
+import org.apache.spark.sql.catalyst.util.truncatedString
import org.apache.spark.sql.connector.catalog.Table
import org.apache.spark.sql.connector.read.{InputPartition, Scan}
import org.apache.spark.sql.execution.datasources.v2.{BatchScanExecShim,
FileScan}
@@ -169,4 +170,13 @@ abstract class BatchScanExecTransformerBase(
case "ClickHouseScan" => ReadFileFormat.MergeTreeReadFormat
case _ => ReadFileFormat.UnknownFormat
}
+
+ override def simpleString(maxFields: Int): String = {
+ val truncatedOutputString = truncatedString(output, "[", ", ", "]",
maxFields)
+ val runtimeFiltersString = s"RuntimeFilters:
${runtimeFilters.mkString("[", ",", "]")}"
+ val nativeFiltersString = s"NativeFilters: ${filterExprs().mkString("[",
",", "]")}"
+ val result = s"$nodeName$truncatedOutputString ${scan.description()}" +
+ s" $runtimeFiltersString $nativeFiltersString"
+ redact(result)
+ }
}
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/FileSourceScanExecTransformer.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/FileSourceScanExecTransformer.scala
index d2f8237b69..7f3c6d4f9f 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/FileSourceScanExecTransformer.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/FileSourceScanExecTransformer.scala
@@ -26,6 +26,7 @@ import org.apache.gluten.utils.FileIndexUtil
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.expressions.{Attribute,
AttributeReference, Expression, PlanExpression}
import org.apache.spark.sql.catalyst.plans.QueryPlan
+import org.apache.spark.sql.catalyst.util.truncatedString
import org.apache.spark.sql.connector.read.InputPartition
import org.apache.spark.sql.execution.FileSourceScanExecShim
import org.apache.spark.sql.execution.datasources.HadoopFsRelation
@@ -33,6 +34,8 @@ import org.apache.spark.sql.execution.metric.SQLMetric
import org.apache.spark.sql.types.StructType
import org.apache.spark.util.collection.BitSet
+import org.apache.commons.lang3.StringUtils
+
case class FileSourceScanExecTransformer(
@transient override val relation: HadoopFsRelation,
override val output: Seq[Attribute],
@@ -190,6 +193,18 @@ abstract class FileSourceScanExecTransformerBase(
case "CSVFileFormat" => ReadFileFormat.TextReadFormat
case _ => ReadFileFormat.UnknownFormat
}
+
+ override def simpleString(maxFields: Int): String = {
+ val metadataEntries = metadata.toSeq.sorted.map {
+ case (key, value) =>
+ key + ": " + StringUtils.abbreviate(redact(value),
maxMetadataValueLength)
+ }
+ val metadataStr = truncatedString(metadataEntries, " ", ", ", "",
maxFields)
+ val nativeFiltersString = s"NativeFilters: ${filterExprs().mkString("[",
",", "]")}"
+ redact(
+ s"$nodeNamePrefix$nodeName${truncatedString(output, "[", ",", "]",
maxFields)}$metadataStr" +
+ s" $nativeFiltersString")
+ }
}
object FileSourceScanExecTransformerBase {
diff --git
a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AbstractBatchScanExec.scala
b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AbstractBatchScanExec.scala
index e3e9659ed3..3313c3c768 100644
---
a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AbstractBatchScanExec.scala
+++
b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AbstractBatchScanExec.scala
@@ -21,7 +21,7 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.physical.{KeyGroupedPartitioning,
Partitioning, SinglePartition}
-import org.apache.spark.sql.catalyst.util.{truncatedString,
InternalRowComparableWrapper}
+import org.apache.spark.sql.catalyst.util.InternalRowComparableWrapper
import org.apache.spark.sql.connector.catalog.Table
import org.apache.spark.sql.connector.read._
import org.apache.spark.sql.internal.SQLConf
@@ -252,13 +252,6 @@ abstract class AbstractBatchScanExec(
rdd
}
- override def simpleString(maxFields: Int): String = {
- val truncatedOutputString = truncatedString(output, "[", ", ", "]",
maxFields)
- val runtimeFiltersString = s"RuntimeFilters:
${runtimeFilters.mkString("[", ",", "]")}"
- val result = s"$nodeName$truncatedOutputString ${scan.description()}
$runtimeFiltersString"
- redact(result)
- }
-
override def nodeName: String = {
s"BatchScanTransformer ${table.name()}".trim
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]