This is an automated email from the ASF dual-hosted git repository.
beliefer pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new fb7704021d [GLUTEN-11155][CORE] Code cleanup for
BasicScanExecTransformer (#11156)
fb7704021d is described below
commit fb7704021d555805c2fd6c540c9c30ae10ffeca6
Author: Jiaan Geng <[email protected]>
AuthorDate: Tue Nov 25 11:48:33 2025 +0800
[GLUTEN-11155][CORE] Code cleanup for BasicScanExecTransformer (#11156)
---
.../gluten/execution/CHRangeExecTransformer.scala | 5 +--
.../utils/MergeTreePartsPartitionsUtil.scala | 2 +-
.../datasources/v1/CHMergeTreeWriterInjects.scala | 5 +--
.../apache/gluten/substrait/rel/RelBuilder.java | 2 +-
.../execution/BasicScanExecTransformer.scala | 45 ++++++++--------------
5 files changed, 21 insertions(+), 38 deletions(-)
diff --git
a/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHRangeExecTransformer.scala
b/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHRangeExecTransformer.scala
index 317d9d12c7..bd8ef3f6f4 100644
---
a/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHRangeExecTransformer.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHRangeExecTransformer.scala
@@ -91,16 +91,15 @@ case class CHRangeExecTransformer(
output.map(attr => new
ColumnTypeNode(NamedStruct.ColumnType.NORMAL_COL)))
.asJava
- val optimizationContent = s"isRange=1\n"
val optimization =
BackendsApiManager.getTransformerApiInstance.packPBMessage(
- StringValue.newBuilder.setValue(optimizationContent).build)
+ StringValue.newBuilder.setValue("isRange=1\n").build)
val extensionNode = ExtensionBuilder.makeAdvancedExtension(optimization,
null)
val readNode = RelBuilder.makeReadRel(
typeNodes,
nameList,
- columnTypeNodes,
null,
+ columnTypeNodes,
extensionNode,
context,
context.nextOperatorId(this.nodeName))
diff --git
a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/clickhouse/utils/MergeTreePartsPartitionsUtil.scala
b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/clickhouse/utils/MergeTreePartsPartitionsUtil.scala
index 96caf5b790..8a077422cd 100644
---
a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/clickhouse/utils/MergeTreePartsPartitionsUtil.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/clickhouse/utils/MergeTreePartsPartitionsUtil.scala
@@ -634,8 +634,8 @@ object MergeTreePartsPartitionsUtil extends Logging {
val readNode = RelBuilder.makeReadRel(
typeNodes,
nameList,
- columnTypeNodes,
transformer.map(_.doTransform(substraitContext)).orNull,
+ columnTypeNodes,
extensionNode,
substraitContext,
substraitContext.nextOperatorId("readRel")
diff --git
a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v1/CHMergeTreeWriterInjects.scala
b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v1/CHMergeTreeWriterInjects.scala
index cf5a0cadba..7b1dcd2b65 100644
---
a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v1/CHMergeTreeWriterInjects.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v1/CHMergeTreeWriterInjects.scala
@@ -192,15 +192,14 @@ object CHMergeTreeWriterInjects {
clickhouseTableConfigs.asJava
)
- val optimizationContent = "isMergeTree=1\n"
- val optimization =
Any.pack(StringValue.newBuilder.setValue(optimizationContent).build)
+ val optimization =
Any.pack(StringValue.newBuilder.setValue("isMergeTree=1\n").build)
val extensionNode = ExtensionBuilder.makeAdvancedExtension(optimization,
null)
val relNode = RelBuilder.makeReadRel(
typeNodes,
nameList,
- columnTypeNodes,
null,
+ columnTypeNodes,
extensionNode,
substraitContext,
substraitContext.nextOperatorId("readRel"))
diff --git
a/gluten-substrait/src/main/java/org/apache/gluten/substrait/rel/RelBuilder.java
b/gluten-substrait/src/main/java/org/apache/gluten/substrait/rel/RelBuilder.java
index c8a028d0be..20ca9d36f1 100644
---
a/gluten-substrait/src/main/java/org/apache/gluten/substrait/rel/RelBuilder.java
+++
b/gluten-substrait/src/main/java/org/apache/gluten/substrait/rel/RelBuilder.java
@@ -148,8 +148,8 @@ public class RelBuilder {
public static RelNode makeReadRel(
List<TypeNode> types,
List<String> names,
- List<ColumnTypeNode> columnTypeNodes,
ExpressionNode filter,
+ List<ColumnTypeNode> columnTypeNodes,
AdvancedExtensionNode extensionNode,
SubstraitContext context,
Long operatorId) {
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/BasicScanExecTransformer.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/BasicScanExecTransformer.scala
index cb6f54dea7..ee6770bb17 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/BasicScanExecTransformer.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/BasicScanExecTransformer.scala
@@ -27,7 +27,6 @@ import
org.apache.gluten.substrait.rel.LocalFilesNode.ReadFileFormat
import org.apache.spark.Partition
import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.types.{StringType, StructField, StructType}
import com.google.protobuf.StringValue
import io.substrait.proto.NamedStruct
@@ -104,56 +103,42 @@ trait BasicScanExecTransformer extends
LeafTransformSupport with BaseDataSource
doNativeValidation(substraitContext, relNode)
}
- def appendStringFields(
- schema: StructType,
- existingFields: Array[StructField]): Array[StructField] = {
- val stringFields =
schema.fields.filter(_.dataType.isInstanceOf[StringType])
- if (stringFields.nonEmpty) {
- (existingFields ++ stringFields).distinct
+ private def makeColumnTypeNode(attr: Attribute): ColumnTypeNode = {
+ if (getPartitionSchema.exists(_.name.equals(attr.name))) {
+ new ColumnTypeNode(NamedStruct.ColumnType.PARTITION_COL)
+ } else if
(BackendsApiManager.getSparkPlanExecApiInstance.isRowIndexMetadataColumn(attr.name))
{
+ new ColumnTypeNode(NamedStruct.ColumnType.ROWINDEX_COL)
+ } else if (attr.isMetadataCol) {
+ new ColumnTypeNode(NamedStruct.ColumnType.METADATA_COL)
} else {
- existingFields
+ new ColumnTypeNode(NamedStruct.ColumnType.NORMAL_COL)
}
}
override protected def doTransform(context: SubstraitContext):
TransformContext = {
val typeNodes = ConverterUtils.collectAttributeTypeNodes(output)
val nameList = ConverterUtils.collectAttributeNamesWithoutExprId(output)
- val columnTypeNodes = output.map {
- attr =>
- if (getPartitionSchema.exists(_.name.equals(attr.name))) {
- new ColumnTypeNode(NamedStruct.ColumnType.PARTITION_COL)
- } else if (
-
BackendsApiManager.getSparkPlanExecApiInstance.isRowIndexMetadataColumn(attr.name)
- ) {
- new ColumnTypeNode(NamedStruct.ColumnType.ROWINDEX_COL)
- } else if (attr.isMetadataCol) {
- new ColumnTypeNode(NamedStruct.ColumnType.METADATA_COL)
- } else {
- new ColumnTypeNode(NamedStruct.ColumnType.NORMAL_COL)
- }
- }.asJava
+ val columnTypeNodes = output.map(makeColumnTypeNode).asJava
// Will put all filter expressions into an AND expression
- val transformer = filterExprs()
+ val exprNode = filterExprs()
.map(ExpressionConverter.replaceAttributeReference)
.reduceLeftOption(And)
.map(ExpressionConverter.replaceWithExpressionTransformer(_, output))
- val filterNodes = transformer.map(_.doTransform(context))
- val exprNode = filterNodes.orNull
+ .map(_.doTransform(context))
+ .orNull
// used by CH backend
- val optimizationContent =
- s"isMergeTree=${if (this.fileFormat ==
ReadFileFormat.MergeTreeReadFormat) "1" else "0"}\n"
-
+ val mergeTreeFlag = if (this.fileFormat ==
ReadFileFormat.MergeTreeReadFormat) "1" else "0"
val optimization =
BackendsApiManager.getTransformerApiInstance.packPBMessage(
- StringValue.newBuilder.setValue(optimizationContent).build)
+ StringValue.newBuilder.setValue(s"isMergeTree=$mergeTreeFlag\n").build)
val extensionNode = ExtensionBuilder.makeAdvancedExtension(optimization,
null)
val readNode = RelBuilder.makeReadRel(
typeNodes,
nameList,
- columnTypeNodes,
exprNode,
+ columnTypeNodes,
extensionNode,
context,
context.nextOperatorId(this.nodeName))
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]