This is an automated email from the ASF dual-hosted git repository.
comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 62b3c9100 fix: split expr.proto file (new) (#2267)
62b3c9100 is described below
commit 62b3c9100aa7d288a59181e093d05fe51b55629a
Author: K.I. (Dennis) Jung <[email protected]>
AuthorDate: Sun Aug 31 07:19:32 2025 +0900
fix: split expr.proto file (new) (#2267)
* split expr.proto file
---
native/proto/src/proto/expr.proto | 78 +---------------------
.../proto/src/proto/{types.proto => literal.proto} | 38 ++++++-----
native/proto/src/proto/operator.proto | 1 +
native/proto/src/proto/types.proto | 58 +++++++++++++++-
.../apache/comet/parquet/SourceFilterSerde.scala | 9 ++-
.../org/apache/comet/serde/QueryPlanSerde.scala | 9 +--
.../main/scala/org/apache/comet/serde/hash.scala | 4 +-
7 files changed, 94 insertions(+), 103 deletions(-)
diff --git a/native/proto/src/proto/expr.proto
b/native/proto/src/proto/expr.proto
index 1152d7a1b..04d9376ac 100644
--- a/native/proto/src/proto/expr.proto
+++ b/native/proto/src/proto/expr.proto
@@ -21,6 +21,7 @@ syntax = "proto3";
package spark.spark_expression;
+import "literal.proto";
import "types.proto";
option java_package = "org.apache.comet.serde";
@@ -203,27 +204,6 @@ message BloomFilterAgg {
DataType datatype = 4;
}
-message Literal {
- oneof value {
- bool bool_val = 1;
- // Protobuf doesn't provide int8 and int16, we put them into int32 and
convert
- // to int8 and int16 when deserializing.
- int32 byte_val = 2;
- int32 short_val = 3;
- int32 int_val = 4;
- int64 long_val = 5;
- float float_val = 6;
- double double_val = 7;
- string string_val = 8;
- bytes bytes_val = 9;
- bytes decimal_val = 10;
- ListLiteral list_val = 11;
- }
-
- DataType datatype = 12;
- bool is_null = 13;
-}
-
enum EvalMode {
LEGACY = 0;
TRY = 1;
@@ -426,59 +406,3 @@ message ArrayJoin {
message Rand {
int64 seed = 1;
}
-
-message DataType {
- enum DataTypeId {
- BOOL = 0;
- INT8 = 1;
- INT16 = 2;
- INT32 = 3;
- INT64 = 4;
- FLOAT = 5;
- DOUBLE = 6;
- STRING = 7;
- BYTES = 8;
- TIMESTAMP = 9;
- DECIMAL = 10;
- TIMESTAMP_NTZ = 11;
- DATE = 12;
- NULL = 13;
- LIST = 14;
- MAP = 15;
- STRUCT = 16;
- }
- DataTypeId type_id = 1;
-
- message DataTypeInfo {
- oneof datatype_struct {
- DecimalInfo decimal = 2;
- ListInfo list = 3;
- MapInfo map = 4;
- StructInfo struct = 5;
- }
- }
-
- message DecimalInfo {
- int32 precision = 1;
- int32 scale = 2;
- }
-
- message ListInfo {
- DataType element_type = 1;
- bool contains_null = 2;
- }
-
- message MapInfo {
- DataType key_type = 1;
- DataType value_type = 2;
- bool value_contains_null = 3;
- }
-
- message StructInfo {
- repeated string field_names = 1;
- repeated DataType field_datatypes = 2;
- repeated bool field_nullable = 3;
- }
-
- DataTypeInfo type_info = 2;
-}
\ No newline at end of file
diff --git a/native/proto/src/proto/types.proto
b/native/proto/src/proto/literal.proto
similarity index 63%
copy from native/proto/src/proto/types.proto
copy to native/proto/src/proto/literal.proto
index cc163522b..b086b1bd1 100644
--- a/native/proto/src/proto/types.proto
+++ b/native/proto/src/proto/literal.proto
@@ -21,21 +21,27 @@ syntax = "proto3";
package spark.spark_expression;
+import "types.proto";
+
option java_package = "org.apache.comet.serde";
-message ListLiteral {
- // Only one of these fields should be populated based on the array type
- repeated bool boolean_values = 1;
- repeated int32 byte_values = 2;
- repeated int32 short_values = 3;
- repeated int32 int_values = 4;
- repeated int64 long_values = 5;
- repeated float float_values = 6;
- repeated double double_values = 7;
- repeated string string_values = 8;
- repeated bytes bytes_values = 9;
- repeated bytes decimal_values = 10;
- repeated ListLiteral list_values = 11;
-
- repeated bool null_mask = 12;
-}
\ No newline at end of file
+message Literal {
+ oneof value {
+ bool bool_val = 1;
+ // Protobuf doesn't provide int8 and int16, we put them into int32 and
convert
+ // to int8 and int16 when deserializing.
+ int32 byte_val = 2;
+ int32 short_val = 3;
+ int32 int_val = 4;
+ int64 long_val = 5;
+ float float_val = 6;
+ double double_val = 7;
+ string string_val = 8;
+ bytes bytes_val = 9;
+ bytes decimal_val = 10;
+ ListLiteral list_val = 11;
+ }
+
+ DataType datatype = 12;
+ bool is_null = 13;
+}
diff --git a/native/proto/src/proto/operator.proto
b/native/proto/src/proto/operator.proto
index 5cb332ef0..77e02e6f4 100644
--- a/native/proto/src/proto/operator.proto
+++ b/native/proto/src/proto/operator.proto
@@ -23,6 +23,7 @@ package spark.spark_operator;
import "expr.proto";
import "partitioning.proto";
+import "types.proto";
option java_package = "org.apache.comet.serde";
diff --git a/native/proto/src/proto/types.proto
b/native/proto/src/proto/types.proto
index cc163522b..2fd3d59a7 100644
--- a/native/proto/src/proto/types.proto
+++ b/native/proto/src/proto/types.proto
@@ -38,4 +38,60 @@ message ListLiteral {
repeated ListLiteral list_values = 11;
repeated bool null_mask = 12;
-}
\ No newline at end of file
+}
+
+message DataType {
+ enum DataTypeId {
+ BOOL = 0;
+ INT8 = 1;
+ INT16 = 2;
+ INT32 = 3;
+ INT64 = 4;
+ FLOAT = 5;
+ DOUBLE = 6;
+ STRING = 7;
+ BYTES = 8;
+ TIMESTAMP = 9;
+ DECIMAL = 10;
+ TIMESTAMP_NTZ = 11;
+ DATE = 12;
+ NULL = 13;
+ LIST = 14;
+ MAP = 15;
+ STRUCT = 16;
+ }
+ DataTypeId type_id = 1;
+
+ message DataTypeInfo {
+ oneof datatype_struct {
+ DecimalInfo decimal = 2;
+ ListInfo list = 3;
+ MapInfo map = 4;
+ StructInfo struct = 5;
+ }
+ }
+
+ message DecimalInfo {
+ int32 precision = 1;
+ int32 scale = 2;
+ }
+
+ message ListInfo {
+ DataType element_type = 1;
+ bool contains_null = 2;
+ }
+
+ message MapInfo {
+ DataType key_type = 1;
+ DataType value_type = 2;
+ bool value_contains_null = 3;
+ }
+
+ message StructInfo {
+ repeated string field_names = 1;
+ repeated DataType field_datatypes = 2;
+ repeated bool field_nullable = 3;
+ }
+
+ DataTypeInfo type_info = 2;
+}
diff --git
a/spark/src/main/scala/org/apache/comet/parquet/SourceFilterSerde.scala
b/spark/src/main/scala/org/apache/comet/parquet/SourceFilterSerde.scala
index 4ad467cd8..ac6a89ca3 100644
--- a/spark/src/main/scala/org/apache/comet/parquet/SourceFilterSerde.scala
+++ b/spark/src/main/scala/org/apache/comet/parquet/SourceFilterSerde.scala
@@ -29,13 +29,14 @@ import org.apache.spark.sql.types._
import org.apache.comet.serde.ExprOuterClass
import org.apache.comet.serde.ExprOuterClass.Expr
+import org.apache.comet.serde.LiteralOuterClass
import org.apache.comet.serde.QueryPlanSerde.serializeDataType
object SourceFilterSerde extends Logging {
def createNameExpr(
name: String,
- schema: StructType): Option[(DataType, ExprOuterClass.Expr)] = {
+ schema: StructType): Option[(org.apache.spark.sql.types.DataType,
ExprOuterClass.Expr)] = {
val filedWithIndex = schema.fields.zipWithIndex.find { case (field, _) =>
field.name == name
}
@@ -66,8 +67,10 @@ object SourceFilterSerde extends Logging {
/**
* create a literal value native expression for source filter value, the
value is a scala value
*/
- def createValueExpr(value: Any, dataType: DataType):
Option[ExprOuterClass.Expr] = {
- val exprBuilder = ExprOuterClass.Literal.newBuilder()
+ def createValueExpr(
+ value: Any,
+ dataType: org.apache.spark.sql.types.DataType):
Option[ExprOuterClass.Expr] = {
+ val exprBuilder = LiteralOuterClass.Literal.newBuilder()
var valueIsSet = true
if (value == null) {
exprBuilder.setIsNull(true)
diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
index ad9be300f..2a5b6d075 100644
--- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
@@ -52,10 +52,11 @@ import
org.apache.comet.CometSparkSessionExtensions.{isCometScan, withInfo}
import org.apache.comet.DataTypeSupport.isComplexType
import org.apache.comet.expressions._
import org.apache.comet.objectstore.NativeConfig
-import org.apache.comet.serde.ExprOuterClass.{AggExpr, DataType =>
ProtoDataType, Expr, ScalarFunc}
-import org.apache.comet.serde.ExprOuterClass.DataType._
+import org.apache.comet.serde.ExprOuterClass.{AggExpr, Expr, ScalarFunc}
import org.apache.comet.serde.OperatorOuterClass.{AggregateMode =>
CometAggregateMode, BuildSide, JoinType, Operator}
import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal,
optExprWithInfo, scalarFunctionExprToProto}
+import org.apache.comet.serde.Types.{DataType => ProtoDataType}
+import org.apache.comet.serde.Types.DataType._
import org.apache.comet.serde.Types.ListLiteral
import org.apache.comet.shims.CometExprShim
@@ -228,7 +229,7 @@ object QueryPlanSerde extends Logging with CometExprShim {
* doesn't mean it is supported by Comet native execution, i.e.,
`supportedDataType` may return
* false for it.
*/
- def serializeDataType(dt: DataType): Option[ExprOuterClass.DataType] = {
+ def serializeDataType(dt: org.apache.spark.sql.types.DataType):
Option[Types.DataType] = {
val typeId = dt match {
case _: BooleanType => 0
case _: ByteType => 1
@@ -762,7 +763,7 @@ object QueryPlanSerde extends Logging with CometExprShim {
.contains(CometConf.COMET_NATIVE_SCAN_IMPL.get()) && dataType
.isInstanceOf[ArrayType]) && !isComplexType(
dataType.asInstanceOf[ArrayType].elementType)) =>
- val exprBuilder = ExprOuterClass.Literal.newBuilder()
+ val exprBuilder = LiteralOuterClass.Literal.newBuilder()
if (value == null) {
exprBuilder.setIsNull(true)
diff --git a/spark/src/main/scala/org/apache/comet/serde/hash.scala
b/spark/src/main/scala/org/apache/comet/serde/hash.scala
index 53f99ea7c..5c45a2593 100644
--- a/spark/src/main/scala/org/apache/comet/serde/hash.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/hash.scala
@@ -34,7 +34,7 @@ object CometXxHash64 extends CometExpressionSerde[XxHash64] {
return None
}
val exprs = expr.children.map(exprToProtoInternal(_, inputs, binding))
- val seedBuilder = ExprOuterClass.Literal
+ val seedBuilder = LiteralOuterClass.Literal
.newBuilder()
.setDatatype(serializeDataType(LongType).get)
.setLongVal(expr.seed)
@@ -53,7 +53,7 @@ object CometMurmur3Hash extends
CometExpressionSerde[Murmur3Hash] {
return None
}
val exprs = expr.children.map(exprToProtoInternal(_, inputs, binding))
- val seedBuilder = ExprOuterClass.Literal
+ val seedBuilder = LiteralOuterClass.Literal
.newBuilder()
.setDatatype(serializeDataType(IntegerType).get)
.setIntVal(expr.seed)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]