This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 0f1c515179e [SPARK-41345][CONNECT] Add Hint to Connect Proto
0f1c515179e is described below

commit 0f1c515179e5ed34aca27c51f500c26ca19cc748
Author: Rui Wang <rui.w...@databricks.com>
AuthorDate: Thu Dec 1 19:07:33 2022 +0800

    [SPARK-41345][CONNECT] Add Hint to Connect Proto
    
    ### What changes were proposed in this pull request?
    
    This PR adds `Hint` to Connect proto.
    
    The Hint technically can accept `ANY` parameter, however in this version of 
implementation by proto, it is limited to only support literals. As a 
comparison, PySpark hint API even support a few options ((str, list, float, 
int))
    
    ### Why are the changes needed?
    
    Proto API completeness.
    
    ### Does this PR introduce _any_ user-facing change?
    
    NO
    
    ### How was this patch tested?
    
    N/A
    
    Closes #38857 from amaliujia/hint.
    
    Authored-by: Rui Wang <rui.w...@databricks.com>
    Signed-off-by: Ruifeng Zheng <ruife...@apache.org>
---
 .../main/protobuf/spark/connect/relations.proto    |  17 ++
 python/pyspark/sql/connect/proto/relations_pb2.py  | 174 +++++++++++----------
 python/pyspark/sql/connect/proto/relations_pb2.pyi |  56 +++++++
 3 files changed, 167 insertions(+), 80 deletions(-)

diff --git a/connector/connect/src/main/protobuf/spark/connect/relations.proto 
b/connector/connect/src/main/protobuf/spark/connect/relations.proto
index f4df95fdd73..bb891d63670 100644
--- a/connector/connect/src/main/protobuf/spark/connect/relations.proto
+++ b/connector/connect/src/main/protobuf/spark/connect/relations.proto
@@ -53,6 +53,7 @@ message Relation {
     Drop drop = 21;
     Tail tail = 22;
     WithColumns with_columns = 23;
+    Hint hint = 24;
 
     // NA functions
     NAFill fill_na = 90;
@@ -541,3 +542,19 @@ message WithColumns {
   // An exception is thrown when duplicated names are present in the mapping.
   repeated Expression.Alias name_expr_list = 2;
 }
+
+// Specify a hint over a relation. Hint should have a name and optional 
parameters.
+message Hint {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) Hint name.
+  //
+  // Supported Join hints include BROADCAST, MERGE, SHUFFLE_HASH, 
SHUFFLE_REPLICATE_NL.
+  //
+  // Supported partitioning hints include COALESCE, REPARTITION, 
REPARTITION_BY_RANGE.
+  string name = 2;
+
+  // (Optional) Hint parameters.
+  repeated Expression.Literal parameters = 3;
+}
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.py 
b/python/pyspark/sql/connect/proto/relations_pb2.py
index 7c4cbf17b85..47be485b546 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.py
+++ b/python/pyspark/sql/connect/proto/relations_pb2.py
@@ -33,7 +33,7 @@ from pyspark.sql.connect.proto import expressions_pb2 as 
spark_dot_connect_dot_e
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    
b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x1fspark/connect/expressions.proto"\xa5\r\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01
 
\x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02
 
\x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 
\x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04
 
\x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05
 \x01(\x0b\ [...]
+    
b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x1fspark/connect/expressions.proto"\xd0\r\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01
 
\x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02
 
\x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 
\x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04
 
\x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05
 \x01(\x0b\ [...]
 )
 
 
@@ -75,6 +75,7 @@ _RENAMECOLUMNSBYNAMETONAMEMAP_RENAMECOLUMNSMAPENTRY = (
     _RENAMECOLUMNSBYNAMETONAMEMAP.nested_types_by_name["RenameColumnsMapEntry"]
 )
 _WITHCOLUMNS = DESCRIPTOR.message_types_by_name["WithColumns"]
+_HINT = DESCRIPTOR.message_types_by_name["Hint"]
 _JOIN_JOINTYPE = _JOIN.enum_types_by_name["JoinType"]
 _SETOPERATION_SETOPTYPE = _SETOPERATION.enum_types_by_name["SetOpType"]
 _SORT_SORTDIRECTION = _SORT.enum_types_by_name["SortDirection"]
@@ -469,6 +470,17 @@ WithColumns = _reflection.GeneratedProtocolMessageType(
 )
 _sym_db.RegisterMessage(WithColumns)
 
+Hint = _reflection.GeneratedProtocolMessageType(
+    "Hint",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _HINT,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Hint)
+    },
+)
+_sym_db.RegisterMessage(Hint)
+
 if _descriptor._USE_C_DESCRIPTORS == False:
 
     DESCRIPTOR._options = None
@@ -478,83 +490,85 @@ if _descriptor._USE_C_DESCRIPTORS == False:
     _RENAMECOLUMNSBYNAMETONAMEMAP_RENAMECOLUMNSMAPENTRY._options = None
     _RENAMECOLUMNSBYNAMETONAMEMAP_RENAMECOLUMNSMAPENTRY._serialized_options = 
b"8\001"
     _RELATION._serialized_start = 82
-    _RELATION._serialized_end = 1783
-    _UNKNOWN._serialized_start = 1785
-    _UNKNOWN._serialized_end = 1794
-    _RELATIONCOMMON._serialized_start = 1796
-    _RELATIONCOMMON._serialized_end = 1845
-    _SQL._serialized_start = 1847
-    _SQL._serialized_end = 1874
-    _READ._serialized_start = 1877
-    _READ._serialized_end = 2303
-    _READ_NAMEDTABLE._serialized_start = 2019
-    _READ_NAMEDTABLE._serialized_end = 2080
-    _READ_DATASOURCE._serialized_start = 2083
-    _READ_DATASOURCE._serialized_end = 2290
-    _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 2221
-    _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 2279
-    _PROJECT._serialized_start = 2305
-    _PROJECT._serialized_end = 2422
-    _FILTER._serialized_start = 2424
-    _FILTER._serialized_end = 2536
-    _JOIN._serialized_start = 2539
-    _JOIN._serialized_end = 3010
-    _JOIN_JOINTYPE._serialized_start = 2802
-    _JOIN_JOINTYPE._serialized_end = 3010
-    _SETOPERATION._serialized_start = 3013
-    _SETOPERATION._serialized_end = 3409
-    _SETOPERATION_SETOPTYPE._serialized_start = 3272
-    _SETOPERATION_SETOPTYPE._serialized_end = 3386
-    _LIMIT._serialized_start = 3411
-    _LIMIT._serialized_end = 3487
-    _OFFSET._serialized_start = 3489
-    _OFFSET._serialized_end = 3568
-    _TAIL._serialized_start = 3570
-    _TAIL._serialized_end = 3645
-    _AGGREGATE._serialized_start = 3648
-    _AGGREGATE._serialized_end = 3858
-    _SORT._serialized_start = 3861
-    _SORT._serialized_end = 4411
-    _SORT_SORTFIELD._serialized_start = 4015
-    _SORT_SORTFIELD._serialized_end = 4203
-    _SORT_SORTDIRECTION._serialized_start = 4205
-    _SORT_SORTDIRECTION._serialized_end = 4313
-    _SORT_SORTNULLS._serialized_start = 4315
-    _SORT_SORTNULLS._serialized_end = 4397
-    _DROP._serialized_start = 4413
-    _DROP._serialized_end = 4513
-    _DEDUPLICATE._serialized_start = 4516
-    _DEDUPLICATE._serialized_end = 4687
-    _LOCALRELATION._serialized_start = 4689
-    _LOCALRELATION._serialized_end = 4724
-    _SAMPLE._serialized_start = 4727
-    _SAMPLE._serialized_end = 4951
-    _RANGE._serialized_start = 4954
-    _RANGE._serialized_end = 5099
-    _SUBQUERYALIAS._serialized_start = 5101
-    _SUBQUERYALIAS._serialized_end = 5215
-    _REPARTITION._serialized_start = 5218
-    _REPARTITION._serialized_end = 5360
-    _SHOWSTRING._serialized_start = 5363
-    _SHOWSTRING._serialized_end = 5504
-    _STATSUMMARY._serialized_start = 5506
-    _STATSUMMARY._serialized_end = 5598
-    _STATCROSSTAB._serialized_start = 5600
-    _STATCROSSTAB._serialized_end = 5701
-    _NAFILL._serialized_start = 5704
-    _NAFILL._serialized_end = 5838
-    _NADROP._serialized_start = 5841
-    _NADROP._serialized_end = 5975
-    _NAREPLACE._serialized_start = 5978
-    _NAREPLACE._serialized_end = 6274
-    _NAREPLACE_REPLACEMENT._serialized_start = 6133
-    _NAREPLACE_REPLACEMENT._serialized_end = 6274
-    _RENAMECOLUMNSBYSAMELENGTHNAMES._serialized_start = 6276
-    _RENAMECOLUMNSBYSAMELENGTHNAMES._serialized_end = 6390
-    _RENAMECOLUMNSBYNAMETONAMEMAP._serialized_start = 6393
-    _RENAMECOLUMNSBYNAMETONAMEMAP._serialized_end = 6652
-    _RENAMECOLUMNSBYNAMETONAMEMAP_RENAMECOLUMNSMAPENTRY._serialized_start = 
6585
-    _RENAMECOLUMNSBYNAMETONAMEMAP_RENAMECOLUMNSMAPENTRY._serialized_end = 6652
-    _WITHCOLUMNS._serialized_start = 6655
-    _WITHCOLUMNS._serialized_end = 6786
+    _RELATION._serialized_end = 1826
+    _UNKNOWN._serialized_start = 1828
+    _UNKNOWN._serialized_end = 1837
+    _RELATIONCOMMON._serialized_start = 1839
+    _RELATIONCOMMON._serialized_end = 1888
+    _SQL._serialized_start = 1890
+    _SQL._serialized_end = 1917
+    _READ._serialized_start = 1920
+    _READ._serialized_end = 2346
+    _READ_NAMEDTABLE._serialized_start = 2062
+    _READ_NAMEDTABLE._serialized_end = 2123
+    _READ_DATASOURCE._serialized_start = 2126
+    _READ_DATASOURCE._serialized_end = 2333
+    _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 2264
+    _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 2322
+    _PROJECT._serialized_start = 2348
+    _PROJECT._serialized_end = 2465
+    _FILTER._serialized_start = 2467
+    _FILTER._serialized_end = 2579
+    _JOIN._serialized_start = 2582
+    _JOIN._serialized_end = 3053
+    _JOIN_JOINTYPE._serialized_start = 2845
+    _JOIN_JOINTYPE._serialized_end = 3053
+    _SETOPERATION._serialized_start = 3056
+    _SETOPERATION._serialized_end = 3452
+    _SETOPERATION_SETOPTYPE._serialized_start = 3315
+    _SETOPERATION_SETOPTYPE._serialized_end = 3429
+    _LIMIT._serialized_start = 3454
+    _LIMIT._serialized_end = 3530
+    _OFFSET._serialized_start = 3532
+    _OFFSET._serialized_end = 3611
+    _TAIL._serialized_start = 3613
+    _TAIL._serialized_end = 3688
+    _AGGREGATE._serialized_start = 3691
+    _AGGREGATE._serialized_end = 3901
+    _SORT._serialized_start = 3904
+    _SORT._serialized_end = 4454
+    _SORT_SORTFIELD._serialized_start = 4058
+    _SORT_SORTFIELD._serialized_end = 4246
+    _SORT_SORTDIRECTION._serialized_start = 4248
+    _SORT_SORTDIRECTION._serialized_end = 4356
+    _SORT_SORTNULLS._serialized_start = 4358
+    _SORT_SORTNULLS._serialized_end = 4440
+    _DROP._serialized_start = 4456
+    _DROP._serialized_end = 4556
+    _DEDUPLICATE._serialized_start = 4559
+    _DEDUPLICATE._serialized_end = 4730
+    _LOCALRELATION._serialized_start = 4732
+    _LOCALRELATION._serialized_end = 4767
+    _SAMPLE._serialized_start = 4770
+    _SAMPLE._serialized_end = 4994
+    _RANGE._serialized_start = 4997
+    _RANGE._serialized_end = 5142
+    _SUBQUERYALIAS._serialized_start = 5144
+    _SUBQUERYALIAS._serialized_end = 5258
+    _REPARTITION._serialized_start = 5261
+    _REPARTITION._serialized_end = 5403
+    _SHOWSTRING._serialized_start = 5406
+    _SHOWSTRING._serialized_end = 5547
+    _STATSUMMARY._serialized_start = 5549
+    _STATSUMMARY._serialized_end = 5641
+    _STATCROSSTAB._serialized_start = 5643
+    _STATCROSSTAB._serialized_end = 5744
+    _NAFILL._serialized_start = 5747
+    _NAFILL._serialized_end = 5881
+    _NADROP._serialized_start = 5884
+    _NADROP._serialized_end = 6018
+    _NAREPLACE._serialized_start = 6021
+    _NAREPLACE._serialized_end = 6317
+    _NAREPLACE_REPLACEMENT._serialized_start = 6176
+    _NAREPLACE_REPLACEMENT._serialized_end = 6317
+    _RENAMECOLUMNSBYSAMELENGTHNAMES._serialized_start = 6319
+    _RENAMECOLUMNSBYSAMELENGTHNAMES._serialized_end = 6433
+    _RENAMECOLUMNSBYNAMETONAMEMAP._serialized_start = 6436
+    _RENAMECOLUMNSBYNAMETONAMEMAP._serialized_end = 6695
+    _RENAMECOLUMNSBYNAMETONAMEMAP_RENAMECOLUMNSMAPENTRY._serialized_start = 
6628
+    _RENAMECOLUMNSBYNAMETONAMEMAP_RENAMECOLUMNSMAPENTRY._serialized_end = 6695
+    _WITHCOLUMNS._serialized_start = 6698
+    _WITHCOLUMNS._serialized_end = 6829
+    _HINT._serialized_start = 6832
+    _HINT._serialized_end = 6972
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi 
b/python/pyspark/sql/connect/proto/relations_pb2.pyi
index dbc69a48d88..996afbedcdc 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi
@@ -82,6 +82,7 @@ class Relation(google.protobuf.message.Message):
     DROP_FIELD_NUMBER: builtins.int
     TAIL_FIELD_NUMBER: builtins.int
     WITH_COLUMNS_FIELD_NUMBER: builtins.int
+    HINT_FIELD_NUMBER: builtins.int
     FILL_NA_FIELD_NUMBER: builtins.int
     DROP_NA_FIELD_NUMBER: builtins.int
     REPLACE_FIELD_NUMBER: builtins.int
@@ -135,6 +136,8 @@ class Relation(google.protobuf.message.Message):
     @property
     def with_columns(self) -> global___WithColumns: ...
     @property
+    def hint(self) -> global___Hint: ...
+    @property
     def fill_na(self) -> global___NAFill:
         """NA functions"""
     @property
@@ -174,6 +177,7 @@ class Relation(google.protobuf.message.Message):
         drop: global___Drop | None = ...,
         tail: global___Tail | None = ...,
         with_columns: global___WithColumns | None = ...,
+        hint: global___Hint | None = ...,
         fill_na: global___NAFill | None = ...,
         drop_na: global___NADrop | None = ...,
         replace: global___NAReplace | None = ...,
@@ -200,6 +204,8 @@ class Relation(google.protobuf.message.Message):
             b"fill_na",
             "filter",
             b"filter",
+            "hint",
+            b"hint",
             "join",
             b"join",
             "limit",
@@ -265,6 +271,8 @@ class Relation(google.protobuf.message.Message):
             b"fill_na",
             "filter",
             b"filter",
+            "hint",
+            b"hint",
             "join",
             b"join",
             "limit",
@@ -336,6 +344,7 @@ class Relation(google.protobuf.message.Message):
         "drop",
         "tail",
         "with_columns",
+        "hint",
         "fill_na",
         "drop_na",
         "replace",
@@ -1865,3 +1874,50 @@ class WithColumns(google.protobuf.message.Message):
     ) -> None: ...
 
 global___WithColumns = WithColumns
+
+class Hint(google.protobuf.message.Message):
+    """Specify a hint over a relation. Hint should have a name and optional 
parameters."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    NAME_FIELD_NUMBER: builtins.int
+    PARAMETERS_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    name: builtins.str
+    """(Required) Hint name.
+
+    Supported Join hints include BROADCAST, MERGE, SHUFFLE_HASH, 
SHUFFLE_REPLICATE_NL.
+
+    Supported partitioning hints include COALESCE, REPARTITION, 
REPARTITION_BY_RANGE.
+    """
+    @property
+    def parameters(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        pyspark.sql.connect.proto.expressions_pb2.Expression.Literal
+    ]:
+        """(Optional) Hint parameters."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        name: builtins.str = ...,
+        parameters: collections.abc.Iterable[
+            pyspark.sql.connect.proto.expressions_pb2.Expression.Literal
+        ]
+        | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "input", b"input", "name", b"name", "parameters", b"parameters"
+        ],
+    ) -> None: ...
+
+global___Hint = Hint


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to