This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.5 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push: new 0db5bdecfa6c [SPARK-48639][CONNECT][PYTHON] Add Origin to Relation.RelationCommon 0db5bdecfa6c is described below commit 0db5bdecfa6cbfff1be7690bb783a858989776b9 Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Mon Jun 24 19:25:56 2024 +0900 [SPARK-48639][CONNECT][PYTHON] Add Origin to Relation.RelationCommon This PR proposes to add `Origin` (from https://github.com/apache/spark/pull/46789) to `Relation.RelationCommon` To have the common protobuf message to keep the source code info. No. This is not used. CI should validate protobuf definition, and exiting tests should pass. No. Closes #47024 from HyukjinKwon/SPARK-48639. Lead-authored-by: Hyukjin Kwon <gurwls...@apache.org> Co-authored-by: Hyukjin Kwon <gurwls...@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> (cherry picked from commit 09cb5921b7d88e63e172aa6aea370319e8bbd2fa) Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../main/protobuf/spark/connect/relations.proto | 4 +- python/pyspark/sql/connect/proto/relations_pb2.py | 264 ++++++++++----------- python/pyspark/sql/connect/proto/relations_pb2.pyi | 9 +- 3 files changed, 135 insertions(+), 142 deletions(-) diff --git a/connector/connect/common/src/main/protobuf/spark/connect/relations.proto b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto index f7f1315ede0f..8b384728983b 100644 --- a/connector/connect/common/src/main/protobuf/spark/connect/relations.proto +++ b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto @@ -103,8 +103,8 @@ message Unknown {} // Common metadata of all relations. message RelationCommon { - // (Required) Shared relation metadata. - string source_info = 1; + // (Optional) Shared relation metadata. + reserved 1; // (Optional) A per-client globally unique id for a given connect plan. optional int64 plan_id = 2; diff --git a/python/pyspark/sql/connect/proto/relations_pb2.py b/python/pyspark/sql/connect/proto/relations_pb2.py index 3f7e57949373..7dd494db8695 100644 --- a/python/pyspark/sql/connect/proto/relations_pb2.py +++ b/python/pyspark/sql/connect/proto/relations_pb2.py @@ -35,7 +35,7 @@ from pyspark.sql.connect.proto import catalog_pb2 as spark_dot_connect_dot_catal DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto"\xe1\x18\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66il [...] + b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto"\xe1\x18\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66il [...] ) _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) @@ -61,135 +61,135 @@ if _descriptor._USE_C_DESCRIPTORS == False: _UNKNOWN._serialized_start = 3336 _UNKNOWN._serialized_end = 3345 _RELATIONCOMMON._serialized_start = 3347 - _RELATIONCOMMON._serialized_end = 3438 - _SQL._serialized_start = 3441 - _SQL._serialized_end = 3672 - _SQL_ARGSENTRY._serialized_start = 3582 - _SQL_ARGSENTRY._serialized_end = 3672 - _READ._serialized_start = 3675 - _READ._serialized_end = 4338 - _READ_NAMEDTABLE._serialized_start = 3853 - _READ_NAMEDTABLE._serialized_end = 4045 - _READ_NAMEDTABLE_OPTIONSENTRY._serialized_start = 3987 - _READ_NAMEDTABLE_OPTIONSENTRY._serialized_end = 4045 - _READ_DATASOURCE._serialized_start = 4048 - _READ_DATASOURCE._serialized_end = 4325 - _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 3987 - _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 4045 - _PROJECT._serialized_start = 4340 - _PROJECT._serialized_end = 4457 - _FILTER._serialized_start = 4459 - _FILTER._serialized_end = 4571 - _JOIN._serialized_start = 4574 - _JOIN._serialized_end = 5235 - _JOIN_JOINDATATYPE._serialized_start = 4913 - _JOIN_JOINDATATYPE._serialized_end = 5005 - _JOIN_JOINTYPE._serialized_start = 5008 - _JOIN_JOINTYPE._serialized_end = 5216 - _SETOPERATION._serialized_start = 5238 - _SETOPERATION._serialized_end = 5717 - _SETOPERATION_SETOPTYPE._serialized_start = 5554 - _SETOPERATION_SETOPTYPE._serialized_end = 5668 - _LIMIT._serialized_start = 5719 - _LIMIT._serialized_end = 5795 - _OFFSET._serialized_start = 5797 - _OFFSET._serialized_end = 5876 - _TAIL._serialized_start = 5878 - _TAIL._serialized_end = 5953 - _AGGREGATE._serialized_start = 5956 - _AGGREGATE._serialized_end = 6538 - _AGGREGATE_PIVOT._serialized_start = 6295 - _AGGREGATE_PIVOT._serialized_end = 6406 - _AGGREGATE_GROUPTYPE._serialized_start = 6409 - _AGGREGATE_GROUPTYPE._serialized_end = 6538 - _SORT._serialized_start = 6541 - _SORT._serialized_end = 6701 - _DROP._serialized_start = 6704 - _DROP._serialized_end = 6845 - _DEDUPLICATE._serialized_start = 6848 - _DEDUPLICATE._serialized_end = 7088 - _LOCALRELATION._serialized_start = 7090 - _LOCALRELATION._serialized_end = 7179 - _CACHEDLOCALRELATION._serialized_start = 7181 - _CACHEDLOCALRELATION._serialized_end = 7253 - _CACHEDREMOTERELATION._serialized_start = 7255 - _CACHEDREMOTERELATION._serialized_end = 7310 - _SAMPLE._serialized_start = 7313 - _SAMPLE._serialized_end = 7586 - _RANGE._serialized_start = 7589 - _RANGE._serialized_end = 7734 - _SUBQUERYALIAS._serialized_start = 7736 - _SUBQUERYALIAS._serialized_end = 7850 - _REPARTITION._serialized_start = 7853 - _REPARTITION._serialized_end = 7995 - _SHOWSTRING._serialized_start = 7998 - _SHOWSTRING._serialized_end = 8140 - _HTMLSTRING._serialized_start = 8142 - _HTMLSTRING._serialized_end = 8256 - _STATSUMMARY._serialized_start = 8258 - _STATSUMMARY._serialized_end = 8350 - _STATDESCRIBE._serialized_start = 8352 - _STATDESCRIBE._serialized_end = 8433 - _STATCROSSTAB._serialized_start = 8435 - _STATCROSSTAB._serialized_end = 8536 - _STATCOV._serialized_start = 8538 - _STATCOV._serialized_end = 8634 - _STATCORR._serialized_start = 8637 - _STATCORR._serialized_end = 8774 - _STATAPPROXQUANTILE._serialized_start = 8777 - _STATAPPROXQUANTILE._serialized_end = 8941 - _STATFREQITEMS._serialized_start = 8943 - _STATFREQITEMS._serialized_end = 9068 - _STATSAMPLEBY._serialized_start = 9071 - _STATSAMPLEBY._serialized_end = 9380 - _STATSAMPLEBY_FRACTION._serialized_start = 9272 - _STATSAMPLEBY_FRACTION._serialized_end = 9371 - _NAFILL._serialized_start = 9383 - _NAFILL._serialized_end = 9517 - _NADROP._serialized_start = 9520 - _NADROP._serialized_end = 9654 - _NAREPLACE._serialized_start = 9657 - _NAREPLACE._serialized_end = 9953 - _NAREPLACE_REPLACEMENT._serialized_start = 9812 - _NAREPLACE_REPLACEMENT._serialized_end = 9953 - _TODF._serialized_start = 9955 - _TODF._serialized_end = 10043 - _WITHCOLUMNSRENAMED._serialized_start = 10046 - _WITHCOLUMNSRENAMED._serialized_end = 10285 - _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_start = 10218 - _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_end = 10285 - _WITHCOLUMNS._serialized_start = 10287 - _WITHCOLUMNS._serialized_end = 10406 - _WITHWATERMARK._serialized_start = 10409 - _WITHWATERMARK._serialized_end = 10543 - _HINT._serialized_start = 10546 - _HINT._serialized_end = 10678 - _UNPIVOT._serialized_start = 10681 - _UNPIVOT._serialized_end = 11008 - _UNPIVOT_VALUES._serialized_start = 10938 - _UNPIVOT_VALUES._serialized_end = 10997 - _TOSCHEMA._serialized_start = 11010 - _TOSCHEMA._serialized_end = 11116 - _REPARTITIONBYEXPRESSION._serialized_start = 11119 - _REPARTITIONBYEXPRESSION._serialized_end = 11322 - _MAPPARTITIONS._serialized_start = 11325 - _MAPPARTITIONS._serialized_end = 11506 - _GROUPMAP._serialized_start = 11509 - _GROUPMAP._serialized_end = 12144 - _COGROUPMAP._serialized_start = 12147 - _COGROUPMAP._serialized_end = 12673 - _APPLYINPANDASWITHSTATE._serialized_start = 12676 - _APPLYINPANDASWITHSTATE._serialized_end = 13033 - _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_start = 13036 - _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_end = 13280 - _PYTHONUDTF._serialized_start = 13283 - _PYTHONUDTF._serialized_end = 13460 - _COLLECTMETRICS._serialized_start = 13463 - _COLLECTMETRICS._serialized_end = 13599 - _PARSE._serialized_start = 13602 - _PARSE._serialized_end = 13990 - _PARSE_OPTIONSENTRY._serialized_start = 3987 - _PARSE_OPTIONSENTRY._serialized_end = 4045 - _PARSE_PARSEFORMAT._serialized_start = 13891 - _PARSE_PARSEFORMAT._serialized_end = 13979 + _RELATIONCOMMON._serialized_end = 3411 + _SQL._serialized_start = 3414 + _SQL._serialized_end = 3645 + _SQL_ARGSENTRY._serialized_start = 3555 + _SQL_ARGSENTRY._serialized_end = 3645 + _READ._serialized_start = 3648 + _READ._serialized_end = 4311 + _READ_NAMEDTABLE._serialized_start = 3826 + _READ_NAMEDTABLE._serialized_end = 4018 + _READ_NAMEDTABLE_OPTIONSENTRY._serialized_start = 3960 + _READ_NAMEDTABLE_OPTIONSENTRY._serialized_end = 4018 + _READ_DATASOURCE._serialized_start = 4021 + _READ_DATASOURCE._serialized_end = 4298 + _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 3960 + _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 4018 + _PROJECT._serialized_start = 4313 + _PROJECT._serialized_end = 4430 + _FILTER._serialized_start = 4432 + _FILTER._serialized_end = 4544 + _JOIN._serialized_start = 4547 + _JOIN._serialized_end = 5208 + _JOIN_JOINDATATYPE._serialized_start = 4886 + _JOIN_JOINDATATYPE._serialized_end = 4978 + _JOIN_JOINTYPE._serialized_start = 4981 + _JOIN_JOINTYPE._serialized_end = 5189 + _SETOPERATION._serialized_start = 5211 + _SETOPERATION._serialized_end = 5690 + _SETOPERATION_SETOPTYPE._serialized_start = 5527 + _SETOPERATION_SETOPTYPE._serialized_end = 5641 + _LIMIT._serialized_start = 5692 + _LIMIT._serialized_end = 5768 + _OFFSET._serialized_start = 5770 + _OFFSET._serialized_end = 5849 + _TAIL._serialized_start = 5851 + _TAIL._serialized_end = 5926 + _AGGREGATE._serialized_start = 5929 + _AGGREGATE._serialized_end = 6511 + _AGGREGATE_PIVOT._serialized_start = 6268 + _AGGREGATE_PIVOT._serialized_end = 6379 + _AGGREGATE_GROUPTYPE._serialized_start = 6382 + _AGGREGATE_GROUPTYPE._serialized_end = 6511 + _SORT._serialized_start = 6514 + _SORT._serialized_end = 6674 + _DROP._serialized_start = 6677 + _DROP._serialized_end = 6818 + _DEDUPLICATE._serialized_start = 6821 + _DEDUPLICATE._serialized_end = 7061 + _LOCALRELATION._serialized_start = 7063 + _LOCALRELATION._serialized_end = 7152 + _CACHEDLOCALRELATION._serialized_start = 7154 + _CACHEDLOCALRELATION._serialized_end = 7226 + _CACHEDREMOTERELATION._serialized_start = 7228 + _CACHEDREMOTERELATION._serialized_end = 7283 + _SAMPLE._serialized_start = 7286 + _SAMPLE._serialized_end = 7559 + _RANGE._serialized_start = 7562 + _RANGE._serialized_end = 7707 + _SUBQUERYALIAS._serialized_start = 7709 + _SUBQUERYALIAS._serialized_end = 7823 + _REPARTITION._serialized_start = 7826 + _REPARTITION._serialized_end = 7968 + _SHOWSTRING._serialized_start = 7971 + _SHOWSTRING._serialized_end = 8113 + _HTMLSTRING._serialized_start = 8115 + _HTMLSTRING._serialized_end = 8229 + _STATSUMMARY._serialized_start = 8231 + _STATSUMMARY._serialized_end = 8323 + _STATDESCRIBE._serialized_start = 8325 + _STATDESCRIBE._serialized_end = 8406 + _STATCROSSTAB._serialized_start = 8408 + _STATCROSSTAB._serialized_end = 8509 + _STATCOV._serialized_start = 8511 + _STATCOV._serialized_end = 8607 + _STATCORR._serialized_start = 8610 + _STATCORR._serialized_end = 8747 + _STATAPPROXQUANTILE._serialized_start = 8750 + _STATAPPROXQUANTILE._serialized_end = 8914 + _STATFREQITEMS._serialized_start = 8916 + _STATFREQITEMS._serialized_end = 9041 + _STATSAMPLEBY._serialized_start = 9044 + _STATSAMPLEBY._serialized_end = 9353 + _STATSAMPLEBY_FRACTION._serialized_start = 9245 + _STATSAMPLEBY_FRACTION._serialized_end = 9344 + _NAFILL._serialized_start = 9356 + _NAFILL._serialized_end = 9490 + _NADROP._serialized_start = 9493 + _NADROP._serialized_end = 9627 + _NAREPLACE._serialized_start = 9630 + _NAREPLACE._serialized_end = 9926 + _NAREPLACE_REPLACEMENT._serialized_start = 9785 + _NAREPLACE_REPLACEMENT._serialized_end = 9926 + _TODF._serialized_start = 9928 + _TODF._serialized_end = 10016 + _WITHCOLUMNSRENAMED._serialized_start = 10019 + _WITHCOLUMNSRENAMED._serialized_end = 10258 + _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_start = 10191 + _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_end = 10258 + _WITHCOLUMNS._serialized_start = 10260 + _WITHCOLUMNS._serialized_end = 10379 + _WITHWATERMARK._serialized_start = 10382 + _WITHWATERMARK._serialized_end = 10516 + _HINT._serialized_start = 10519 + _HINT._serialized_end = 10651 + _UNPIVOT._serialized_start = 10654 + _UNPIVOT._serialized_end = 10981 + _UNPIVOT_VALUES._serialized_start = 10911 + _UNPIVOT_VALUES._serialized_end = 10970 + _TOSCHEMA._serialized_start = 10983 + _TOSCHEMA._serialized_end = 11089 + _REPARTITIONBYEXPRESSION._serialized_start = 11092 + _REPARTITIONBYEXPRESSION._serialized_end = 11295 + _MAPPARTITIONS._serialized_start = 11298 + _MAPPARTITIONS._serialized_end = 11479 + _GROUPMAP._serialized_start = 11482 + _GROUPMAP._serialized_end = 12117 + _COGROUPMAP._serialized_start = 12120 + _COGROUPMAP._serialized_end = 12646 + _APPLYINPANDASWITHSTATE._serialized_start = 12649 + _APPLYINPANDASWITHSTATE._serialized_end = 13006 + _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_start = 13009 + _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_end = 13253 + _PYTHONUDTF._serialized_start = 13256 + _PYTHONUDTF._serialized_end = 13433 + _COLLECTMETRICS._serialized_start = 13436 + _COLLECTMETRICS._serialized_end = 13572 + _PARSE._serialized_start = 13575 + _PARSE._serialized_end = 13963 + _PARSE_OPTIONSENTRY._serialized_start = 3960 + _PARSE_OPTIONSENTRY._serialized_end = 4018 + _PARSE_PARSEFORMAT._serialized_start = 13864 + _PARSE_PARSEFORMAT._serialized_end = 13952 # @@protoc_insertion_point(module_scope) diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi b/python/pyspark/sql/connect/proto/relations_pb2.pyi index 007b92ef5f42..1c0036afbc43 100644 --- a/python/pyspark/sql/connect/proto/relations_pb2.pyi +++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi @@ -579,26 +579,19 @@ class RelationCommon(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor - SOURCE_INFO_FIELD_NUMBER: builtins.int PLAN_ID_FIELD_NUMBER: builtins.int - source_info: builtins.str - """(Required) Shared relation metadata.""" plan_id: builtins.int """(Optional) A per-client globally unique id for a given connect plan.""" def __init__( self, *, - source_info: builtins.str = ..., plan_id: builtins.int | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["_plan_id", b"_plan_id", "plan_id", b"plan_id"] ) -> builtins.bool: ... def ClearField( - self, - field_name: typing_extensions.Literal[ - "_plan_id", b"_plan_id", "plan_id", b"plan_id", "source_info", b"source_info" - ], + self, field_name: typing_extensions.Literal["_plan_id", b"_plan_id", "plan_id", b"plan_id"] ) -> None: ... def WhichOneof( self, oneof_group: typing_extensions.Literal["_plan_id", b"_plan_id"] --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org