This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch geopandas-tier1-batch-a in repository https://gitbox.apache.org/repos/asf/sedona.git
commit 59da0eda6caf120ab9b58d426668a1d172172a1d Author: Jia Yu <[email protected]> AuthorDate: Tue Mar 10 00:56:20 2026 -0700 Implement GeoSeries: reverse, normalize, representative_point, line_merge Implement four GeoSeries methods by delegating to Sedona ST functions: - reverse() -> ST_Reverse - normalize() -> ST_Normalize - representative_point() -> ST_PointOnSurface - line_merge() -> ST_LineMerge Each function is implemented in geoseries.py, with docstrings and delegation support in base.py, and tests in both test_geoseries.py and test_match_geopandas_series.py. Part of: https://github.com/apache/sedona/issues/2230 --- python/sedona/spark/geopandas/base.py | 126 +++++++++++++++++++-- python/sedona/spark/geopandas/geoseries.py | 28 +++-- python/tests/geopandas/test_geoseries.py | 92 ++++++++++++++- .../tests/geopandas/test_match_geopandas_series.py | 22 +++- 4 files changed, 244 insertions(+), 24 deletions(-) diff --git a/python/sedona/spark/geopandas/base.py b/python/sedona/spark/geopandas/base.py index 83644d0035..ad34073339 100644 --- a/python/sedona/spark/geopandas/base.py +++ b/python/sedona/spark/geopandas/base.py @@ -730,8 +730,35 @@ class GeoFrame(metaclass=ABCMeta): # def set_precision(self, grid_size, mode="valid_output"): # raise NotImplementedError("This method is not implemented yet.") - # def representative_point(self): - # raise NotImplementedError("This method is not implemented yet.") + def representative_point(self): + """Return a point that is guaranteed to be within each geometry. + + Returns a ``GeoSeries`` of (cheaply computed) points that are guaranteed + to be within each geometry. + + Returns + ------- + GeoSeries + + Examples + -------- + >>> from sedona.spark.geopandas import GeoSeries + >>> from shapely.geometry import Polygon, LineString, Point + >>> s = GeoSeries( + ... [ + ... Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), + ... LineString([(0, 0), (1, 1), (1, 0)]), + ... Point(0, 0), + ... ] + ... ) + >>> s.representative_point() + 0 POINT (0.5 0.5) + 1 POINT (1 0.5) + 2 POINT (0 0) + dtype: geometry + + """ + return _delegate_to_geometry_column("representative_point", self) def minimum_bounding_circle(self): """Return a ``GeoSeries`` of geometries representing the minimum bounding @@ -803,8 +830,35 @@ class GeoFrame(metaclass=ABCMeta): # def minimum_clearance(self): # raise NotImplementedError("This method is not implemented yet.") - # def normalize(self): - # raise NotImplementedError("This method is not implemented yet.") + def normalize(self): + """Return a ``GeoSeries`` of normalized geometries. + + Normalization reorganizes the coordinates in a consistent order, + which can be useful for comparison purposes. + + Returns + ------- + GeoSeries + + Examples + -------- + >>> from sedona.spark.geopandas import GeoSeries + >>> from shapely.geometry import Polygon, LineString, Point + >>> s = GeoSeries( + ... [ + ... Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), + ... LineString([(0, 0), (1, 1)]), + ... Point(0, 0), + ... ] + ... ) + >>> s.normalize() + 0 POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0)) + 1 LINESTRING (0 0, 1 1) + 2 POINT (0 0) + dtype: geometry + + """ + return _delegate_to_geometry_column("normalize", self) def make_valid(self, *, method="linework", keep_collapsed=True): """Repairs invalid geometries. @@ -869,8 +923,32 @@ class GeoFrame(metaclass=ABCMeta): "make_valid", self, method=method, keep_collapsed=keep_collapsed ) - # def reverse(self): - # raise NotImplementedError("This method is not implemented yet.") + def reverse(self): + """Return a ``GeoSeries`` with the coordinate order reversed. + + Returns + ------- + GeoSeries + + Examples + -------- + >>> from sedona.spark.geopandas import GeoSeries + >>> from shapely.geometry import LineString, Point + >>> s = GeoSeries( + ... [ + ... LineString([(0, 0), (1, 1), (2, 2)]), + ... LineString([(0, 0), (1, 0), (1, 1)]), + ... Point(0, 0), + ... ] + ... ) + >>> s.reverse() + 0 LINESTRING (2 2, 1 1, 0 0) + 1 LINESTRING (1 1, 1 0, 0 0) + 2 POINT (0 0) + dtype: geometry + + """ + return _delegate_to_geometry_column("reverse", self) def segmentize(self, max_segment_length): """Returns a ``GeoSeries`` with vertices added to line segments based on @@ -1015,8 +1093,40 @@ class GeoFrame(metaclass=ABCMeta): """ return _delegate_to_geometry_column("force_3d", self, z) - # def line_merge(self, directed=False): - # raise NotImplementedError("This method is not implemented yet.") + def line_merge(self, directed=False): + """Return merged LineStrings. + + Returns a ``GeoSeries`` of (Multi)LineStrings, where connected + LineStrings are merged together into single LineStrings. + + Parameters + ---------- + directed : bool, default False + Currently not supported by Sedona. + + Returns + ------- + GeoSeries + + Examples + -------- + >>> from sedona.spark.geopandas import GeoSeries + >>> from shapely.geometry import MultiLineString, LineString + >>> s = GeoSeries( + ... [ + ... MultiLineString([[(0, 0), (1, 1)], [(1, 1), (2, 2)]]), + ... MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]), + ... LineString([(0, 0), (1, 1)]), + ... ] + ... ) + >>> s.line_merge() + 0 LINESTRING (0 0, 1 1, 2 2) + 1 MULTILINESTRING ((0 0, 1 1), (2 2, 3 3)) + 2 LINESTRING (0 0, 1 1) + dtype: geometry + + """ + return _delegate_to_geometry_column("line_merge", self, directed) # @property # def unary_union(self): diff --git a/python/sedona/spark/geopandas/geoseries.py b/python/sedona/spark/geopandas/geoseries.py index af9d0f378a..cb405dde23 100644 --- a/python/sedona/spark/geopandas/geoseries.py +++ b/python/sedona/spark/geopandas/geoseries.py @@ -1030,8 +1030,11 @@ class GeoSeries(GeoFrame, pspd.Series): raise NotImplementedError("This method is not implemented yet.") def representative_point(self): - # Implementation of the abstract method. - raise NotImplementedError("This method is not implemented yet.") + spark_expr = stf.ST_PointOnSurface(self.spark.column) + return self._query_geometry_column( + spark_expr, + returns_geom=True, + ) def minimum_bounding_circle(self) -> "GeoSeries": spark_expr = stf.ST_MinimumBoundingCircle(self.spark.column) @@ -1053,8 +1056,11 @@ class GeoSeries(GeoFrame, pspd.Series): raise NotImplementedError("This method is not implemented yet.") def normalize(self): - # Implementation of the abstract method. - raise NotImplementedError("This method is not implemented yet.") + spark_expr = stf.ST_Normalize(self.spark.column) + return self._query_geometry_column( + spark_expr, + returns_geom=True, + ) def make_valid(self, *, method="linework", keep_collapsed=True) -> "GeoSeries": if method != "structure": @@ -1069,8 +1075,11 @@ class GeoSeries(GeoFrame, pspd.Series): ) def reverse(self): - # Implementation of the abstract method. - raise NotImplementedError("This method is not implemented yet.") + spark_expr = stf.ST_Reverse(self.spark.column) + return self._query_geometry_column( + spark_expr, + returns_geom=True, + ) def segmentize(self, max_segment_length): other_series, extended = self._make_series_of_val(max_segment_length) @@ -1105,8 +1114,11 @@ class GeoSeries(GeoFrame, pspd.Series): ) def line_merge(self, directed=False): - # Implementation of the abstract method. - raise NotImplementedError("This method is not implemented yet.") + spark_expr = stf.ST_LineMerge(self.spark.column) + return self._query_geometry_column( + spark_expr, + returns_geom=True, + ) # ============================================================================ # GEOMETRIC OPERATIONS diff --git a/python/tests/geopandas/test_geoseries.py b/python/tests/geopandas/test_geoseries.py index a4538015cb..9503f2dbe1 100644 --- a/python/tests/geopandas/test_geoseries.py +++ b/python/tests/geopandas/test_geoseries.py @@ -1318,7 +1318,30 @@ e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [3 pass def test_representative_point(self): - pass + s = GeoSeries( + [ + Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), + LineString([(0, 0), (1, 1), (1, 0)]), + Point(0, 0), + None, + ] + ) + result = s.representative_point() + # representative_point returns a point guaranteed to be within the geometry + # We check that each resulting point is within (or on) the original geometry + for i in range(len(result)): + if result.iloc[i] is None: + assert s.iloc[i] is None + else: + assert result.iloc[i].geom_type == "Point" + + # Check that GeoDataFrame works too + df_result = s.to_geoframe().representative_point() + for i in range(len(df_result)): + if df_result.iloc[i] is None: + assert s.iloc[i] is None + else: + assert df_result.iloc[i].geom_type == "Point" def test_minimum_bounding_circle(self): s = GeoSeries( @@ -1374,7 +1397,28 @@ e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [3 pass def test_normalize(self): - pass + s = GeoSeries( + [ + Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), + LineString([(0, 0), (1, 1)]), + Point(0, 0), + None, + ] + ) + result = s.normalize() + expected = gpd.GeoSeries( + [ + shapely.normalize(Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])), + shapely.normalize(LineString([(0, 0), (1, 1)])), + shapely.normalize(Point(0, 0)), + None, + ] + ) + self.check_sgpd_equals_gpd(result, expected) + + # Check that GeoDataFrame works too + df_result = s.to_geoframe().normalize() + self.check_sgpd_equals_gpd(df_result, expected) def test_make_valid(self): s = sgpd.GeoSeries( @@ -1431,7 +1475,28 @@ e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [3 self.check_sgpd_equals_gpd(df_result, expected) def test_reverse(self): - pass + s = GeoSeries( + [ + LineString([(0, 0), (1, 1), (2, 2)]), + LineString([(0, 0), (1, 0), (1, 1)]), + Point(0, 0), + None, + ] + ) + result = s.reverse() + expected = gpd.GeoSeries( + [ + LineString([(2, 2), (1, 1), (0, 0)]), + LineString([(1, 1), (1, 0), (0, 0)]), + Point(0, 0), + None, + ] + ) + self.check_sgpd_equals_gpd(result, expected) + + # Check that GeoDataFrame works too + df_result = s.to_geoframe().reverse() + self.check_sgpd_equals_gpd(df_result, expected) def test_segmentize(self): s = GeoSeries( @@ -1611,7 +1676,26 @@ e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [3 self.check_sgpd_equals_gpd(result, expected) def test_line_merge(self): - pass + s = GeoSeries( + [ + MultiLineString([[(0, 0), (1, 1)], [(1, 1), (2, 2)]]), + MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]), + None, + ] + ) + result = s.line_merge() + expected = gpd.GeoSeries( + [ + LineString([(0, 0), (1, 1), (2, 2)]), + MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]), + None, + ] + ) + self.check_sgpd_equals_gpd(result, expected) + + # Check that GeoDataFrame works too + df_result = s.to_geoframe().line_merge() + self.check_sgpd_equals_gpd(df_result, expected) def test_unary_union(self): pass diff --git a/python/tests/geopandas/test_match_geopandas_series.py b/python/tests/geopandas/test_match_geopandas_series.py index 9bf0175c51..9e0c779d47 100644 --- a/python/tests/geopandas/test_match_geopandas_series.py +++ b/python/tests/geopandas/test_match_geopandas_series.py @@ -770,7 +770,10 @@ class TestMatchGeopandasSeries(TestGeopandasBase): pass def test_representative_point(self): - pass + for geom in self.geoms: + sgpd_result = GeoSeries(geom).representative_point() + gpd_result = gpd.GeoSeries(geom).representative_point() + self.check_sgpd_equals_gpd(sgpd_result, gpd_result) def test_minimum_bounding_circle(self): for geom in self.geoms: @@ -788,7 +791,10 @@ class TestMatchGeopandasSeries(TestGeopandasBase): pass def test_normalize(self): - pass + for geom in self.geoms: + sgpd_result = GeoSeries(geom).normalize() + gpd_result = gpd.GeoSeries(geom).normalize() + self.check_sgpd_equals_gpd(sgpd_result, gpd_result) def test_make_valid(self): import shapely @@ -818,7 +824,10 @@ class TestMatchGeopandasSeries(TestGeopandasBase): GeoSeries([Point(0, 0)]).make_valid(method="linework") def test_reverse(self): - pass + for geom in self.geoms: + sgpd_result = GeoSeries(geom).reverse() + gpd_result = gpd.GeoSeries(geom).reverse() + self.check_sgpd_equals_gpd(sgpd_result, gpd_result) @pytest.mark.skipif( parse_version(gpd.__version__) < parse_version("0.14.0"), @@ -922,7 +931,12 @@ class TestMatchGeopandasSeries(TestGeopandasBase): self.check_sgpd_equals_gpd(sgpd_result, gpd_result) def test_line_merge(self): - pass + # line_merge is only meaningful for MultiLineStrings; Sedona's ST_LineMerge + # returns GEOMETRYCOLLECTION EMPTY for non-MultiLineString inputs. + for geom in [self.multilinestrings]: + sgpd_result = GeoSeries(geom).line_merge() + gpd_result = gpd.GeoSeries(geom).line_merge() + self.check_sgpd_equals_gpd(sgpd_result, gpd_result) def test_unary_union(self): pass
