This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 31ceb14d35 [GH-2230] Implement GeoSeries: reverse, normalize,
representative_point (#2701)
31ceb14d35 is described below
commit 31ceb14d353bfa9f8acc728420ef3e4aec8514b5
Author: Jia Yu <[email protected]>
AuthorDate: Tue Mar 10 21:53:38 2026 -0700
[GH-2230] Implement GeoSeries: reverse, normalize, representative_point
(#2701)
---
python/sedona/spark/geopandas/base.py | 90 ++++++++++++++++++++--
python/sedona/spark/geopandas/geoseries.py | 21 +++--
python/tests/geopandas/test_geoseries.py | 62 ++++++++++++++-
.../tests/geopandas/test_match_geopandas_series.py | 15 +++-
4 files changed, 170 insertions(+), 18 deletions(-)
diff --git a/python/sedona/spark/geopandas/base.py
b/python/sedona/spark/geopandas/base.py
index 83644d0035..0308b4d9be 100644
--- a/python/sedona/spark/geopandas/base.py
+++ b/python/sedona/spark/geopandas/base.py
@@ -730,8 +730,35 @@ class GeoFrame(metaclass=ABCMeta):
# def set_precision(self, grid_size, mode="valid_output"):
# raise NotImplementedError("This method is not implemented yet.")
- # def representative_point(self):
- # raise NotImplementedError("This method is not implemented yet.")
+ def representative_point(self):
+ """Return a point that is guaranteed to be within each geometry.
+
+ Returns a ``GeoSeries`` of (cheaply computed) points that are
guaranteed
+ to be within each geometry.
+
+ Returns
+ -------
+ GeoSeries
+
+ Examples
+ --------
+ >>> from sedona.spark.geopandas import GeoSeries
+ >>> from shapely.geometry import Polygon, LineString, Point
+ >>> s = GeoSeries(
+ ... [
+ ... Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+ ... LineString([(0, 0), (1, 1), (1, 0)]),
+ ... Point(0, 0),
+ ... ]
+ ... )
+ >>> s.representative_point()
+ 0 POINT (0.5 0.5)
+ 1 POINT (1 0.5)
+ 2 POINT (0 0)
+ dtype: geometry
+
+ """
+ return _delegate_to_geometry_column("representative_point", self)
def minimum_bounding_circle(self):
"""Return a ``GeoSeries`` of geometries representing the minimum
bounding
@@ -803,8 +830,35 @@ class GeoFrame(metaclass=ABCMeta):
# def minimum_clearance(self):
# raise NotImplementedError("This method is not implemented yet.")
- # def normalize(self):
- # raise NotImplementedError("This method is not implemented yet.")
+ def normalize(self):
+ """Return a ``GeoSeries`` of normalized geometries.
+
+ Normalization reorganizes the coordinates in a consistent order,
+ which can be useful for comparison purposes.
+
+ Returns
+ -------
+ GeoSeries
+
+ Examples
+ --------
+ >>> from sedona.spark.geopandas import GeoSeries
+ >>> from shapely.geometry import Polygon, LineString, Point
+ >>> s = GeoSeries(
+ ... [
+ ... Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+ ... LineString([(0, 0), (1, 1)]),
+ ... Point(0, 0),
+ ... ]
+ ... )
+ >>> s.normalize()
+ 0 POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))
+ 1 LINESTRING (0 0, 1 1)
+ 2 POINT (0 0)
+ dtype: geometry
+
+ """
+ return _delegate_to_geometry_column("normalize", self)
def make_valid(self, *, method="linework", keep_collapsed=True):
"""Repairs invalid geometries.
@@ -869,8 +923,32 @@ class GeoFrame(metaclass=ABCMeta):
"make_valid", self, method=method, keep_collapsed=keep_collapsed
)
- # def reverse(self):
- # raise NotImplementedError("This method is not implemented yet.")
+ def reverse(self):
+ """Return a ``GeoSeries`` with the coordinate order reversed.
+
+ Returns
+ -------
+ GeoSeries
+
+ Examples
+ --------
+ >>> from sedona.spark.geopandas import GeoSeries
+ >>> from shapely.geometry import LineString, Point
+ >>> s = GeoSeries(
+ ... [
+ ... LineString([(0, 0), (1, 1), (2, 2)]),
+ ... LineString([(0, 0), (1, 0), (1, 1)]),
+ ... Point(0, 0),
+ ... ]
+ ... )
+ >>> s.reverse()
+ 0 LINESTRING (2 2, 1 1, 0 0)
+ 1 LINESTRING (1 1, 1 0, 0 0)
+ 2 POINT (0 0)
+ dtype: geometry
+
+ """
+ return _delegate_to_geometry_column("reverse", self)
def segmentize(self, max_segment_length):
"""Returns a ``GeoSeries`` with vertices added to line segments based
on
diff --git a/python/sedona/spark/geopandas/geoseries.py
b/python/sedona/spark/geopandas/geoseries.py
index af9d0f378a..67221b080b 100644
--- a/python/sedona/spark/geopandas/geoseries.py
+++ b/python/sedona/spark/geopandas/geoseries.py
@@ -1030,8 +1030,11 @@ class GeoSeries(GeoFrame, pspd.Series):
raise NotImplementedError("This method is not implemented yet.")
def representative_point(self):
- # Implementation of the abstract method.
- raise NotImplementedError("This method is not implemented yet.")
+ spark_expr = stf.ST_PointOnSurface(self.spark.column)
+ return self._query_geometry_column(
+ spark_expr,
+ returns_geom=True,
+ )
def minimum_bounding_circle(self) -> "GeoSeries":
spark_expr = stf.ST_MinimumBoundingCircle(self.spark.column)
@@ -1053,8 +1056,11 @@ class GeoSeries(GeoFrame, pspd.Series):
raise NotImplementedError("This method is not implemented yet.")
def normalize(self):
- # Implementation of the abstract method.
- raise NotImplementedError("This method is not implemented yet.")
+ spark_expr = stf.ST_Normalize(self.spark.column)
+ return self._query_geometry_column(
+ spark_expr,
+ returns_geom=True,
+ )
def make_valid(self, *, method="linework", keep_collapsed=True) ->
"GeoSeries":
if method != "structure":
@@ -1069,8 +1075,11 @@ class GeoSeries(GeoFrame, pspd.Series):
)
def reverse(self):
- # Implementation of the abstract method.
- raise NotImplementedError("This method is not implemented yet.")
+ spark_expr = stf.ST_Reverse(self.spark.column)
+ return self._query_geometry_column(
+ spark_expr,
+ returns_geom=True,
+ )
def segmentize(self, max_segment_length):
other_series, extended = self._make_series_of_val(max_segment_length)
diff --git a/python/tests/geopandas/test_geoseries.py
b/python/tests/geopandas/test_geoseries.py
index a4538015cb..2e9c559a9f 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -1318,7 +1318,21 @@ e": "Feature", "properties": {}, "geometry": {"type":
"Point", "coordinates": [3
pass
def test_representative_point(self):
- pass
+ geoms = [
+ Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+ LineString([(0, 0), (1, 1), (1, 0)]),
+ Point(0, 0),
+ None,
+ ]
+ s = GeoSeries(geoms)
+ expected = gpd.GeoSeries(geoms).representative_point()
+
+ result = s.representative_point()
+ self.check_sgpd_equals_gpd(result, expected)
+
+ # Check that GeoDataFrame works too
+ df_result = s.to_geoframe().representative_point()
+ self.check_sgpd_equals_gpd(df_result, expected)
def test_minimum_bounding_circle(self):
s = GeoSeries(
@@ -1374,7 +1388,28 @@ e": "Feature", "properties": {}, "geometry": {"type":
"Point", "coordinates": [3
pass
def test_normalize(self):
- pass
+ s = GeoSeries(
+ [
+ Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+ LineString([(0, 0), (1, 1)]),
+ Point(0, 0),
+ None,
+ ]
+ )
+ result = s.normalize()
+ expected = gpd.GeoSeries(
+ [
+ shapely.normalize(Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])),
+ shapely.normalize(LineString([(0, 0), (1, 1)])),
+ shapely.normalize(Point(0, 0)),
+ None,
+ ]
+ )
+ self.check_sgpd_equals_gpd(result, expected)
+
+ # Check that GeoDataFrame works too
+ df_result = s.to_geoframe().normalize()
+ self.check_sgpd_equals_gpd(df_result, expected)
def test_make_valid(self):
s = sgpd.GeoSeries(
@@ -1431,7 +1466,28 @@ e": "Feature", "properties": {}, "geometry": {"type":
"Point", "coordinates": [3
self.check_sgpd_equals_gpd(df_result, expected)
def test_reverse(self):
- pass
+ s = GeoSeries(
+ [
+ LineString([(0, 0), (1, 1), (2, 2)]),
+ LineString([(0, 0), (1, 0), (1, 1)]),
+ Point(0, 0),
+ None,
+ ]
+ )
+ result = s.reverse()
+ expected = gpd.GeoSeries(
+ [
+ LineString([(2, 2), (1, 1), (0, 0)]),
+ LineString([(1, 1), (1, 0), (0, 0)]),
+ Point(0, 0),
+ None,
+ ]
+ )
+ self.check_sgpd_equals_gpd(result, expected)
+
+ # Check that GeoDataFrame works too
+ df_result = s.to_geoframe().reverse()
+ self.check_sgpd_equals_gpd(df_result, expected)
def test_segmentize(self):
s = GeoSeries(
diff --git a/python/tests/geopandas/test_match_geopandas_series.py
b/python/tests/geopandas/test_match_geopandas_series.py
index 9bf0175c51..abac9b453f 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -770,7 +770,10 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
pass
def test_representative_point(self):
- pass
+ for geom in self.geoms:
+ sgpd_result = GeoSeries(geom).representative_point()
+ gpd_result = gpd.GeoSeries(geom).representative_point()
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
def test_minimum_bounding_circle(self):
for geom in self.geoms:
@@ -788,7 +791,10 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
pass
def test_normalize(self):
- pass
+ for geom in self.geoms:
+ sgpd_result = GeoSeries(geom).normalize()
+ gpd_result = gpd.GeoSeries(geom).normalize()
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
def test_make_valid(self):
import shapely
@@ -818,7 +824,10 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
GeoSeries([Point(0, 0)]).make_valid(method="linework")
def test_reverse(self):
- pass
+ for geom in self.geoms:
+ sgpd_result = GeoSeries(geom).reverse()
+ gpd_result = gpd.GeoSeries(geom).reverse()
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
@pytest.mark.skipif(
parse_version(gpd.__version__) < parse_version("0.14.0"),