This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 79b15124d4 [GH-2725] Implement GeoSeries: type, unary_union,
delaunay_triangles, voronoi_polygons, disjoint, m (#2726)
79b15124d4 is described below
commit 79b15124d43e4ebb45cdc890690e6d7a7483bc81
Author: Jia Yu <[email protected]>
AuthorDate: Wed Mar 11 18:54:50 2026 -0700
[GH-2725] Implement GeoSeries: type, unary_union, delaunay_triangles,
voronoi_polygons, disjoint, m (#2726)
---
python/sedona/spark/geopandas/base.py | 165 +++++++++++++++++++--
python/sedona/spark/geopandas/geodataframe.py | 5 +-
python/sedona/spark/geopandas/geoseries.py | 86 +++++++++--
python/tests/geopandas/test_geoseries.py | 122 ++++++++++++++-
.../tests/geopandas/test_match_geopandas_series.py | 52 ++++++-
5 files changed, 391 insertions(+), 39 deletions(-)
diff --git a/python/sedona/spark/geopandas/base.py
b/python/sedona/spark/geopandas/base.py
index 168a7738b0..ac6827e097 100644
--- a/python/sedona/spark/geopandas/base.py
+++ b/python/sedona/spark/geopandas/base.py
@@ -165,9 +165,31 @@ class GeoFrame(metaclass=ABCMeta):
return _delegate_to_geometry_column("geom_type", self)
@property
- @abstractmethod
def type(self):
- raise NotImplementedError("This method is not implemented yet.")
+ """Return the geometry type of each geometry in the GeoSeries.
+
+ This is an alias for :attr:`geom_type`.
+
+ Returns
+ -------
+ pandas.Series (str)
+
+ Examples
+ --------
+ >>> from sedona.spark.geopandas import GeoSeries
+ >>> from shapely.geometry import Point, Polygon
+ >>> s = GeoSeries(
+ ... [
+ ... Polygon([(0, 0), (1, 1), (0, 1)]),
+ ... Point(0, 0),
+ ... ]
+ ... )
+ >>> s.type
+ 0 Polygon
+ 1 Point
+ dtype: object
+ """
+ return self.geom_type
@property
def length(self):
@@ -772,11 +794,81 @@ class GeoFrame(metaclass=ABCMeta):
"""
return _delegate_to_geometry_column("convex_hull", self)
- # def delaunay_triangles(self, tolerance=0.0, only_edges=False):
- # raise NotImplementedError("This method is not implemented yet.")
+ def delaunay_triangles(self, tolerance=0.0, only_edges=False):
+ """Return Delaunay triangulation of the vertices of each geometry.
- # def voronoi_polygons(self, tolerance=0.0, extend_to=None,
only_edges=False):
- # raise NotImplementedError("This method is not implemented yet.")
+ .. note::
+
+ Unlike geopandas, which collects all vertices across the
+ entire GeoSeries and computes a single triangulation, Sedona
+ computes the triangulation **per row**. Each input geometry
+ produces one ``GeometryCollection`` containing its triangles.
+ The output GeoSeries has the same length as the input.
+
+ Parameters
+ ----------
+ tolerance : float, default 0.0
+ Snapping tolerance for vertices to be considered equal.
+ only_edges : bool, default False
+ If True, return only the edges of the triangulation as a
+ MultiLineString. If False, return triangles as a
+ GeometryCollection of Polygons.
+
+ Returns
+ -------
+ GeoSeries
+
+ Examples
+ --------
+ >>> from sedona.spark.geopandas import GeoSeries
+ >>> from shapely.geometry import MultiPoint
+ >>> s = GeoSeries([MultiPoint([(0, 0), (1, 0), (0.5, 1)])])
+ >>> s.delaunay_triangles()
+ 0 GEOMETRYCOLLECTION (POLYGON ((0 0, 0.5 1, 1 0...
+ dtype: geometry
+ """
+ return _delegate_to_geometry_column(
+ "delaunay_triangles", self, tolerance, only_edges
+ )
+
+ def voronoi_polygons(self, tolerance=0.0, extend_to=None,
only_edges=False):
+ """Return Voronoi diagram of the vertices of each geometry.
+
+ .. note::
+
+ Unlike geopandas, which collects all vertices across the
+ entire GeoSeries and computes a single Voronoi diagram, Sedona
+ computes the diagram **per row**. Each input geometry produces
+ one ``GeometryCollection`` containing its Voronoi polygons.
+ The output GeoSeries has the same length as the input.
+
+ Parameters
+ ----------
+ tolerance : float, default 0.0
+ Snapping tolerance for vertices to be considered equal.
+ extend_to : Geometry, default None
+ Not supported. Passing a non-None value will raise
+ ``NotImplementedError``.
+ only_edges : bool, default False
+ Only ``only_edges=False`` is supported. Passing ``only_edges=True``
+ will raise ``NotImplementedError``.
+
+ Returns
+ -------
+ GeoSeries
+
+ Examples
+ --------
+ >>> from sedona.spark.geopandas import GeoSeries
+ >>> from shapely.geometry import MultiPoint
+ >>> s = GeoSeries([MultiPoint([(0, 0), (1, 0), (0.5, 1)])])
+ >>> s.voronoi_polygons()
+ 0 GEOMETRYCOLLECTION (POLYGON ((-0.25 -0.5, -0....
+ dtype: geometry
+ """
+ return _delegate_to_geometry_column(
+ "voronoi_polygons", self, tolerance, extend_to, only_edges
+ )
@property
def envelope(self):
@@ -1337,9 +1429,34 @@ class GeoFrame(metaclass=ABCMeta):
"""
return _delegate_to_geometry_column("line_merge", self, directed)
- # @property
- # def unary_union(self):
- # raise NotImplementedError("This method is not implemented yet.")
+ @property
+ def unary_union(self):
+ """Returns a geometry containing the union of all geometries in the
+ ``GeoSeries``.
+
+ Deprecated: The ``unary_union`` attribute is deprecated. Use
+ :meth:`union_all` instead.
+
+ Returns
+ -------
+ Geometry
+
+ Examples
+ --------
+ >>> from sedona.spark.geopandas import GeoSeries
+ >>> from shapely.geometry import box
+ >>> s = GeoSeries([box(0, 0, 1, 1), box(0, 0, 2, 2)])
+ >>> s.unary_union.wkt # doctest: +SKIP
+ 'POLYGON ((0 1, 0 2, 2 2, 2 0, 1 0, 0 0, 0 1))'
+ """
+ import warnings
+
+ warnings.warn(
+ "The 'unary_union' attribute is deprecated, use the 'union_all()'
method instead.",
+ FutureWarning,
+ stacklevel=2,
+ )
+ return _delegate_to_geometry_column("union_all", self)
def union_all(self, method="unary", grid_size=None) -> BaseGeometry:
"""Returns a geometry containing the union of all geometries in the
@@ -1502,6 +1619,36 @@ class GeoFrame(metaclass=ABCMeta):
"""
return _delegate_to_geometry_column("crosses", self, other, align)
+ def disjoint(self, other, align=None):
+ """Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
+ each aligned geometry that is disjoint from `other`.
+
+ An object is said to be disjoint from `other` if its
+ `boundary` and `interior` do not intersect at all with those of the
+ other.
+
+ The operation works on a 1-to-1 row-wise manner.
+
+ Parameters
+ ----------
+ other : GeoSeries or geometric object
+ The GeoSeries (elementwise) or geometric object to test if is
+ disjoint.
+ align : bool | None (default None)
+ If True, automatically aligns GeoSeries based on their indices.
None defaults to True.
+ If False, the order of elements is preserved.
+
+ Returns
+ -------
+ Series (bool)
+
+ See also
+ --------
+ GeoSeries.intersects
+ GeoSeries.crosses
+ """
+ return _delegate_to_geometry_column("disjoint", self, other, align)
+
def intersects(self, other, align=None):
"""Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
each aligned geometry that intersects `other`.
diff --git a/python/sedona/spark/geopandas/geodataframe.py
b/python/sedona/spark/geopandas/geodataframe.py
index 16815f578b..93d8dee076 100644
--- a/python/sedona/spark/geopandas/geodataframe.py
+++ b/python/sedona/spark/geopandas/geodataframe.py
@@ -1237,10 +1237,7 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
@property
def type(self):
- # Implementation of the abstract method
- raise NotImplementedError(
- _not_implemented_error("type", "Returns numeric geometry type
codes.")
- )
+ return self.geom_type
def plot(self, *args, **kwargs):
"""
diff --git a/python/sedona/spark/geopandas/geoseries.py
b/python/sedona/spark/geopandas/geoseries.py
index 0319a41be4..96631f3679 100644
--- a/python/sedona/spark/geopandas/geoseries.py
+++ b/python/sedona/spark/geopandas/geoseries.py
@@ -730,10 +730,7 @@ class GeoSeries(GeoFrame, pspd.Series):
@property
def type(self):
- # Implementation of the abstract method.
- raise NotImplementedError(
- _not_implemented_error("type", "Returns numeric geometry type
codes.")
- )
+ return self.geom_type
@property
def length(self) -> pspd.Series:
@@ -984,12 +981,28 @@ class GeoSeries(GeoFrame, pspd.Series):
)
def delaunay_triangles(self, tolerance=0.0, only_edges=False):
- # Implementation of the abstract method.
- raise NotImplementedError("This method is not implemented yet.")
+ spark_expr = stf.ST_DelaunayTriangles(
+ self.spark.column, tolerance, int(only_edges)
+ )
+ return self._query_geometry_column(
+ spark_expr,
+ returns_geom=True,
+ )
def voronoi_polygons(self, tolerance=0.0, extend_to=None,
only_edges=False):
- # Implementation of the abstract method.
- raise NotImplementedError("This method is not implemented yet.")
+ if only_edges:
+ raise NotImplementedError(
+ "Sedona does not support only_edges=True for voronoi_polygons."
+ )
+ if extend_to is not None:
+ raise NotImplementedError(
+ "Sedona does not support extend_to for voronoi_polygons."
+ )
+ spark_expr = stf.ST_VoronoiPolygons(self.spark.column, tolerance,
extend_to)
+ return self._query_geometry_column(
+ spark_expr,
+ returns_geom=True,
+ )
@property
def envelope(self) -> "GeoSeries":
@@ -1144,8 +1157,14 @@ class GeoSeries(GeoFrame, pspd.Series):
@property
def unary_union(self):
- # Implementation of the abstract method.
- raise NotImplementedError("This method is not implemented yet.")
+ import warnings
+
+ warnings.warn(
+ "The 'unary_union' attribute is deprecated, use the 'union_all()'
method instead.",
+ FutureWarning,
+ stacklevel=2,
+ )
+ return self.union_all()
def union_all(self, method="unary", grid_size=None) -> BaseGeometry:
if grid_size is not None:
@@ -1202,9 +1221,18 @@ class GeoSeries(GeoFrame, pspd.Series):
return _to_bool(result)
- def disjoint(self, other, align=None):
- # Implementation of the abstract method.
- raise NotImplementedError("This method is not implemented yet.")
+ def disjoint(self, other, align=None) -> pspd.Series:
+ other_series, extended = self._make_series_of_val(other)
+ align = False if extended else align
+
+ spark_expr = stp.ST_Disjoint(F.col("L"), F.col("R"))
+ result = self._row_wise_operation(
+ spark_expr,
+ other_series,
+ align,
+ default_val=False,
+ )
+ return _to_bool(result)
def intersects(
self, other: Union["GeoSeries", BaseGeometry], align: Union[bool,
None] = None
@@ -1671,7 +1699,37 @@ class GeoSeries(GeoFrame, pspd.Series):
# GeoSeries-only (not in GeoDataFrame)
@property
def m(self) -> pspd.Series:
- raise NotImplementedError("GeoSeries.m() is not implemented yet.")
+ """Return the m coordinate of point geometries in a GeoSeries
+
+ Returns
+ -------
+ pandas.Series
+
+ Examples
+ --------
+
+ >>> from sedona.spark.geopandas import GeoSeries
+ >>> from shapely.geometry import Point
+ >>> s = GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
+ >>> s.m
+ 0 NaN
+ 1 NaN
+ 2 NaN
+ dtype: float64
+
+ See Also
+ --------
+
+ GeoSeries.x
+ GeoSeries.y
+ GeoSeries.z
+
+ """
+ spark_col = stf.ST_M(self.spark.column)
+ return self._query_geometry_column(
+ spark_col,
+ returns_geom=False,
+ )
#
============================================================================
# CONSTRUCTION METHODS
diff --git a/python/tests/geopandas/test_geoseries.py
b/python/tests/geopandas/test_geoseries.py
index bcbcc979c8..9bc572a151 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -251,7 +251,13 @@ class TestGeoSeries(TestGeopandasBase):
self.check_pd_series_equal(result, expected)
def test_m(self):
- pass
+ geoseries = sgpd.GeoSeries(
+ [Point(0, -1, 2.5), Point(2.5, 0, -1), Point(-1, 2.5, 0)]
+ )
+ result = geoseries.m
+ # ST_M returns NaN for points without M coordinate
+ expected = pd.Series([np.nan, np.nan, np.nan])
+ self.check_pd_series_equal(result, expected)
def test_from_file(self):
pass
@@ -649,7 +655,39 @@ e": "Feature", "properties": {}, "geometry": {"type":
"Point", "coordinates": [3
self.check_pd_series_equal(df_result, expected)
def test_type(self):
- pass
+ geoseries = GeoSeries(
+ [
+ Point(0, 0),
+ MultiPoint([(0, 0), (1, 1)]),
+ LineString([(0, 0), (1, 1)]),
+ MultiLineString([[(0, 0), (1, 1)]]),
+ Polygon([(0, 0), (1, 0), (1, 1)]),
+ MultiPolygon(
+ [
+ Polygon([(0, 0), (1, 0), (0, 1)]),
+ Polygon([(2, 2), (3, 2), (2, 3)]),
+ ]
+ ),
+ GeometryCollection([Point(0, 0), LineString([(0, 0), (1,
1)])]),
+ ]
+ )
+ result = geoseries.type
+ expected = pd.Series(
+ [
+ "Point",
+ "MultiPoint",
+ "LineString",
+ "MultiLineString",
+ "Polygon",
+ "MultiPolygon",
+ "GeometryCollection",
+ ]
+ )
+ self.check_pd_series_equal(result, expected)
+
+ # Check that GeoDataFrame works too
+ df_result = geoseries.to_geoframe().type
+ self.check_pd_series_equal(df_result, expected)
def test_length(self):
geoseries = GeoSeries(
@@ -1329,10 +1367,51 @@ e": "Feature", "properties": {}, "geometry": {"type":
"Point", "coordinates": [3
self.check_sgpd_equals_gpd(df_result, expected)
def test_delaunay_triangles(self):
- pass
+ s = GeoSeries(
+ [
+ MultiPoint([(0, 0), (1, 0), (0.5, 1)]),
+ MultiPoint([(0, 0), (1, 0), (1, 1), (0, 1)]),
+ ]
+ )
+ # Sedona ST_DelaunayTriangles is element-wise (returns a
GeometryCollection
+ # per input), unlike geopandas which operates on all points at once.
+ result = s.delaunay_triangles()
+ result_gpd = result.to_geopandas()
+ assert len(result_gpd) == 2
+ # First input (3 points) should produce 1 triangle
+ assert result_gpd.iloc[0].geom_type == "GeometryCollection"
+ assert len(list(result_gpd.iloc[0].geoms)) == 1
+ # Second input (4 points) should produce 2 triangles
+ assert result_gpd.iloc[1].geom_type == "GeometryCollection"
+ assert len(list(result_gpd.iloc[1].geoms)) == 2
+
+ # Check that GeoDataFrame works too
+ df_result = s.to_geoframe().delaunay_triangles()
+ df_result_gpd = df_result.to_geopandas()
+ assert len(df_result_gpd) == 2
def test_voronoi_polygons(self):
- pass
+ s = GeoSeries(
+ [
+ MultiPoint([(0, 0), (1, 0), (0.5, 1)]),
+ ]
+ )
+ # Sedona ST_VoronoiPolygons is element-wise, unlike geopandas
+ result = s.voronoi_polygons()
+ result_gpd = result.to_geopandas()
+ assert len(result_gpd) == 1
+ assert result_gpd.iloc[0].geom_type == "GeometryCollection"
+ # 3 points should produce 3 Voronoi polygons
+ assert len(list(result_gpd.iloc[0].geoms)) == 3
+
+ # Check that GeoDataFrame works too
+ df_result = s.to_geoframe().voronoi_polygons()
+ df_result_gpd = df_result.to_geopandas()
+ assert len(df_result_gpd) == 1
+
+ # only_edges=True should raise
+ with pytest.raises(NotImplementedError):
+ s.voronoi_polygons(only_edges=True)
def test_envelope(self):
s = sgpd.GeoSeries(
@@ -1841,7 +1920,16 @@ e": "Feature", "properties": {}, "geometry": {"type":
"Point", "coordinates": [3
self.check_sgpd_equals_gpd(df_result, expected)
def test_unary_union(self):
- pass
+ s = GeoSeries([box(0, 0, 1, 1), box(0, 0, 2, 2)])
+ with pytest.warns(FutureWarning, match="unary_union"):
+ result = s.unary_union
+ expected = Polygon([(0, 1), (0, 2), (2, 2), (2, 0), (1, 0), (0, 0),
(0, 1)])
+ self.check_geom_equals(result, expected)
+
+ # Check that GeoDataFrame works too
+ with pytest.warns(FutureWarning, match="unary_union"):
+ df_result = s.to_geoframe().unary_union
+ self.check_geom_equals(df_result, expected)
def test_union_all(self):
s = GeoSeries([box(0, 0, 1, 1), box(0, 0, 2, 2)])
@@ -1944,7 +2032,29 @@ e": "Feature", "properties": {}, "geometry": {"type":
"Point", "coordinates": [3
self.check_pd_series_equal(result, expected)
def test_disjoint(self):
- pass
+ s = GeoSeries(
+ [
+ Polygon([(0, 0), (2, 2), (0, 2)]),
+ LineString([(0, 0), (2, 2)]),
+ Point(0, 0),
+ Point(5, 5),
+ ],
+ )
+ s2 = GeoSeries(
+ [
+ Point(3, 3),
+ Point(1, 1),
+ Point(0, 0),
+ Point(0, 0),
+ ],
+ )
+ result = s.disjoint(s2, align=False)
+ expected = pd.Series([True, False, False, True])
+ self.check_pd_series_equal(result, expected)
+
+ # Check that GeoDataFrame works too
+ df_result = s.to_geoframe().disjoint(s2, align=False)
+ self.check_pd_series_equal(df_result, expected)
def test_intersects(self):
s = sgpd.GeoSeries(
diff --git a/python/tests/geopandas/test_match_geopandas_series.py
b/python/tests/geopandas/test_match_geopandas_series.py
index 29de459214..6dd5af9dff 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -17,6 +17,7 @@
import os
import shutil
import tempfile
+import warnings
import pytest
import numpy as np
import pandas as pd
@@ -291,7 +292,13 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
self.check_pd_series_equal(sgpd_result, gpd_result)
def test_m(self):
- pass
+ # M coordinate is not well supported in Shapely/geopandas
+ # so we just check that ST_M returns NaN for standard 2D/3D points
+ sgpd_result = GeoSeries(self.points).m
+ assert isinstance(sgpd_result, ps.Series)
+ # Standard 2D/3D points have no M, should be NaN
+ expected = pd.Series([np.nan] * len(self.points))
+ self.check_pd_series_equal(sgpd_result, expected)
def test_from_file(self):
pass
@@ -498,7 +505,13 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
self.check_pd_series_equal(sgpd_result, gpd_result)
def test_type(self):
- pass
+ for geom in self.geoms:
+ # Sedona converts LinearRing to LineString
+ if isinstance(geom[0], LinearRing):
+ continue
+ sgpd_result = GeoSeries(geom).type
+ gpd_result = gpd.GeoSeries(geom).type
+ self.check_pd_series_equal(sgpd_result, gpd_result)
def test_length(self):
for geom in self.geoms:
@@ -749,10 +762,20 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
def test_delaunay_triangles(self):
- pass
+ # Sedona ST_DelaunayTriangles is element-wise (returns a
GeometryCollection
+ # per input geometry), while geopandas operates on all points across
the
+ # GeoSeries as a single set. Cannot compare directly.
+ for geom in self.geoms:
+ result = GeoSeries(geom).delaunay_triangles()
+ assert len(result) == len(geom)
def test_voronoi_polygons(self):
- pass
+ # Sedona ST_VoronoiPolygons is element-wise, while geopandas operates
on
+ # all points across the GeoSeries as a single set. Cannot compare
directly.
+ for geom in self.geoms:
+ result = GeoSeries(geom).voronoi_polygons()
+ collected = result.to_geopandas()
+ assert len(collected) == len(geom)
def test_envelope(self):
for geom in self.geoms:
@@ -977,7 +1000,14 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
def test_unary_union(self):
- pass
+ lst = [g for geom in self.geoms for g in geom if g.is_valid]
+ with pytest.warns(FutureWarning, match="unary_union"):
+ sgpd_result = GeoSeries(lst).unary_union
+
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore", FutureWarning)
+ gpd_result = gpd.GeoSeries(lst).unary_union
+ self.check_geom_equals(sgpd_result, gpd_result)
def test_union_all(self):
if parse_version(gpd.__version__) < parse_version("1.1.0"):
@@ -1027,7 +1057,17 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
self.check_pd_series_equal(sgpd_result, gpd_result)
def test_disjoint(self):
- pass
+ for geom, geom2 in self.pairs:
+ sgpd_result = GeoSeries(geom).disjoint(GeoSeries(geom2),
align=True)
+ gpd_result = gpd.GeoSeries(geom).disjoint(gpd.GeoSeries(geom2),
align=True)
+ self.check_pd_series_equal(sgpd_result, gpd_result)
+
+ if len(geom) == len(geom2):
+ sgpd_result = GeoSeries(geom).disjoint(GeoSeries(geom2),
align=False)
+ gpd_result = gpd.GeoSeries(geom).disjoint(
+ gpd.GeoSeries(geom2), align=False
+ )
+ self.check_pd_series_equal(sgpd_result, gpd_result)
def test_intersects(self):
for geom, geom2 in self.pairs: