This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 02f76057ad [GH-2765] Implement GeoSeries: relate_pattern,
contains_properly, build_area, polygonize (#2766)
02f76057ad is described below
commit 02f76057ade1856b0965595f770ac67bf5406ada
Author: Jia Yu <[email protected]>
AuthorDate: Thu Mar 19 21:18:59 2026 -0700
[GH-2765] Implement GeoSeries: relate_pattern, contains_properly,
build_area, polygonize (#2766)
---
python/sedona/spark/geopandas/base.py | 192 ++++++++++++++++++++-
python/sedona/spark/geopandas/geoseries.py | 102 ++++++++++-
python/tests/geopandas/test_geoseries.py | 119 ++++++++++++-
.../tests/geopandas/test_match_geopandas_series.py | 62 ++++++-
4 files changed, 465 insertions(+), 10 deletions(-)
diff --git a/python/sedona/spark/geopandas/base.py
b/python/sedona/spark/geopandas/base.py
index 858bd6e72f..7db50e7f8f 100644
--- a/python/sedona/spark/geopandas/base.py
+++ b/python/sedona/spark/geopandas/base.py
@@ -1429,6 +1429,79 @@ class GeoFrame(metaclass=ABCMeta):
"""
return _delegate_to_geometry_column("line_merge", self, directed)
+ def build_area(self, node=True):
+ """Create an areal geometry formed by the constituent linework.
+
+ Builds areas from the GeoSeries that contain linework which represents
+ the edges of a planar graph. All geometries within the GeoSeries are
+ considered together and the resulting polygons therefore do not map 1:1
+ to input geometries.
+
+ Parameters
+ ----------
+ node : bool, default True
+ If True, the linework is noded together before building areas.
+
+ Returns
+ -------
+ GeoSeries
+
+ Examples
+ --------
+ >>> from sedona.spark.geopandas import GeoSeries
+ >>> from shapely.geometry import MultiLineString
+ >>> s = GeoSeries(
+ ... [
+ ... MultiLineString(
+ ... [[(0, 0), (0, 1), (1, 1), (1, 0), (0, 0)]]
+ ... ),
+ ... ]
+ ... )
+ >>> s.build_area()
+ 0 POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))
+ Name: polygons, dtype: geometry
+ """
+ return _delegate_to_geometry_column("build_area", self, node)
+
+ def polygonize(self, node=True, full=False):
+ """Create polygons formed from the linework of a GeoSeries.
+
+ Polygonizes the GeoSeries that contain linework which represents the
+ edges of a planar graph. All geometries within the GeoSeries are
+ considered together and the resulting polygons therefore do not map 1:1
+ to input geometries.
+
+ Parameters
+ ----------
+ node : bool, default True
+ If True, the linework is noded together before polygonizing.
+ full : bool, default False
+ If True, return the full polygonization result including cut edges,
+ dangles, and invalid rings.
+ Not supported in Sedona; passing ``True`` will raise
+ ``NotImplementedError``.
+
+ Returns
+ -------
+ GeoSeries
+
+ Examples
+ --------
+ >>> from sedona.spark.geopandas import GeoSeries
+ >>> from shapely.geometry import MultiLineString
+ >>> s = GeoSeries(
+ ... [
+ ... MultiLineString(
+ ... [[(0, 0), (0, 1), (1, 1)], [(1, 1), (1, 0), (0, 0)]]
+ ... ),
+ ... ]
+ ... )
+ >>> s.polygonize()
+ 0 POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))
+ Name: polygons, dtype: geometry
+ """
+ return _delegate_to_geometry_column("polygonize", self, node, full)
+
@property
def unary_union(self):
"""Returns a geometry containing the union of all geometries in the
@@ -3399,7 +3472,64 @@ class GeoFrame(metaclass=ABCMeta):
return _delegate_to_geometry_column("contains", self, other, align)
def contains_properly(self, other, align=None):
- raise NotImplementedError("This method is not implemented yet.")
+ """Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
+ each aligned geometry that properly contains `other`.
+
+ An object is said to properly contain `other` if the `other` object
+ lies entirely within the `interior` of the object (no shared boundary
+ points).
+
+ The operation works on a 1-to-1 row-wise manner.
+
+ Parameters
+ ----------
+ other : GeoSeries or geometric object
+ The GeoSeries (elementwise) or geometric object to test whether
+ it is properly contained.
+ align : bool | None (default None)
+ If True, automatically aligns GeoSeries based on their indices.
None defaults to True.
+ If False, the order of elements is preserved.
+
+ Returns
+ -------
+ Series (bool)
+
+ Examples
+ --------
+ >>> from sedona.spark.geopandas import GeoSeries
+ >>> from shapely.geometry import Point, Polygon
+ >>> s = GeoSeries(
+ ... [
+ ... Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+ ... Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+ ... ]
+ ... )
+ >>> s2 = GeoSeries(
+ ... [
+ ... Point(1, 1),
+ ... Point(0, 0),
+ ... ]
+ ... )
+
+ >>> s.contains_properly(s2)
+ 0 True
+ 1 False
+ dtype: bool
+
+ Notes
+ -----
+ This method works in a row-wise manner. It does not check if an element
+ of one GeoSeries ``contains_properly`` any element of the other one.
+
+ The difference from ``contains`` is that ``contains_properly`` returns
+ ``False`` when the `other` geometry touches the boundary of the object.
+
+ See also
+ --------
+ GeoSeries.contains
+ GeoSeries.within
+ """
+ return _delegate_to_geometry_column("contains_properly", self, other,
align)
def relate(self, other, align=None):
"""Returns the DE-9IM matrix string for the relationship between each
geometry and `other`.
@@ -3472,6 +3602,66 @@ class GeoFrame(metaclass=ABCMeta):
"""
return _delegate_to_geometry_column("relate", self, other, align)
+ def relate_pattern(self, other, pattern, align=None):
+ """Returns a ``Series`` of ``dtype('bool')`` with value ``True`` if the
+ DE-9IM relationship between each geometry and `other` matches the
+ specified `pattern`.
+
+ The operation works on a 1-to-1 row-wise manner.
+
+ Parameters
+ ----------
+ other : GeoSeries or geometric object
+ The GeoSeries (elementwise) or geometric object to relate to.
+ pattern : str
+ The DE-9IM pattern to match. A 9-character string where each
+ character is one of: 'T' (matches any non-empty intersection),
+ 'F' (matches empty intersection), '*' (matches anything),
+ '0', '1', '2' (matches specific dimensions).
+ align : bool | None (default None)
+ If True, automatically aligns GeoSeries based on their indices.
None defaults to True.
+ If False, the order of elements is preserved.
+
+ Returns
+ -------
+ Series (bool)
+
+ Examples
+ --------
+ >>> from sedona.spark.geopandas import GeoSeries
+ >>> from shapely.geometry import Point, Polygon
+ >>> s = GeoSeries(
+ ... [
+ ... Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+ ... Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+ ... ]
+ ... )
+ >>> s2 = GeoSeries(
+ ... [
+ ... Point(1, 1),
+ ... Point(3, 3),
+ ... ]
+ ... )
+
+ >>> s.relate_pattern(s2, "T*F**FFF*")
+ 0 True
+ 1 False
+ dtype: bool
+
+ Notes
+ -----
+ This method works in a row-wise manner.
+
+ See also
+ --------
+ GeoSeries.relate
+ GeoSeries.contains
+ GeoSeries.intersects
+ """
+ return _delegate_to_geometry_column(
+ "relate_pattern", self, other, pattern, align
+ )
+
def to_parquet(self, path, **kwargs):
raise NotImplementedError("This method is not implemented yet.")
diff --git a/python/sedona/spark/geopandas/geoseries.py
b/python/sedona/spark/geopandas/geoseries.py
index f3d96dba30..00ca985a64 100644
--- a/python/sedona/spark/geopandas/geoseries.py
+++ b/python/sedona/spark/geopandas/geoseries.py
@@ -1151,6 +1151,75 @@ class GeoSeries(GeoFrame, pspd.Series):
returns_geom=True,
)
+ def build_area(self, node=True):
+ if len(self) == 0:
+ return GeoSeries([], name="polygons", crs=self.crs)
+
+ if node:
+ aggr_expr = sta.ST_Union_Aggr(self.spark.column)
+ else:
+ aggr_expr = sta.ST_Collect_Agg(self.spark.column)
+
+ build_expr = stf.ST_BuildArea(aggr_expr)
+ dump_expr = F.explode(stf.ST_Dump(build_expr))
+
+ sdf = self._internal.spark_frame.select(dump_expr.alias("polygons"))
+
+ if not sdf.take(1):
+ return GeoSeries([], name="polygons", crs=self.crs)
+
+ from pyspark.pandas.internal import InternalField
+
+ internal = InternalFrame(
+ spark_frame=sdf,
+ index_spark_columns=None,
+ column_labels=[("polygons",)],
+ data_spark_columns=[scol_for(sdf, "polygons")],
+ data_fields=[InternalField(np.dtype("object"),
sdf.schema["polygons"])],
+ column_label_names=[("polygons",)],
+ )
+ ps_series = first_series(PandasOnSparkDataFrame(internal))
+ ps_series.rename("polygons", inplace=True)
+ result = GeoSeries(ps_series, crs=self.crs)
+ return result
+
+ def polygonize(self, node=True, full=False):
+ if full:
+ raise NotImplementedError(
+ "Sedona does not support full=True for polygonize."
+ )
+
+ if len(self) == 0:
+ return GeoSeries([], name="polygons", crs=self.crs)
+
+ if node:
+ aggr_expr = sta.ST_Union_Aggr(self.spark.column)
+ else:
+ aggr_expr = sta.ST_Collect_Agg(self.spark.column)
+
+ poly_expr = stf.ST_Polygonize(aggr_expr)
+ dump_expr = F.explode(stf.ST_Dump(poly_expr))
+
+ sdf = self._internal.spark_frame.select(dump_expr.alias("polygons"))
+
+ if not sdf.take(1):
+ return GeoSeries([], name="polygons", crs=self.crs)
+
+ from pyspark.pandas.internal import InternalField
+
+ internal = InternalFrame(
+ spark_frame=sdf,
+ index_spark_columns=None,
+ column_labels=[("polygons",)],
+ data_spark_columns=[scol_for(sdf, "polygons")],
+ data_fields=[InternalField(np.dtype("object"),
sdf.schema["polygons"])],
+ column_label_names=[("polygons",)],
+ )
+ ps_series = first_series(PandasOnSparkDataFrame(internal))
+ ps_series.rename("polygons", inplace=True)
+ result = GeoSeries(ps_series, crs=self.crs)
+ return result
+
#
============================================================================
# GEOMETRIC OPERATIONS
#
============================================================================
@@ -1550,6 +1619,20 @@ class GeoSeries(GeoFrame, pspd.Series):
)
return result
+ def relate_pattern(self, other, pattern, align=None) -> pspd.Series:
+ other, extended = self._make_series_of_val(other)
+ align = False if extended else align
+
+ spark_col = stp.ST_Relate(F.col("L"), F.col("R"), F.lit(pattern))
+ result = self._row_wise_operation(
+ spark_col,
+ other,
+ align,
+ returns_geom=False,
+ default_val=False,
+ )
+ return _to_bool(result)
+
#
============================================================================
# SPATIAL PREDICATES
#
============================================================================
@@ -1568,14 +1651,19 @@ class GeoSeries(GeoFrame, pspd.Series):
)
return _to_bool(result)
- def contains_properly(self, other, align=None):
- # Implementation of the abstract method.
- raise NotImplementedError(
- _not_implemented_error(
- "contains_properly",
- "Tests if geometries properly contain other geometries (no
boundary contact).",
- )
+ def contains_properly(self, other, align=None) -> pspd.Series:
+ other, extended = self._make_series_of_val(other)
+ align = False if extended else align
+
+ spark_col = stp.ST_Relate(F.col("L"), F.col("R"), F.lit("T**FF*FF*"))
+ result = self._row_wise_operation(
+ spark_col,
+ other,
+ align,
+ returns_geom=False,
+ default_val=False,
)
+ return _to_bool(result)
def buffer(
self,
diff --git a/python/tests/geopandas/test_geoseries.py
b/python/tests/geopandas/test_geoseries.py
index e929e3e43d..b61e8e5056 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -1919,6 +1919,60 @@ e": "Feature", "properties": {}, "geometry": {"type":
"Point", "coordinates": [3
df_result = s.to_geoframe().line_merge()
self.check_sgpd_equals_gpd(df_result, expected)
+ def test_build_area(self):
+ # build_area is an aggregate operation: all linework is combined,
+ # then areas are built from the combined noded linework.
+ s = GeoSeries(
+ [
+ LineString([(0, 0), (1, 0)]),
+ LineString([(1, 0), (0.5, 1)]),
+ LineString([(0.5, 1), (0, 0)]),
+ ]
+ )
+ result = s.build_area()
+ assert result.name == "polygons"
+ assert len(result) == 1
+ expected_poly = Polygon([(1, 0), (0, 0), (0.5, 1), (1, 0)])
+ self.check_geom_equals(result.iloc[0], expected_poly)
+
+ # Check that GeoDataFrame works too
+ df_result = s.to_geoframe().build_area()
+ assert df_result.name == "polygons"
+ assert len(df_result) == 1
+ self.check_geom_equals(df_result.iloc[0], expected_poly)
+
+ # Test empty GeoSeries
+ result_empty = GeoSeries([]).build_area()
+ assert len(result_empty) == 0
+ assert result_empty.name == "polygons"
+
+ def test_polygonize(self):
+ # polygonize is an aggregate operation: all linework is combined,
+ # then polygons are formed from the combined noded linework.
+ s = GeoSeries(
+ [
+ LineString([(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)]),
+ LineString([(1, 0), (2, 0), (2, 1), (1, 1)]),
+ ]
+ )
+ result = s.polygonize()
+ assert result.name == "polygons"
+ assert len(result) == 2
+
+ # Check that GeoDataFrame works too
+ df_result = s.to_geoframe().polygonize()
+ assert df_result.name == "polygons"
+ assert len(df_result) == 2
+
+ # Test that full=True raises NotImplementedError
+ with pytest.raises(NotImplementedError):
+ s.polygonize(full=True)
+
+ # Test empty GeoSeries
+ result_empty = GeoSeries([]).polygonize()
+ assert len(result_empty) == 0
+ assert result_empty.name == "polygons"
+
def test_unary_union(self):
s = GeoSeries([box(0, 0, 1, 1), box(0, 0, 2, 2)])
with pytest.warns(FutureWarning, match="unary_union"):
@@ -2556,7 +2610,34 @@ e": "Feature", "properties": {}, "geometry": {"type":
"Point", "coordinates": [3
self.check_pd_series_equal(df_result, expected)
def test_contains_properly(self):
- pass
+ s = GeoSeries(
+ [
+ Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+ Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+ Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+ ]
+ )
+ s2 = GeoSeries(
+ [
+ Point(1, 1), # interior point → True
+ Point(0, 0), # boundary point → False
+ Point(3, 3), # exterior point → False
+ ]
+ )
+
+ result = s.contains_properly(s2, align=False)
+ expected = pd.Series([True, False, False])
+ self.check_pd_series_equal(result, expected)
+
+ # Test with single geometry
+ result = s.contains_properly(Point(1, 1))
+ expected = pd.Series([True, True, True])
+ self.check_pd_series_equal(result, expected)
+
+ # Test that GeoDataFrame works too
+ df_result = s.to_geoframe().contains_properly(s2, align=False)
+ expected = pd.Series([True, False, False])
+ self.check_pd_series_equal(df_result, expected)
def test_relate(self):
s = GeoSeries(
@@ -2635,6 +2716,42 @@ e": "Feature", "properties": {}, "geometry": {"type":
"Point", "coordinates": [3
expected = pd.Series(["FF2F11212", "212101212"])
self.check_pd_series_equal(result, expected)
+ def test_relate_pattern(self):
+ s = GeoSeries(
+ [
+ Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+ Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+ Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+ ]
+ )
+ s2 = GeoSeries(
+ [
+ Point(1, 1), # interior → contains pattern matches
+ Point(0, 0), # boundary → contains pattern fails
+ Point(3, 3), # exterior → contains pattern fails
+ ]
+ )
+
+ # Test contains_properly pattern: T**FF*FF*
+ result = s.relate_pattern(s2, "T**FF*FF*", align=False)
+ expected = pd.Series([True, False, False])
+ self.check_pd_series_equal(result, expected)
+
+ # Test intersects pattern: T********
+ result = s.relate_pattern(s2, "T********", align=False)
+ expected = pd.Series([True, False, False])
+ self.check_pd_series_equal(result, expected)
+
+ # Test with single geometry
+ result = s.relate_pattern(Point(1, 1), "T**FF*FF*")
+ expected = pd.Series([True, True, True])
+ self.check_pd_series_equal(result, expected)
+
+ # Test that GeoDataFrame works too
+ df_result = s.to_geoframe().relate_pattern(s2, "T**FF*FF*",
align=False)
+ expected = pd.Series([True, False, False])
+ self.check_pd_series_equal(df_result, expected)
+
def test_frechet_distance(self):
s1 = GeoSeries(
[
diff --git a/python/tests/geopandas/test_match_geopandas_series.py
b/python/tests/geopandas/test_match_geopandas_series.py
index 4c1af9eab5..3138e8c267 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -999,6 +999,28 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
gpd_result = gpd.GeoSeries(geom).line_merge()
self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+ def test_build_area(self):
+ # build_area is aggregate: use linestrings forming a triangle
+ geom = [
+ LineString([(0, 0), (1, 0)]),
+ LineString([(1, 0), (0.5, 1)]),
+ LineString([(0.5, 1), (0, 0)]),
+ ]
+ sgpd_result = GeoSeries(geom).build_area()
+ gpd_result = gpd.GeoSeries(geom).build_area()
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
+ def test_polygonize(self):
+ # polygonize is aggregate: use linestrings forming a closed ring
+ geom = [
+ LineString([(0, 0), (1, 0)]),
+ LineString([(1, 0), (0.5, 1)]),
+ LineString([(0.5, 1), (0, 0)]),
+ ]
+ sgpd_result = GeoSeries(geom).polygonize()
+ gpd_result = gpd.GeoSeries(geom).polygonize()
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
def test_unary_union(self):
lst = [g for geom in self.geoms for g in geom if g.is_valid]
with pytest.warns(FutureWarning, match="unary_union"):
@@ -1270,7 +1292,25 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
self.check_pd_series_equal(sgpd_result, gpd_result)
def test_contains_properly(self):
- pass
+ for geom, geom2 in self.pairs:
+ if geom == geom2 or self.contains_any_geom_collection(geom, geom2):
+ continue
+ sgpd_result = GeoSeries(geom).contains_properly(
+ GeoSeries(geom2), align=True
+ )
+ gpd_result = gpd.GeoSeries(geom).contains_properly(
+ gpd.GeoSeries(geom2), align=True
+ )
+ self.check_pd_series_equal(sgpd_result, gpd_result)
+
+ if len(geom) == len(geom2):
+ sgpd_result = GeoSeries(geom).contains_properly(
+ GeoSeries(geom2), align=False
+ )
+ gpd_result = gpd.GeoSeries(geom).contains_properly(
+ gpd.GeoSeries(geom2), align=False
+ )
+ self.check_pd_series_equal(sgpd_result, gpd_result)
def test_relate(self):
for geom, geom2 in self.pairs:
@@ -1285,6 +1325,26 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
)
self.check_pd_series_equal(sgpd_result, gpd_result)
+ def test_relate_pattern(self):
+ for geom, geom2 in self.pairs:
+ for pattern in ["T********", "T*F**FFF*", "FF*FF****"]:
+ sgpd_result = GeoSeries(geom).relate_pattern(
+ GeoSeries(geom2), pattern, align=True
+ )
+ gpd_result = gpd.GeoSeries(geom).relate_pattern(
+ gpd.GeoSeries(geom2), pattern, align=True
+ )
+ self.check_pd_series_equal(sgpd_result, gpd_result)
+
+ if len(geom) == len(geom2):
+ sgpd_result = GeoSeries(geom).relate_pattern(
+ GeoSeries(geom2), pattern, align=False
+ )
+ gpd_result = gpd.GeoSeries(geom).relate_pattern(
+ gpd.GeoSeries(geom2), pattern, align=False
+ )
+ self.check_pd_series_equal(sgpd_result, gpd_result)
+
def test_frechet_distance(self):
line_pairs = [
(self.linestrings, self.linestrings),