This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 02f76057ad [GH-2765] Implement GeoSeries: relate_pattern, 
contains_properly, build_area, polygonize (#2766)
02f76057ad is described below

commit 02f76057ade1856b0965595f770ac67bf5406ada
Author: Jia Yu <[email protected]>
AuthorDate: Thu Mar 19 21:18:59 2026 -0700

    [GH-2765] Implement GeoSeries: relate_pattern, contains_properly, 
build_area, polygonize (#2766)
---
 python/sedona/spark/geopandas/base.py              | 192 ++++++++++++++++++++-
 python/sedona/spark/geopandas/geoseries.py         | 102 ++++++++++-
 python/tests/geopandas/test_geoseries.py           | 119 ++++++++++++-
 .../tests/geopandas/test_match_geopandas_series.py |  62 ++++++-
 4 files changed, 465 insertions(+), 10 deletions(-)

diff --git a/python/sedona/spark/geopandas/base.py 
b/python/sedona/spark/geopandas/base.py
index 858bd6e72f..7db50e7f8f 100644
--- a/python/sedona/spark/geopandas/base.py
+++ b/python/sedona/spark/geopandas/base.py
@@ -1429,6 +1429,79 @@ class GeoFrame(metaclass=ABCMeta):
         """
         return _delegate_to_geometry_column("line_merge", self, directed)
 
+    def build_area(self, node=True):
+        """Create an areal geometry formed by the constituent linework.
+
+        Builds areas from the GeoSeries that contain linework which represents
+        the edges of a planar graph.  All geometries within the GeoSeries are
+        considered together and the resulting polygons therefore do not map 1:1
+        to input geometries.
+
+        Parameters
+        ----------
+        node : bool, default True
+            If True, the linework is noded together before building areas.
+
+        Returns
+        -------
+        GeoSeries
+
+        Examples
+        --------
+        >>> from sedona.spark.geopandas import GeoSeries
+        >>> from shapely.geometry import MultiLineString
+        >>> s = GeoSeries(
+        ...     [
+        ...         MultiLineString(
+        ...             [[(0, 0), (0, 1), (1, 1), (1, 0), (0, 0)]]
+        ...         ),
+        ...     ]
+        ... )
+        >>> s.build_area()
+        0    POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))
+        Name: polygons, dtype: geometry
+        """
+        return _delegate_to_geometry_column("build_area", self, node)
+
+    def polygonize(self, node=True, full=False):
+        """Create polygons formed from the linework of a GeoSeries.
+
+        Polygonizes the GeoSeries that contain linework which represents the
+        edges of a planar graph.  All geometries within the GeoSeries are
+        considered together and the resulting polygons therefore do not map 1:1
+        to input geometries.
+
+        Parameters
+        ----------
+        node : bool, default True
+            If True, the linework is noded together before polygonizing.
+        full : bool, default False
+            If True, return the full polygonization result including cut edges,
+            dangles, and invalid rings.
+            Not supported in Sedona; passing ``True`` will raise
+            ``NotImplementedError``.
+
+        Returns
+        -------
+        GeoSeries
+
+        Examples
+        --------
+        >>> from sedona.spark.geopandas import GeoSeries
+        >>> from shapely.geometry import MultiLineString
+        >>> s = GeoSeries(
+        ...     [
+        ...         MultiLineString(
+        ...             [[(0, 0), (0, 1), (1, 1)], [(1, 1), (1, 0), (0, 0)]]
+        ...         ),
+        ...     ]
+        ... )
+        >>> s.polygonize()
+        0    POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))
+        Name: polygons, dtype: geometry
+        """
+        return _delegate_to_geometry_column("polygonize", self, node, full)
+
     @property
     def unary_union(self):
         """Returns a geometry containing the union of all geometries in the
@@ -3399,7 +3472,64 @@ class GeoFrame(metaclass=ABCMeta):
         return _delegate_to_geometry_column("contains", self, other, align)
 
     def contains_properly(self, other, align=None):
-        raise NotImplementedError("This method is not implemented yet.")
+        """Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
+        each aligned geometry that properly contains `other`.
+
+        An object is said to properly contain `other` if the `other` object
+        lies entirely within the `interior` of the object (no shared boundary
+        points).
+
+        The operation works on a 1-to-1 row-wise manner.
+
+        Parameters
+        ----------
+        other : GeoSeries or geometric object
+            The GeoSeries (elementwise) or geometric object to test whether
+            it is properly contained.
+        align : bool | None (default None)
+            If True, automatically aligns GeoSeries based on their indices. 
None defaults to True.
+            If False, the order of elements is preserved.
+
+        Returns
+        -------
+        Series (bool)
+
+        Examples
+        --------
+        >>> from sedona.spark.geopandas import GeoSeries
+        >>> from shapely.geometry import Point, Polygon
+        >>> s = GeoSeries(
+        ...     [
+        ...         Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+        ...         Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+        ...     ]
+        ... )
+        >>> s2 = GeoSeries(
+        ...     [
+        ...         Point(1, 1),
+        ...         Point(0, 0),
+        ...     ]
+        ... )
+
+        >>> s.contains_properly(s2)
+        0     True
+        1    False
+        dtype: bool
+
+        Notes
+        -----
+        This method works in a row-wise manner. It does not check if an element
+        of one GeoSeries ``contains_properly`` any element of the other one.
+
+        The difference from ``contains`` is that ``contains_properly`` returns
+        ``False`` when the `other` geometry touches the boundary of the object.
+
+        See also
+        --------
+        GeoSeries.contains
+        GeoSeries.within
+        """
+        return _delegate_to_geometry_column("contains_properly", self, other, 
align)
 
     def relate(self, other, align=None):
         """Returns the DE-9IM matrix string for the relationship between each 
geometry and `other`.
@@ -3472,6 +3602,66 @@ class GeoFrame(metaclass=ABCMeta):
         """
         return _delegate_to_geometry_column("relate", self, other, align)
 
+    def relate_pattern(self, other, pattern, align=None):
+        """Returns a ``Series`` of ``dtype('bool')`` with value ``True`` if the
+        DE-9IM relationship between each geometry and `other` matches the
+        specified `pattern`.
+
+        The operation works on a 1-to-1 row-wise manner.
+
+        Parameters
+        ----------
+        other : GeoSeries or geometric object
+            The GeoSeries (elementwise) or geometric object to relate to.
+        pattern : str
+            The DE-9IM pattern to match. A 9-character string where each
+            character is one of: 'T' (matches any non-empty intersection),
+            'F' (matches empty intersection), '*' (matches anything),
+            '0', '1', '2' (matches specific dimensions).
+        align : bool | None (default None)
+            If True, automatically aligns GeoSeries based on their indices. 
None defaults to True.
+            If False, the order of elements is preserved.
+
+        Returns
+        -------
+        Series (bool)
+
+        Examples
+        --------
+        >>> from sedona.spark.geopandas import GeoSeries
+        >>> from shapely.geometry import Point, Polygon
+        >>> s = GeoSeries(
+        ...     [
+        ...         Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+        ...         Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+        ...     ]
+        ... )
+        >>> s2 = GeoSeries(
+        ...     [
+        ...         Point(1, 1),
+        ...         Point(3, 3),
+        ...     ]
+        ... )
+
+        >>> s.relate_pattern(s2, "T*F**FFF*")
+        0     True
+        1    False
+        dtype: bool
+
+        Notes
+        -----
+        This method works in a row-wise manner.
+
+        See also
+        --------
+        GeoSeries.relate
+        GeoSeries.contains
+        GeoSeries.intersects
+        """
+        return _delegate_to_geometry_column(
+            "relate_pattern", self, other, pattern, align
+        )
+
     def to_parquet(self, path, **kwargs):
         raise NotImplementedError("This method is not implemented yet.")
 
diff --git a/python/sedona/spark/geopandas/geoseries.py 
b/python/sedona/spark/geopandas/geoseries.py
index f3d96dba30..00ca985a64 100644
--- a/python/sedona/spark/geopandas/geoseries.py
+++ b/python/sedona/spark/geopandas/geoseries.py
@@ -1151,6 +1151,75 @@ class GeoSeries(GeoFrame, pspd.Series):
             returns_geom=True,
         )
 
+    def build_area(self, node=True):
+        if len(self) == 0:
+            return GeoSeries([], name="polygons", crs=self.crs)
+
+        if node:
+            aggr_expr = sta.ST_Union_Aggr(self.spark.column)
+        else:
+            aggr_expr = sta.ST_Collect_Agg(self.spark.column)
+
+        build_expr = stf.ST_BuildArea(aggr_expr)
+        dump_expr = F.explode(stf.ST_Dump(build_expr))
+
+        sdf = self._internal.spark_frame.select(dump_expr.alias("polygons"))
+
+        if not sdf.take(1):
+            return GeoSeries([], name="polygons", crs=self.crs)
+
+        from pyspark.pandas.internal import InternalField
+
+        internal = InternalFrame(
+            spark_frame=sdf,
+            index_spark_columns=None,
+            column_labels=[("polygons",)],
+            data_spark_columns=[scol_for(sdf, "polygons")],
+            data_fields=[InternalField(np.dtype("object"), 
sdf.schema["polygons"])],
+            column_label_names=[("polygons",)],
+        )
+        ps_series = first_series(PandasOnSparkDataFrame(internal))
+        ps_series.rename("polygons", inplace=True)
+        result = GeoSeries(ps_series, crs=self.crs)
+        return result
+
+    def polygonize(self, node=True, full=False):
+        if full:
+            raise NotImplementedError(
+                "Sedona does not support full=True for polygonize."
+            )
+
+        if len(self) == 0:
+            return GeoSeries([], name="polygons", crs=self.crs)
+
+        if node:
+            aggr_expr = sta.ST_Union_Aggr(self.spark.column)
+        else:
+            aggr_expr = sta.ST_Collect_Agg(self.spark.column)
+
+        poly_expr = stf.ST_Polygonize(aggr_expr)
+        dump_expr = F.explode(stf.ST_Dump(poly_expr))
+
+        sdf = self._internal.spark_frame.select(dump_expr.alias("polygons"))
+
+        if not sdf.take(1):
+            return GeoSeries([], name="polygons", crs=self.crs)
+
+        from pyspark.pandas.internal import InternalField
+
+        internal = InternalFrame(
+            spark_frame=sdf,
+            index_spark_columns=None,
+            column_labels=[("polygons",)],
+            data_spark_columns=[scol_for(sdf, "polygons")],
+            data_fields=[InternalField(np.dtype("object"), 
sdf.schema["polygons"])],
+            column_label_names=[("polygons",)],
+        )
+        ps_series = first_series(PandasOnSparkDataFrame(internal))
+        ps_series.rename("polygons", inplace=True)
+        result = GeoSeries(ps_series, crs=self.crs)
+        return result
+
     # 
============================================================================
     # GEOMETRIC OPERATIONS
     # 
============================================================================
@@ -1550,6 +1619,20 @@ class GeoSeries(GeoFrame, pspd.Series):
         )
         return result
 
+    def relate_pattern(self, other, pattern, align=None) -> pspd.Series:
+        other, extended = self._make_series_of_val(other)
+        align = False if extended else align
+
+        spark_col = stp.ST_Relate(F.col("L"), F.col("R"), F.lit(pattern))
+        result = self._row_wise_operation(
+            spark_col,
+            other,
+            align,
+            returns_geom=False,
+            default_val=False,
+        )
+        return _to_bool(result)
+
     # 
============================================================================
     # SPATIAL PREDICATES
     # 
============================================================================
@@ -1568,14 +1651,19 @@ class GeoSeries(GeoFrame, pspd.Series):
         )
         return _to_bool(result)
 
-    def contains_properly(self, other, align=None):
-        # Implementation of the abstract method.
-        raise NotImplementedError(
-            _not_implemented_error(
-                "contains_properly",
-                "Tests if geometries properly contain other geometries (no 
boundary contact).",
-            )
+    def contains_properly(self, other, align=None) -> pspd.Series:
+        other, extended = self._make_series_of_val(other)
+        align = False if extended else align
+
+        spark_col = stp.ST_Relate(F.col("L"), F.col("R"), F.lit("T**FF*FF*"))
+        result = self._row_wise_operation(
+            spark_col,
+            other,
+            align,
+            returns_geom=False,
+            default_val=False,
         )
+        return _to_bool(result)
 
     def buffer(
         self,
diff --git a/python/tests/geopandas/test_geoseries.py 
b/python/tests/geopandas/test_geoseries.py
index e929e3e43d..b61e8e5056 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -1919,6 +1919,60 @@ e": "Feature", "properties": {}, "geometry": {"type": 
"Point", "coordinates": [3
         df_result = s.to_geoframe().line_merge()
         self.check_sgpd_equals_gpd(df_result, expected)
 
+    def test_build_area(self):
+        # build_area is an aggregate operation: all linework is combined,
+        # then areas are built from the combined noded linework.
+        s = GeoSeries(
+            [
+                LineString([(0, 0), (1, 0)]),
+                LineString([(1, 0), (0.5, 1)]),
+                LineString([(0.5, 1), (0, 0)]),
+            ]
+        )
+        result = s.build_area()
+        assert result.name == "polygons"
+        assert len(result) == 1
+        expected_poly = Polygon([(1, 0), (0, 0), (0.5, 1), (1, 0)])
+        self.check_geom_equals(result.iloc[0], expected_poly)
+
+        # Check that GeoDataFrame works too
+        df_result = s.to_geoframe().build_area()
+        assert df_result.name == "polygons"
+        assert len(df_result) == 1
+        self.check_geom_equals(df_result.iloc[0], expected_poly)
+
+        # Test empty GeoSeries
+        result_empty = GeoSeries([]).build_area()
+        assert len(result_empty) == 0
+        assert result_empty.name == "polygons"
+
+    def test_polygonize(self):
+        # polygonize is an aggregate operation: all linework is combined,
+        # then polygons are formed from the combined noded linework.
+        s = GeoSeries(
+            [
+                LineString([(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)]),
+                LineString([(1, 0), (2, 0), (2, 1), (1, 1)]),
+            ]
+        )
+        result = s.polygonize()
+        assert result.name == "polygons"
+        assert len(result) == 2
+
+        # Check that GeoDataFrame works too
+        df_result = s.to_geoframe().polygonize()
+        assert df_result.name == "polygons"
+        assert len(df_result) == 2
+
+        # Test that full=True raises NotImplementedError
+        with pytest.raises(NotImplementedError):
+            s.polygonize(full=True)
+
+        # Test empty GeoSeries
+        result_empty = GeoSeries([]).polygonize()
+        assert len(result_empty) == 0
+        assert result_empty.name == "polygons"
+
     def test_unary_union(self):
         s = GeoSeries([box(0, 0, 1, 1), box(0, 0, 2, 2)])
         with pytest.warns(FutureWarning, match="unary_union"):
@@ -2556,7 +2610,34 @@ e": "Feature", "properties": {}, "geometry": {"type": 
"Point", "coordinates": [3
         self.check_pd_series_equal(df_result, expected)
 
     def test_contains_properly(self):
-        pass
+        s = GeoSeries(
+            [
+                Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+                Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+                Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+            ]
+        )
+        s2 = GeoSeries(
+            [
+                Point(1, 1),  # interior point → True
+                Point(0, 0),  # boundary point → False
+                Point(3, 3),  # exterior point → False
+            ]
+        )
+
+        result = s.contains_properly(s2, align=False)
+        expected = pd.Series([True, False, False])
+        self.check_pd_series_equal(result, expected)
+
+        # Test with single geometry
+        result = s.contains_properly(Point(1, 1))
+        expected = pd.Series([True, True, True])
+        self.check_pd_series_equal(result, expected)
+
+        # Test that GeoDataFrame works too
+        df_result = s.to_geoframe().contains_properly(s2, align=False)
+        expected = pd.Series([True, False, False])
+        self.check_pd_series_equal(df_result, expected)
 
     def test_relate(self):
         s = GeoSeries(
@@ -2635,6 +2716,42 @@ e": "Feature", "properties": {}, "geometry": {"type": 
"Point", "coordinates": [3
         expected = pd.Series(["FF2F11212", "212101212"])
         self.check_pd_series_equal(result, expected)
 
+    def test_relate_pattern(self):
+        s = GeoSeries(
+            [
+                Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+                Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+                Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+            ]
+        )
+        s2 = GeoSeries(
+            [
+                Point(1, 1),  # interior → contains pattern matches
+                Point(0, 0),  # boundary → contains pattern fails
+                Point(3, 3),  # exterior → contains pattern fails
+            ]
+        )
+
+        # Test contains_properly pattern: T**FF*FF*
+        result = s.relate_pattern(s2, "T**FF*FF*", align=False)
+        expected = pd.Series([True, False, False])
+        self.check_pd_series_equal(result, expected)
+
+        # Test intersects pattern: T********
+        result = s.relate_pattern(s2, "T********", align=False)
+        expected = pd.Series([True, False, False])
+        self.check_pd_series_equal(result, expected)
+
+        # Test with single geometry
+        result = s.relate_pattern(Point(1, 1), "T**FF*FF*")
+        expected = pd.Series([True, True, True])
+        self.check_pd_series_equal(result, expected)
+
+        # Test that GeoDataFrame works too
+        df_result = s.to_geoframe().relate_pattern(s2, "T**FF*FF*", 
align=False)
+        expected = pd.Series([True, False, False])
+        self.check_pd_series_equal(df_result, expected)
+
     def test_frechet_distance(self):
         s1 = GeoSeries(
             [
diff --git a/python/tests/geopandas/test_match_geopandas_series.py 
b/python/tests/geopandas/test_match_geopandas_series.py
index 4c1af9eab5..3138e8c267 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -999,6 +999,28 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
             gpd_result = gpd.GeoSeries(geom).line_merge()
             self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
 
+    def test_build_area(self):
+        # build_area is aggregate: use linestrings forming a triangle
+        geom = [
+            LineString([(0, 0), (1, 0)]),
+            LineString([(1, 0), (0.5, 1)]),
+            LineString([(0.5, 1), (0, 0)]),
+        ]
+        sgpd_result = GeoSeries(geom).build_area()
+        gpd_result = gpd.GeoSeries(geom).build_area()
+        self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
+    def test_polygonize(self):
+        # polygonize is aggregate: use linestrings forming a closed ring
+        geom = [
+            LineString([(0, 0), (1, 0)]),
+            LineString([(1, 0), (0.5, 1)]),
+            LineString([(0.5, 1), (0, 0)]),
+        ]
+        sgpd_result = GeoSeries(geom).polygonize()
+        gpd_result = gpd.GeoSeries(geom).polygonize()
+        self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
     def test_unary_union(self):
         lst = [g for geom in self.geoms for g in geom if g.is_valid]
         with pytest.warns(FutureWarning, match="unary_union"):
@@ -1270,7 +1292,25 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
                 self.check_pd_series_equal(sgpd_result, gpd_result)
 
     def test_contains_properly(self):
-        pass
+        for geom, geom2 in self.pairs:
+            if geom == geom2 or self.contains_any_geom_collection(geom, geom2):
+                continue
+            sgpd_result = GeoSeries(geom).contains_properly(
+                GeoSeries(geom2), align=True
+            )
+            gpd_result = gpd.GeoSeries(geom).contains_properly(
+                gpd.GeoSeries(geom2), align=True
+            )
+            self.check_pd_series_equal(sgpd_result, gpd_result)
+
+            if len(geom) == len(geom2):
+                sgpd_result = GeoSeries(geom).contains_properly(
+                    GeoSeries(geom2), align=False
+                )
+                gpd_result = gpd.GeoSeries(geom).contains_properly(
+                    gpd.GeoSeries(geom2), align=False
+                )
+                self.check_pd_series_equal(sgpd_result, gpd_result)
 
     def test_relate(self):
         for geom, geom2 in self.pairs:
@@ -1285,6 +1325,26 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
                 )
                 self.check_pd_series_equal(sgpd_result, gpd_result)
 
+    def test_relate_pattern(self):
+        for geom, geom2 in self.pairs:
+            for pattern in ["T********", "T*F**FFF*", "FF*FF****"]:
+                sgpd_result = GeoSeries(geom).relate_pattern(
+                    GeoSeries(geom2), pattern, align=True
+                )
+                gpd_result = gpd.GeoSeries(geom).relate_pattern(
+                    gpd.GeoSeries(geom2), pattern, align=True
+                )
+                self.check_pd_series_equal(sgpd_result, gpd_result)
+
+                if len(geom) == len(geom2):
+                    sgpd_result = GeoSeries(geom).relate_pattern(
+                        GeoSeries(geom2), pattern, align=False
+                    )
+                    gpd_result = gpd.GeoSeries(geom).relate_pattern(
+                        gpd.GeoSeries(geom2), pattern, align=False
+                    )
+                    self.check_pd_series_equal(sgpd_result, gpd_result)
+
     def test_frechet_distance(self):
         line_pairs = [
             (self.linestrings, self.linestrings),

Reply via email to