This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch geopandas-tier2-batch-fg in repository https://gitbox.apache.org/repos/asf/sedona.git
commit 9c5f6b34504c24b6ceeccdcf1d20105d7451b159 Author: Jia Yu <[email protected]> AuthorDate: Tue Mar 17 02:16:34 2026 -0700 Add GeoSeries functions: relate_pattern, contains_properly, build_area, polygonize Tier 2 batch F/G: implements 4 remaining Tier 2 GeoSeries functions. - relate_pattern(other, pattern): delegates to ST_Relate 3-arg form - contains_properly(other): implemented via ST_Relate with DE-9IM pattern T**FF*FF* (no ST_ContainsProperly in Sedona) - build_area(node=True): aggregate operation matching geopandas semantics; collects all linework, builds areas, explodes into polygons - polygonize(node=True, full=False): aggregate operation matching geopandas semantics; collects all linework, polygonizes, explodes results Includes unit tests and geopandas match tests for all 4 functions. --- python/sedona/spark/geopandas/base.py | 192 ++++++++++++++++++++- python/sedona/spark/geopandas/geoseries.py | 80 ++++++++- python/tests/geopandas/test_geoseries.py | 119 ++++++++++++- .../tests/geopandas/test_match_geopandas_series.py | 62 ++++++- 4 files changed, 443 insertions(+), 10 deletions(-) diff --git a/python/sedona/spark/geopandas/base.py b/python/sedona/spark/geopandas/base.py index 858bd6e72f..7db50e7f8f 100644 --- a/python/sedona/spark/geopandas/base.py +++ b/python/sedona/spark/geopandas/base.py @@ -1429,6 +1429,79 @@ class GeoFrame(metaclass=ABCMeta): """ return _delegate_to_geometry_column("line_merge", self, directed) + def build_area(self, node=True): + """Create an areal geometry formed by the constituent linework. + + Builds areas from the GeoSeries that contain linework which represents + the edges of a planar graph. All geometries within the GeoSeries are + considered together and the resulting polygons therefore do not map 1:1 + to input geometries. + + Parameters + ---------- + node : bool, default True + If True, the linework is noded together before building areas. + + Returns + ------- + GeoSeries + + Examples + -------- + >>> from sedona.spark.geopandas import GeoSeries + >>> from shapely.geometry import MultiLineString + >>> s = GeoSeries( + ... [ + ... MultiLineString( + ... [[(0, 0), (0, 1), (1, 1), (1, 0), (0, 0)]] + ... ), + ... ] + ... ) + >>> s.build_area() + 0 POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0)) + Name: polygons, dtype: geometry + """ + return _delegate_to_geometry_column("build_area", self, node) + + def polygonize(self, node=True, full=False): + """Create polygons formed from the linework of a GeoSeries. + + Polygonizes the GeoSeries that contain linework which represents the + edges of a planar graph. All geometries within the GeoSeries are + considered together and the resulting polygons therefore do not map 1:1 + to input geometries. + + Parameters + ---------- + node : bool, default True + If True, the linework is noded together before polygonizing. + full : bool, default False + If True, return the full polygonization result including cut edges, + dangles, and invalid rings. + Not supported in Sedona; passing ``True`` will raise + ``NotImplementedError``. + + Returns + ------- + GeoSeries + + Examples + -------- + >>> from sedona.spark.geopandas import GeoSeries + >>> from shapely.geometry import MultiLineString + >>> s = GeoSeries( + ... [ + ... MultiLineString( + ... [[(0, 0), (0, 1), (1, 1)], [(1, 1), (1, 0), (0, 0)]] + ... ), + ... ] + ... ) + >>> s.polygonize() + 0 POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0)) + Name: polygons, dtype: geometry + """ + return _delegate_to_geometry_column("polygonize", self, node, full) + @property def unary_union(self): """Returns a geometry containing the union of all geometries in the @@ -3399,7 +3472,64 @@ class GeoFrame(metaclass=ABCMeta): return _delegate_to_geometry_column("contains", self, other, align) def contains_properly(self, other, align=None): - raise NotImplementedError("This method is not implemented yet.") + """Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for + each aligned geometry that properly contains `other`. + + An object is said to properly contain `other` if the `other` object + lies entirely within the `interior` of the object (no shared boundary + points). + + The operation works on a 1-to-1 row-wise manner. + + Parameters + ---------- + other : GeoSeries or geometric object + The GeoSeries (elementwise) or geometric object to test whether + it is properly contained. + align : bool | None (default None) + If True, automatically aligns GeoSeries based on their indices. None defaults to True. + If False, the order of elements is preserved. + + Returns + ------- + Series (bool) + + Examples + -------- + >>> from sedona.spark.geopandas import GeoSeries + >>> from shapely.geometry import Point, Polygon + >>> s = GeoSeries( + ... [ + ... Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]), + ... Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]), + ... ] + ... ) + >>> s2 = GeoSeries( + ... [ + ... Point(1, 1), + ... Point(0, 0), + ... ] + ... ) + + >>> s.contains_properly(s2) + 0 True + 1 False + dtype: bool + + Notes + ----- + This method works in a row-wise manner. It does not check if an element + of one GeoSeries ``contains_properly`` any element of the other one. + + The difference from ``contains`` is that ``contains_properly`` returns + ``False`` when the `other` geometry touches the boundary of the object. + + See also + -------- + GeoSeries.contains + GeoSeries.within + """ + return _delegate_to_geometry_column("contains_properly", self, other, align) def relate(self, other, align=None): """Returns the DE-9IM matrix string for the relationship between each geometry and `other`. @@ -3472,6 +3602,66 @@ class GeoFrame(metaclass=ABCMeta): """ return _delegate_to_geometry_column("relate", self, other, align) + def relate_pattern(self, other, pattern, align=None): + """Returns a ``Series`` of ``dtype('bool')`` with value ``True`` if the + DE-9IM relationship between each geometry and `other` matches the + specified `pattern`. + + The operation works on a 1-to-1 row-wise manner. + + Parameters + ---------- + other : GeoSeries or geometric object + The GeoSeries (elementwise) or geometric object to relate to. + pattern : str + The DE-9IM pattern to match. A 9-character string where each + character is one of: 'T' (matches any non-empty intersection), + 'F' (matches empty intersection), '*' (matches anything), + '0', '1', '2' (matches specific dimensions). + align : bool | None (default None) + If True, automatically aligns GeoSeries based on their indices. None defaults to True. + If False, the order of elements is preserved. + + Returns + ------- + Series (bool) + + Examples + -------- + >>> from sedona.spark.geopandas import GeoSeries + >>> from shapely.geometry import Point, Polygon + >>> s = GeoSeries( + ... [ + ... Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]), + ... Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]), + ... ] + ... ) + >>> s2 = GeoSeries( + ... [ + ... Point(1, 1), + ... Point(3, 3), + ... ] + ... ) + + >>> s.relate_pattern(s2, "T*F**FFF*") + 0 True + 1 False + dtype: bool + + Notes + ----- + This method works in a row-wise manner. + + See also + -------- + GeoSeries.relate + GeoSeries.contains + GeoSeries.intersects + """ + return _delegate_to_geometry_column( + "relate_pattern", self, other, pattern, align + ) + def to_parquet(self, path, **kwargs): raise NotImplementedError("This method is not implemented yet.") diff --git a/python/sedona/spark/geopandas/geoseries.py b/python/sedona/spark/geopandas/geoseries.py index f3d96dba30..3ec88322d3 100644 --- a/python/sedona/spark/geopandas/geoseries.py +++ b/python/sedona/spark/geopandas/geoseries.py @@ -1151,6 +1151,53 @@ class GeoSeries(GeoFrame, pspd.Series): returns_geom=True, ) + def build_area(self, node=True): + if len(self) == 0: + return GeoSeries([], name="polygons") + + if node: + aggr_expr = sta.ST_Union_Aggr(self.spark.column) + else: + aggr_expr = sta.ST_Collect_Agg(self.spark.column) + + build_expr = stf.ST_BuildArea(aggr_expr) + result = self._query_geometry_column( + build_expr, returns_geom=False, is_aggr=True + ) + geom = result.take([0]).iloc[0] + + if geom is None or geom.is_empty: + return GeoSeries([], name="polygons") + + parts = shapely.get_parts(geom) + return GeoSeries(list(parts), name="polygons") + + def polygonize(self, node=True, full=False): + if full: + raise NotImplementedError( + "Sedona does not support full=True for polygonize." + ) + + if len(self) == 0: + return GeoSeries([], name="polygons") + + if node: + aggr_expr = sta.ST_Union_Aggr(self.spark.column) + else: + aggr_expr = sta.ST_Collect_Agg(self.spark.column) + + poly_expr = stf.ST_Polygonize(aggr_expr) + result = self._query_geometry_column( + poly_expr, returns_geom=False, is_aggr=True + ) + geom = result.take([0]).iloc[0] + + if geom is None or geom.is_empty: + return GeoSeries([], name="polygons") + + parts = shapely.get_parts(geom) + return GeoSeries(list(parts), name="polygons") + # ============================================================================ # GEOMETRIC OPERATIONS # ============================================================================ @@ -1550,6 +1597,20 @@ class GeoSeries(GeoFrame, pspd.Series): ) return result + def relate_pattern(self, other, pattern, align=None) -> pspd.Series: + other, extended = self._make_series_of_val(other) + align = False if extended else align + + spark_col = stp.ST_Relate(F.col("L"), F.col("R"), F.lit(pattern)) + result = self._row_wise_operation( + spark_col, + other, + align, + returns_geom=False, + default_val=False, + ) + return _to_bool(result) + # ============================================================================ # SPATIAL PREDICATES # ============================================================================ @@ -1568,14 +1629,19 @@ class GeoSeries(GeoFrame, pspd.Series): ) return _to_bool(result) - def contains_properly(self, other, align=None): - # Implementation of the abstract method. - raise NotImplementedError( - _not_implemented_error( - "contains_properly", - "Tests if geometries properly contain other geometries (no boundary contact).", - ) + def contains_properly(self, other, align=None) -> pspd.Series: + other, extended = self._make_series_of_val(other) + align = False if extended else align + + spark_col = stp.ST_Relate(F.col("L"), F.col("R"), F.lit("T**FF*FF*")) + result = self._row_wise_operation( + spark_col, + other, + align, + returns_geom=False, + default_val=False, ) + return _to_bool(result) def buffer( self, diff --git a/python/tests/geopandas/test_geoseries.py b/python/tests/geopandas/test_geoseries.py index e929e3e43d..b61e8e5056 100644 --- a/python/tests/geopandas/test_geoseries.py +++ b/python/tests/geopandas/test_geoseries.py @@ -1919,6 +1919,60 @@ e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [3 df_result = s.to_geoframe().line_merge() self.check_sgpd_equals_gpd(df_result, expected) + def test_build_area(self): + # build_area is an aggregate operation: all linework is combined, + # then areas are built from the combined noded linework. + s = GeoSeries( + [ + LineString([(0, 0), (1, 0)]), + LineString([(1, 0), (0.5, 1)]), + LineString([(0.5, 1), (0, 0)]), + ] + ) + result = s.build_area() + assert result.name == "polygons" + assert len(result) == 1 + expected_poly = Polygon([(1, 0), (0, 0), (0.5, 1), (1, 0)]) + self.check_geom_equals(result.iloc[0], expected_poly) + + # Check that GeoDataFrame works too + df_result = s.to_geoframe().build_area() + assert df_result.name == "polygons" + assert len(df_result) == 1 + self.check_geom_equals(df_result.iloc[0], expected_poly) + + # Test empty GeoSeries + result_empty = GeoSeries([]).build_area() + assert len(result_empty) == 0 + assert result_empty.name == "polygons" + + def test_polygonize(self): + # polygonize is an aggregate operation: all linework is combined, + # then polygons are formed from the combined noded linework. + s = GeoSeries( + [ + LineString([(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)]), + LineString([(1, 0), (2, 0), (2, 1), (1, 1)]), + ] + ) + result = s.polygonize() + assert result.name == "polygons" + assert len(result) == 2 + + # Check that GeoDataFrame works too + df_result = s.to_geoframe().polygonize() + assert df_result.name == "polygons" + assert len(df_result) == 2 + + # Test that full=True raises NotImplementedError + with pytest.raises(NotImplementedError): + s.polygonize(full=True) + + # Test empty GeoSeries + result_empty = GeoSeries([]).polygonize() + assert len(result_empty) == 0 + assert result_empty.name == "polygons" + def test_unary_union(self): s = GeoSeries([box(0, 0, 1, 1), box(0, 0, 2, 2)]) with pytest.warns(FutureWarning, match="unary_union"): @@ -2556,7 +2610,34 @@ e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [3 self.check_pd_series_equal(df_result, expected) def test_contains_properly(self): - pass + s = GeoSeries( + [ + Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]), + Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]), + Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]), + ] + ) + s2 = GeoSeries( + [ + Point(1, 1), # interior point → True + Point(0, 0), # boundary point → False + Point(3, 3), # exterior point → False + ] + ) + + result = s.contains_properly(s2, align=False) + expected = pd.Series([True, False, False]) + self.check_pd_series_equal(result, expected) + + # Test with single geometry + result = s.contains_properly(Point(1, 1)) + expected = pd.Series([True, True, True]) + self.check_pd_series_equal(result, expected) + + # Test that GeoDataFrame works too + df_result = s.to_geoframe().contains_properly(s2, align=False) + expected = pd.Series([True, False, False]) + self.check_pd_series_equal(df_result, expected) def test_relate(self): s = GeoSeries( @@ -2635,6 +2716,42 @@ e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [3 expected = pd.Series(["FF2F11212", "212101212"]) self.check_pd_series_equal(result, expected) + def test_relate_pattern(self): + s = GeoSeries( + [ + Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]), + Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]), + Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]), + ] + ) + s2 = GeoSeries( + [ + Point(1, 1), # interior → contains pattern matches + Point(0, 0), # boundary → contains pattern fails + Point(3, 3), # exterior → contains pattern fails + ] + ) + + # Test contains_properly pattern: T**FF*FF* + result = s.relate_pattern(s2, "T**FF*FF*", align=False) + expected = pd.Series([True, False, False]) + self.check_pd_series_equal(result, expected) + + # Test intersects pattern: T******** + result = s.relate_pattern(s2, "T********", align=False) + expected = pd.Series([True, False, False]) + self.check_pd_series_equal(result, expected) + + # Test with single geometry + result = s.relate_pattern(Point(1, 1), "T**FF*FF*") + expected = pd.Series([True, True, True]) + self.check_pd_series_equal(result, expected) + + # Test that GeoDataFrame works too + df_result = s.to_geoframe().relate_pattern(s2, "T**FF*FF*", align=False) + expected = pd.Series([True, False, False]) + self.check_pd_series_equal(df_result, expected) + def test_frechet_distance(self): s1 = GeoSeries( [ diff --git a/python/tests/geopandas/test_match_geopandas_series.py b/python/tests/geopandas/test_match_geopandas_series.py index 4c1af9eab5..3138e8c267 100644 --- a/python/tests/geopandas/test_match_geopandas_series.py +++ b/python/tests/geopandas/test_match_geopandas_series.py @@ -999,6 +999,28 @@ class TestMatchGeopandasSeries(TestGeopandasBase): gpd_result = gpd.GeoSeries(geom).line_merge() self.check_sgpd_equals_gpd(sgpd_result, gpd_result) + def test_build_area(self): + # build_area is aggregate: use linestrings forming a triangle + geom = [ + LineString([(0, 0), (1, 0)]), + LineString([(1, 0), (0.5, 1)]), + LineString([(0.5, 1), (0, 0)]), + ] + sgpd_result = GeoSeries(geom).build_area() + gpd_result = gpd.GeoSeries(geom).build_area() + self.check_sgpd_equals_gpd(sgpd_result, gpd_result) + + def test_polygonize(self): + # polygonize is aggregate: use linestrings forming a closed ring + geom = [ + LineString([(0, 0), (1, 0)]), + LineString([(1, 0), (0.5, 1)]), + LineString([(0.5, 1), (0, 0)]), + ] + sgpd_result = GeoSeries(geom).polygonize() + gpd_result = gpd.GeoSeries(geom).polygonize() + self.check_sgpd_equals_gpd(sgpd_result, gpd_result) + def test_unary_union(self): lst = [g for geom in self.geoms for g in geom if g.is_valid] with pytest.warns(FutureWarning, match="unary_union"): @@ -1270,7 +1292,25 @@ class TestMatchGeopandasSeries(TestGeopandasBase): self.check_pd_series_equal(sgpd_result, gpd_result) def test_contains_properly(self): - pass + for geom, geom2 in self.pairs: + if geom == geom2 or self.contains_any_geom_collection(geom, geom2): + continue + sgpd_result = GeoSeries(geom).contains_properly( + GeoSeries(geom2), align=True + ) + gpd_result = gpd.GeoSeries(geom).contains_properly( + gpd.GeoSeries(geom2), align=True + ) + self.check_pd_series_equal(sgpd_result, gpd_result) + + if len(geom) == len(geom2): + sgpd_result = GeoSeries(geom).contains_properly( + GeoSeries(geom2), align=False + ) + gpd_result = gpd.GeoSeries(geom).contains_properly( + gpd.GeoSeries(geom2), align=False + ) + self.check_pd_series_equal(sgpd_result, gpd_result) def test_relate(self): for geom, geom2 in self.pairs: @@ -1285,6 +1325,26 @@ class TestMatchGeopandasSeries(TestGeopandasBase): ) self.check_pd_series_equal(sgpd_result, gpd_result) + def test_relate_pattern(self): + for geom, geom2 in self.pairs: + for pattern in ["T********", "T*F**FFF*", "FF*FF****"]: + sgpd_result = GeoSeries(geom).relate_pattern( + GeoSeries(geom2), pattern, align=True + ) + gpd_result = gpd.GeoSeries(geom).relate_pattern( + gpd.GeoSeries(geom2), pattern, align=True + ) + self.check_pd_series_equal(sgpd_result, gpd_result) + + if len(geom) == len(geom2): + sgpd_result = GeoSeries(geom).relate_pattern( + GeoSeries(geom2), pattern, align=False + ) + gpd_result = gpd.GeoSeries(geom).relate_pattern( + gpd.GeoSeries(geom2), pattern, align=False + ) + self.check_pd_series_equal(sgpd_result, gpd_result) + def test_frechet_distance(self): line_pairs = [ (self.linestrings, self.linestrings),
