This is an automated email from the ASF dual-hosted git repository.

petern pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new fbf447f2f4 [GH-2230] Implement GeoSeries.clip_by_rect (#2784)
fbf447f2f4 is described below

commit fbf447f2f4e3dc4cdacc5a37b5c444180d46839f
Author: piyushka-ally <[email protected]>
AuthorDate: Thu Apr 2 22:56:51 2026 +0530

    [GH-2230] Implement GeoSeries.clip_by_rect (#2784)
    
    Co-authored-by: Piyush Kanti Chanda <[email protected]>
---
 python/sedona/spark/geopandas/base.py              | 68 ++++++++++++++++++++++
 python/sedona/spark/geopandas/geodataframe.py      |  1 +
 python/sedona/spark/geopandas/geoseries.py         | 18 ++++++
 python/tests/geopandas/test_geoseries.py           | 30 ++++++++++
 .../tests/geopandas/test_match_geopandas_series.py | 14 +++++
 5 files changed, 131 insertions(+)

diff --git a/python/sedona/spark/geopandas/base.py 
b/python/sedona/spark/geopandas/base.py
index e406052c2a..dc249d6036 100644
--- a/python/sedona/spark/geopandas/base.py
+++ b/python/sedona/spark/geopandas/base.py
@@ -3073,6 +3073,74 @@ class GeoFrame(metaclass=ABCMeta):
         """
         return _delegate_to_geometry_column("dwithin", self, other, distance, 
align)
 
+    def clip_by_rect(self, xmin, ymin, xmax, ymax):
+        """Returns a ``GeoSeries`` of the portions of geometry within the
+        given rectangle.
+
+        The geometry is clipped to the rectangle defined by the given
+        coordinates.  Geometries that do not intersect the rectangle are
+        returned as empty polygons (``POLYGON EMPTY``).
+
+        .. note::
+            This implementation uses ``ST_Intersection`` with a rectangle
+            envelope, which may produce slightly different results from
+            geopandas' ``clip_by_rect`` in edge cases:
+
+            - Non-intersecting geometries are returned as ``POLYGON EMPTY``,
+              whereas geopandas returns ``GEOMETRYCOLLECTION EMPTY``.
+            - Points on the boundary of the rectangle are considered
+              intersecting and are returned unchanged, whereas geopandas
+              returns ``GEOMETRYCOLLECTION EMPTY`` for boundary-only
+              intersections.
+
+        Parameters
+        ----------
+        xmin : float
+            Minimum x value of the rectangle.
+        ymin : float
+            Minimum y value of the rectangle.
+        xmax : float
+            Maximum x value of the rectangle.
+        ymax : float
+            Maximum y value of the rectangle.
+
+        Returns
+        -------
+        GeoSeries
+
+        Examples
+        --------
+        >>> from sedona.spark.geopandas import GeoSeries
+        >>> from shapely.geometry import Polygon, LineString, Point
+        >>> s = GeoSeries(
+        ...     [
+        ...         Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+        ...         LineString([(0, 0), (2, 2)]),
+        ...         Point(0.5, 0.5),
+        ...     ],
+        ... )
+
+        >>> s.clip_by_rect(0, 0, 1, 1)
+        0    POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))
+        1                   LINESTRING (0 0, 1 1)
+        2                         POINT (0.5 0.5)
+        dtype: geometry
+
+        Geometries that do not intersect the rectangle are returned as
+        empty:
+
+        >>> GeoSeries([Point(5, 5)]).clip_by_rect(0, 0, 1, 1)
+        0    POLYGON EMPTY
+        dtype: geometry
+
+        See also
+        --------
+        GeoSeries.intersection
+        """
+        return _delegate_to_geometry_column(
+            "clip_by_rect", self, xmin, ymin, xmax, ymax
+        )
+
     def difference(self, other, align=None):
         """Returns a ``GeoSeries`` of the points in each aligned geometry that
         are not in `other`.
diff --git a/python/sedona/spark/geopandas/geodataframe.py 
b/python/sedona/spark/geopandas/geodataframe.py
index 93d8dee076..68429c93e2 100644
--- a/python/sedona/spark/geopandas/geodataframe.py
+++ b/python/sedona/spark/geopandas/geodataframe.py
@@ -51,6 +51,7 @@ IMPLEMENTATION_PRIORITY = {
         "_to_geopandas",
         "contains",
         "contains_properly",
+        "clip_by_rect",
         "convex_hull",
         "count_coordinates",
         "count_geometries",
diff --git a/python/sedona/spark/geopandas/geoseries.py 
b/python/sedona/spark/geopandas/geoseries.py
index 231b82d144..4b3a87ab9a 100644
--- a/python/sedona/spark/geopandas/geoseries.py
+++ b/python/sedona/spark/geopandas/geoseries.py
@@ -67,6 +67,7 @@ IMPLEMENTATION_PRIORITY = {
         "convex_hull",
         "explode",
         "clip",
+        "clip_by_rect",
         "from_shapely",
         "count_coordinates",
         "count_geometries",
@@ -835,6 +836,23 @@ class GeoSeries(GeoFrame, pspd.Series):
             default_val=False,
         )
 
+    def clip_by_rect(self, xmin, ymin, xmax, ymax) -> "GeoSeries":
+        if not all(
+            isinstance(val, (int, float, np.integer, np.floating))
+            for val in [xmin, ymin, xmax, ymax]
+        ):
+            raise TypeError(
+                "clip_by_rect only accepts scalar numeric values for 
xmin/ymin/xmax/ymax"
+            )
+        rect = stc.ST_PolygonFromEnvelope(
+            float(xmin), float(ymin), float(xmax), float(ymax)
+        )
+        spark_expr = stf.ST_Intersection(self.spark.column, rect)
+        return self._query_geometry_column(
+            spark_expr,
+            returns_geom=True,
+        )
+
     def difference(self, other, align=None) -> "GeoSeries":
         other_series, extended = self._make_series_of_val(other)
         align = False if extended else align
diff --git a/python/tests/geopandas/test_geoseries.py 
b/python/tests/geopandas/test_geoseries.py
index 1486ab85da..c1f97cb52e 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -616,6 +616,36 @@ e": "Feature", "properties": {}, "geometry": {"type": 
"Point", "coordinates": [3
     def test_clip(self):
         pass
 
+    def test_clip_by_rect(self):
+        s = GeoSeries(
+            [
+                Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+                LineString([(0, 0), (2, 2)]),
+                Point(0.5, 0.5),
+                Point(5, 5),
+                None,
+            ],
+        )
+        result = s.clip_by_rect(0, 0, 1, 1)
+        expected = gpd.GeoSeries(
+            [
+                Polygon([(0, 0), (0, 1), (1, 1), (1, 0), (0, 0)]),
+                LineString([(0, 0), (1, 1)]),
+                Point(0.5, 0.5),
+                Polygon(),  # Sedona returns POLYGON EMPTY for non-intersecting
+                None,
+            ]
+        )
+        self.check_sgpd_equals_gpd(result, expected)
+
+        # Check that GeoDataFrame works too
+        df_result = s.to_geoframe().clip_by_rect(0, 0, 1, 1)
+        self.check_sgpd_equals_gpd(df_result, expected)
+
+        # Test invalid input types
+        with pytest.raises(TypeError):
+            s.clip_by_rect("a", 0, 1, 1)
+
     def test_geom_type(self):
         geoseries = sgpd.GeoSeries(
             [
diff --git a/python/tests/geopandas/test_match_geopandas_series.py 
b/python/tests/geopandas/test_match_geopandas_series.py
index 372e66f737..054d2de0d7 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -495,6 +495,20 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
     def test_clip(self):
         pass
 
+    def test_clip_by_rect(self):
+        # Use rect (0.3, 0.3, 1.7, 1.7) so no test-geometry vertex or hole
+        # coordinate (0, 0.1, 0.2, 1, 2, …) lands on a rectangle boundary.
+        # This avoids boundary-handling differences between JTS and GEOS.
+        for geom in self.geoms:
+            # JTS throws TopologyException on invalid geometries (e.g.
+            # self-intersecting polygons) during ST_Intersection, while
+            # GEOS handles them gracefully.
+            if not gpd.GeoSeries(geom).is_valid.all():
+                continue
+            sgpd_result = GeoSeries(geom).clip_by_rect(0.3, 0.3, 1.7, 1.7)
+            gpd_result = gpd.GeoSeries(geom).clip_by_rect(0.3, 0.3, 1.7, 1.7)
+            self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
     def test_geom_type(self):
         for geom in self.geoms:
             # Sedona converts it to LineString, so the outputs will be 
different

Reply via email to