This is an automated email from the ASF dual-hosted git repository.
petern pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new fbf447f2f4 [GH-2230] Implement GeoSeries.clip_by_rect (#2784)
fbf447f2f4 is described below
commit fbf447f2f4e3dc4cdacc5a37b5c444180d46839f
Author: piyushka-ally <[email protected]>
AuthorDate: Thu Apr 2 22:56:51 2026 +0530
[GH-2230] Implement GeoSeries.clip_by_rect (#2784)
Co-authored-by: Piyush Kanti Chanda <[email protected]>
---
python/sedona/spark/geopandas/base.py | 68 ++++++++++++++++++++++
python/sedona/spark/geopandas/geodataframe.py | 1 +
python/sedona/spark/geopandas/geoseries.py | 18 ++++++
python/tests/geopandas/test_geoseries.py | 30 ++++++++++
.../tests/geopandas/test_match_geopandas_series.py | 14 +++++
5 files changed, 131 insertions(+)
diff --git a/python/sedona/spark/geopandas/base.py
b/python/sedona/spark/geopandas/base.py
index e406052c2a..dc249d6036 100644
--- a/python/sedona/spark/geopandas/base.py
+++ b/python/sedona/spark/geopandas/base.py
@@ -3073,6 +3073,74 @@ class GeoFrame(metaclass=ABCMeta):
"""
return _delegate_to_geometry_column("dwithin", self, other, distance,
align)
+ def clip_by_rect(self, xmin, ymin, xmax, ymax):
+ """Returns a ``GeoSeries`` of the portions of geometry within the
+ given rectangle.
+
+ The geometry is clipped to the rectangle defined by the given
+ coordinates. Geometries that do not intersect the rectangle are
+ returned as empty polygons (``POLYGON EMPTY``).
+
+ .. note::
+ This implementation uses ``ST_Intersection`` with a rectangle
+ envelope, which may produce slightly different results from
+ geopandas' ``clip_by_rect`` in edge cases:
+
+ - Non-intersecting geometries are returned as ``POLYGON EMPTY``,
+ whereas geopandas returns ``GEOMETRYCOLLECTION EMPTY``.
+ - Points on the boundary of the rectangle are considered
+ intersecting and are returned unchanged, whereas geopandas
+ returns ``GEOMETRYCOLLECTION EMPTY`` for boundary-only
+ intersections.
+
+ Parameters
+ ----------
+ xmin : float
+ Minimum x value of the rectangle.
+ ymin : float
+ Minimum y value of the rectangle.
+ xmax : float
+ Maximum x value of the rectangle.
+ ymax : float
+ Maximum y value of the rectangle.
+
+ Returns
+ -------
+ GeoSeries
+
+ Examples
+ --------
+ >>> from sedona.spark.geopandas import GeoSeries
+ >>> from shapely.geometry import Polygon, LineString, Point
+ >>> s = GeoSeries(
+ ... [
+ ... Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+ ... LineString([(0, 0), (2, 2)]),
+ ... Point(0.5, 0.5),
+ ... ],
+ ... )
+
+ >>> s.clip_by_rect(0, 0, 1, 1)
+ 0 POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))
+ 1 LINESTRING (0 0, 1 1)
+ 2 POINT (0.5 0.5)
+ dtype: geometry
+
+ Geometries that do not intersect the rectangle are returned as
+ empty:
+
+ >>> GeoSeries([Point(5, 5)]).clip_by_rect(0, 0, 1, 1)
+ 0 POLYGON EMPTY
+ dtype: geometry
+
+ See also
+ --------
+ GeoSeries.intersection
+ """
+ return _delegate_to_geometry_column(
+ "clip_by_rect", self, xmin, ymin, xmax, ymax
+ )
+
def difference(self, other, align=None):
"""Returns a ``GeoSeries`` of the points in each aligned geometry that
are not in `other`.
diff --git a/python/sedona/spark/geopandas/geodataframe.py
b/python/sedona/spark/geopandas/geodataframe.py
index 93d8dee076..68429c93e2 100644
--- a/python/sedona/spark/geopandas/geodataframe.py
+++ b/python/sedona/spark/geopandas/geodataframe.py
@@ -51,6 +51,7 @@ IMPLEMENTATION_PRIORITY = {
"_to_geopandas",
"contains",
"contains_properly",
+ "clip_by_rect",
"convex_hull",
"count_coordinates",
"count_geometries",
diff --git a/python/sedona/spark/geopandas/geoseries.py
b/python/sedona/spark/geopandas/geoseries.py
index 231b82d144..4b3a87ab9a 100644
--- a/python/sedona/spark/geopandas/geoseries.py
+++ b/python/sedona/spark/geopandas/geoseries.py
@@ -67,6 +67,7 @@ IMPLEMENTATION_PRIORITY = {
"convex_hull",
"explode",
"clip",
+ "clip_by_rect",
"from_shapely",
"count_coordinates",
"count_geometries",
@@ -835,6 +836,23 @@ class GeoSeries(GeoFrame, pspd.Series):
default_val=False,
)
+ def clip_by_rect(self, xmin, ymin, xmax, ymax) -> "GeoSeries":
+ if not all(
+ isinstance(val, (int, float, np.integer, np.floating))
+ for val in [xmin, ymin, xmax, ymax]
+ ):
+ raise TypeError(
+ "clip_by_rect only accepts scalar numeric values for
xmin/ymin/xmax/ymax"
+ )
+ rect = stc.ST_PolygonFromEnvelope(
+ float(xmin), float(ymin), float(xmax), float(ymax)
+ )
+ spark_expr = stf.ST_Intersection(self.spark.column, rect)
+ return self._query_geometry_column(
+ spark_expr,
+ returns_geom=True,
+ )
+
def difference(self, other, align=None) -> "GeoSeries":
other_series, extended = self._make_series_of_val(other)
align = False if extended else align
diff --git a/python/tests/geopandas/test_geoseries.py
b/python/tests/geopandas/test_geoseries.py
index 1486ab85da..c1f97cb52e 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -616,6 +616,36 @@ e": "Feature", "properties": {}, "geometry": {"type":
"Point", "coordinates": [3
def test_clip(self):
pass
+ def test_clip_by_rect(self):
+ s = GeoSeries(
+ [
+ Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+ LineString([(0, 0), (2, 2)]),
+ Point(0.5, 0.5),
+ Point(5, 5),
+ None,
+ ],
+ )
+ result = s.clip_by_rect(0, 0, 1, 1)
+ expected = gpd.GeoSeries(
+ [
+ Polygon([(0, 0), (0, 1), (1, 1), (1, 0), (0, 0)]),
+ LineString([(0, 0), (1, 1)]),
+ Point(0.5, 0.5),
+ Polygon(), # Sedona returns POLYGON EMPTY for non-intersecting
+ None,
+ ]
+ )
+ self.check_sgpd_equals_gpd(result, expected)
+
+ # Check that GeoDataFrame works too
+ df_result = s.to_geoframe().clip_by_rect(0, 0, 1, 1)
+ self.check_sgpd_equals_gpd(df_result, expected)
+
+ # Test invalid input types
+ with pytest.raises(TypeError):
+ s.clip_by_rect("a", 0, 1, 1)
+
def test_geom_type(self):
geoseries = sgpd.GeoSeries(
[
diff --git a/python/tests/geopandas/test_match_geopandas_series.py
b/python/tests/geopandas/test_match_geopandas_series.py
index 372e66f737..054d2de0d7 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -495,6 +495,20 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
def test_clip(self):
pass
+ def test_clip_by_rect(self):
+ # Use rect (0.3, 0.3, 1.7, 1.7) so no test-geometry vertex or hole
+ # coordinate (0, 0.1, 0.2, 1, 2, …) lands on a rectangle boundary.
+ # This avoids boundary-handling differences between JTS and GEOS.
+ for geom in self.geoms:
+ # JTS throws TopologyException on invalid geometries (e.g.
+ # self-intersecting polygons) during ST_Intersection, while
+ # GEOS handles them gracefully.
+ if not gpd.GeoSeries(geom).is_valid.all():
+ continue
+ sgpd_result = GeoSeries(geom).clip_by_rect(0.3, 0.3, 1.7, 1.7)
+ gpd_result = gpd.GeoSeries(geom).clip_by_rect(0.3, 0.3, 1.7, 1.7)
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
def test_geom_type(self):
for geom in self.geoms:
# Sedona converts it to LineString, so the outputs will be
different