This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 501df70d1e [GH-2768] Replace len(self)==0 with cheaper _is_empty()
check in GeoSeries (#2770)
501df70d1e is described below
commit 501df70d1e4fa2db2ddfc7e1d4c222b9258a3f50
Author: Jia Yu <[email protected]>
AuthorDate: Fri Mar 20 13:40:39 2026 -0700
[GH-2768] Replace len(self)==0 with cheaper _is_empty() check in GeoSeries
(#2770)
---
python/sedona/spark/geopandas/geoseries.py | 20 ++++++++++++--------
1 file changed, 12 insertions(+), 8 deletions(-)
diff --git a/python/sedona/spark/geopandas/geoseries.py
b/python/sedona/spark/geopandas/geoseries.py
index 00ca985a64..60b9fa29bf 100644
--- a/python/sedona/spark/geopandas/geoseries.py
+++ b/python/sedona/spark/geopandas/geoseries.py
@@ -341,6 +341,10 @@ class GeoSeries(GeoFrame, pspd.Series):
if crs:
self.set_crs(crs, inplace=True)
+ def _is_empty(self) -> bool:
+ """Check if this GeoSeries has no rows without triggering a full Spark
scan."""
+ return not self._internal.spark_frame.take(1)
+
#
============================================================================
# COORDINATE REFERENCE SYSTEM (CRS) OPERATIONS
#
============================================================================
@@ -382,7 +386,7 @@ class GeoSeries(GeoFrame, pspd.Series):
"""
from pyproj import CRS
- if len(self) == 0:
+ if self._is_empty():
return None
# F.first is non-deterministic, but it doesn't matter because all
non-null values should be the same.
@@ -1152,8 +1156,8 @@ class GeoSeries(GeoFrame, pspd.Series):
)
def build_area(self, node=True):
- if len(self) == 0:
- return GeoSeries([], name="polygons", crs=self.crs)
+ if self._is_empty():
+ return GeoSeries([], name="polygons", crs=None)
if node:
aggr_expr = sta.ST_Union_Aggr(self.spark.column)
@@ -1189,8 +1193,8 @@ class GeoSeries(GeoFrame, pspd.Series):
"Sedona does not support full=True for polygonize."
)
- if len(self) == 0:
- return GeoSeries([], name="polygons", crs=self.crs)
+ if self._is_empty():
+ return GeoSeries([], name="polygons", crs=None)
if node:
aggr_expr = sta.ST_Union_Aggr(self.spark.column)
@@ -1245,7 +1249,7 @@ class GeoSeries(GeoFrame, pspd.Series):
f"Sedona does not support manually specifying different union
methods. Ignoring non-default method argument of {method}"
)
- if len(self) == 0:
+ if self._is_empty():
# While it's not explicitly defined in GeoPandas docs, this is
what GeoPandas returns for empty GeoSeries.
# If it ever changes for some reason, we'll catch that with the
test
from shapely.geometry import GeometryCollection
@@ -1260,7 +1264,7 @@ class GeoSeries(GeoFrame, pspd.Series):
return geom
def intersection_all(self) -> BaseGeometry:
- if len(self) == 0:
+ if self._is_empty():
from shapely.geometry import GeometryCollection
return GeometryCollection()
@@ -2645,7 +2649,7 @@ class GeoSeries(GeoFrame, pspd.Series):
def total_bounds(self):
import warnings
- if len(self) == 0:
+ if self._is_empty():
# numpy 'min' cannot handle empty arrays
# TODO with numpy >= 1.15, the 'initial' argument can be used
return np.array([np.nan, np.nan, np.nan, np.nan])