This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 501df70d1e [GH-2768] Replace len(self)==0 with cheaper _is_empty() 
check in GeoSeries (#2770)
501df70d1e is described below

commit 501df70d1e4fa2db2ddfc7e1d4c222b9258a3f50
Author: Jia Yu <[email protected]>
AuthorDate: Fri Mar 20 13:40:39 2026 -0700

    [GH-2768] Replace len(self)==0 with cheaper _is_empty() check in GeoSeries 
(#2770)
---
 python/sedona/spark/geopandas/geoseries.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/python/sedona/spark/geopandas/geoseries.py 
b/python/sedona/spark/geopandas/geoseries.py
index 00ca985a64..60b9fa29bf 100644
--- a/python/sedona/spark/geopandas/geoseries.py
+++ b/python/sedona/spark/geopandas/geoseries.py
@@ -341,6 +341,10 @@ class GeoSeries(GeoFrame, pspd.Series):
         if crs:
             self.set_crs(crs, inplace=True)
 
+    def _is_empty(self) -> bool:
+        """Check if this GeoSeries has no rows without triggering a full Spark 
scan."""
+        return not self._internal.spark_frame.take(1)
+
     # 
============================================================================
     # COORDINATE REFERENCE SYSTEM (CRS) OPERATIONS
     # 
============================================================================
@@ -382,7 +386,7 @@ class GeoSeries(GeoFrame, pspd.Series):
         """
         from pyproj import CRS
 
-        if len(self) == 0:
+        if self._is_empty():
             return None
 
         # F.first is non-deterministic, but it doesn't matter because all 
non-null values should be the same.
@@ -1152,8 +1156,8 @@ class GeoSeries(GeoFrame, pspd.Series):
         )
 
     def build_area(self, node=True):
-        if len(self) == 0:
-            return GeoSeries([], name="polygons", crs=self.crs)
+        if self._is_empty():
+            return GeoSeries([], name="polygons", crs=None)
 
         if node:
             aggr_expr = sta.ST_Union_Aggr(self.spark.column)
@@ -1189,8 +1193,8 @@ class GeoSeries(GeoFrame, pspd.Series):
                 "Sedona does not support full=True for polygonize."
             )
 
-        if len(self) == 0:
-            return GeoSeries([], name="polygons", crs=self.crs)
+        if self._is_empty():
+            return GeoSeries([], name="polygons", crs=None)
 
         if node:
             aggr_expr = sta.ST_Union_Aggr(self.spark.column)
@@ -1245,7 +1249,7 @@ class GeoSeries(GeoFrame, pspd.Series):
                 f"Sedona does not support manually specifying different union 
methods. Ignoring non-default method argument of {method}"
             )
 
-        if len(self) == 0:
+        if self._is_empty():
             # While it's not explicitly defined in GeoPandas docs, this is 
what GeoPandas returns for empty GeoSeries.
             # If it ever changes for some reason, we'll catch that with the 
test
             from shapely.geometry import GeometryCollection
@@ -1260,7 +1264,7 @@ class GeoSeries(GeoFrame, pspd.Series):
         return geom
 
     def intersection_all(self) -> BaseGeometry:
-        if len(self) == 0:
+        if self._is_empty():
             from shapely.geometry import GeometryCollection
 
             return GeometryCollection()
@@ -2645,7 +2649,7 @@ class GeoSeries(GeoFrame, pspd.Series):
     def total_bounds(self):
         import warnings
 
-        if len(self) == 0:
+        if self._is_empty():
             # numpy 'min' cannot handle empty arrays
             # TODO with numpy >= 1.15, the 'initial' argument can be used
             return np.array([np.nan, np.nan, np.nan, np.nan])

Reply via email to