Re: [PR] feat(python/sedonadb): Expose memory pool and runtime configuration in Python bindings [sedona-db]

via GitHub Fri, 13 Feb 2026 06:00:17 -0800


Copilot commented on code in PR #608:
URL: https://github.com/apache/sedona-db/pull/608#discussion_r2804366675



##########
python/sedonadb/src/context.rs:
##########
@@ -39,15 +47,59 @@ pub struct InternalContext {
 #[pymethods]
 impl InternalContext {
     #[new]
-    fn new(py: Python) -> Result<Self, PySedonaError> {
+    #[pyo3(signature = (memory_limit=None, temp_dir=None, 
memory_pool_type=None, unspillable_reserve_ratio=None))]
+    fn new(
+        py: Python,
+        memory_limit: Option<usize>,
+        temp_dir: Option<String>,
+        memory_pool_type: Option<String>,
+        unspillable_reserve_ratio: Option<f64>,
+    ) -> Result<Self, PySedonaError> {
         let runtime = tokio::runtime::Builder::new_multi_thread()
             .enable_all()
             .build()
             .map_err(|e| {
                 PySedonaError::SedonaPython(format!("Failed to build 
multithreaded runtime: {e}"))
             })?;
 
-        let inner = wait_for_future(py, &runtime, 
SedonaContext::new_local_interactive())??;
+        let mut rt_builder = RuntimeEnvBuilder::new();
+        if let Some(memory_limit) = memory_limit {
+            let pool_type: PoolType = memory_pool_type
+                .as_deref()
+                .unwrap_or("fair")

Review Comment:
   The default value for `memory_pool_type` is "fair" in the Python bindings 
(line 69), but the CLI uses "greedy" as the default 
(sedona-cli/src/main.rs:78). This inconsistency could be confusing for users 
migrating from the CLI to Python or vice versa. Consider documenting this 
difference explicitly or aligning the defaults across both interfaces.
   ```suggestion
                   // Default to "greedy" to match the sedona-cli behavior when 
not specified.
                   .unwrap_or("greedy")
   ```



##########
python/sedonadb/python/sedonadb/context.py:
##########
@@ -49,8 +49,16 @@ class SedonaContext:
         └───────┘
     """
 
-    def __init__(self):
-        self._impl = InternalContext()
+    def __init__(
+        self,
+        memory_limit: Optional[int] = None,
+        temp_dir: Optional[str] = None,
+        memory_pool_type: Literal["greedy", "fair"] = "fair",
+        unspillable_reserve_ratio: Optional[float] = None,
+    ):
+        self._impl = InternalContext(
+            memory_limit, temp_dir, memory_pool_type, unspillable_reserve_ratio
+        )
         self.options = Options()

Review Comment:
   The `SedonaContext.__init__()` method lacks a docstring. Other methods in 
this class have comprehensive docstrings (see examples throughout the file like 
`create_data_frame`, `read_parquet`, etc.). The `__init__` method should 
document the same parameters and their meanings as the `connect()` function for 
consistency and to help users who instantiate the class directly.



##########
python/sedonadb/src/context.rs:
##########
@@ -39,15 +47,59 @@ pub struct InternalContext {
 #[pymethods]
 impl InternalContext {
     #[new]
-    fn new(py: Python) -> Result<Self, PySedonaError> {
+    #[pyo3(signature = (memory_limit=None, temp_dir=None, 
memory_pool_type=None, unspillable_reserve_ratio=None))]
+    fn new(
+        py: Python,
+        memory_limit: Option<usize>,
+        temp_dir: Option<String>,
+        memory_pool_type: Option<String>,
+        unspillable_reserve_ratio: Option<f64>,
+    ) -> Result<Self, PySedonaError> {
         let runtime = tokio::runtime::Builder::new_multi_thread()
             .enable_all()
             .build()
             .map_err(|e| {
                 PySedonaError::SedonaPython(format!("Failed to build 
multithreaded runtime: {e}"))
             })?;
 
-        let inner = wait_for_future(py, &runtime, 
SedonaContext::new_local_interactive())??;
+        let mut rt_builder = RuntimeEnvBuilder::new();
+        if let Some(memory_limit) = memory_limit {
+            let pool_type: PoolType = memory_pool_type
+                .as_deref()
+                .unwrap_or("fair")
+                .parse()
+                .map_err(|e: String| PySedonaError::SedonaPython(e))?;
+            let track_capacity = NonZeroUsize::new(10).expect("track capacity 
must be non-zero");
+            let pool: Arc<dyn MemoryPool> = match pool_type {
+                PoolType::Fair => {
+                    let unspillable_reserve =
+                        
unspillable_reserve_ratio.unwrap_or(DEFAULT_UNSPILLABLE_RESERVE_RATIO);
+                    Arc::new(TrackConsumersPool::new(
+                        SedonaFairSpillPool::new(memory_limit, 
unspillable_reserve),
+                        track_capacity,
+                    ))
+                }
+                PoolType::Greedy => Arc::new(TrackConsumersPool::new(
+                    GreedyMemoryPool::new(memory_limit),
+                    track_capacity,
+                )),

Review Comment:
   When `memory_pool_type` is set to "greedy", the `unspillable_reserve_ratio` 
parameter is silently ignored since the GreedyMemoryPool doesn't use it. This 
could confuse users who set both parameters. Consider either: (1) raising an 
error when `unspillable_reserve_ratio` is provided with 
`memory_pool_type="greedy"`, or (2) documenting that 
`unspillable_reserve_ratio` only applies to the "fair" pool type.



##########
python/sedonadb/src/context.rs:
##########
@@ -39,15 +47,59 @@ pub struct InternalContext {
 #[pymethods]
 impl InternalContext {
     #[new]
-    fn new(py: Python) -> Result<Self, PySedonaError> {
+    #[pyo3(signature = (memory_limit=None, temp_dir=None, 
memory_pool_type=None, unspillable_reserve_ratio=None))]
+    fn new(
+        py: Python,
+        memory_limit: Option<usize>,
+        temp_dir: Option<String>,
+        memory_pool_type: Option<String>,
+        unspillable_reserve_ratio: Option<f64>,
+    ) -> Result<Self, PySedonaError> {
         let runtime = tokio::runtime::Builder::new_multi_thread()
             .enable_all()
             .build()
             .map_err(|e| {
                 PySedonaError::SedonaPython(format!("Failed to build 
multithreaded runtime: {e}"))
             })?;
 
-        let inner = wait_for_future(py, &runtime, 
SedonaContext::new_local_interactive())??;
+        let mut rt_builder = RuntimeEnvBuilder::new();
+        if let Some(memory_limit) = memory_limit {
+            let pool_type: PoolType = memory_pool_type
+                .as_deref()
+                .unwrap_or("fair")
+                .parse()
+                .map_err(|e: String| PySedonaError::SedonaPython(e))?;
+            let track_capacity = NonZeroUsize::new(10).expect("track capacity 
must be non-zero");
+            let pool: Arc<dyn MemoryPool> = match pool_type {
+                PoolType::Fair => {
+                    let unspillable_reserve =
+                        
unspillable_reserve_ratio.unwrap_or(DEFAULT_UNSPILLABLE_RESERVE_RATIO);
+                    Arc::new(TrackConsumersPool::new(
+                        SedonaFairSpillPool::new(memory_limit, 
unspillable_reserve),
+                        track_capacity,
+                    ))
+                }
+                PoolType::Greedy => Arc::new(TrackConsumersPool::new(
+                    GreedyMemoryPool::new(memory_limit),
+                    track_capacity,
+                )),
+            };
+            rt_builder = rt_builder.with_memory_pool(pool);
+        }

Review Comment:
   When `memory_limit` is not provided, the `memory_pool_type` and 
`unspillable_reserve_ratio` parameters are silently ignored. This could be 
confusing for users who set these parameters without realizing they have no 
effect. Consider either: (1) raising an error if `memory_pool_type` or 
`unspillable_reserve_ratio` are provided without `memory_limit`, or (2) 
documenting this dependency clearly in the parameter descriptions.



##########
python/sedonadb/src/context.rs:
##########
@@ -39,15 +47,59 @@ pub struct InternalContext {
 #[pymethods]
 impl InternalContext {
     #[new]
-    fn new(py: Python) -> Result<Self, PySedonaError> {
+    #[pyo3(signature = (memory_limit=None, temp_dir=None, 
memory_pool_type=None, unspillable_reserve_ratio=None))]
+    fn new(
+        py: Python,
+        memory_limit: Option<usize>,
+        temp_dir: Option<String>,
+        memory_pool_type: Option<String>,
+        unspillable_reserve_ratio: Option<f64>,
+    ) -> Result<Self, PySedonaError> {
         let runtime = tokio::runtime::Builder::new_multi_thread()
             .enable_all()
             .build()
             .map_err(|e| {
                 PySedonaError::SedonaPython(format!("Failed to build 
multithreaded runtime: {e}"))
             })?;
 
-        let inner = wait_for_future(py, &runtime, 
SedonaContext::new_local_interactive())??;
+        let mut rt_builder = RuntimeEnvBuilder::new();
+        if let Some(memory_limit) = memory_limit {
+            let pool_type: PoolType = memory_pool_type
+                .as_deref()
+                .unwrap_or("fair")
+                .parse()
+                .map_err(|e: String| PySedonaError::SedonaPython(e))?;
+            let track_capacity = NonZeroUsize::new(10).expect("track capacity 
must be non-zero");
+            let pool: Arc<dyn MemoryPool> = match pool_type {
+                PoolType::Fair => {
+                    let unspillable_reserve =
+                        
unspillable_reserve_ratio.unwrap_or(DEFAULT_UNSPILLABLE_RESERVE_RATIO);

Review Comment:
   Missing validation for `unspillable_reserve_ratio`. The CLI implementation 
in `sedona-cli/src/main.rs` validates that this value is between 0.0 and 1.0 
(see `validate_unspillable_reserve_ratio` function). The Python bindings should 
apply the same validation to prevent invalid values from being passed to 
`SedonaFairSpillPool::new()`. Without this validation, users could pass invalid 
values like -0.5 or 2.0, which could lead to unexpected behavior.
   ```suggestion
                           
unspillable_reserve_ratio.unwrap_or(DEFAULT_UNSPILLABLE_RESERVE_RATIO);
                       if unspillable_reserve < 0.0 || unspillable_reserve > 
1.0 {
                           return Err(PySedonaError::SedonaPython(format!(
                               "unspillable_reserve_ratio must be between 0.0 
and 1.0, got {unspillable_reserve}"
                           )));
                       }
   ```



##########
python/sedonadb/python/sedonadb/context.py:
##########
@@ -380,9 +388,26 @@ def funcs(self) -> Functions:
         return Functions(self)
 
 
-def connect() -> SedonaContext:
-    """Create a new [SedonaContext][sedonadb.context.SedonaContext]"""
-    return SedonaContext()
+def connect(
+    memory_limit: Optional[int] = None,
+    temp_dir: Optional[str] = None,
+    memory_pool_type: Literal["greedy", "fair"] = "fair",
+    unspillable_reserve_ratio: Optional[float] = None,
+) -> SedonaContext:
+    """Create a new [SedonaContext][sedonadb.context.SedonaContext]
+
+    Args:
+        memory_limit: The maximum amount of memory to use for execution, in 
bytes.
+        temp_dir: The directory to use for temporary files.
+        memory_pool_type: The type of memory pool to use. Can be "greedy" or 
"fair".
+        unspillable_reserve_ratio: The fraction of memory reserved for 
unspillable consumers (0.0 - 1.0).

Review Comment:
   The documentation should clarify the relationship between the parameters. 
Specifically, `memory_pool_type` and `unspillable_reserve_ratio` are only 
relevant when `memory_limit` is set. Consider updating the documentation to 
make this clear. For example: "memory_pool_type: The type of memory pool to use 
(only applies when memory_limit is set). Can be 'greedy' or 'fair'."
   ```suggestion
           memory_pool_type: The type of memory pool to use (only applies when 
``memory_limit`` is set). Can be "greedy" or "fair".
           unspillable_reserve_ratio: The fraction of memory reserved for 
unspillable consumers (0.0 - 1.0, only applies when ``memory_limit`` is set).
   ```



##########
python/sedonadb/python/sedonadb/context.py:
##########
@@ -380,9 +388,26 @@ def funcs(self) -> Functions:
         return Functions(self)
 
 
-def connect() -> SedonaContext:
-    """Create a new [SedonaContext][sedonadb.context.SedonaContext]"""
-    return SedonaContext()
+def connect(
+    memory_limit: Optional[int] = None,
+    temp_dir: Optional[str] = None,
+    memory_pool_type: Literal["greedy", "fair"] = "fair",
+    unspillable_reserve_ratio: Optional[float] = None,
+) -> SedonaContext:
+    """Create a new [SedonaContext][sedonadb.context.SedonaContext]
+
+    Args:
+        memory_limit: The maximum amount of memory to use for execution, in 
bytes.
+        temp_dir: The directory to use for temporary files.
+        memory_pool_type: The type of memory pool to use. Can be "greedy" or 
"fair".
+        unspillable_reserve_ratio: The fraction of memory reserved for 
unspillable consumers (0.0 - 1.0).

Review Comment:
   The documentation for the `connect()` function should specify the default 
value for `unspillable_reserve_ratio`. According to the Rust code, when not 
provided, it defaults to `DEFAULT_UNSPILLABLE_RESERVE_RATIO` (0.2). This 
information would help users understand the default behavior without needing to 
look at the source code. Consider updating the docstring to say: "The fraction 
of memory reserved for unspillable consumers (0.0 - 1.0). Defaults to 0.2 when 
memory_limit is set."
   ```suggestion
           unspillable_reserve_ratio: The fraction of memory reserved for 
unspillable consumers (0.0 - 1.0). Defaults to 0.2 when memory_limit is set.
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] feat(python/sedonadb): Expose memory pool and runtime configuration in Python bindings [sedona-db]

Reply via email to