This is an automated email from the ASF dual-hosted git repository.

curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
     new f62ac677e fix(csharp/src/Drivers/Databricks): Update DirectResult 
MaxRows MaxBytes setting (#3489)
f62ac677e is described below

commit f62ac677ebde8db95323098d67073e0686f8e9c3
Author: eric-wang-1990 <[email protected]>
AuthorDate: Fri Sep 26 16:01:03 2025 -0700

    fix(csharp/src/Drivers/Databricks): Update DirectResult MaxRows MaxBytes 
setting (#3489)
    
    The directResults field control how many rows/bytes can be returned in
    one arrow batch.
    Before this change, due to a bug for databricks it is calling base class
    SparkConnection, which has maxRows=1000, which is too small.
    ODBC can get all results in a single ExecuteStatement call while ADBC
    needs 1 ExecuteStatement and multiple FetchResults, which cause ADBC to
    be slower in small queries.
    For ADBC:
    <img width="614" height="136" alt="image"
    
src="https://github.com/user-attachments/assets/64faa63c-9bc6-4dd1-8d71-66af09e95df4";
    />
    For ODBC:
    <img width="611" height="27" alt="image"
    
src="https://github.com/user-attachments/assets/52817f46-412a-41fc-9f0b-17d7ae02d91d";
    />
    This PR update the DefaultMaxBytes to 10MB, which is the same limit on
    Databricks backend for Arrow row set.
    MaxRows to be 500K, assuming a minimum 20 Bytes column size.
---
 .../src/Drivers/Databricks/DatabricksConnection.cs | 28 ++++++++++++++++------
 .../Databricks/E2E/DatabricksConnectionTest.cs     | 19 +++++++++++++++
 2 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/csharp/src/Drivers/Databricks/DatabricksConnection.cs 
b/csharp/src/Drivers/Databricks/DatabricksConnection.cs
index a9f4cd0f4..5a69bc5a8 100644
--- a/csharp/src/Drivers/Databricks/DatabricksConnection.cs
+++ b/csharp/src/Drivers/Databricks/DatabricksConnection.cs
@@ -59,12 +59,11 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
         private bool _enablePKFK = true;
         private bool _runAsyncInThrift = true;
 
-        internal static TSparkGetDirectResults defaultGetDirectResults = new()
-        {
-            MaxRows = 2000000,
-            MaxBytes = 404857600
-        };
-
+        // DirectQuery configuration
+        private const long DefaultDirectResultMaxBytes = 10 * 1024 * 1024; // 
10MB for direct query results size limit
+        private const long DefaultDirectResultMaxRows = 500 * 1000; // upper 
limit for 10MB result assume smallest 20 Byte column
+        private long _directResultMaxBytes = DefaultDirectResultMaxBytes;
+        private long _directResultMaxRows = DefaultDirectResultMaxRows;
         // CloudFetch configuration
         private const long DefaultMaxBytesPerFile = 20 * 1024 * 1024; // 20MB
         private const int DefaultQueryTimeSeconds = 3 * 60 * 60; // 3 hours
@@ -443,11 +442,26 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
         {
             if (EnableDirectResults)
             {
-                return base.TrySetGetDirectResults(request);
+                request.GetDirectResults = new()
+                {
+                    MaxRows = _directResultMaxRows,
+                    MaxBytes = _directResultMaxBytes
+                };
+                return true;
             }
             return false;
         }
 
+        /// <summary>
+        /// Gets the maximum bytes per fetch block for directResult
+        /// </summary>
+        internal long DirectResultMaxBytes => _directResultMaxBytes;
+
+        /// <summary>
+        /// Gets the maximum rows per fetch block for directResult
+        /// </summary>
+        internal long DirectResultMaxRows => _directResultMaxRows;
+
         /// <summary>
         /// Gets whether CloudFetch is enabled.
         /// </summary>
diff --git a/csharp/test/Drivers/Databricks/E2E/DatabricksConnectionTest.cs 
b/csharp/test/Drivers/Databricks/E2E/DatabricksConnectionTest.cs
index 3d41c2c6a..0df7c22c7 100644
--- a/csharp/test/Drivers/Databricks/E2E/DatabricksConnectionTest.cs
+++ b/csharp/test/Drivers/Databricks/E2E/DatabricksConnectionTest.cs
@@ -462,5 +462,24 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks
                 $"Connection created successfully with 
tracePropagationEnabled={tracePropagationEnabled}, " +
                 $"traceParentHeaderName={traceParentHeaderName}, 
traceStateEnabled={traceStateEnabled}");
         }
+
+        /// <summary>
+        /// Tests that TrySetGetDirectResults uses DatabricksConnection's 
defaultGetDirectResults
+        /// </summary>
+        [Fact]
+        public void 
TrySetGetDirectResults_UsesDatabricksDefaultGetDirectResults()
+        {
+            var testConfig = 
(DatabricksTestConfiguration)TestConfiguration.Clone();
+            using var connection = NewConnection(testConfig);
+            // Create a mock request object
+            var request = new TExecuteStatementReq();
+            bool result = 
((DatabricksConnection)Connection).TrySetGetDirectResults(request);
+
+            // Assert
+            Assert.True(result, "TrySetGetDirectResults should return true 
when EnableDirectResults is true by default");
+            Assert.NotNull(request.GetDirectResults);
+            
Assert.Equal(((DatabricksConnection)Connection).DirectResultMaxRows, 
request.GetDirectResults.MaxRows);
+            
Assert.Equal(((DatabricksConnection)Connection).DirectResultMaxBytes, 
request.GetDirectResults.MaxBytes);
+        }
     }
 }

Reply via email to