This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new f62ac677e fix(csharp/src/Drivers/Databricks): Update DirectResult
MaxRows MaxBytes setting (#3489)
f62ac677e is described below
commit f62ac677ebde8db95323098d67073e0686f8e9c3
Author: eric-wang-1990 <[email protected]>
AuthorDate: Fri Sep 26 16:01:03 2025 -0700
fix(csharp/src/Drivers/Databricks): Update DirectResult MaxRows MaxBytes
setting (#3489)
The directResults field control how many rows/bytes can be returned in
one arrow batch.
Before this change, due to a bug for databricks it is calling base class
SparkConnection, which has maxRows=1000, which is too small.
ODBC can get all results in a single ExecuteStatement call while ADBC
needs 1 ExecuteStatement and multiple FetchResults, which cause ADBC to
be slower in small queries.
For ADBC:
<img width="614" height="136" alt="image"
src="https://github.com/user-attachments/assets/64faa63c-9bc6-4dd1-8d71-66af09e95df4"
/>
For ODBC:
<img width="611" height="27" alt="image"
src="https://github.com/user-attachments/assets/52817f46-412a-41fc-9f0b-17d7ae02d91d"
/>
This PR update the DefaultMaxBytes to 10MB, which is the same limit on
Databricks backend for Arrow row set.
MaxRows to be 500K, assuming a minimum 20 Bytes column size.
---
.../src/Drivers/Databricks/DatabricksConnection.cs | 28 ++++++++++++++++------
.../Databricks/E2E/DatabricksConnectionTest.cs | 19 +++++++++++++++
2 files changed, 40 insertions(+), 7 deletions(-)
diff --git a/csharp/src/Drivers/Databricks/DatabricksConnection.cs
b/csharp/src/Drivers/Databricks/DatabricksConnection.cs
index a9f4cd0f4..5a69bc5a8 100644
--- a/csharp/src/Drivers/Databricks/DatabricksConnection.cs
+++ b/csharp/src/Drivers/Databricks/DatabricksConnection.cs
@@ -59,12 +59,11 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
private bool _enablePKFK = true;
private bool _runAsyncInThrift = true;
- internal static TSparkGetDirectResults defaultGetDirectResults = new()
- {
- MaxRows = 2000000,
- MaxBytes = 404857600
- };
-
+ // DirectQuery configuration
+ private const long DefaultDirectResultMaxBytes = 10 * 1024 * 1024; //
10MB for direct query results size limit
+ private const long DefaultDirectResultMaxRows = 500 * 1000; // upper
limit for 10MB result assume smallest 20 Byte column
+ private long _directResultMaxBytes = DefaultDirectResultMaxBytes;
+ private long _directResultMaxRows = DefaultDirectResultMaxRows;
// CloudFetch configuration
private const long DefaultMaxBytesPerFile = 20 * 1024 * 1024; // 20MB
private const int DefaultQueryTimeSeconds = 3 * 60 * 60; // 3 hours
@@ -443,11 +442,26 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
{
if (EnableDirectResults)
{
- return base.TrySetGetDirectResults(request);
+ request.GetDirectResults = new()
+ {
+ MaxRows = _directResultMaxRows,
+ MaxBytes = _directResultMaxBytes
+ };
+ return true;
}
return false;
}
+ /// <summary>
+ /// Gets the maximum bytes per fetch block for directResult
+ /// </summary>
+ internal long DirectResultMaxBytes => _directResultMaxBytes;
+
+ /// <summary>
+ /// Gets the maximum rows per fetch block for directResult
+ /// </summary>
+ internal long DirectResultMaxRows => _directResultMaxRows;
+
/// <summary>
/// Gets whether CloudFetch is enabled.
/// </summary>
diff --git a/csharp/test/Drivers/Databricks/E2E/DatabricksConnectionTest.cs
b/csharp/test/Drivers/Databricks/E2E/DatabricksConnectionTest.cs
index 3d41c2c6a..0df7c22c7 100644
--- a/csharp/test/Drivers/Databricks/E2E/DatabricksConnectionTest.cs
+++ b/csharp/test/Drivers/Databricks/E2E/DatabricksConnectionTest.cs
@@ -462,5 +462,24 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks
$"Connection created successfully with
tracePropagationEnabled={tracePropagationEnabled}, " +
$"traceParentHeaderName={traceParentHeaderName},
traceStateEnabled={traceStateEnabled}");
}
+
+ /// <summary>
+ /// Tests that TrySetGetDirectResults uses DatabricksConnection's
defaultGetDirectResults
+ /// </summary>
+ [Fact]
+ public void
TrySetGetDirectResults_UsesDatabricksDefaultGetDirectResults()
+ {
+ var testConfig =
(DatabricksTestConfiguration)TestConfiguration.Clone();
+ using var connection = NewConnection(testConfig);
+ // Create a mock request object
+ var request = new TExecuteStatementReq();
+ bool result =
((DatabricksConnection)Connection).TrySetGetDirectResults(request);
+
+ // Assert
+ Assert.True(result, "TrySetGetDirectResults should return true
when EnableDirectResults is true by default");
+ Assert.NotNull(request.GetDirectResults);
+
Assert.Equal(((DatabricksConnection)Connection).DirectResultMaxRows,
request.GetDirectResults.MaxRows);
+
Assert.Equal(((DatabricksConnection)Connection).DirectResultMaxBytes,
request.GetDirectResults.MaxBytes);
+ }
}
}