This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 9ba6bdb8b feat(csharp/src/Drivers): Add Databricks driver (#2672)
9ba6bdb8b is described below
commit 9ba6bdb8bd42e35237dadfae758419846e6c442c
Author: davidhcoe <[email protected]>
AuthorDate: Tue Apr 8 17:07:30 2025 -0400
feat(csharp/src/Drivers): Add Databricks driver (#2672)
After the initial Spark work, the Databricks-specific features were
becoming more entwined in the code. There have been requests for
additional Databricks-specific features such as CloudFetch, Entra
authentication, and LZ4 compression to name a few.
This PR moves the Databricks-specific code out of the Spark driver and
into its own driver. It leverages the Thrift and Spark libraries that
exist in the Apache driver but teases out the Databricks capabilities
from the Spark capabilities. It does so by exposing the internals of the
Apache driver so that the Databricks driver can leverage the same
underlying libraries while still being able to offer differentiating
features and raises the branding of the driver from under the
Apache/Spark umbrella to the same level as BigQuery, FlightSQL, and
Snowflake.
---------
Co-authored-by: David Coe <>
Co-authored-by: Bruce Irschick <[email protected]>
---
csharp/Apache.Arrow.Adbc.sln | 14 +++
.../Apache/Apache.Arrow.Adbc.Drivers.Apache.csproj | 4 +-
csharp/src/Drivers/Apache/AssemblyInfo.cs | 21 ++++
.../Apache/Hive2/HiveServer2HttpConnection.cs | 1 -
.../src/Drivers/Apache/Hive2/HiveServer2TlsImpl.cs | 4 +-
.../src/Drivers/Apache/Impala/ImpalaConnection.cs | 2 -
.../Drivers/Apache/Impala/ImpalaHttpConnection.cs | 2 -
.../Apache/Impala/ImpalaStandardConnection.cs | 1 -
csharp/src/Drivers/Apache/Spark/README.md | 37 +------
csharp/src/Drivers/Apache/Spark/SparkConnection.cs | 6 --
.../Drivers/Apache/Spark/SparkConnectionFactory.cs | 1 -
.../Drivers/Apache/Spark/SparkHttpConnection.cs | 1 -
csharp/src/Drivers/Apache/Spark/SparkParameters.cs | 22 +----
csharp/src/Drivers/Apache/Spark/SparkServerType.cs | 6 +-
csharp/src/Drivers/Apache/Spark/SparkStatement.cs | 109 ---------------------
.../src/Drivers/Apache/Thrift/StreamExtensions.cs | 4 +-
.../Drivers/Apache/Thrift/ThriftSocketTransport.cs | 6 +-
csharp/src/Drivers/Apache/readme.md | 2 +-
.../Apache.Arrow.Adbc.Drivers.Databricks.csproj} | 17 ++--
.../Drivers/Databricks/AssemblyInfo.cs} | 24 +----
.../CloudFetch/CloudFetchReader.cs} | 24 ++---
.../DatabricksConnection.cs} | 30 ++++--
.../src/Drivers/Databricks/DatabricksDatabase.cs | 47 +++++++++
.../Drivers/Databricks/DatabricksDriver.cs} | 24 ++---
.../src/Drivers/Databricks/DatabricksParameters.cs | 54 ++++++++++
.../DatabricksReader.cs} | 14 ++-
.../DatabricksSchemaParser.cs} | 5 +-
.../DatabricksStatement.cs} | 66 +++++--------
.../{Apache/Spark => Databricks}/Lz4Utilities.cs | 3 +-
csharp/src/Drivers/Databricks/readme.md | 56 +++++++++++
csharp/test/Apache.Arrow.Adbc.Tests/TestBase.cs | 2 +-
.../Apache.Arrow.Adbc.Tests.Drivers.Apache.csproj | 6 --
csharp/test/Drivers/Apache/AssemblyInfo.cs | 35 +++++++
.../Drivers/Apache/Hive2/DateTimeValueTests.cs | 5 -
.../Drivers/Apache/Hive2/DecimalUtilityTests.cs | 1 -
.../Apache/Hive2/HiveServer2ParametersTest.cs | 6 +-
csharp/test/Drivers/Apache/Hive2/StatementTests.cs | 1 -
csharp/test/Drivers/Apache/Spark/ClientTests.cs | 3 +-
.../Drivers/Apache/Spark/DateTimeValueTests.cs | 34 -------
csharp/test/Drivers/Apache/Spark/DriverTests.cs | 2 +-
.../Drivers/Apache/Spark/SparkConnectionTest.cs | 13 +--
.../Drivers/Apache/Spark/SparkTestEnvironment.cs | 17 ++--
csharp/test/Drivers/Apache/Spark/StatementTests.cs | 68 -------------
.../test/Drivers/Apache/Spark/StringValueTests.cs | 35 -------
...che.Arrow.Adbc.Tests.Drivers.Databricks.csproj} | 26 +----
.../{Apache/Spark => Databricks}/ClientTests.cs | 10 +-
.../Spark => Databricks}/CloudFetchE2ETest.cs | 26 ++---
.../ComplexTypesValueTests.cs} | 18 +---
.../DatabricksConnectionTest.cs} | 73 ++++++--------
.../DatabricksTestConfiguration.cs} | 18 +---
.../DatabricksTestEnvironment.cs} | 39 ++++----
.../Spark => Databricks}/DateTimeValueTests.cs | 10 +-
.../{Apache/Spark => Databricks}/DriverTests.cs | 9 +-
.../test/Drivers/Databricks/NumericValueTests.cs | 65 ++++++++++++
.../Resources/Databricks.sql} | 0
.../Resources/databricks.json} | 0
.../{Apache/Spark => Databricks}/StatementTests.cs | 29 +++---
.../Spark => Databricks}/StringValueTests.cs | 31 ++----
58 files changed, 508 insertions(+), 681 deletions(-)
diff --git a/csharp/Apache.Arrow.Adbc.sln b/csharp/Apache.Arrow.Adbc.sln
index baeed931c..7bb6260dc 100644
--- a/csharp/Apache.Arrow.Adbc.sln
+++ b/csharp/Apache.Arrow.Adbc.sln
@@ -38,6 +38,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") =
"Apache.Arrow.Adbc.Tests.Dri
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Benchmarks",
"Benchmarks\Benchmarks.csproj", "{BAF2CF14-BA77-429E-AF54-A34B978E9F5C}"
EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") =
"Apache.Arrow.Adbc.Drivers.Databricks",
"src\Drivers\Databricks\Apache.Arrow.Adbc.Drivers.Databricks.csproj",
"{25042111-6B86-8B75-7EF6-5BFAA36F72B1}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") =
"Apache.Arrow.Adbc.Tests.Drivers.Databricks",
"test\Drivers\Databricks\Apache.Arrow.Adbc.Tests.Drivers.Databricks.csproj",
"{BA07EB2C-5246-EB72-153C-493C7E7412D2}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -100,6 +104,14 @@ Global
{BAF2CF14-BA77-429E-AF54-A34B978E9F5C}.Debug|Any CPU.Build.0 =
Debug|Any CPU
{BAF2CF14-BA77-429E-AF54-A34B978E9F5C}.Release|Any
CPU.ActiveCfg = Release|Any CPU
{BAF2CF14-BA77-429E-AF54-A34B978E9F5C}.Release|Any CPU.Build.0
= Release|Any CPU
+ {25042111-6B86-8B75-7EF6-5BFAA36F72B1}.Debug|Any CPU.ActiveCfg
= Debug|Any CPU
+ {25042111-6B86-8B75-7EF6-5BFAA36F72B1}.Debug|Any CPU.Build.0 =
Debug|Any CPU
+ {25042111-6B86-8B75-7EF6-5BFAA36F72B1}.Release|Any
CPU.ActiveCfg = Release|Any CPU
+ {25042111-6B86-8B75-7EF6-5BFAA36F72B1}.Release|Any CPU.Build.0
= Release|Any CPU
+ {BA07EB2C-5246-EB72-153C-493C7E7412D2}.Debug|Any CPU.ActiveCfg
= Debug|Any CPU
+ {BA07EB2C-5246-EB72-153C-493C7E7412D2}.Debug|Any CPU.Build.0 =
Debug|Any CPU
+ {BA07EB2C-5246-EB72-153C-493C7E7412D2}.Release|Any
CPU.ActiveCfg = Release|Any CPU
+ {BA07EB2C-5246-EB72-153C-493C7E7412D2}.Release|Any CPU.Build.0
= Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@@ -119,6 +131,8 @@ Global
{77D5A92F-4136-4DE7-81F4-43B981223280} =
{FEB257A0-4FD3-495E-9A47-9E1649755445}
{5B27FB02-D4AE-4ACB-AD88-5E64EEB61729} =
{C7290227-E925-47E7-8B6B-A8B171645D58}
{BAF2CF14-BA77-429E-AF54-A34B978E9F5C} =
{5BD04C26-CE52-4893-8C1A-479705195CEF}
+ {25042111-6B86-8B75-7EF6-5BFAA36F72B1} =
{FEB257A0-4FD3-495E-9A47-9E1649755445}
+ {BA07EB2C-5246-EB72-153C-493C7E7412D2} =
{C7290227-E925-47E7-8B6B-A8B171645D58}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {4795CF16-0FDB-4BE0-9768-5CF31564DC03}
diff --git a/csharp/src/Drivers/Apache/Apache.Arrow.Adbc.Drivers.Apache.csproj
b/csharp/src/Drivers/Apache/Apache.Arrow.Adbc.Drivers.Apache.csproj
index 2ad285410..c60aeb98d 100644
--- a/csharp/src/Drivers/Apache/Apache.Arrow.Adbc.Drivers.Apache.csproj
+++ b/csharp/src/Drivers/Apache/Apache.Arrow.Adbc.Drivers.Apache.csproj
@@ -1,4 +1,4 @@
-<Project Sdk="Microsoft.NET.Sdk">
+<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>netstandard2.0;net472;net6.0</TargetFrameworks>
@@ -6,8 +6,6 @@
<ItemGroup>
<PackageReference Include="ApacheThrift" Version="0.21.0" />
- <PackageReference Include="K4os.Compression.LZ4" Version="1.3.8" />
- <PackageReference Include="K4os.Compression.LZ4.Streams" Version="1.3.8" />
<PackageReference Include="System.Net.Http" Version="4.3.4" />
<PackageReference Include="System.Text.Json" Version="8.0.5" />
</ItemGroup>
diff --git a/csharp/src/Drivers/Apache/AssemblyInfo.cs
b/csharp/src/Drivers/Apache/AssemblyInfo.cs
new file mode 100644
index 000000000..fd70665a9
--- /dev/null
+++ b/csharp/src/Drivers/Apache/AssemblyInfo.cs
@@ -0,0 +1,21 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+using System.Runtime.CompilerServices;
+
+[assembly: InternalsVisibleTo("Apache.Arrow.Adbc.Drivers.Databricks,
PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")]
+[assembly: InternalsVisibleTo("Apache.Arrow.Adbc.Tests.Drivers.Databricks,
PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")]
diff --git a/csharp/src/Drivers/Apache/Hive2/HiveServer2HttpConnection.cs
b/csharp/src/Drivers/Apache/Hive2/HiveServer2HttpConnection.cs
index 1c124d325..aeee4b998 100644
--- a/csharp/src/Drivers/Apache/Hive2/HiveServer2HttpConnection.cs
+++ b/csharp/src/Drivers/Apache/Hive2/HiveServer2HttpConnection.cs
@@ -21,7 +21,6 @@ using System.Globalization;
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
-using System.Net.Security;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
diff --git a/csharp/src/Drivers/Apache/Hive2/HiveServer2TlsImpl.cs
b/csharp/src/Drivers/Apache/Hive2/HiveServer2TlsImpl.cs
index 388e5bd42..7c3d51c3f 100644
--- a/csharp/src/Drivers/Apache/Hive2/HiveServer2TlsImpl.cs
+++ b/csharp/src/Drivers/Apache/Hive2/HiveServer2TlsImpl.cs
@@ -16,11 +16,11 @@
*/
using System;
-using System.IO;
using System.Collections.Generic;
+using System.IO;
+using System.Net.Http;
using System.Net.Security;
using System.Security.Cryptography.X509Certificates;
-using System.Net.Http;
namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
{
diff --git a/csharp/src/Drivers/Apache/Impala/ImpalaConnection.cs
b/csharp/src/Drivers/Apache/Impala/ImpalaConnection.cs
index 501fcfe66..3d51af521 100644
--- a/csharp/src/Drivers/Apache/Impala/ImpalaConnection.cs
+++ b/csharp/src/Drivers/Apache/Impala/ImpalaConnection.cs
@@ -17,11 +17,9 @@
using System;
using System.Collections.Generic;
-using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Apache.Arrow.Adbc.Drivers.Apache.Hive2;
-using Apache.Arrow.Ipc;
using Apache.Hive.Service.Rpc.Thrift;
namespace Apache.Arrow.Adbc.Drivers.Apache.Impala
diff --git a/csharp/src/Drivers/Apache/Impala/ImpalaHttpConnection.cs
b/csharp/src/Drivers/Apache/Impala/ImpalaHttpConnection.cs
index ef5c34166..157692c2a 100644
--- a/csharp/src/Drivers/Apache/Impala/ImpalaHttpConnection.cs
+++ b/csharp/src/Drivers/Apache/Impala/ImpalaHttpConnection.cs
@@ -17,12 +17,10 @@
using System;
using System.Collections.Generic;
-using System.Diagnostics;
using System.Globalization;
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
-using System.Net.Security;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
diff --git a/csharp/src/Drivers/Apache/Impala/ImpalaStandardConnection.cs
b/csharp/src/Drivers/Apache/Impala/ImpalaStandardConnection.cs
index 2665070bb..623335e91 100644
--- a/csharp/src/Drivers/Apache/Impala/ImpalaStandardConnection.cs
+++ b/csharp/src/Drivers/Apache/Impala/ImpalaStandardConnection.cs
@@ -17,7 +17,6 @@
using System;
using System.Collections.Generic;
-using System.Diagnostics;
using System.Net;
using System.Threading;
using System.Threading.Tasks;
diff --git a/csharp/src/Drivers/Apache/Spark/README.md
b/csharp/src/Drivers/Apache/Spark/README.md
index 94712ee53..1c5855bc2 100644
--- a/csharp/src/Drivers/Apache/Spark/README.md
+++ b/csharp/src/Drivers/Apache/Spark/README.md
@@ -26,7 +26,7 @@ but can also be passed in the call to `AdbcDatabase.Connect`.
| Property | Description | Default |
| :--- | :--- | :--- |
-| `adbc.spark.type` | (Required) Indicates the Spark server type. One of
`databricks`, `http` (future: `standard`) | |
+| `adbc.spark.type` | (Required) Indicates the Spark server type.
Currently only `http` (future: `standard`) | |
| `adbc.spark.auth_type` | An indicator of the intended type of
authentication. Allowed values: `none`, `username_only`, `basic`, and `token`.
This property is optional. The authentication type can be inferred from
`token`, `username`, and `password`. If a `token` value is provided, token
authentication is used. Otherwise, if both `username` and `password` values are
provided, basic authentication is used. | |
| `adbc.spark.host` | Host name for the data source. Do not include
scheme or port number. Example: `sparkserver.region.cloudapp.azure.com` | |
| `adbc.spark.port` | The port number the data source listens on for a
new connections. | `443` |
@@ -67,33 +67,6 @@ The `adbc.apache.statement.polltime_ms` specifies the time
between polls to the
The following table depicts how the Spark ADBC driver converts a Spark type to
an Arrow type and a .NET type:
-### Spark on Databricks
-
-| Spark Type | Arrow Type | C# Type |
-| :--- | :---: | :---: |
-| ARRAY* | String | string |
-| BIGINT | Int64 | long |
-| BINARY | Binary | byte[] |
-| BOOLEAN | Boolean | bool |
-| CHAR | String | string |
-| DATE | Date32 | DateTime |
-| DECIMAL | Decimal128 | SqlDecimal |
-| DOUBLE | Double | double |
-| FLOAT | Float | float |
-| INT | Int32 | int |
-| INTERVAL_DAY_TIME+ | String | string |
-| INTERVAL_YEAR_MONTH+ | String | string |
-| MAP* | String | string |
-| NULL | Null | null |
-| SMALLINT | Int16 | short |
-| STRING | String | string |
-| STRUCT* | String | string |
-| TIMESTAMP | Timestamp | DateTimeOffset |
-| TINYINT | Int8 | sbyte |
-| UNION | String | string |
-| USER_DEFINED | String | string |
-| VARCHAR | String | string |
-
### Apache Spark over HTTP (adbc.spark.data_type_conv = ?)
| Spark Type | Arrow Type (`none`) | C# Type (`none`) | Arrow Type
(`scalar`) | C# Type (`scalar`) |
@@ -126,14 +99,6 @@ The following table depicts how the Spark ADBC driver
converts a Spark type to a
## Supported Variants
-### Spark on Databricks
-
-Support for Spark on Databricks is the most mature.
-
-The Spark ADBC driver supports token-based authentiation using the
-[Databricks personal access
token](https://docs.databricks.com/en/dev-tools/auth/pat.html).
-Basic (username and password) authenication is not supported, at this time.
-
### Apache Spark over HTPP
Support for Spark over HTTP is initial.
diff --git a/csharp/src/Drivers/Apache/Spark/SparkConnection.cs
b/csharp/src/Drivers/Apache/Spark/SparkConnection.cs
index d9cbbb0d9..8b3014cc8 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkConnection.cs
+++ b/csharp/src/Drivers/Apache/Spark/SparkConnection.cs
@@ -17,15 +17,9 @@
using System;
using System.Collections.Generic;
-using System.Linq;
using System.Threading;
-using System.Threading.Tasks;
using Apache.Arrow.Adbc.Drivers.Apache.Hive2;
-using Apache.Arrow.Adbc.Extensions;
-using Apache.Arrow.Ipc;
-using Apache.Arrow.Types;
using Apache.Hive.Service.Rpc.Thrift;
-using Thrift.Transport;
namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
{
diff --git a/csharp/src/Drivers/Apache/Spark/SparkConnectionFactory.cs
b/csharp/src/Drivers/Apache/Spark/SparkConnectionFactory.cs
index 4feaf4183..484e51bc9 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkConnectionFactory.cs
+++ b/csharp/src/Drivers/Apache/Spark/SparkConnectionFactory.cs
@@ -35,7 +35,6 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
return serverTypeValue switch
{
- SparkServerType.Databricks => new
SparkDatabricksConnection(properties),
SparkServerType.Http => new SparkHttpConnection(properties),
// TODO: Re-enable when properly supported
//SparkServerType.Standard => new
SparkStandardConnection(properties),
diff --git a/csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs
b/csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs
index a1fafc367..9cabd1ac4 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs
+++ b/csharp/src/Drivers/Apache/Spark/SparkHttpConnection.cs
@@ -21,7 +21,6 @@ using System.Globalization;
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
-using System.Net.Security;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
diff --git a/csharp/src/Drivers/Apache/Spark/SparkParameters.cs
b/csharp/src/Drivers/Apache/Spark/SparkParameters.cs
index b5587197d..66d329814 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkParameters.cs
+++ b/csharp/src/Drivers/Apache/Spark/SparkParameters.cs
@@ -20,7 +20,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
/// <summary>
/// Parameters used for connecting to Spark data sources.
/// </summary>
- public static class SparkParameters
+ public class SparkParameters
{
public const string HostName = "adbc.spark.host";
public const string Port = "adbc.spark.port";
@@ -33,25 +33,6 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
public const string Type = "adbc.spark.type";
public const string DataTypeConv = "adbc.spark.data_type_conv";
public const string ConnectTimeoutMilliseconds =
"adbc.spark.connect_timeout_ms";
-
- // CloudFetch configuration parameters
- /// <summary>
- /// Maximum number of retry attempts for CloudFetch downloads.
- /// Default value is 3 if not specified.
- /// </summary>
- public const string CloudFetchMaxRetries =
"adbc.spark.cloudfetch.max_retries";
-
- /// <summary>
- /// Delay in milliseconds between CloudFetch retry attempts.
- /// Default value is 500ms if not specified.
- /// </summary>
- public const string CloudFetchRetryDelayMs =
"adbc.spark.cloudfetch.retry_delay_ms";
-
- /// <summary>
- /// Timeout in minutes for CloudFetch HTTP operations.
- /// Default value is 5 minutes if not specified.
- /// </summary>
- public const string CloudFetchTimeoutMinutes =
"adbc.spark.cloudfetch.timeout_minutes";
}
public static class SparkAuthTypeConstants
@@ -66,7 +47,6 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
public static class SparkServerTypeConstants
{
public const string Http = "http";
- public const string Databricks = "databricks";
public const string Standard = "standard";
}
}
diff --git a/csharp/src/Drivers/Apache/Spark/SparkServerType.cs
b/csharp/src/Drivers/Apache/Spark/SparkServerType.cs
index 8e3dfb28d..f2cd98abc 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkServerType.cs
+++ b/csharp/src/Drivers/Apache/Spark/SparkServerType.cs
@@ -20,14 +20,13 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
internal enum SparkServerType
{
Http,
- Databricks,
Standard,
Empty = int.MaxValue,
}
internal static class ServerTypeParser
{
- internal const string SupportedList = SparkServerTypeConstants.Http +
", " + SparkServerTypeConstants.Databricks;
+ internal const string SupportedList = SparkServerTypeConstants.Http;
internal static bool TryParse(string? serverType, out SparkServerType
serverTypeValue)
{
@@ -37,9 +36,6 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
case "":
serverTypeValue = SparkServerType.Empty;
return true;
- case SparkServerTypeConstants.Databricks:
- serverTypeValue = SparkServerType.Databricks;
- return true;
case SparkServerTypeConstants.Http:
serverTypeValue = SparkServerType.Http;
return true;
diff --git a/csharp/src/Drivers/Apache/Spark/SparkStatement.cs
b/csharp/src/Drivers/Apache/Spark/SparkStatement.cs
index eeb8ba60b..48cc29d54 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkStatement.cs
+++ b/csharp/src/Drivers/Apache/Spark/SparkStatement.cs
@@ -15,8 +15,6 @@
* limitations under the License.
*/
-using System;
-using System.Collections.Generic;
using Apache.Arrow.Adbc.Drivers.Apache.Hive2;
using Apache.Hive.Service.Rpc.Thrift;
@@ -24,14 +22,6 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
{
internal class SparkStatement : HiveServer2Statement
{
- // Default maximum bytes per file for CloudFetch
- private const long DefaultMaxBytesPerFile = 20 * 1024 * 1024; // 20MB
-
- // CloudFetch configuration
- private bool useCloudFetch = true;
- private bool canDecompressLz4 = true;
- private long maxBytesPerFile = DefaultMaxBytesPerFile;
-
internal SparkStatement(SparkConnection connection)
: base(connection)
{
@@ -47,11 +37,6 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
statement.QueryTimeout = QueryTimeoutSeconds;
statement.CanReadArrowResult = true;
- // Set CloudFetch capabilities
- statement.CanDownloadResult = useCloudFetch;
- statement.CanDecompressLZ4Result = canDecompressLz4;
- statement.MaxBytesPerFile = maxBytesPerFile;
-
#pragma warning disable CS0618 // Type or member is obsolete
statement.ConfOverlay = SparkConnection.timestampConfig;
#pragma warning restore CS0618 // Type or member is obsolete
@@ -67,99 +52,5 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
IntervalTypesAsArrow = false,
};
}
-
- public override void SetOption(string key, string value)
- {
- switch (key)
- {
- case Options.UseCloudFetch:
- if (bool.TryParse(value, out bool useCloudFetchValue))
- {
- this.useCloudFetch = useCloudFetchValue;
- }
- else
- {
- throw new ArgumentException($"Invalid value for {key}:
{value}. Expected a boolean value.");
- }
- break;
- case Options.CanDecompressLz4:
- if (bool.TryParse(value, out bool canDecompressLz4Value))
- {
- this.canDecompressLz4 = canDecompressLz4Value;
- }
- else
- {
- throw new ArgumentException($"Invalid value for {key}:
{value}. Expected a boolean value.");
- }
- break;
- case Options.MaxBytesPerFile:
- if (long.TryParse(value, out long maxBytesPerFileValue))
- {
- this.maxBytesPerFile = maxBytesPerFileValue;
- }
- else
- {
- throw new ArgumentException($"Invalid value for {key}:
{value}. Expected a long value.");
- }
- break;
- default:
- base.SetOption(key, value);
- break;
- }
- }
-
- /// <summary>
- /// Sets whether to use CloudFetch for retrieving results.
- /// </summary>
- /// <param name="useCloudFetch">Whether to use CloudFetch.</param>
- internal void SetUseCloudFetch(bool useCloudFetch)
- {
- this.useCloudFetch = useCloudFetch;
- }
-
- /// <summary>
- /// Gets whether CloudFetch is enabled.
- /// </summary>
- public bool UseCloudFetch => useCloudFetch;
-
- /// <summary>
- /// Sets whether the client can decompress LZ4 compressed results.
- /// </summary>
- /// <param name="canDecompressLz4">Whether the client can decompress
LZ4.</param>
- internal void SetCanDecompressLz4(bool canDecompressLz4)
- {
- this.canDecompressLz4 = canDecompressLz4;
- }
-
- /// <summary>
- /// Gets whether LZ4 decompression is enabled.
- /// </summary>
- public bool CanDecompressLz4 => canDecompressLz4;
-
- /// <summary>
- /// Sets the maximum bytes per file for CloudFetch.
- /// </summary>
- /// <param name="maxBytesPerFile">The maximum bytes per file.</param>
- internal void SetMaxBytesPerFile(long maxBytesPerFile)
- {
- this.maxBytesPerFile = maxBytesPerFile;
- }
-
- /// <summary>
- /// Gets the maximum bytes per file for CloudFetch.
- /// </summary>
- public long MaxBytesPerFile => maxBytesPerFile;
-
- /// <summary>
- /// Provides the constant string key values to the <see
cref="AdbcStatement.SetOption(string, string)" /> method.
- /// </summary>
- public sealed class Options : ApacheParameters
- {
- // Lz4 compression option
- public const string CanDecompressLz4 =
"adbc.spark.lz4_compression.enabled";
- // CloudFetch options
- public const string UseCloudFetch =
"adbc.spark.cloudfetch.enabled";
- public const string MaxBytesPerFile =
"adbc.spark.cloudfetch.max_bytes_per_file";
- }
}
}
diff --git a/csharp/src/Drivers/Apache/Thrift/StreamExtensions.cs
b/csharp/src/Drivers/Apache/Thrift/StreamExtensions.cs
index c252d9aff..7a72822c9 100644
--- a/csharp/src/Drivers/Apache/Thrift/StreamExtensions.cs
+++ b/csharp/src/Drivers/Apache/Thrift/StreamExtensions.cs
@@ -18,9 +18,9 @@
using System;
using System.Collections.Generic;
using System.IO;
-using System.Threading.Tasks;
-using System.Threading;
using System.Runtime.InteropServices;
+using System.Threading;
+using System.Threading.Tasks;
namespace Apache.Arrow.Adbc.Drivers.Apache.Thrift
{
diff --git a/csharp/src/Drivers/Apache/Thrift/ThriftSocketTransport.cs
b/csharp/src/Drivers/Apache/Thrift/ThriftSocketTransport.cs
index b6950c4a8..c9336ff37 100644
--- a/csharp/src/Drivers/Apache/Thrift/ThriftSocketTransport.cs
+++ b/csharp/src/Drivers/Apache/Thrift/ThriftSocketTransport.cs
@@ -15,15 +15,11 @@
* limitations under the License.
*/
-using System;
using System.IO;
using System.Net.Http;
-using System.Threading.Tasks;
-using System.Threading;
+using System.Reflection;
using Thrift;
-using Thrift.Transport;
using Thrift.Transport.Client;
-using System.Reflection;
namespace Apache.Arrow.Adbc.Drivers.Apache
{
diff --git a/csharp/src/Drivers/Apache/readme.md
b/csharp/src/Drivers/Apache/readme.md
index 38d616074..f58ee1cb8 100644
--- a/csharp/src/Drivers/Apache/readme.md
+++ b/csharp/src/Drivers/Apache/readme.md
@@ -54,7 +54,7 @@ The Imapala classes are under development, have limited
functionality, and may p
# Spark
-The Spark classes are intended for use against native Spark and Spark on
Databricks.
+The Spark classes are intended for use against native Spark.
For more details, see [Spark Driver](Spark/README.md)
diff --git a/csharp/src/Drivers/Apache/Apache.Arrow.Adbc.Drivers.Apache.csproj
b/csharp/src/Drivers/Databricks/Apache.Arrow.Adbc.Drivers.Databricks.csproj
similarity index 51%
copy from csharp/src/Drivers/Apache/Apache.Arrow.Adbc.Drivers.Apache.csproj
copy to
csharp/src/Drivers/Databricks/Apache.Arrow.Adbc.Drivers.Databricks.csproj
index 2ad285410..a8ffbfc60 100644
--- a/csharp/src/Drivers/Apache/Apache.Arrow.Adbc.Drivers.Apache.csproj
+++ b/csharp/src/Drivers/Databricks/Apache.Arrow.Adbc.Drivers.Databricks.csproj
@@ -1,19 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">
-
<PropertyGroup>
<TargetFrameworks>netstandard2.0;net472;net6.0</TargetFrameworks>
+ <PackageReadmeFile>readme.md</PackageReadmeFile>
</PropertyGroup>
-
<ItemGroup>
- <PackageReference Include="ApacheThrift" Version="0.21.0" />
<PackageReference Include="K4os.Compression.LZ4" Version="1.3.8" />
<PackageReference Include="K4os.Compression.LZ4.Streams" Version="1.3.8" />
- <PackageReference Include="System.Net.Http" Version="4.3.4" />
- <PackageReference Include="System.Text.Json" Version="8.0.5" />
</ItemGroup>
-
<ItemGroup>
- <ProjectReference
Include="..\..\Apache.Arrow.Adbc\Apache.Arrow.Adbc.csproj" />
+ <Content Include="readme.md">
+ <Pack>true</Pack>
+ <PackagePath>\</PackagePath>
+ <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+ </Content>
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference
Include="..\Apache\Apache.Arrow.Adbc.Drivers.Apache.csproj" />
</ItemGroup>
-
</Project>
diff --git a/csharp/test/Drivers/Apache/Hive2/DateTimeValueTests.cs
b/csharp/src/Drivers/Databricks/AssemblyInfo.cs
similarity index 54%
copy from csharp/test/Drivers/Apache/Hive2/DateTimeValueTests.cs
copy to csharp/src/Drivers/Databricks/AssemblyInfo.cs
index 33e70193a..59e9e75de 100644
--- a/csharp/test/Drivers/Apache/Hive2/DateTimeValueTests.cs
+++ b/csharp/src/Drivers/Databricks/AssemblyInfo.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -15,24 +15,6 @@
* limitations under the License.
*/
-using System;
-using System.Globalization;
-using System.Threading.Tasks;
-using Apache.Arrow.Adbc.Drivers.Apache.Spark;
-using Xunit;
-using Xunit.Abstractions;
+using System.Runtime.CompilerServices;
-namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Hive2
-{
- public class DateTimeValueTests :
Common.DateTimeValueTests<ApacheTestConfiguration, HiveServer2TestEnvironment>
- {
- public DateTimeValueTests(ITestOutputHelper output)
- : base(output, new HiveServer2TestEnvironment.Factory())
- { }
-
- protected override string GetFormattedTimestampValue(string value)
- {
- return "TO_TIMESTAMP(" + QuoteValue(value) + ")";
- }
- }
-}
+[assembly: InternalsVisibleTo("Apache.Arrow.Adbc.Tests.Drivers.Databricks,
PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")]
diff --git
a/csharp/src/Drivers/Apache/Spark/CloudFetch/SparkCloudFetchReader.cs
b/csharp/src/Drivers/Databricks/CloudFetch/CloudFetchReader.cs
similarity index 92%
rename from csharp/src/Drivers/Apache/Spark/CloudFetch/SparkCloudFetchReader.cs
rename to csharp/src/Drivers/Databricks/CloudFetch/CloudFetchReader.cs
index a96d6647b..27b47194f 100644
--- a/csharp/src/Drivers/Apache/Spark/CloudFetch/SparkCloudFetchReader.cs
+++ b/csharp/src/Drivers/Databricks/CloudFetch/CloudFetchReader.cs
@@ -18,24 +18,20 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
-using System.IO;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using Apache.Arrow.Adbc.Drivers.Apache;
-using Apache.Arrow.Adbc.Drivers.Apache.Hive2;
-using Apache.Arrow.Adbc.Drivers.Apache.Spark;
using Apache.Arrow.Ipc;
using Apache.Hive.Service.Rpc.Thrift;
-using K4os.Compression.LZ4.Streams;
-namespace Apache.Arrow.Adbc.Drivers.Apache.Spark.CloudFetch
+namespace Apache.Arrow.Adbc.Drivers.Databricks.CloudFetch
{
/// <summary>
/// Reader for CloudFetch results from Databricks Spark Thrift server.
/// Handles downloading and processing URL-based result sets.
/// </summary>
- internal sealed class SparkCloudFetchReader : IArrowArrayStream
+ internal sealed class CloudFetchReader : IArrowArrayStream
{
// Default values used if not specified in connection properties
private const int DefaultMaxRetries = 3;
@@ -46,7 +42,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark.CloudFetch
private readonly int retryDelayMs;
private readonly int timeoutMinutes;
- private HiveServer2Statement? statement;
+ private DatabricksStatement? statement;
private readonly Schema schema;
private List<TSparkArrowResultLink>? resultLinks;
private int linkIndex;
@@ -58,12 +54,12 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark.CloudFetch
private readonly Lazy<HttpClient> httpClient;
/// <summary>
- /// Initializes a new instance of the <see
cref="SparkCloudFetchReader"/> class.
+ /// Initializes a new instance of the <see cref="CloudFetchReader"/>
class.
/// </summary>
- /// <param name="statement">The HiveServer2 statement.</param>
+ /// <param name="statement">The Databricks statement.</param>
/// <param name="schema">The Arrow schema.</param>
/// <param name="isLz4Compressed">Whether the results are LZ4
compressed.</param>
- public SparkCloudFetchReader(HiveServer2Statement statement, Schema
schema, bool isLz4Compressed)
+ public CloudFetchReader(DatabricksStatement statement, Schema schema,
bool isLz4Compressed)
{
this.statement = statement;
this.schema = schema;
@@ -74,7 +70,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark.CloudFetch
// Parse max retries
int parsedMaxRetries = DefaultMaxRetries;
- if
(connectionProps.TryGetValue(SparkParameters.CloudFetchMaxRetries, out string?
maxRetriesStr) &&
+ if
(connectionProps.TryGetValue(DatabricksParameters.CloudFetchMaxRetries, out
string? maxRetriesStr) &&
int.TryParse(maxRetriesStr, out parsedMaxRetries) &&
parsedMaxRetries > 0)
{
@@ -88,7 +84,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark.CloudFetch
// Parse retry delay
int parsedRetryDelay = DefaultRetryDelayMs;
- if
(connectionProps.TryGetValue(SparkParameters.CloudFetchRetryDelayMs, out
string? retryDelayStr) &&
+ if
(connectionProps.TryGetValue(DatabricksParameters.CloudFetchRetryDelayMs, out
string? retryDelayStr) &&
int.TryParse(retryDelayStr, out parsedRetryDelay) &&
parsedRetryDelay > 0)
{
@@ -102,7 +98,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark.CloudFetch
// Parse timeout minutes
int parsedTimeout = DefaultTimeoutMinutes;
- if
(connectionProps.TryGetValue(SparkParameters.CloudFetchTimeoutMinutes, out
string? timeoutStr) &&
+ if
(connectionProps.TryGetValue(DatabricksParameters.CloudFetchTimeoutMinutes, out
string? timeoutStr) &&
int.TryParse(timeoutStr, out parsedTimeout) &&
parsedTimeout > 0)
{
@@ -201,7 +197,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark.CloudFetch
catch (Exception ex)
{
// Create concise error message based on exception type
- string errorPrefix = $"CloudFetch link
{this.linkIndex-1}:";
+ string errorPrefix = $"CloudFetch link {this.linkIndex
- 1}:";
string errorMessage = ex switch
{
_ when ex.GetType().Name.Contains("LZ4") =>
$"{errorPrefix} LZ4 decompression failed - Data may be corrupted",
diff --git a/csharp/src/Drivers/Apache/Spark/SparkDatabricksConnection.cs
b/csharp/src/Drivers/Databricks/DatabricksConnection.cs
similarity index 81%
rename from csharp/src/Drivers/Apache/Spark/SparkDatabricksConnection.cs
rename to csharp/src/Drivers/Databricks/DatabricksConnection.cs
index 2e8e52e72..489a10e97 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkDatabricksConnection.cs
+++ b/csharp/src/Drivers/Databricks/DatabricksConnection.cs
@@ -15,18 +15,21 @@
* limitations under the License.
*/
+using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
-using Apache.Arrow.Adbc.Drivers.Apache.Spark.CloudFetch;
+using Apache.Arrow.Adbc.Drivers.Apache;
+using Apache.Arrow.Adbc.Drivers.Apache.Spark;
+using Apache.Arrow.Adbc.Drivers.Databricks.CloudFetch;
using Apache.Arrow.Ipc;
using Apache.Hive.Service.Rpc.Thrift;
-namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
+namespace Apache.Arrow.Adbc.Drivers.Databricks
{
- internal class SparkDatabricksConnection : SparkHttpConnection
+ internal class DatabricksConnection : SparkHttpConnection
{
- public SparkDatabricksConnection(IReadOnlyDictionary<string, string>
properties) : base(properties)
+ public DatabricksConnection(IReadOnlyDictionary<string, string>
properties) : base(properties)
{
}
@@ -36,6 +39,13 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
TSparkRowSetType resultFormat = TSparkRowSetType.ARROW_BASED_SET;
bool isLz4Compressed = false;
+ DatabricksStatement? databricksStatement = statement as
DatabricksStatement;
+
+ if (databricksStatement == null)
+ {
+ throw new InvalidOperationException("Cannot obtain a reader
for Databricks");
+ }
+
if (metadataResp != null)
{
if (metadataResp.__isset.resultFormat)
@@ -52,17 +62,21 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
// Choose the appropriate reader based on the result format
if (resultFormat == TSparkRowSetType.URL_BASED_SET)
{
- return new SparkCloudFetchReader(statement, schema,
isLz4Compressed);
+ return new CloudFetchReader(databricksStatement, schema,
isLz4Compressed);
}
else
{
- return new SparkDatabricksReader(statement, schema,
isLz4Compressed);
+ return new DatabricksReader(databricksStatement, schema);
}
}
- internal override SchemaParser SchemaParser => new
SparkDatabricksSchemaParser();
+ internal override SchemaParser SchemaParser => new
DatabricksSchemaParser();
- internal override SparkServerType ServerType =>
SparkServerType.Databricks;
+ public override AdbcStatement CreateStatement()
+ {
+ DatabricksStatement statement = new DatabricksStatement(this);
+ return statement;
+ }
protected override TOpenSessionReq CreateSessionRequest()
{
diff --git a/csharp/src/Drivers/Databricks/DatabricksDatabase.cs
b/csharp/src/Drivers/Databricks/DatabricksDatabase.cs
new file mode 100644
index 000000000..53a8027e0
--- /dev/null
+++ b/csharp/src/Drivers/Databricks/DatabricksDatabase.cs
@@ -0,0 +1,47 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Apache.Arrow.Adbc.Drivers.Databricks
+{
+ /// <summary>
+ /// Databricks-specific implementation of <see cref="AdbcDatabase"/>
+ /// </summary>
+ public class DatabricksDatabase : AdbcDatabase
+ {
+ readonly IReadOnlyDictionary<string, string> properties;
+
+ public DatabricksDatabase(IReadOnlyDictionary<string, string>
properties)
+ {
+ this.properties = properties;
+ }
+ public override AdbcConnection Connect(IReadOnlyDictionary<string,
string>? options)
+ {
+ IReadOnlyDictionary<string, string> mergedProperties = options ==
null
+ ? properties
+ : options
+ .Concat(properties.Where(x =>
!options.Keys.Contains(x.Key, StringComparer.OrdinalIgnoreCase)))
+ .ToDictionary(kvp => kvp.Key, kvp => kvp.Value);
+ DatabricksConnection connection = new
DatabricksConnection(mergedProperties);
+ connection.OpenAsync().Wait();
+ return connection;
+ }
+ }
+}
diff --git a/csharp/test/Drivers/Apache/Hive2/DateTimeValueTests.cs
b/csharp/src/Drivers/Databricks/DatabricksDriver.cs
similarity index 57%
copy from csharp/test/Drivers/Apache/Hive2/DateTimeValueTests.cs
copy to csharp/src/Drivers/Databricks/DatabricksDriver.cs
index 33e70193a..f2a79b113 100644
--- a/csharp/test/Drivers/Apache/Hive2/DateTimeValueTests.cs
+++ b/csharp/src/Drivers/Databricks/DatabricksDriver.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -15,24 +15,18 @@
* limitations under the License.
*/
-using System;
-using System.Globalization;
-using System.Threading.Tasks;
-using Apache.Arrow.Adbc.Drivers.Apache.Spark;
-using Xunit;
-using Xunit.Abstractions;
+using System.Collections.Generic;
-namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Hive2
+namespace Apache.Arrow.Adbc.Drivers.Databricks
{
- public class DateTimeValueTests :
Common.DateTimeValueTests<ApacheTestConfiguration, HiveServer2TestEnvironment>
+ /// <summary>
+ /// Databricks-specific implementation of <see cref="AdbcDriver"/>
+ /// </summary>
+ public class DatabricksDriver : AdbcDriver
{
- public DateTimeValueTests(ITestOutputHelper output)
- : base(output, new HiveServer2TestEnvironment.Factory())
- { }
-
- protected override string GetFormattedTimestampValue(string value)
+ public override AdbcDatabase Open(IReadOnlyDictionary<string, string>
parameters)
{
- return "TO_TIMESTAMP(" + QuoteValue(value) + ")";
+ return new DatabricksDatabase(parameters);
}
}
}
diff --git a/csharp/src/Drivers/Databricks/DatabricksParameters.cs
b/csharp/src/Drivers/Databricks/DatabricksParameters.cs
new file mode 100644
index 000000000..833dc3e7d
--- /dev/null
+++ b/csharp/src/Drivers/Databricks/DatabricksParameters.cs
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Apache.Arrow.Adbc.Drivers.Apache.Spark;
+
+namespace Apache.Arrow.Adbc.Drivers.Databricks
+{
+ /// <summary>
+ /// Parameters used for connecting to Databricks data sources.
+ /// </summary>
+ public class DatabricksParameters : SparkParameters
+ {
+ // CloudFetch configuration parameters
+ /// <summary>
+ /// Maximum number of retry attempts for CloudFetch downloads.
+ /// Default value is 3 if not specified.
+ /// </summary>
+ public const string CloudFetchMaxRetries =
"adbc.databricks.cloudfetch.max_retries";
+
+ /// <summary>
+ /// Delay in milliseconds between CloudFetch retry attempts.
+ /// Default value is 500ms if not specified.
+ /// </summary>
+ public const string CloudFetchRetryDelayMs =
"adbc.databricks.cloudfetch.retry_delay_ms";
+
+ /// <summary>
+ /// Timeout in minutes for CloudFetch HTTP operations.
+ /// Default value is 5 minutes if not specified.
+ /// </summary>
+ public const string CloudFetchTimeoutMinutes =
"adbc.databricks.cloudfetch.timeout_minutes";
+ }
+
+ /// <summary>
+ /// Constants used for default parameter values.
+ /// </summary>
+ public class DatabricksConstants
+ {
+
+ }
+}
diff --git a/csharp/src/Drivers/Apache/Spark/SparkDatabricksReader.cs
b/csharp/src/Drivers/Databricks/DatabricksReader.cs
similarity index 90%
rename from csharp/src/Drivers/Apache/Spark/SparkDatabricksReader.cs
rename to csharp/src/Drivers/Databricks/DatabricksReader.cs
index fd8731f73..9d8b864ba 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkDatabricksReader.cs
+++ b/csharp/src/Drivers/Databricks/DatabricksReader.cs
@@ -17,31 +17,29 @@
using System;
using System.Collections.Generic;
-using System.IO;
using System.Threading;
using System.Threading.Tasks;
-using Apache.Arrow.Adbc.Drivers.Apache.Hive2;
+using Apache.Arrow.Adbc.Drivers.Apache;
using Apache.Arrow.Ipc;
using Apache.Hive.Service.Rpc.Thrift;
-using K4os.Compression.LZ4.Streams;
-namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
+namespace Apache.Arrow.Adbc.Drivers.Databricks
{
- internal sealed class SparkDatabricksReader : IArrowArrayStream
+ internal sealed class DatabricksReader : IArrowArrayStream
{
- HiveServer2Statement? statement;
+ DatabricksStatement? statement;
Schema schema;
List<TSparkArrowBatch>? batches;
int index;
IArrowReader? reader;
bool isLz4Compressed;
- public SparkDatabricksReader(HiveServer2Statement statement, Schema
schema)
+ public DatabricksReader(DatabricksStatement statement, Schema schema)
: this(statement, schema, false)
{
}
- public SparkDatabricksReader(HiveServer2Statement statement, Schema
schema, bool isLz4Compressed)
+ public DatabricksReader(DatabricksStatement statement, Schema schema,
bool isLz4Compressed)
{
this.statement = statement;
this.schema = schema;
diff --git a/csharp/src/Drivers/Apache/Spark/SparkDatabricksSchemaParser.cs
b/csharp/src/Drivers/Databricks/DatabricksSchemaParser.cs
similarity index 94%
rename from csharp/src/Drivers/Apache/Spark/SparkDatabricksSchemaParser.cs
rename to csharp/src/Drivers/Databricks/DatabricksSchemaParser.cs
index 995c1edf0..630a66450 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkDatabricksSchemaParser.cs
+++ b/csharp/src/Drivers/Databricks/DatabricksSchemaParser.cs
@@ -16,13 +16,14 @@
*/
using System;
+using Apache.Arrow.Adbc.Drivers.Apache;
using Apache.Arrow.Adbc.Drivers.Apache.Hive2;
using Apache.Arrow.Types;
using Apache.Hive.Service.Rpc.Thrift;
-namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
+namespace Apache.Arrow.Adbc.Drivers.Databricks
{
- internal class SparkDatabricksSchemaParser : SchemaParser
+ internal class DatabricksSchemaParser : SchemaParser
{
public override IArrowType GetArrowType(TPrimitiveTypeEntry
thriftType, DataTypeConversion dataTypeConversion)
{
diff --git a/csharp/src/Drivers/Apache/Spark/SparkStatement.cs
b/csharp/src/Drivers/Databricks/DatabricksStatement.cs
similarity index 73%
copy from csharp/src/Drivers/Apache/Spark/SparkStatement.cs
copy to csharp/src/Drivers/Databricks/DatabricksStatement.cs
index eeb8ba60b..62f576eb9 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkStatement.cs
+++ b/csharp/src/Drivers/Databricks/DatabricksStatement.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -16,13 +16,16 @@
*/
using System;
-using System.Collections.Generic;
-using Apache.Arrow.Adbc.Drivers.Apache.Hive2;
+using Apache.Arrow.Adbc.Drivers.Apache;
+using Apache.Arrow.Adbc.Drivers.Apache.Spark;
using Apache.Hive.Service.Rpc.Thrift;
-namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
+namespace Apache.Arrow.Adbc.Drivers.Databricks
{
- internal class SparkStatement : HiveServer2Statement
+ /// <summary>
+ /// Databricks-specific implementation of <see cref="AdbcStatement"/>
+ /// </summary>
+ internal class DatabricksStatement : SparkStatement
{
// Default maximum bytes per file for CloudFetch
private const long DefaultMaxBytesPerFile = 20 * 1024 * 1024; // 20MB
@@ -32,40 +35,20 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
private bool canDecompressLz4 = true;
private long maxBytesPerFile = DefaultMaxBytesPerFile;
- internal SparkStatement(SparkConnection connection)
+ public DatabricksStatement(DatabricksConnection connection)
: base(connection)
{
+
}
protected override void SetStatementProperties(TExecuteStatementReq
statement)
{
- // TODO: Ensure this is set dynamically depending on server
capabilities.
- statement.EnforceResultPersistenceMode = false;
- statement.ResultPersistenceMode =
TResultPersistenceMode.ALL_RESULTS;
- // This seems like a good idea to have the server timeout so it
doesn't keep processing unnecessarily.
- // Set in combination with a CancellationToken.
- statement.QueryTimeout = QueryTimeoutSeconds;
- statement.CanReadArrowResult = true;
+ base.SetStatementProperties(statement);
// Set CloudFetch capabilities
statement.CanDownloadResult = useCloudFetch;
statement.CanDecompressLZ4Result = canDecompressLz4;
statement.MaxBytesPerFile = maxBytesPerFile;
-
-#pragma warning disable CS0618 // Type or member is obsolete
- statement.ConfOverlay = SparkConnection.timestampConfig;
-#pragma warning restore CS0618 // Type or member is obsolete
- statement.UseArrowNativeTypes = new TSparkArrowTypes
- {
- TimestampAsArrow = true,
- DecimalAsArrow = true,
-
- // set to false so they return as string
- // otherwise, they return as ARRAY_TYPE but you can't determine
- // the object type of the items in the array
- ComplexTypesAsArrow = false,
- IntervalTypesAsArrow = false,
- };
}
public override void SetOption(string key, string value)
@@ -122,6 +105,16 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
/// </summary>
public bool UseCloudFetch => useCloudFetch;
+ /// <summary>
+ /// Gets the maximum bytes per file for CloudFetch.
+ /// </summary>
+ public long MaxBytesPerFile => maxBytesPerFile;
+
+ /// <summary>
+ /// Gets whether LZ4 decompression is enabled.
+ /// </summary>
+ public bool CanDecompressLz4 => canDecompressLz4;
+
/// <summary>
/// Sets whether the client can decompress LZ4 compressed results.
/// </summary>
@@ -131,11 +124,6 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
this.canDecompressLz4 = canDecompressLz4;
}
- /// <summary>
- /// Gets whether LZ4 decompression is enabled.
- /// </summary>
- public bool CanDecompressLz4 => canDecompressLz4;
-
/// <summary>
/// Sets the maximum bytes per file for CloudFetch.
/// </summary>
@@ -145,21 +133,15 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
this.maxBytesPerFile = maxBytesPerFile;
}
- /// <summary>
- /// Gets the maximum bytes per file for CloudFetch.
- /// </summary>
- public long MaxBytesPerFile => maxBytesPerFile;
-
/// <summary>
/// Provides the constant string key values to the <see
cref="AdbcStatement.SetOption(string, string)" /> method.
/// </summary>
public sealed class Options : ApacheParameters
{
- // Lz4 compression option
- public const string CanDecompressLz4 =
"adbc.spark.lz4_compression.enabled";
// CloudFetch options
- public const string UseCloudFetch =
"adbc.spark.cloudfetch.enabled";
- public const string MaxBytesPerFile =
"adbc.spark.cloudfetch.max_bytes_per_file";
+ public const string UseCloudFetch =
"adbc.databricks.cloudfetch.enabled";
+ public const string CanDecompressLz4 =
"adbc.databricks.cloudfetch.lz4.enabled";
+ public const string MaxBytesPerFile =
"adbc.databricks.cloudfetch.max_bytes_per_file";
}
}
}
diff --git a/csharp/src/Drivers/Apache/Spark/Lz4Utilities.cs
b/csharp/src/Drivers/Databricks/Lz4Utilities.cs
similarity index 97%
rename from csharp/src/Drivers/Apache/Spark/Lz4Utilities.cs
rename to csharp/src/Drivers/Databricks/Lz4Utilities.cs
index 42cf21878..890f0b77b 100644
--- a/csharp/src/Drivers/Apache/Spark/Lz4Utilities.cs
+++ b/csharp/src/Drivers/Databricks/Lz4Utilities.cs
@@ -16,11 +16,10 @@
*/
using System;
-using System.Buffers;
using System.IO;
using K4os.Compression.LZ4.Streams;
-namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
+namespace Apache.Arrow.Adbc.Drivers.Databricks
{
/// <summary>
/// Utility class for LZ4 compression/decompression operations.
diff --git a/csharp/src/Drivers/Databricks/readme.md
b/csharp/src/Drivers/Databricks/readme.md
new file mode 100644
index 000000000..b794255b0
--- /dev/null
+++ b/csharp/src/Drivers/Databricks/readme.md
@@ -0,0 +1,56 @@
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+-->
+
+# Databricks
+
+The Databricks ADBC driver is built on top of the Spark ADBC driver and
inherits many of it's [properties](../Apache/Spark/readme.md).
+
+The Databricks ADBC driver supports token-based authentiation using the
+[Databricks personal access
token](https://docs.databricks.com/en/dev-tools/auth/pat.html).
+Basic (username and password) authenication is not supported, at this time.
+
+## Data Types
+
+The following table depicts how the Databricks ADBC driver converts a
Databricks type to an Arrow type and a .NET type:
+
+
+| Spark Type | Arrow Type | C# Type |
+| :--- | :---: | :---: |
+| ARRAY* | String | string |
+| BIGINT | Int64 | long |
+| BINARY | Binary | byte[] |
+| BOOLEAN | Boolean | bool |
+| CHAR | String | string |
+| DATE | Date32 | DateTime |
+| DECIMAL | Decimal128 | SqlDecimal |
+| DOUBLE | Double | double |
+| FLOAT | Float | float |
+| INT | Int32 | int |
+| INTERVAL_DAY_TIME+ | String | string |
+| INTERVAL_YEAR_MONTH+ | String | string |
+| MAP* | String | string |
+| NULL | Null | null |
+| SMALLINT | Int16 | short |
+| STRING | String | string |
+| STRUCT* | String | string |
+| TIMESTAMP | Timestamp | DateTimeOffset |
+| TINYINT | Int8 | sbyte |
+| UNION | String | string |
+| USER_DEFINED | String | string |
+| VARCHAR | String | string |
diff --git a/csharp/test/Apache.Arrow.Adbc.Tests/TestBase.cs
b/csharp/test/Apache.Arrow.Adbc.Tests/TestBase.cs
index 3782ad09e..919a4cc28 100644
--- a/csharp/test/Apache.Arrow.Adbc.Tests/TestBase.cs
+++ b/csharp/test/Apache.Arrow.Adbc.Tests/TestBase.cs
@@ -176,7 +176,7 @@ namespace Apache.Arrow.Adbc.Tests
}
/// <summary>
- /// Gets a the Spark ADBC driver with settings from the <see
cref="SparkTestConfiguration"/>.
+ /// Gets an ADBC driver with settings from the <see
cref="Tests.TestConfiguration"/>.
/// </summary>
/// <param name="testConfiguration"><see
cref="Tests.TestConfiguration"/></param>
/// <param name="connectionOptions"></param>
diff --git
a/csharp/test/Drivers/Apache/Apache.Arrow.Adbc.Tests.Drivers.Apache.csproj
b/csharp/test/Drivers/Apache/Apache.Arrow.Adbc.Tests.Drivers.Apache.csproj
index af779a699..8b56e652c 100644
--- a/csharp/test/Drivers/Apache/Apache.Arrow.Adbc.Tests.Drivers.Apache.csproj
+++ b/csharp/test/Drivers/Apache/Apache.Arrow.Adbc.Tests.Drivers.Apache.csproj
@@ -41,15 +41,9 @@
<None Update="Spark\Resources\sparkconfig-http.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
- <None Update="Spark\Resources\sparkconfig-databricks.json">
- <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
- </None>
<None Update="Impala\Resources\impalaconfig.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
- <None Update="Spark\Resources\SparkData-Databricks.sql">
- <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
- </None>
<None Update="Spark\Resources\SparkData.sql">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
diff --git a/csharp/test/Drivers/Apache/AssemblyInfo.cs
b/csharp/test/Drivers/Apache/AssemblyInfo.cs
new file mode 100644
index 000000000..7cf47f159
--- /dev/null
+++ b/csharp/test/Drivers/Apache/AssemblyInfo.cs
@@ -0,0 +1,35 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// In SDK-style projects such as this one, several assembly attributes that
were historically
+// defined in this file are now automatically added during build and populated
with
+// values defined in project properties. For details of which attributes are
included
+// and how to customise this process see:
https://aka.ms/assembly-info-properties
+
+
+// Setting ComVisible to false makes the types in this assembly not visible to
COM
+// components. If you need to access a type in this assembly from COM, set
the ComVisible
+// attribute to true on that type.
+
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed
to COM.
+
+[assembly: InternalsVisibleTo("Apache.Arrow.Adbc.Tests.Drivers.Databricks,
PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")]
diff --git a/csharp/test/Drivers/Apache/Hive2/DateTimeValueTests.cs
b/csharp/test/Drivers/Apache/Hive2/DateTimeValueTests.cs
index 33e70193a..49007f075 100644
--- a/csharp/test/Drivers/Apache/Hive2/DateTimeValueTests.cs
+++ b/csharp/test/Drivers/Apache/Hive2/DateTimeValueTests.cs
@@ -15,11 +15,6 @@
* limitations under the License.
*/
-using System;
-using System.Globalization;
-using System.Threading.Tasks;
-using Apache.Arrow.Adbc.Drivers.Apache.Spark;
-using Xunit;
using Xunit.Abstractions;
namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Hive2
diff --git a/csharp/test/Drivers/Apache/Hive2/DecimalUtilityTests.cs
b/csharp/test/Drivers/Apache/Hive2/DecimalUtilityTests.cs
index 467317c40..ee32af9f2 100644
--- a/csharp/test/Drivers/Apache/Hive2/DecimalUtilityTests.cs
+++ b/csharp/test/Drivers/Apache/Hive2/DecimalUtilityTests.cs
@@ -20,7 +20,6 @@ using System.Buffers.Text;
using System.Collections.Generic;
using System.Data.SqlTypes;
using System.Diagnostics;
-using System.Globalization;
using System.Text;
using Apache.Arrow.Adbc.Drivers.Apache.Hive2;
using Xunit;
diff --git a/csharp/test/Drivers/Apache/Hive2/HiveServer2ParametersTest.cs
b/csharp/test/Drivers/Apache/Hive2/HiveServer2ParametersTest.cs
index b481080bc..92e0f833d 100644
--- a/csharp/test/Drivers/Apache/Hive2/HiveServer2ParametersTest.cs
+++ b/csharp/test/Drivers/Apache/Hive2/HiveServer2ParametersTest.cs
@@ -52,11 +52,11 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Hive2
// Combined - conflicting
yield return new object?[] { $"none,scalar",
DataTypeConversion.None | DataTypeConversion.Scalar,
typeof(ArgumentOutOfRangeException) };
yield return new object?[] { $" nOnE, scAlAr ",
DataTypeConversion.None | DataTypeConversion.Scalar,
typeof(ArgumentOutOfRangeException) };
- yield return new object?[] { $", none, scalar, ",
DataTypeConversion.None | DataTypeConversion.Scalar ,
typeof(ArgumentOutOfRangeException) };
- yield return new object?[] { $"scalar,none",
DataTypeConversion.None | DataTypeConversion.Scalar ,
typeof(ArgumentOutOfRangeException) };
+ yield return new object?[] { $", none, scalar, ",
DataTypeConversion.None | DataTypeConversion.Scalar,
typeof(ArgumentOutOfRangeException) };
+ yield return new object?[] { $"scalar,none",
DataTypeConversion.None | DataTypeConversion.Scalar,
typeof(ArgumentOutOfRangeException) };
// Invalid options
yield return new object?[] { $"xxx", DataTypeConversion.Empty,
typeof(ArgumentOutOfRangeException) };
- yield return new object?[] { $"none,scalar,xxx",
DataTypeConversion.None | DataTypeConversion.Scalar,
typeof(ArgumentOutOfRangeException) };
+ yield return new object?[] { $"none,scalar,xxx",
DataTypeConversion.None | DataTypeConversion.Scalar,
typeof(ArgumentOutOfRangeException) };
}
}
}
diff --git a/csharp/test/Drivers/Apache/Hive2/StatementTests.cs
b/csharp/test/Drivers/Apache/Hive2/StatementTests.cs
index c3e90a432..d2c06fff1 100644
--- a/csharp/test/Drivers/Apache/Hive2/StatementTests.cs
+++ b/csharp/test/Drivers/Apache/Hive2/StatementTests.cs
@@ -15,7 +15,6 @@
* limitations under the License.
*/
-using System.Collections.Generic;
using System.Threading.Tasks;
using Xunit;
using Xunit.Abstractions;
diff --git a/csharp/test/Drivers/Apache/Spark/ClientTests.cs
b/csharp/test/Drivers/Apache/Spark/ClientTests.cs
index a685932c8..8e6b7a108 100644
--- a/csharp/test/Drivers/Apache/Spark/ClientTests.cs
+++ b/csharp/test/Drivers/Apache/Spark/ClientTests.cs
@@ -16,7 +16,6 @@
*/
using System.Collections.Generic;
-using Apache.Arrow.Adbc.Drivers.Apache.Spark;
using Xunit.Abstractions;
namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
@@ -31,7 +30,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
protected override IReadOnlyList<int> GetUpdateExpectedResults()
{
int affectedRows = ValidateAffectedRows ? 1 : -1;
- return GetUpdateExpectedResults(affectedRows,
TestEnvironment.ServerType == SparkServerType.Databricks);
+ return GetUpdateExpectedResults(affectedRows, false);
}
internal static IReadOnlyList<int> GetUpdateExpectedResults(int
affectedRows, bool isDatabricks)
diff --git a/csharp/test/Drivers/Apache/Spark/DateTimeValueTests.cs
b/csharp/test/Drivers/Apache/Spark/DateTimeValueTests.cs
index e8ab8eedf..f05394da8 100644
--- a/csharp/test/Drivers/Apache/Spark/DateTimeValueTests.cs
+++ b/csharp/test/Drivers/Apache/Spark/DateTimeValueTests.cs
@@ -16,9 +16,7 @@
*/
using System;
-using System.Globalization;
using System.Threading.Tasks;
-using Apache.Arrow.Adbc.Drivers.Apache.Spark;
using Xunit;
using Xunit.Abstractions;
@@ -30,38 +28,6 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
: base(output, new SparkTestEnvironment.Factory())
{ }
- [SkippableTheory]
- [MemberData(nameof(TimestampBasicData), "TIMESTAMP_LTZ")]
- [MemberData(nameof(TimestampExtendedData), "TIMESTAMP_LTZ")]
- public async Task TestTimestampDataDatabricks(DateTimeOffset value,
string columnType)
- {
- Skip.If(TestEnvironment.ServerType != SparkServerType.Databricks);
- await base.TestTimestampData(value, columnType);
- }
-
- /// <summary>
- /// Validates if driver can send and receive specific no timezone
Timstamp values correctly
- /// </summary>
- [SkippableTheory]
- [MemberData(nameof(TimestampBasicData), "TIMESTAMP_NTZ")]
- [MemberData(nameof(TimestampExtendedData), "TIMESTAMP_NTZ")]
- public async Task TestTimestampNoTimezoneDataDatabricks(DateTimeOffset
value, string columnType)
- {
- Skip.If(TestEnvironment.ServerType != SparkServerType.Databricks);
- string columnName = "TIMESTAMPTYPE";
- using TemporaryTable table = await
NewTemporaryTableAsync(Statement, string.Format("{0} {1}", columnName,
columnType));
-
- string formattedValue = $"{value.ToString(DateFormat,
CultureInfo.InvariantCulture)}";
- DateTimeOffset truncatedValue =
DateTimeOffset.ParseExact(formattedValue, DateFormat,
CultureInfo.InvariantCulture);
-
- await ValidateInsertSelectDeleteSingleValueAsync(
- table.TableName,
- columnName,
- // Remove timezone offset
- new DateTimeOffset(truncatedValue.DateTime, TimeSpan.Zero),
- QuoteValue(formattedValue));
- }
-
/// <summary>
/// Tests INTERVAL data types (YEAR-MONTH and DAY-SECOND).
/// </summary>
diff --git a/csharp/test/Drivers/Apache/Spark/DriverTests.cs
b/csharp/test/Drivers/Apache/Spark/DriverTests.cs
index ec49f38a0..563decc9e 100644
--- a/csharp/test/Drivers/Apache/Spark/DriverTests.cs
+++ b/csharp/test/Drivers/Apache/Spark/DriverTests.cs
@@ -112,7 +112,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
protected override IReadOnlyList<int> GetUpdateExpectedResults()
{
int affectedRows = ValidateAffectedRows ? 1 : -1;
- return ClientTests.GetUpdateExpectedResults(affectedRows,
TestEnvironment.ServerType == SparkServerType.Databricks);
+ return ClientTests.GetUpdateExpectedResults(affectedRows, false);
}
public static IEnumerable<object[]> CatalogNamePatternData()
diff --git a/csharp/test/Drivers/Apache/Spark/SparkConnectionTest.cs
b/csharp/test/Drivers/Apache/Spark/SparkConnectionTest.cs
index 417885e0b..3fa47e305 100644
--- a/csharp/test/Drivers/Apache/Spark/SparkConnectionTest.cs
+++ b/csharp/test/Drivers/Apache/Spark/SparkConnectionTest.cs
@@ -17,8 +17,6 @@
using System;
using System.Collections.Generic;
-using System.Globalization;
-using System.Net;
using Apache.Arrow.Adbc.Drivers.Apache;
using Apache.Arrow.Adbc.Drivers.Apache.Hive2;
using Apache.Arrow.Adbc.Drivers.Apache.Spark;
@@ -76,7 +74,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
{
NewConnection(testConfiguration);
}
- catch(AggregateException aex)
+ catch (AggregateException aex)
{
if (exceptionType != null)
{
@@ -204,7 +202,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
cn.GetObjects(AdbcConnection.GetObjectsDepth.Tables,
testConfiguration.Metadata.Catalog, testConfiguration.Metadata.Schema,
testConfiguration.Metadata.Schema, null, null);
};
- AddAction("getObjectsAll", getObjectsAll, new List<Type?>() {
null, typeof(TimeoutException), null, null, null } );
+ AddAction("getObjectsAll", getObjectsAll, new List<Type?>() {
null, typeof(TimeoutException), null, null, null });
AddAction("getObjectsCatalogs", getObjectsCatalogs);
AddAction("getObjectsDbSchemas", getObjectsDbSchemas);
AddAction("getObjectsTables", getObjectsTables);
@@ -307,16 +305,9 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[SparkParameters.AuthType] = $"{SparkAuthTypeConstants.Token}",
[AdbcOptions.Username] = "user", [AdbcOptions.Password] = "myPassword" },
typeof(ArgumentException)));
Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[AdbcOptions.Username] = "user" }, typeof(ArgumentException)));
Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[AdbcOptions.Password] = "myPassword" }, typeof(ArgumentException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Databricks, [SparkParameters.HostName] =
"valid.server.com", [SparkParameters.Token] = "abcdef", [SparkParameters.Port]
= "-1" }, typeof(ArgumentOutOfRangeException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Databricks, [SparkParameters.HostName] =
"valid.server.com", [SparkParameters.Token] = "abcdef", [SparkParameters.Port]
= IPEndPoint.MinPort.ToString(CultureInfo.InvariantCulture) },
typeof(ArgumentOutOfRangeException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Databricks, [SparkParameters.HostName] =
"valid.server.com", [SparkParameters.Token] = "abcdef", [SparkParameters.Port]
= (IPEndPoint.MaxPort + 1).ToString(CultureInfo.InvariantCulture) },
typeof(ArgumentOutOfRangeException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Databricks, [SparkParameters.Token] = "abcdef",
[AdbcOptions.Uri] = "httpxxz://hostname.com" },
typeof(ArgumentOutOfRangeException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Databricks, [SparkParameters.Token] = "abcdef",
[AdbcOptions.Uri] = "http-//hostname.com" }, typeof(ArgumentException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Databricks, [SparkParameters.Token] = "abcdef",
[AdbcOptions.Uri] = "httpxxz://hostname.com:1234567890" },
typeof(ArgumentException)));
Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[AdbcOptions.Username] = "user", [AdbcOptions.Password] = "myPassword",
[SparkParameters.ConnectTimeoutMilliseconds] = ((long)int.MaxValue +
1).ToString() }, typeof(ArgumentOutOfRangeException)));
Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[AdbcOptions.Username] = "user", [AdbcOptions.Password] = "myPassword",
[SparkParameters.ConnectTimeoutMilliseconds] = "non-numeric" },
typeof(ArgumentOutOfRangeException)));
Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[AdbcOptions.Username] = "user", [AdbcOptions.Password] = "myPassword",
[SparkParameters.ConnectTimeoutMilliseconds] = "" },
typeof(ArgumentOutOfRangeException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Databricks, [SparkParameters.Token] = "abcdef",
[SparkParameters.HostName] = "valid.server.com", [AdbcOptions.Uri] =
"http://valid.hostname.com" }, typeof(ArgumentOutOfRangeException)));
}
}
}
diff --git a/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
b/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
index e53c13191..a512a3ca5 100644
--- a/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
+++ b/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
@@ -19,7 +19,6 @@ using System;
using System.Collections.Generic;
using System.Data.SqlTypes;
using System.Text;
-using System.Text.Json.Serialization;
using Apache.Arrow.Adbc.Drivers.Apache;
using Apache.Arrow.Adbc.Drivers.Apache.Hive2;
using Apache.Arrow.Adbc.Drivers.Apache.Spark;
@@ -39,11 +38,9 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
public override string TestConfigVariable => "SPARK_TEST_CONFIG_FILE";
- public override string SqlDataResourceLocation => ServerType ==
SparkServerType.Databricks
- ? "Spark/Resources/SparkData-Databricks.sql"
- : "Spark/Resources/SparkData.sql";
+ public override string SqlDataResourceLocation =>
"Spark/Resources/SparkData.sql";
- public override int ExpectedColumnCount => ServerType ==
SparkServerType.Databricks ? 19 : 17;
+ public override int ExpectedColumnCount => 17;
public override AdbcDriver CreateNewDriver() => new SparkDriver();
@@ -153,13 +150,13 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
public override string VendorVersion =>
((HiveServer2Connection)Connection).VendorVersion;
- public override bool SupportsDelete => ServerType ==
SparkServerType.Databricks;
+ public override bool SupportsDelete => false;
- public override bool SupportsUpdate => ServerType ==
SparkServerType.Databricks;
+ public override bool SupportsUpdate => false;
- public override bool SupportCatalogName => ServerType ==
SparkServerType.Databricks;
+ public override bool SupportCatalogName => false;
- public override bool ValidateAffectedRows => ServerType ==
SparkServerType.Databricks;
+ public override bool ValidateAffectedRows => false;
public override string GetInsertStatement(string tableName, string
columnName, string? value) =>
string.Format("INSERT INTO {0} ({1}) SELECT {2};", tableName,
columnName, value ?? "NULL");
@@ -167,7 +164,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
public override SampleDataBuilder GetSampleDataBuilder()
{
SampleDataBuilder sampleDataBuilder = new();
- bool dataTypeIsFloat = ServerType == SparkServerType.Databricks ||
DataTypeConversion.HasFlag(DataTypeConversion.Scalar);
+ bool dataTypeIsFloat =
DataTypeConversion.HasFlag(DataTypeConversion.Scalar);
Type floatNetType = dataTypeIsFloat ? typeof(float) :
typeof(double);
Type floatArrowType = dataTypeIsFloat ? typeof(FloatType) :
typeof(DoubleType);
object floatValue;
diff --git a/csharp/test/Drivers/Apache/Spark/StatementTests.cs
b/csharp/test/Drivers/Apache/Spark/StatementTests.cs
index b313d1d62..e9c28c76d 100644
--- a/csharp/test/Drivers/Apache/Spark/StatementTests.cs
+++ b/csharp/test/Drivers/Apache/Spark/StatementTests.cs
@@ -17,9 +17,6 @@
using System;
using System.Collections.Generic;
-using System.Linq;
-using System.Threading.Tasks;
-using Apache.Arrow.Adbc.Drivers.Apache.Spark;
using Apache.Arrow.Adbc.Tests.Drivers.Apache.Common;
using Xunit;
using Xunit.Abstractions;
@@ -40,50 +37,6 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
base.StatementTimeoutTest(statementWithExceptions);
}
- [SkippableTheory]
- [InlineData(true, "CloudFetch enabled")]
- [InlineData(false, "CloudFetch disabled")]
- public async Task LZ4DecompressionCapabilityTest(bool useCloudFetch,
string configName)
- {
- OutputHelper?.WriteLine($"Testing with LZ4 decompression
capability enabled ({configName})");
-
- // Create a connection using the test configuration
- using AdbcConnection connection = NewConnection();
- using var statement = connection.CreateStatement();
-
- // Set options for LZ4 decompression (enabled by default) and
CloudFetch as specified
- statement.SetOption(SparkStatement.Options.UseCloudFetch,
useCloudFetch.ToString().ToLower());
- OutputHelper?.WriteLine($"CloudFetch is {(useCloudFetch ?
"enabled" : "disabled")}");
- OutputHelper?.WriteLine("LZ4 decompression capability is enabled
by default");
-
- // Execute a query that should return data
- statement.SqlQuery = "SELECT id, CAST(id AS STRING) as id_string,
id * 2 as id_doubled FROM RANGE(100)";
- QueryResult result = statement.ExecuteQuery();
-
- // Verify we have a valid stream
- Assert.NotNull(result.Stream);
-
- // Read all batches
- int totalRows = 0;
- int batchCount = 0;
-
- while (result.Stream != null)
- {
- using var batch = await
result.Stream.ReadNextRecordBatchAsync();
- if (batch == null)
- break;
-
- batchCount++;
- totalRows += batch.Length;
- OutputHelper?.WriteLine($"Batch {batchCount}: Read
{batch.Length} rows");
- }
-
- // Verify we got all rows
- Assert.Equal(100, totalRows);
- OutputHelper?.WriteLine($"Successfully read {totalRows} rows in
{batchCount} batches with {configName}");
- OutputHelper?.WriteLine("NOTE: Whether actual LZ4 compression was
used is determined by the server");
- }
-
internal class LongRunningStatementTimeoutTestData :
ShortRunningStatementTimeoutTestData
{
public LongRunningStatementTimeoutTestData()
@@ -96,27 +49,6 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
}
}
- [SkippableFact]
- public async Task CanGetPrimaryKeysDatabricks()
- {
- Skip.If(TestEnvironment.ServerType != SparkServerType.Databricks);
- await base.CanGetPrimaryKeys(TestConfiguration.Metadata.Catalog,
TestConfiguration.Metadata.Schema);
- }
-
- [SkippableFact]
- public async Task CanGetCrossReferenceFromParentTableDatabricks()
- {
- Skip.If(TestEnvironment.ServerType != SparkServerType.Databricks);
- await
base.CanGetCrossReferenceFromParentTable(TestConfiguration.Metadata.Catalog,
TestConfiguration.Metadata.Schema);
- }
-
- [SkippableFact]
- public async Task CanGetCrossReferenceFromChildTableDatabricks()
- {
- Skip.If(TestEnvironment.ServerType != SparkServerType.Databricks);
- await
base.CanGetCrossReferenceFromChildTable(TestConfiguration.Metadata.Catalog,
TestConfiguration.Metadata.Schema);
- }
-
protected override void PrepareCreateTableWithPrimaryKeys(out string
sqlUpdate, out string tableNameParent, out string fullTableNameParent, out
IReadOnlyList<string> primaryKeys)
{
CreateNewTableName(out tableNameParent, out fullTableNameParent);
diff --git a/csharp/test/Drivers/Apache/Spark/StringValueTests.cs
b/csharp/test/Drivers/Apache/Spark/StringValueTests.cs
index 3ac5306c0..ba22efbf6 100644
--- a/csharp/test/Drivers/Apache/Spark/StringValueTests.cs
+++ b/csharp/test/Drivers/Apache/Spark/StringValueTests.cs
@@ -16,7 +16,6 @@
*/
using System.Threading.Tasks;
-using Apache.Arrow.Adbc.Drivers.Apache.Spark;
using Xunit;
using Xunit.Abstractions;
@@ -25,21 +24,6 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
public class StringValueTests(ITestOutputHelper output)
: Common.StringValueTests<SparkTestConfiguration,
SparkTestEnvironment>(output, new SparkTestEnvironment.Factory())
{
- [SkippableTheory]
- [InlineData("String contains formatting characters tab\t, newline\n,
carriage return\r.", SparkServerType.Databricks)]
- internal async Task TestStringDataDatabricks(string? value,
SparkServerType serverType)
- {
- Skip.If(TestEnvironment.ServerType != serverType);
- await TestStringData(value);
- }
-
- [SkippableTheory]
- [InlineData("String contains formatting characters tab\t, newline\n,
carriage return\r.", SparkServerType.Databricks)]
- internal async Task TestVarcharDataDatabricks(string? value,
SparkServerType serverType)
- {
- Skip.If(TestEnvironment.ServerType != serverType);
- await TestVarcharData(value);
- }
[SkippableTheory]
[InlineData(null)]
@@ -50,18 +34,8 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
{
await base.TestCharData(value);
}
-
- [SkippableTheory]
- [InlineData("String contains formatting characters tab\t, newline\n,
carriage return\r.", SparkServerType.Databricks)]
- internal async Task TestCharDataDatabricks(string? value,
SparkServerType serverType)
- {
- Skip.If(TestEnvironment.ServerType != serverType);
- await base.TestCharData(value);
- }
-
protected override async Task TestVarcharExceptionData(string value,
string[] expectedTexts, string? expectedSqlState)
{
- Skip.If(TestEnvironment.ServerType == SparkServerType.Databricks);
await base.TestVarcharExceptionData(value, expectedTexts,
expectedSqlState);
}
@@ -69,15 +43,6 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
[InlineData("String whose length is too long for VARCHAR(10).", new
string[] { "Exceeds", "length limitation: 10" }, null)]
public async Task TestVarcharExceptionDataSpark(string value, string[]
expectedTexts, string? expectedSqlState)
{
- Skip.If(TestEnvironment.ServerType == SparkServerType.Databricks,
$"Server type: {TestEnvironment.ServerType}");
- await base.TestVarcharExceptionData(value, expectedTexts,
expectedSqlState);
- }
-
- [SkippableTheory]
- [InlineData("String whose length is too long for VARCHAR(10).", new
string[] { "DELTA_EXCEED_CHAR_VARCHAR_LIMIT",
"DeltaInvariantViolationException" }, "22001")]
- public async Task TestVarcharExceptionDataDatabricks(string value,
string[] expectedTexts, string? expectedSqlState)
- {
- Skip.IfNot(TestEnvironment.ServerType ==
SparkServerType.Databricks, $"Server type: {TestEnvironment.ServerType}");
await base.TestVarcharExceptionData(value, expectedTexts,
expectedSqlState);
}
}
diff --git
a/csharp/test/Drivers/Apache/Apache.Arrow.Adbc.Tests.Drivers.Apache.csproj
b/csharp/test/Drivers/Databricks/Apache.Arrow.Adbc.Tests.Drivers.Databricks.csproj
similarity index 56%
copy from
csharp/test/Drivers/Apache/Apache.Arrow.Adbc.Tests.Drivers.Apache.csproj
copy to
csharp/test/Drivers/Databricks/Apache.Arrow.Adbc.Tests.Drivers.Databricks.csproj
index af779a699..4f40ca668 100644
--- a/csharp/test/Drivers/Apache/Apache.Arrow.Adbc.Tests.Drivers.Apache.csproj
+++
b/csharp/test/Drivers/Databricks/Apache.Arrow.Adbc.Tests.Drivers.Databricks.csproj
@@ -18,8 +18,9 @@
</ItemGroup>
<ItemGroup>
- <ProjectReference
Include="..\..\..\src\Drivers\Apache\Apache.Arrow.Adbc.Drivers.Apache.csproj" />
+ <ProjectReference
Include="..\..\..\src\Drivers\Databricks\Apache.Arrow.Adbc.Drivers.Databricks.csproj"
/>
<ProjectReference
Include="..\..\Apache.Arrow.Adbc.Tests\Apache.Arrow.Adbc.Tests.csproj" />
+ <ProjectReference
Include="..\Apache\Apache.Arrow.Adbc.Tests.Drivers.Apache.csproj" />
</ItemGroup>
<ItemGroup>
@@ -29,30 +30,11 @@
</ItemGroup>
<ItemGroup>
- <None Update="Hive2\Resources\hiveconfig-http.json">
+ <None Update="Resources\databricks.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
- <None Update="Hive2\Resources\HiveData.sql">
- <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
- </None>
- <None Update="Impala\Resources\ImpalaData.sql">
- <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
- </None>
- <None Update="Spark\Resources\sparkconfig-http.json">
- <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
- </None>
- <None Update="Spark\Resources\sparkconfig-databricks.json">
- <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
- </None>
- <None Update="Impala\Resources\impalaconfig.json">
- <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
- </None>
- <None Update="Spark\Resources\SparkData-Databricks.sql">
- <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
- </None>
- <None Update="Spark\Resources\SparkData.sql">
+ <None Update="Resources\Databricks.sql">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
-
</Project>
diff --git a/csharp/test/Drivers/Apache/Spark/ClientTests.cs
b/csharp/test/Drivers/Databricks/ClientTests.cs
similarity index 82%
copy from csharp/test/Drivers/Apache/Spark/ClientTests.cs
copy to csharp/test/Drivers/Databricks/ClientTests.cs
index a685932c8..d76ac025a 100644
--- a/csharp/test/Drivers/Apache/Spark/ClientTests.cs
+++ b/csharp/test/Drivers/Databricks/ClientTests.cs
@@ -16,22 +16,22 @@
*/
using System.Collections.Generic;
-using Apache.Arrow.Adbc.Drivers.Apache.Spark;
+using Apache.Arrow.Adbc.Tests.Drivers.Apache.Common;
using Xunit.Abstractions;
-namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
+namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks
{
- public class ClientTests : Common.ClientTests<SparkTestConfiguration,
SparkTestEnvironment>
+ public class ClientTests : ClientTests<DatabricksTestConfiguration,
DatabricksTestEnvironment>
{
public ClientTests(ITestOutputHelper? outputHelper)
- : base(outputHelper, new SparkTestEnvironment.Factory())
+ : base(outputHelper, new DatabricksTestEnvironment.Factory())
{
}
protected override IReadOnlyList<int> GetUpdateExpectedResults()
{
int affectedRows = ValidateAffectedRows ? 1 : -1;
- return GetUpdateExpectedResults(affectedRows,
TestEnvironment.ServerType == SparkServerType.Databricks);
+ return GetUpdateExpectedResults(affectedRows, true);
}
internal static IReadOnlyList<int> GetUpdateExpectedResults(int
affectedRows, bool isDatabricks)
diff --git a/csharp/test/Drivers/Apache/Spark/CloudFetchE2ETest.cs
b/csharp/test/Drivers/Databricks/CloudFetchE2ETest.cs
similarity index 75%
rename from csharp/test/Drivers/Apache/Spark/CloudFetchE2ETest.cs
rename to csharp/test/Drivers/Databricks/CloudFetchE2ETest.cs
index 0325c3e98..e57a55ec2 100644
--- a/csharp/test/Drivers/Apache/Spark/CloudFetchE2ETest.cs
+++ b/csharp/test/Drivers/Databricks/CloudFetchE2ETest.cs
@@ -16,28 +16,22 @@
*/
using System;
-using System.Collections.Generic;
-using System.Reflection;
using System.Threading.Tasks;
-using Apache.Arrow.Adbc.Drivers.Apache.Spark;
-using Apache.Arrow.Adbc.Drivers.Apache.Spark.CloudFetch;
-using Apache.Arrow.Types;
+using Apache.Arrow.Adbc.Drivers.Databricks;
using Xunit;
using Xunit.Abstractions;
-using Apache.Arrow.Adbc.Client;
-using Apache.Arrow.Adbc.Tests.Drivers.Apache.Common;
-namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
+namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks
{
/// <summary>
- /// End-to-end tests for the CloudFetch feature in the Spark ADBC driver.
+ /// End-to-end tests for the CloudFetch feature in the Databricks ADBC
driver.
/// </summary>
- public class CloudFetchE2ETest : TestBase<SparkTestConfiguration,
SparkTestEnvironment>
+ public class CloudFetchE2ETest : TestBase<DatabricksTestConfiguration,
DatabricksTestEnvironment>
{
public CloudFetchE2ETest(ITestOutputHelper? outputHelper)
- : base(outputHelper, new SparkTestEnvironment.Factory())
+ : base(outputHelper, new DatabricksTestEnvironment.Factory())
{
- // Skip the test if the SPARK_TEST_CONFIG_FILE environment
variable is not set
+ // Skip the test if the DATABRICKS_TEST_CONFIG_FILE environment
variable is not set
Skip.IfNot(Utils.CanExecuteTestConfig(TestConfigVariable));
}
@@ -60,10 +54,9 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
{
// Create a statement with CloudFetch enabled
var statement = Connection.CreateStatement();
- statement.SetOption(SparkStatement.Options.UseCloudFetch, "true");
- statement.SetOption(SparkStatement.Options.CanDecompressLz4,
"true");
- statement.SetOption(SparkStatement.Options.MaxBytesPerFile,
"10485760"); // 10MB
-
+ statement.SetOption(DatabricksStatement.Options.UseCloudFetch,
"true");
+ statement.SetOption(DatabricksStatement.Options.CanDecompressLz4,
"true");
+ statement.SetOption(DatabricksStatement.Options.MaxBytesPerFile,
"10485760"); // 10MB
// Execute a query that generates a large result set using range
function
statement.SqlQuery = query;
@@ -71,7 +64,6 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
// Execute the query and get the result
var result = await statement.ExecuteQueryAsync();
-
if (result.Stream == null)
{
throw new InvalidOperationException("Result stream is null");
diff --git a/csharp/test/Drivers/Apache/Hive2/DateTimeValueTests.cs
b/csharp/test/Drivers/Databricks/ComplexTypesValueTests.cs
similarity index 59%
copy from csharp/test/Drivers/Apache/Hive2/DateTimeValueTests.cs
copy to csharp/test/Drivers/Databricks/ComplexTypesValueTests.cs
index 33e70193a..591b780fb 100644
--- a/csharp/test/Drivers/Apache/Hive2/DateTimeValueTests.cs
+++ b/csharp/test/Drivers/Databricks/ComplexTypesValueTests.cs
@@ -15,24 +15,16 @@
* limitations under the License.
*/
-using System;
-using System.Globalization;
-using System.Threading.Tasks;
-using Apache.Arrow.Adbc.Drivers.Apache.Spark;
-using Xunit;
+using Apache.Arrow.Adbc.Tests.Drivers.Apache.Common;
using Xunit.Abstractions;
-namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Hive2
+namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks
{
- public class DateTimeValueTests :
Common.DateTimeValueTests<ApacheTestConfiguration, HiveServer2TestEnvironment>
+ public class ComplexTypesValueTests :
ComplexTypesValueTests<DatabricksTestConfiguration, DatabricksTestEnvironment>
{
- public DateTimeValueTests(ITestOutputHelper output)
- : base(output, new HiveServer2TestEnvironment.Factory())
- { }
-
- protected override string GetFormattedTimestampValue(string value)
+ public ComplexTypesValueTests(ITestOutputHelper output)
+ : base(output, new DatabricksTestEnvironment.Factory())
{
- return "TO_TIMESTAMP(" + QuoteValue(value) + ")";
}
}
}
diff --git a/csharp/test/Drivers/Apache/Spark/SparkConnectionTest.cs
b/csharp/test/Drivers/Databricks/DatabricksConnectionTest.cs
similarity index 63%
copy from csharp/test/Drivers/Apache/Spark/SparkConnectionTest.cs
copy to csharp/test/Drivers/Databricks/DatabricksConnectionTest.cs
index 417885e0b..a5667de53 100644
--- a/csharp/test/Drivers/Apache/Spark/SparkConnectionTest.cs
+++ b/csharp/test/Drivers/Databricks/DatabricksConnectionTest.cs
@@ -26,14 +26,14 @@ using Thrift.Transport;
using Xunit;
using Xunit.Abstractions;
-namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
+namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks
{
/// <summary>
- /// Class for testing the Spark ADBC connection tests.
+ /// Class for testing the Databricks ADBC connection tests.
/// </summary>
- public class SparkConnectionTest : TestBase<SparkTestConfiguration,
SparkTestEnvironment>
+ public class DatabricksConnectionTest :
TestBase<DatabricksTestConfiguration, DatabricksTestEnvironment>
{
- public SparkConnectionTest(ITestOutputHelper? outputHelper) :
base(outputHelper, new SparkTestEnvironment.Factory())
+ public DatabricksConnectionTest(ITestOutputHelper? outputHelper) :
base(outputHelper, new DatabricksTestEnvironment.Factory())
{
Skip.IfNot(Utils.CanExecuteTestConfig(TestConfigVariable));
}
@@ -65,7 +65,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
[InlineData(null, null, null)]
public void ConnectionTimeoutTest(int? connectTimeoutMilliseconds,
Type? exceptionType, Type? alternateExceptionType)
{
- SparkTestConfiguration testConfiguration =
(SparkTestConfiguration)TestConfiguration.Clone();
+ DatabricksTestConfiguration testConfiguration =
(DatabricksTestConfiguration)TestConfiguration.Clone();
if (connectTimeoutMilliseconds.HasValue)
testConfiguration.ConnectTimeoutMilliseconds =
connectTimeoutMilliseconds.Value.ToString();
@@ -76,7 +76,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
{
NewConnection(testConfiguration);
}
- catch(AggregateException aex)
+ catch (AggregateException aex)
{
if (exceptionType != null)
{
@@ -105,14 +105,14 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
}
/// <summary>
- /// Tests the various metadata calls on a SparkConnection
+ /// Tests the various metadata calls on a DatabricksConnection
/// </summary>
/// <param name="metadataWithException"></param>
[SkippableTheory]
[ClassData(typeof(MetadataTimeoutTestData))]
internal void MetadataTimeoutTest(MetadataWithExceptions
metadataWithException)
{
- SparkTestConfiguration testConfiguration =
(SparkTestConfiguration)TestConfiguration.Clone();
+ DatabricksTestConfiguration testConfiguration =
(DatabricksTestConfiguration)TestConfiguration.Clone();
if (metadataWithException.QueryTimeoutSeconds.HasValue)
testConfiguration.QueryTimeoutSeconds =
metadataWithException.QueryTimeoutSeconds.Value.ToString();
@@ -138,7 +138,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
/// </summary>
internal class MetadataWithExceptions
{
- public MetadataWithExceptions(int? queryTimeoutSeconds, string
actionName, Action<SparkTestConfiguration> action, Type? exceptionType, Type?
alternateExceptionType)
+ public MetadataWithExceptions(int? queryTimeoutSeconds, string
actionName, Action<DatabricksTestConfiguration> action, Type? exceptionType,
Type? alternateExceptionType)
{
QueryTimeoutSeconds = queryTimeoutSeconds;
ActionName = actionName;
@@ -168,7 +168,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
/// <summary>
/// The metadata action to perform.
/// </summary>
- public Action<SparkTestConfiguration> MetadataAction { get; }
+ public Action<DatabricksTestConfiguration> MetadataAction { get; }
}
/// <summary>
@@ -178,38 +178,38 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
{
public MetadataTimeoutTestData()
{
- SparkConnectionTest sparkConnectionTest = new
SparkConnectionTest(null);
+ DatabricksConnectionTest sparkConnectionTest = new
DatabricksConnectionTest(null);
- Action<SparkTestConfiguration> getObjectsAll =
(testConfiguration) =>
+ Action<DatabricksTestConfiguration> getObjectsAll =
(testConfiguration) =>
{
AdbcConnection cn =
sparkConnectionTest.NewConnection(testConfiguration);
cn.GetObjects(AdbcConnection.GetObjectsDepth.All,
testConfiguration.Metadata.Catalog, testConfiguration.Metadata.Schema,
testConfiguration.Metadata.Table, null, null);
};
- Action<SparkTestConfiguration> getObjectsCatalogs =
(testConfiguration) =>
+ Action<DatabricksTestConfiguration> getObjectsCatalogs =
(testConfiguration) =>
{
AdbcConnection cn =
sparkConnectionTest.NewConnection(testConfiguration);
cn.GetObjects(AdbcConnection.GetObjectsDepth.Catalogs,
testConfiguration.Metadata.Catalog, testConfiguration.Metadata.Schema,
testConfiguration.Metadata.Schema, null, null);
};
- Action<SparkTestConfiguration> getObjectsDbSchemas =
(testConfiguration) =>
+ Action<DatabricksTestConfiguration> getObjectsDbSchemas =
(testConfiguration) =>
{
AdbcConnection cn =
sparkConnectionTest.NewConnection(testConfiguration);
cn.GetObjects(AdbcConnection.GetObjectsDepth.DbSchemas,
testConfiguration.Metadata.Catalog, testConfiguration.Metadata.Schema,
testConfiguration.Metadata.Schema, null, null);
};
- Action<SparkTestConfiguration> getObjectsTables =
(testConfiguration) =>
+ Action<DatabricksTestConfiguration> getObjectsTables =
(testConfiguration) =>
{
AdbcConnection cn =
sparkConnectionTest.NewConnection(testConfiguration);
cn.GetObjects(AdbcConnection.GetObjectsDepth.Tables,
testConfiguration.Metadata.Catalog, testConfiguration.Metadata.Schema,
testConfiguration.Metadata.Schema, null, null);
};
- AddAction("getObjectsAll", getObjectsAll, new List<Type?>() {
null, typeof(TimeoutException), null, null, null } );
+ AddAction("getObjectsAll", getObjectsAll, new List<Type?>() {
null, typeof(TimeoutException), null, null, null });
AddAction("getObjectsCatalogs", getObjectsCatalogs);
AddAction("getObjectsDbSchemas", getObjectsDbSchemas);
AddAction("getObjectsTables", getObjectsTables);
- Action<SparkTestConfiguration> getTableTypes =
(testConfiguration) =>
+ Action<DatabricksTestConfiguration> getTableTypes =
(testConfiguration) =>
{
AdbcConnection cn =
sparkConnectionTest.NewConnection(testConfiguration);
cn.GetTableTypes();
@@ -217,7 +217,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
AddAction("getTableTypes", getTableTypes);
- Action<SparkTestConfiguration> getTableSchema =
(testConfiguration) =>
+ Action<DatabricksTestConfiguration> getTableSchema =
(testConfiguration) =>
{
AdbcConnection cn =
sparkConnectionTest.NewConnection(testConfiguration);
cn.GetTableSchema(testConfiguration.Metadata.Catalog,
testConfiguration.Metadata.Schema, testConfiguration.Metadata.Table);
@@ -232,7 +232,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
/// <param name="name">The friendly name of the action.</param>
/// <param name="action">The action to perform.</param>
/// <param name="alternateExceptions">Optional list of alternate
exceptions that are possible. Must have 5 items if present.</param>
- private void AddAction(string name, Action<SparkTestConfiguration>
action, List<Type?>? alternateExceptions = null)
+ private void AddAction(string name,
Action<DatabricksTestConfiguration> action, List<Type?>? alternateExceptions =
null)
{
List<Type?> expectedExceptions = new List<Type?>()
{
@@ -259,7 +259,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
/// [3] QueryTimeout = default
/// [4] QueryTimeout = 300
/// </remarks>
- private void AddAction(string name, Action<SparkTestConfiguration>
action, List<Type?> expectedExceptions, List<Type?>? alternateExceptions)
+ private void AddAction(string name,
Action<DatabricksTestConfiguration> action, List<Type?> expectedExceptions,
List<Type?>? alternateExceptions)
{
Assert.True(expectedExceptions.Count == 5);
@@ -293,30 +293,15 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
public InvalidConnectionParametersTestData()
{
Add(new([], typeof(ArgumentException)));
- Add(new(new() { [SparkParameters.Type] = " " },
typeof(ArgumentOutOfRangeException)));
- Add(new(new() { [SparkParameters.Type] = "xxx" },
typeof(ArgumentOutOfRangeException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Standard }, typeof(ArgumentOutOfRangeException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = " " },
typeof(ArgumentException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] =
"invalid!server.com" }, typeof(ArgumentException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] =
"http://valid.server.com" }, typeof(ArgumentException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com"
}, typeof(ArgumentException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[SparkParameters.AuthType] = $"unknown_auth_type" },
typeof(ArgumentOutOfRangeException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[SparkParameters.AuthType] = $"{SparkAuthTypeConstants.Basic}" },
typeof(ArgumentException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[SparkParameters.AuthType] = $"{SparkAuthTypeConstants.Token}" },
typeof(ArgumentException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[SparkParameters.AuthType] = $"{SparkAuthTypeConstants.Basic}",
[SparkParameters.Token] = "abcdef" }, typeof(ArgumentException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[SparkParameters.AuthType] = $"{SparkAuthTypeConstants.Token}",
[AdbcOptions.Username] = "user", [AdbcOptions.Password] = "myPassword" },
typeof(ArgumentException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[AdbcOptions.Username] = "user" }, typeof(ArgumentException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[AdbcOptions.Password] = "myPassword" }, typeof(ArgumentException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Databricks, [SparkParameters.HostName] =
"valid.server.com", [SparkParameters.Token] = "abcdef", [SparkParameters.Port]
= "-1" }, typeof(ArgumentOutOfRangeException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Databricks, [SparkParameters.HostName] =
"valid.server.com", [SparkParameters.Token] = "abcdef", [SparkParameters.Port]
= IPEndPoint.MinPort.ToString(CultureInfo.InvariantCulture) },
typeof(ArgumentOutOfRangeException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Databricks, [SparkParameters.HostName] =
"valid.server.com", [SparkParameters.Token] = "abcdef", [SparkParameters.Port]
= (IPEndPoint.MaxPort + 1).ToString(CultureInfo.InvariantCulture) },
typeof(ArgumentOutOfRangeException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Databricks, [SparkParameters.Token] = "abcdef",
[AdbcOptions.Uri] = "httpxxz://hostname.com" },
typeof(ArgumentOutOfRangeException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Databricks, [SparkParameters.Token] = "abcdef",
[AdbcOptions.Uri] = "http-//hostname.com" }, typeof(ArgumentException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Databricks, [SparkParameters.Token] = "abcdef",
[AdbcOptions.Uri] = "httpxxz://hostname.com:1234567890" },
typeof(ArgumentException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[AdbcOptions.Username] = "user", [AdbcOptions.Password] = "myPassword",
[SparkParameters.ConnectTimeoutMilliseconds] = ((long)int.MaxValue +
1).ToString() }, typeof(ArgumentOutOfRangeException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[AdbcOptions.Username] = "user", [AdbcOptions.Password] = "myPassword",
[SparkParameters.ConnectTimeoutMilliseconds] = "non-numeric" },
typeof(ArgumentOutOfRangeException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[AdbcOptions.Username] = "user", [AdbcOptions.Password] = "myPassword",
[SparkParameters.ConnectTimeoutMilliseconds] = "" },
typeof(ArgumentOutOfRangeException)));
- Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Databricks, [SparkParameters.Token] = "abcdef",
[SparkParameters.HostName] = "valid.server.com", [AdbcOptions.Uri] =
"http://valid.hostname.com" }, typeof(ArgumentOutOfRangeException)));
+ Add(new(new() { [SparkParameters.Type] = " " },
typeof(ArgumentException)));
+ Add(new(new() { [SparkParameters.Type] = "xxx" },
typeof(ArgumentException)));
+ Add(new(new() { /*[SparkParameters.Type] =
SparkServerTypeConstants.Databricks,*/ [SparkParameters.HostName] =
"valid.server.com", [SparkParameters.Token] = "abcdef", [SparkParameters.Port]
= "-1" }, typeof(ArgumentOutOfRangeException)));
+ Add(new(new() { /*[SparkParameters.Type] =
SparkServerTypeConstants.Databricks,*/ [SparkParameters.HostName] =
"valid.server.com", [SparkParameters.Token] = "abcdef", [SparkParameters.Port]
= IPEndPoint.MinPort.ToString(CultureInfo.InvariantCulture) },
typeof(ArgumentOutOfRangeException)));
+ Add(new(new() { /*[SparkParameters.Type] =
SparkServerTypeConstants.Databricks,*/ [SparkParameters.HostName] =
"valid.server.com", [SparkParameters.Token] = "abcdef", [SparkParameters.Port]
= (IPEndPoint.MaxPort + 1).ToString(CultureInfo.InvariantCulture) },
typeof(ArgumentOutOfRangeException)));
+ Add(new(new() { /*[SparkParameters.Type] =
SparkServerTypeConstants.Databricks,*/ [SparkParameters.Token] = "abcdef",
[AdbcOptions.Uri] = "httpxxz://hostname.com" },
typeof(ArgumentOutOfRangeException)));
+ Add(new(new() { /*[SparkParameters.Type] =
SparkServerTypeConstants.Databricks,*/ [SparkParameters.Token] = "abcdef",
[AdbcOptions.Uri] = "http-//hostname.com" }, typeof(ArgumentException)));
+ Add(new(new() { /*[SparkParameters.Type] =
SparkServerTypeConstants.Databricks,*/ [SparkParameters.Token] = "abcdef",
[AdbcOptions.Uri] = "httpxxz://hostname.com:1234567890" },
typeof(ArgumentException)));
+ Add(new(new() { /*[SparkParameters.Type] =
SparkServerTypeConstants.Databricks,*/ [SparkParameters.Token] = "abcdef",
[SparkParameters.HostName] = "valid.server.com", [AdbcOptions.Uri] =
"http://valid.hostname.com" }, typeof(ArgumentOutOfRangeException)));
}
}
}
diff --git a/csharp/test/Drivers/Apache/Hive2/DateTimeValueTests.cs
b/csharp/test/Drivers/Databricks/DatabricksTestConfiguration.cs
similarity index 56%
copy from csharp/test/Drivers/Apache/Hive2/DateTimeValueTests.cs
copy to csharp/test/Drivers/Databricks/DatabricksTestConfiguration.cs
index 33e70193a..fb221560b 100644
--- a/csharp/test/Drivers/Apache/Hive2/DateTimeValueTests.cs
+++ b/csharp/test/Drivers/Databricks/DatabricksTestConfiguration.cs
@@ -15,24 +15,12 @@
* limitations under the License.
*/
-using System;
-using System.Globalization;
-using System.Threading.Tasks;
-using Apache.Arrow.Adbc.Drivers.Apache.Spark;
-using Xunit;
-using Xunit.Abstractions;
+using Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark;
-namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Hive2
+namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks
{
- public class DateTimeValueTests :
Common.DateTimeValueTests<ApacheTestConfiguration, HiveServer2TestEnvironment>
+ public class DatabricksTestConfiguration : SparkTestConfiguration
{
- public DateTimeValueTests(ITestOutputHelper output)
- : base(output, new HiveServer2TestEnvironment.Factory())
- { }
- protected override string GetFormattedTimestampValue(string value)
- {
- return "TO_TIMESTAMP(" + QuoteValue(value) + ")";
- }
}
}
diff --git a/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
b/csharp/test/Drivers/Databricks/DatabricksTestEnvironment.cs
similarity index 90%
copy from csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
copy to csharp/test/Drivers/Databricks/DatabricksTestEnvironment.cs
index e53c13191..a84817ed2 100644
--- a/csharp/test/Drivers/Apache/Spark/SparkTestEnvironment.cs
+++ b/csharp/test/Drivers/Databricks/DatabricksTestEnvironment.cs
@@ -19,33 +19,32 @@ using System;
using System.Collections.Generic;
using System.Data.SqlTypes;
using System.Text;
-using System.Text.Json.Serialization;
using Apache.Arrow.Adbc.Drivers.Apache;
using Apache.Arrow.Adbc.Drivers.Apache.Hive2;
using Apache.Arrow.Adbc.Drivers.Apache.Spark;
+using Apache.Arrow.Adbc.Drivers.Databricks;
+using Apache.Arrow.Adbc.Tests.Drivers.Apache;
using Apache.Arrow.Adbc.Tests.Drivers.Apache.Common;
using Apache.Arrow.Types;
-namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
+namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks
{
- public class SparkTestEnvironment :
CommonTestEnvironment<SparkTestConfiguration>
+ public class DatabricksTestEnvironment :
CommonTestEnvironment<DatabricksTestConfiguration>
{
- public class Factory : Factory<SparkTestEnvironment>
+ public class Factory : Factory<DatabricksTestEnvironment>
{
- public override SparkTestEnvironment Create(Func<AdbcConnection>
getConnection) => new(getConnection);
+ public override DatabricksTestEnvironment
Create(Func<AdbcConnection> getConnection) => new(getConnection);
}
- private SparkTestEnvironment(Func<AdbcConnection> getConnection) :
base(getConnection) { }
+ private DatabricksTestEnvironment(Func<AdbcConnection> getConnection)
: base(getConnection) { }
- public override string TestConfigVariable => "SPARK_TEST_CONFIG_FILE";
+ public override string TestConfigVariable =>
"DATABRICKS_TEST_CONFIG_FILE";
- public override string SqlDataResourceLocation => ServerType ==
SparkServerType.Databricks
- ? "Spark/Resources/SparkData-Databricks.sql"
- : "Spark/Resources/SparkData.sql";
+ public override string SqlDataResourceLocation =>
"Resources/Databricks.sql";
- public override int ExpectedColumnCount => ServerType ==
SparkServerType.Databricks ? 19 : 17;
+ public override int ExpectedColumnCount => 19;
- public override AdbcDriver CreateNewDriver() => new SparkDriver();
+ public override AdbcDriver CreateNewDriver() => new DatabricksDriver();
public override string GetCreateTemporaryTableStatement(string
tableName, string columns)
{
@@ -54,7 +53,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
public override string Delimiter => "`";
- public override Dictionary<string, string>
GetDriverParameters(SparkTestConfiguration testConfiguration)
+ public override Dictionary<string, string>
GetDriverParameters(DatabricksTestConfiguration testConfiguration)
{
Dictionary<string, string> parameters =
new(StringComparer.OrdinalIgnoreCase);
@@ -149,17 +148,15 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
return parameters;
}
- internal SparkServerType ServerType =>
((SparkConnection)Connection).ServerType;
+ public override string VendorVersion =>
((DatabricksConnection)Connection).VendorVersion;
- public override string VendorVersion =>
((HiveServer2Connection)Connection).VendorVersion;
+ public override bool SupportsDelete => true;
- public override bool SupportsDelete => ServerType ==
SparkServerType.Databricks;
+ public override bool SupportsUpdate => true;
- public override bool SupportsUpdate => ServerType ==
SparkServerType.Databricks;
+ public override bool SupportCatalogName => true;
- public override bool SupportCatalogName => ServerType ==
SparkServerType.Databricks;
-
- public override bool ValidateAffectedRows => ServerType ==
SparkServerType.Databricks;
+ public override bool ValidateAffectedRows => true;
public override string GetInsertStatement(string tableName, string
columnName, string? value) =>
string.Format("INSERT INTO {0} ({1}) SELECT {2};", tableName,
columnName, value ?? "NULL");
@@ -167,7 +164,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
public override SampleDataBuilder GetSampleDataBuilder()
{
SampleDataBuilder sampleDataBuilder = new();
- bool dataTypeIsFloat = ServerType == SparkServerType.Databricks ||
DataTypeConversion.HasFlag(DataTypeConversion.Scalar);
+ bool dataTypeIsFloat = true; // ServerType ==
SparkServerType.Databricks ||
DataTypeConversion.HasFlag(DataTypeConversion.Scalar);
Type floatNetType = dataTypeIsFloat ? typeof(float) :
typeof(double);
Type floatArrowType = dataTypeIsFloat ? typeof(FloatType) :
typeof(DoubleType);
object floatValue;
diff --git a/csharp/test/Drivers/Apache/Spark/DateTimeValueTests.cs
b/csharp/test/Drivers/Databricks/DateTimeValueTests.cs
similarity index 92%
copy from csharp/test/Drivers/Apache/Spark/DateTimeValueTests.cs
copy to csharp/test/Drivers/Databricks/DateTimeValueTests.cs
index e8ab8eedf..56d72c586 100644
--- a/csharp/test/Drivers/Apache/Spark/DateTimeValueTests.cs
+++ b/csharp/test/Drivers/Databricks/DateTimeValueTests.cs
@@ -18,16 +18,16 @@
using System;
using System.Globalization;
using System.Threading.Tasks;
-using Apache.Arrow.Adbc.Drivers.Apache.Spark;
+using Apache.Arrow.Adbc.Tests.Drivers.Apache.Common;
using Xunit;
using Xunit.Abstractions;
-namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
+namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks
{
- public class DateTimeValueTests :
Common.DateTimeValueTests<SparkTestConfiguration, SparkTestEnvironment>
+ public class DateTimeValueTests :
DateTimeValueTests<DatabricksTestConfiguration, DatabricksTestEnvironment>
{
public DateTimeValueTests(ITestOutputHelper output)
- : base(output, new SparkTestEnvironment.Factory())
+ : base(output, new DatabricksTestEnvironment.Factory())
{ }
[SkippableTheory]
@@ -35,7 +35,6 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
[MemberData(nameof(TimestampExtendedData), "TIMESTAMP_LTZ")]
public async Task TestTimestampDataDatabricks(DateTimeOffset value,
string columnType)
{
- Skip.If(TestEnvironment.ServerType != SparkServerType.Databricks);
await base.TestTimestampData(value, columnType);
}
@@ -47,7 +46,6 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
[MemberData(nameof(TimestampExtendedData), "TIMESTAMP_NTZ")]
public async Task TestTimestampNoTimezoneDataDatabricks(DateTimeOffset
value, string columnType)
{
- Skip.If(TestEnvironment.ServerType != SparkServerType.Databricks);
string columnName = "TIMESTAMPTYPE";
using TemporaryTable table = await
NewTemporaryTableAsync(Statement, string.Format("{0} {1}", columnName,
columnType));
diff --git a/csharp/test/Drivers/Apache/Spark/DriverTests.cs
b/csharp/test/Drivers/Databricks/DriverTests.cs
similarity index 95%
copy from csharp/test/Drivers/Apache/Spark/DriverTests.cs
copy to csharp/test/Drivers/Databricks/DriverTests.cs
index ec49f38a0..cbf180ce8 100644
--- a/csharp/test/Drivers/Apache/Spark/DriverTests.cs
+++ b/csharp/test/Drivers/Databricks/DriverTests.cs
@@ -18,15 +18,16 @@
using System;
using System.Collections.Generic;
using Apache.Arrow.Adbc.Drivers.Apache.Spark;
+using Apache.Arrow.Adbc.Tests.Drivers.Apache.Common;
using Xunit;
using Xunit.Abstractions;
-namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
+namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks
{
- public class DriverTests : Common.DriverTests<SparkTestConfiguration,
SparkTestEnvironment>
+ public class DriverTests : DriverTests<DatabricksTestConfiguration,
DatabricksTestEnvironment>
{
public DriverTests(ITestOutputHelper? outputHelper)
- : base(outputHelper, new SparkTestEnvironment.Factory())
+ : base(outputHelper, new DatabricksTestEnvironment.Factory())
{
}
@@ -112,7 +113,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
protected override IReadOnlyList<int> GetUpdateExpectedResults()
{
int affectedRows = ValidateAffectedRows ? 1 : -1;
- return ClientTests.GetUpdateExpectedResults(affectedRows,
TestEnvironment.ServerType == SparkServerType.Databricks);
+ return ClientTests.GetUpdateExpectedResults(affectedRows, true);
}
public static IEnumerable<object[]> CatalogNamePatternData()
diff --git a/csharp/test/Drivers/Databricks/NumericValueTests.cs
b/csharp/test/Drivers/Databricks/NumericValueTests.cs
new file mode 100644
index 000000000..db2711abc
--- /dev/null
+++ b/csharp/test/Drivers/Databricks/NumericValueTests.cs
@@ -0,0 +1,65 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+using System.Threading.Tasks;
+using Apache.Arrow.Adbc.Tests.Drivers.Apache.Common;
+using Xunit;
+using Xunit.Abstractions;
+
+namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks
+{
+ public class NumericValueTests :
NumericValueTests<DatabricksTestConfiguration, DatabricksTestEnvironment>
+ {
+ public NumericValueTests(ITestOutputHelper output)
+ : base(output, new DatabricksTestEnvironment.Factory())
+ {
+ }
+
+ [SkippableTheory]
+ [InlineData(0)]
+ [InlineData(0.2)]
+ [InlineData(15e-03)]
+ [InlineData(1.234E+2)]
+ [InlineData(double.NegativeInfinity)]
+ [InlineData(double.PositiveInfinity)]
+ [InlineData(double.NaN)]
+ [InlineData(double.MinValue)]
+ [InlineData(double.MaxValue)]
+ public override async Task TestDoubleValuesInsertSelectDelete(double
value)
+ {
+ await base.TestDoubleValuesInsertSelectDelete(value);
+ }
+
+ [SkippableTheory]
+ [InlineData(0)]
+ [InlineData(25)]
+ [InlineData(float.NegativeInfinity)]
+ [InlineData(float.PositiveInfinity)]
+ [InlineData(float.NaN)]
+ // TODO: Solve server issue when non-integer float value is used in
where clause.
+ //[InlineData(25.1)]
+ //[InlineData(0.2)]
+ //[InlineData(15e-03)]
+ //[InlineData(1.234E+2)]
+ //[InlineData(float.MinValue)]
+ //[InlineData(float.MaxValue)]
+ public override async Task TestFloatValuesInsertSelectDelete(float
value)
+ {
+ await base.TestFloatValuesInsertSelectDelete(value);
+ }
+ }
+}
diff --git
a/csharp/test/Drivers/Apache/Spark/Resources/SparkData-Databricks.sql
b/csharp/test/Drivers/Databricks/Resources/Databricks.sql
similarity index 100%
rename from csharp/test/Drivers/Apache/Spark/Resources/SparkData-Databricks.sql
rename to csharp/test/Drivers/Databricks/Resources/Databricks.sql
diff --git
a/csharp/test/Drivers/Apache/Spark/Resources/sparkconfig-databricks.json
b/csharp/test/Drivers/Databricks/Resources/databricks.json
similarity index 100%
rename from
csharp/test/Drivers/Apache/Spark/Resources/sparkconfig-databricks.json
rename to csharp/test/Drivers/Databricks/Resources/databricks.json
diff --git a/csharp/test/Drivers/Apache/Spark/StatementTests.cs
b/csharp/test/Drivers/Databricks/StatementTests.cs
similarity index 88%
copy from csharp/test/Drivers/Apache/Spark/StatementTests.cs
copy to csharp/test/Drivers/Databricks/StatementTests.cs
index b313d1d62..8f06c63c8 100644
--- a/csharp/test/Drivers/Apache/Spark/StatementTests.cs
+++ b/csharp/test/Drivers/Databricks/StatementTests.cs
@@ -17,29 +17,21 @@
using System;
using System.Collections.Generic;
-using System.Linq;
using System.Threading.Tasks;
-using Apache.Arrow.Adbc.Drivers.Apache.Spark;
+using Apache.Arrow.Adbc.Drivers.Databricks;
using Apache.Arrow.Adbc.Tests.Drivers.Apache.Common;
using Xunit;
using Xunit.Abstractions;
-namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
+namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks
{
- public class StatementTests :
Common.StatementTests<SparkTestConfiguration, SparkTestEnvironment>
+ public class StatementTests : StatementTests<DatabricksTestConfiguration,
DatabricksTestEnvironment>
{
public StatementTests(ITestOutputHelper? outputHelper)
- : base(outputHelper, new SparkTestEnvironment.Factory())
+ : base(outputHelper, new DatabricksTestEnvironment.Factory())
{
}
- [SkippableTheory]
- [ClassData(typeof(LongRunningStatementTimeoutTestData))]
- internal override void StatementTimeoutTest(StatementWithExceptions
statementWithExceptions)
- {
- base.StatementTimeoutTest(statementWithExceptions);
- }
-
[SkippableTheory]
[InlineData(true, "CloudFetch enabled")]
[InlineData(false, "CloudFetch disabled")]
@@ -52,7 +44,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
using var statement = connection.CreateStatement();
// Set options for LZ4 decompression (enabled by default) and
CloudFetch as specified
- statement.SetOption(SparkStatement.Options.UseCloudFetch,
useCloudFetch.ToString().ToLower());
+ statement.SetOption(DatabricksStatement.Options.UseCloudFetch,
useCloudFetch.ToString().ToLower());
OutputHelper?.WriteLine($"CloudFetch is {(useCloudFetch ?
"enabled" : "disabled")}");
OutputHelper?.WriteLine("LZ4 decompression capability is enabled
by default");
@@ -84,6 +76,14 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
OutputHelper?.WriteLine("NOTE: Whether actual LZ4 compression was
used is determined by the server");
}
+
+ [SkippableTheory]
+ [ClassData(typeof(LongRunningStatementTimeoutTestData))]
+ internal override void StatementTimeoutTest(StatementWithExceptions
statementWithExceptions)
+ {
+ base.StatementTimeoutTest(statementWithExceptions);
+ }
+
internal class LongRunningStatementTimeoutTestData :
ShortRunningStatementTimeoutTestData
{
public LongRunningStatementTimeoutTestData()
@@ -99,21 +99,18 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
[SkippableFact]
public async Task CanGetPrimaryKeysDatabricks()
{
- Skip.If(TestEnvironment.ServerType != SparkServerType.Databricks);
await base.CanGetPrimaryKeys(TestConfiguration.Metadata.Catalog,
TestConfiguration.Metadata.Schema);
}
[SkippableFact]
public async Task CanGetCrossReferenceFromParentTableDatabricks()
{
- Skip.If(TestEnvironment.ServerType != SparkServerType.Databricks);
await
base.CanGetCrossReferenceFromParentTable(TestConfiguration.Metadata.Catalog,
TestConfiguration.Metadata.Schema);
}
[SkippableFact]
public async Task CanGetCrossReferenceFromChildTableDatabricks()
{
- Skip.If(TestEnvironment.ServerType != SparkServerType.Databricks);
await
base.CanGetCrossReferenceFromChildTable(TestConfiguration.Metadata.Catalog,
TestConfiguration.Metadata.Schema);
}
diff --git a/csharp/test/Drivers/Apache/Spark/StringValueTests.cs
b/csharp/test/Drivers/Databricks/StringValueTests.cs
similarity index 64%
copy from csharp/test/Drivers/Apache/Spark/StringValueTests.cs
copy to csharp/test/Drivers/Databricks/StringValueTests.cs
index 3ac5306c0..52890a518 100644
--- a/csharp/test/Drivers/Apache/Spark/StringValueTests.cs
+++ b/csharp/test/Drivers/Databricks/StringValueTests.cs
@@ -16,28 +16,26 @@
*/
using System.Threading.Tasks;
-using Apache.Arrow.Adbc.Drivers.Apache.Spark;
+using Apache.Arrow.Adbc.Tests.Drivers.Apache.Common;
using Xunit;
using Xunit.Abstractions;
-namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
+namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks
{
public class StringValueTests(ITestOutputHelper output)
- : Common.StringValueTests<SparkTestConfiguration,
SparkTestEnvironment>(output, new SparkTestEnvironment.Factory())
+ : StringValueTests<DatabricksTestConfiguration,
DatabricksTestEnvironment>(output, new DatabricksTestEnvironment.Factory())
{
[SkippableTheory]
- [InlineData("String contains formatting characters tab\t, newline\n,
carriage return\r.", SparkServerType.Databricks)]
- internal async Task TestStringDataDatabricks(string? value,
SparkServerType serverType)
+ [InlineData("String contains formatting characters tab\t, newline\n,
carriage return\r.")]
+ internal async Task TestStringDataDatabricks(string? value)
{
- Skip.If(TestEnvironment.ServerType != serverType);
await TestStringData(value);
}
[SkippableTheory]
- [InlineData("String contains formatting characters tab\t, newline\n,
carriage return\r.", SparkServerType.Databricks)]
- internal async Task TestVarcharDataDatabricks(string? value,
SparkServerType serverType)
+ [InlineData("String contains formatting characters tab\t, newline\n,
carriage return\r.")]
+ internal async Task TestVarcharDataDatabricks(string? value)
{
- Skip.If(TestEnvironment.ServerType != serverType);
await TestVarcharData(value);
}
@@ -52,24 +50,14 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
}
[SkippableTheory]
- [InlineData("String contains formatting characters tab\t, newline\n,
carriage return\r.", SparkServerType.Databricks)]
- internal async Task TestCharDataDatabricks(string? value,
SparkServerType serverType)
+ [InlineData("String contains formatting characters tab\t, newline\n,
carriage return\r.")]
+ internal async Task TestCharDataDatabricks(string? value)
{
- Skip.If(TestEnvironment.ServerType != serverType);
await base.TestCharData(value);
}
protected override async Task TestVarcharExceptionData(string value,
string[] expectedTexts, string? expectedSqlState)
{
- Skip.If(TestEnvironment.ServerType == SparkServerType.Databricks);
- await base.TestVarcharExceptionData(value, expectedTexts,
expectedSqlState);
- }
-
- [SkippableTheory]
- [InlineData("String whose length is too long for VARCHAR(10).", new
string[] { "Exceeds", "length limitation: 10" }, null)]
- public async Task TestVarcharExceptionDataSpark(string value, string[]
expectedTexts, string? expectedSqlState)
- {
- Skip.If(TestEnvironment.ServerType == SparkServerType.Databricks,
$"Server type: {TestEnvironment.ServerType}");
await base.TestVarcharExceptionData(value, expectedTexts,
expectedSqlState);
}
@@ -77,7 +65,6 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
[InlineData("String whose length is too long for VARCHAR(10).", new
string[] { "DELTA_EXCEED_CHAR_VARCHAR_LIMIT",
"DeltaInvariantViolationException" }, "22001")]
public async Task TestVarcharExceptionDataDatabricks(string value,
string[] expectedTexts, string? expectedSqlState)
{
- Skip.IfNot(TestEnvironment.ServerType ==
SparkServerType.Databricks, $"Server type: {TestEnvironment.ServerType}");
await base.TestVarcharExceptionData(value, expectedTexts,
expectedSqlState);
}
}