This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 861f00938 feat(csharp): improve handling of StructArrays (#2587)
861f00938 is described below
commit 861f00938be1d2df3428b5f5fc1411fe1edaffbe
Author: davidhcoe <[email protected]>
AuthorDate: Mon Mar 10 15:07:02 2025 -0400
feat(csharp): improve handling of StructArrays (#2587)
- improves the handling of structs to return objects or JsonString
(defaults to JsonString to not break existing callers)
- additional testing for each return type
- updates to the ADO.NET wrapper to support both struct types
- fixes https://github.com/apache/arrow-adbc/issues/2586
---------
Co-authored-by: David Coe <>
---
.../Extensions/IArrowArrayExtensions.cs | 121 ++++++++++++------
csharp/src/Client/AdbcDataReader.cs | 8 +-
csharp/src/Client/SchemaConverter.cs | 21 ++-
csharp/src/Client/StructBehavior.cs | 3 +
csharp/src/Client/readme.md | 2 +-
csharp/src/Drivers/BigQuery/BigQueryStatement.cs | 50 +++++++-
csharp/test/Apache.Arrow.Adbc.Tests/ClientTests.cs | 67 +++++++++-
.../Apache.Arrow.Adbc.Tests/SampleDataBuilder.cs | 8 ++
csharp/test/Drivers/BigQuery/BigQueryData.cs | 141 +++++++++++++--------
.../Drivers/BigQuery/BigQueryTestConfiguration.cs | 9 ++
csharp/test/Drivers/BigQuery/ClientTests.cs | 43 ++++++-
11 files changed, 359 insertions(+), 114 deletions(-)
diff --git a/csharp/src/Apache.Arrow.Adbc/Extensions/IArrowArrayExtensions.cs
b/csharp/src/Apache.Arrow.Adbc/Extensions/IArrowArrayExtensions.cs
index 1ce24e35d..b49904518 100644
--- a/csharp/src/Apache.Arrow.Adbc/Extensions/IArrowArrayExtensions.cs
+++ b/csharp/src/Apache.Arrow.Adbc/Extensions/IArrowArrayExtensions.cs
@@ -25,10 +25,16 @@ using Apache.Arrow.Types;
namespace Apache.Arrow.Adbc.Extensions
{
+ public enum StructResultType
+ {
+ JsonString,
+ Object
+ }
+
public static class IArrowArrayExtensions
{
/// <summary>
- /// Helper extension to get a value from the <see cref="IArrowArray"/>
at the specified index.
+ /// Overloaded. Helper extension to get a value from the <see
cref="IArrowArray"/> at the specified index.
/// </summary>
/// <param name="arrowArray">
/// The Arrow array.
@@ -37,10 +43,30 @@ namespace Apache.Arrow.Adbc.Extensions
/// The index in the array to get the value from.
/// </param>
public static object? ValueAt(this IArrowArray arrowArray, int index)
+ {
+ return ValueAt(arrowArray, index, StructResultType.JsonString);
+ }
+
+ /// <summary>
+ /// Overloaded. Helper extension to get a value from the <see
cref="IArrowArray"/> at the specified index.
+ /// </summary>
+ /// <param name="arrowArray">
+ /// The Arrow array.
+ /// </param>
+ /// <param name="index">
+ /// The index in the array to get the value from.
+ /// </param>
+ /// <param name="resultType">
+ /// T
+ /// </param>
+ public static object? ValueAt(this IArrowArray arrowArray, int index,
StructResultType resultType = StructResultType.JsonString)
{
if (arrowArray == null) throw new
ArgumentNullException(nameof(arrowArray));
if (index < 0) throw new
ArgumentOutOfRangeException(nameof(index));
+ if (arrowArray.IsNull(index))
+ return null;
+
switch (arrowArray.Data.DataType.TypeId)
{
case ArrowTypeId.Null:
@@ -127,39 +153,47 @@ namespace Apache.Arrow.Adbc.Extensions
throw new NotSupportedException($"Unsupported
interval unit: {((IntervalType)arrowArray.Data.DataType).Unit}");
}
case ArrowTypeId.Binary:
- if (!arrowArray.IsNull(index))
- {
- return
((BinaryArray)arrowArray).GetBytes(index).ToArray();
- }
- else
- {
- return null;
- }
+ return ((BinaryArray)arrowArray).GetBytes(index).ToArray();
case ArrowTypeId.List:
return ((ListArray)arrowArray).GetSlicedValues(index);
case ArrowTypeId.Struct:
- return SerializeToJson(((StructArray)arrowArray), index);
+ StructArray structArray = (StructArray)arrowArray;
+ return resultType == StructResultType.JsonString ?
SerializeToJson(structArray, index) : ParseStructArray(structArray, index);
- // not covered:
- // -- map array
- // -- dictionary array
- // -- fixed size binary
- // -- union array
+ // not covered:
+ // -- map array
+ // -- dictionary array
+ // -- fixed size binary
+ // -- union array
}
return null;
}
/// <summary>
- /// Helper extension to get a value from the <see cref="IArrowArray"/>
at the specified index.
+ /// Overloaded. Helper extension to get a value converter for the <see
href="IArrowType"/>.
/// </summary>
- /// <param name="arrowArray">
- /// The Arrow array.
- /// </param>
- /// <param name="index">
- /// The index in the array to get the value from.
+ /// <param name="arrayType">
+ /// The return type of an item in a StructArray.
/// </param>
public static Func<IArrowArray, int, object?> GetValueConverter(this
IArrowType arrayType)
+ {
+ return GetValueConverter(arrayType, StructResultType.JsonString);
+ }
+
+ /// <summary>
+ /// Overloaded. Helper extension to get a value from the <see
cref="IArrowArray"/> at the specified index.
+ /// </summary>
+ /// <param name="arrayType">
+ /// The Arrow array type.
+ /// </param>
+ /// <param name="sourceType">
+ /// The incoming <see cref="SourceStringType"/>.
+ /// </param>
+ /// <param name="resultType">
+ /// The return type of an item in a StructArray.
+ /// </param>
+ public static Func<IArrowArray, int, object?> GetValueConverter(this
IArrowType arrayType, StructResultType resultType)
{
if (arrayType == null) throw new
ArgumentNullException(nameof(arrayType));
@@ -198,7 +232,9 @@ namespace Apache.Arrow.Adbc.Extensions
case ArrowTypeId.Int64:
return (array, index) =>
((Int64Array)array).GetValue(index);
case ArrowTypeId.String:
- return (array, index) =>
((StringArray)array).GetString(index);
+ return (array, index) => array.Data.DataType.TypeId ==
ArrowTypeId.Decimal256 ?
+ ((Decimal256Array)array).GetString(index) :
+ ((StringArray)array).GetString(index);
#if NET6_0_OR_GREATER
case ArrowTypeId.Time32:
return (array, index) =>
((Time32Array)array).GetTime(index);
@@ -256,7 +292,9 @@ namespace Apache.Arrow.Adbc.Extensions
case ArrowTypeId.List:
return (array, index) =>
((ListArray)array).GetSlicedValues(index);
case ArrowTypeId.Struct:
- return (array, index) =>
SerializeToJson((StructArray)array, index);
+ return resultType == StructResultType.JsonString ?
+ (array, index) => SerializeToJson((StructArray)array,
index) :
+ (array, index) => ParseStructArray((StructArray)array,
index);
// not covered:
// -- map array
@@ -273,42 +311,45 @@ namespace Apache.Arrow.Adbc.Extensions
/// </summary>
private static string SerializeToJson(StructArray structArray, int
index)
{
- Dictionary<String, object?>? jsonDictionary =
ParseStructArray(structArray, index);
+ Dictionary<string, object?>? obj = ParseStructArray(structArray,
index);
- return JsonSerializer.Serialize(jsonDictionary);
+ return JsonSerializer.Serialize(obj);
}
/// <summary>
- /// Converts a StructArray to a Dictionary<String, object?>.
+ /// Converts an item in the StructArray at the index position to a
Dictionary<string, object?>.
/// </summary>
- private static Dictionary<String, object?>?
ParseStructArray(StructArray structArray, int index)
+ private static Dictionary<string, object?>?
ParseStructArray(StructArray structArray, int index)
{
if (structArray.IsNull(index))
return null;
- Dictionary<String, object?> jsonDictionary = new
Dictionary<String, object?>();
+ Dictionary<string, object?> jsonDictionary = new
Dictionary<string, object?>();
+
StructType structType = (StructType)structArray.Data.DataType;
for (int i = 0; i < structArray.Data.Children.Length; i++)
{
string name = structType.Fields[i].Name;
- object? value = ValueAt(structArray.Fields[i], index);
+
+ // keep the results as StructArray internally
+ object? value = ValueAt(structArray.Fields[i], index,
StructResultType.Object);
if (value is StructArray structArray1)
{
- List<Dictionary<string, object?>?> children = new
List<Dictionary<string, object?>?>();
-
- for (int j = 0; j < structArray1.Length; j++)
+ if (structArray1.Length == 0)
{
- children.Add(ParseStructArray(structArray1, j));
- }
-
- if (children.Count > 0)
- {
- jsonDictionary.Add(name, children);
+ jsonDictionary.Add(name, null);
}
else
{
- jsonDictionary.Add(name,
ParseStructArray(structArray1, index));
+ List<Dictionary<string, object?>?> children = new
List<Dictionary<string, object?>?>();
+
+ for (int j = 0; j < structArray1.Length; j++)
+ {
+ children.Add(ParseStructArray(structArray1, j));
+ }
+
+ jsonDictionary.Add(name, children);
}
}
else if (value is IArrowArray arrowArray)
@@ -319,7 +360,7 @@ namespace Apache.Arrow.Adbc.Extensions
{
for (int j = 0; j < arrowArray.Length; j++)
{
- values.Add(ValueAt(arrowArray, j));
+ values.Add(ValueAt(arrowArray, j,
StructResultType.Object));
}
jsonDictionary.Add(name, values);
diff --git a/csharp/src/Client/AdbcDataReader.cs
b/csharp/src/Client/AdbcDataReader.cs
index 17c665669..03d4f564a 100644
--- a/csharp/src/Client/AdbcDataReader.cs
+++ b/csharp/src/Client/AdbcDataReader.cs
@@ -86,10 +86,12 @@ namespace Apache.Arrow.Adbc.Client
this.DecimalBehavior = decimalBehavior;
this.StructBehavior = structBehavior;
+ StructResultType structResultType = this.StructBehavior ==
StructBehavior.JsonString ? StructResultType.JsonString :
StructResultType.Object;
+
this.converters = new Func<IArrowArray, int,
object?>[this.schema.FieldsList.Count];
for (int i = 0; i < this.converters.Length; i++)
{
- this.converters[i] =
this.schema.FieldsList[i].DataType.GetValueConverter();
+ this.converters[i] =
this.schema.FieldsList[i].DataType.GetValueConverter(structResultType);
}
}
@@ -372,7 +374,9 @@ namespace Apache.Arrow.Adbc.Client
}
else
{
- dbColumns.Add(new AdbcColumn(f.Name, t, f.DataType,
f.IsNullable));
+ IArrowType arrowType =
SchemaConverter.GetArrowTypeBasedOnRequestedBehavior(f.DataType,
this.StructBehavior);
+
+ dbColumns.Add(new AdbcColumn(f.Name, t, arrowType,
f.IsNullable));
}
}
diff --git a/csharp/src/Client/SchemaConverter.cs
b/csharp/src/Client/SchemaConverter.cs
index dae2dbf3c..2603c78ca 100644
--- a/csharp/src/Client/SchemaConverter.cs
+++ b/csharp/src/Client/SchemaConverter.cs
@@ -16,6 +16,7 @@
*/
using System;
+using System.Collections.Generic;
using System.Data;
using System.Data.Common;
using System.Data.SqlTypes;
@@ -60,7 +61,7 @@ namespace Apache.Arrow.Adbc.Client
row[SchemaTableColumn.ColumnName] = f.Name;
row[SchemaTableColumn.ColumnOrdinal] = columnOrdinal;
row[SchemaTableColumn.AllowDBNull] = f.IsNullable;
- row[SchemaTableColumn.ProviderType] = f.DataType;
+ row[SchemaTableColumn.ProviderType] =
SchemaConverter.GetArrowTypeBasedOnRequestedBehavior(f.DataType,
structBehavior);
Type t = ConvertArrowType(f, decimalBehavior, structBehavior);
row[SchemaTableColumn.DataType] = t;
@@ -193,10 +194,7 @@ namespace Apache.Arrow.Adbc.Client
return typeof(string);
case ArrowTypeId.Struct:
- if (structBehavior == StructBehavior.JsonString)
- return typeof(string);
- else
- goto default;
+ return structBehavior == StructBehavior.JsonString ?
typeof(string) : typeof(Dictionary<string, object?>);
case ArrowTypeId.Timestamp:
return typeof(DateTimeOffset);
@@ -271,5 +269,18 @@ namespace Apache.Arrow.Adbc.Client
throw new InvalidCastException($"Cannot determine the array type
for {dataType.Name}");
}
+
+ /// <summary>
+ /// Get the IArrowType based on the input IArrowType and the desired
<see cref="StructBehavior"/>.
+ /// If it's a StructType and the desired behavior is a JsonString then
this returns StringType.
+ /// Otherwise, it returns the input IArrowType.
+ /// </summary>
+ /// <param name="defaultType">The default IArrowType to return.</param>
+ /// <param name="structBehavior">Desired behavior if the IArrowType is
a StructType.</param>
+ /// <returns></returns>
+ public static IArrowType
GetArrowTypeBasedOnRequestedBehavior(IArrowType defaultType, StructBehavior
structBehavior)
+ {
+ return defaultType.TypeId == ArrowTypeId.Struct && structBehavior
== StructBehavior.JsonString ? StringType.Default : defaultType;
+ }
}
}
diff --git a/csharp/src/Client/StructBehavior.cs
b/csharp/src/Client/StructBehavior.cs
index 9911a1aec..68e865eda 100644
--- a/csharp/src/Client/StructBehavior.cs
+++ b/csharp/src/Client/StructBehavior.cs
@@ -17,6 +17,9 @@
namespace Apache.Arrow.Adbc.Client
{
+ /// <summary>
+ /// Controls the behavior of how StructArrays should be handled in the
results.
+ /// </summary>
public enum StructBehavior
{
/// <summary>
diff --git a/csharp/src/Client/readme.md b/csharp/src/Client/readme.md
index 59f647a37..51076001e 100644
--- a/csharp/src/Client/readme.md
+++ b/csharp/src/Client/readme.md
@@ -80,5 +80,5 @@ These properties are:
- __AdbcConnectionTimeout__ - This specifies the connection timeout value. The
value needs to be in the form (driver.property.name, integer, unit) where the
unit is one of `s` or `ms`, For example,
`AdbcConnectionTimeout=(adbc.snowflake.sql.client_option.client_timeout,30,s)`
would set the connection timeout to 30 seconds.
- __AdbcCommandTimeout__ - This specifies the command timeout value. This
follows the same pattern as `AdbcConnectionTimeout` and sets the
`AdbcCommandTimeoutProperty` and `CommandTimeout` values on the `AdbcCommand`
object.
-- __StructBehavior__ - This specifies the StructBehavior when working with
Arrow Struct arrays. The valid values are `JsonString` (the default) or
`Strict` (treat the struct as a native type).
+- __StructBehavior__ - This specifies the StructBehavior when working with
Arrow Struct arrays. The valid values are `JsonString` (the default) or
`Strict` (treat the struct as a native type). If using JsonString, the return
ArrowType will be StringType and the result a string value. If using Strict,
the return ArrowType will be StructType and the result a Dictionary<string,
object?>.
- __DecimalBehavior__ - This specifies the DecimalBehavior when parsing
decimal values from Arrow libraries. The valid values are `UseSqlDecimal` or
`OverflowDecimalAsString` where values like Decimal256 are treated as strings.
diff --git a/csharp/src/Drivers/BigQuery/BigQueryStatement.cs
b/csharp/src/Drivers/BigQuery/BigQueryStatement.cs
index a4df83043..ddf34104f 100644
--- a/csharp/src/Drivers/BigQuery/BigQueryStatement.cs
+++ b/csharp/src/Drivers/BigQuery/BigQueryStatement.cs
@@ -112,7 +112,13 @@ namespace Apache.Arrow.Adbc.Drivers.BigQuery
ReadSession rrs = readClient.CreateReadSession("projects/" +
results.TableReference.ProjectId, rs, maxStreamCount);
long totalRows = results.TotalRows == null ? -1L :
(long)results.TotalRows.Value;
- IArrowArrayStream stream = new
MultiArrowReader(TranslateSchema(results.Schema), rrs.Streams.Select(s =>
ReadChunk(readClient, s.Name)));
+
+ var readers = rrs.Streams
+ .Select(s => ReadChunk(readClient, s.Name))
+ .Where(chunk => chunk != null)
+ .Cast<IArrowReader>();
+
+ IArrowArrayStream stream = new
MultiArrowReader(TranslateSchema(results.Schema), readers);
return new QueryResult(totalRows, stream);
}
@@ -175,8 +181,7 @@ namespace Apache.Arrow.Adbc.Drivers.BigQuery
case "DATE":
return GetType(field, Date32Type.Default);
case "RECORD" or "STRUCT":
- // its a json string
- return GetType(field, StringType.Default);
+ return GetType(field, BuildStructType(field));
// treat these values as strings
case "GEOGRAPHY" or "JSON":
@@ -200,6 +205,19 @@ namespace Apache.Arrow.Adbc.Drivers.BigQuery
}
}
+ private StructType BuildStructType(TableFieldSchema field)
+ {
+ List<Field> arrowFields = new List<Field>();
+
+ foreach (TableFieldSchema subfield in field.Fields)
+ {
+ Field arrowField = TranslateField(subfield);
+ arrowFields.Add(arrowField);
+ }
+
+ return new StructType(arrowFields.AsReadOnly());
+ }
+
private IArrowType GetType(TableFieldSchema field, IArrowType type)
{
if (field.Mode == "REPEATED")
@@ -208,7 +226,7 @@ namespace Apache.Arrow.Adbc.Drivers.BigQuery
return type;
}
- static IArrowReader ReadChunk(BigQueryReadClient readClient, string
streamName)
+ static IArrowReader? ReadChunk(BigQueryReadClient readClient, string
streamName)
{
// Ideally we wouldn't need to indirect through a stream, but the
necessary APIs in Arrow
// are internal. (TODO: consider changing Arrow).
@@ -217,7 +235,14 @@ namespace Apache.Arrow.Adbc.Drivers.BigQuery
ReadRowsStream stream = new ReadRowsStream(enumerator);
- return new ArrowStreamReader(stream);
+ if (stream.HasRows)
+ {
+ return new ArrowStreamReader(stream);
+ }
+ else
+ {
+ return null;
+ }
}
private QueryOptions ValidateOptions()
@@ -349,15 +374,28 @@ namespace Apache.Arrow.Adbc.Drivers.BigQuery
ReadOnlyMemory<byte> currentBuffer;
bool first;
int position;
+ bool hasRows;
public ReadRowsStream(IAsyncEnumerator<ReadRowsResponse> response)
{
if (!response.MoveNextAsync().Result) { }
- this.currentBuffer =
response.Current.ArrowSchema.SerializedSchema.Memory;
+
+ if (response.Current != null)
+ {
+ this.currentBuffer =
response.Current.ArrowSchema.SerializedSchema.Memory;
+ this.hasRows = true;
+ }
+ else
+ {
+ this.hasRows = false;
+ }
+
this.response = response;
this.first = true;
}
+ public bool HasRows => this.hasRows;
+
public override bool CanRead => true;
public override bool CanSeek => false;
diff --git a/csharp/test/Apache.Arrow.Adbc.Tests/ClientTests.cs
b/csharp/test/Apache.Arrow.Adbc.Tests/ClientTests.cs
index 3deac6283..0289e4ae2 100644
--- a/csharp/test/Apache.Arrow.Adbc.Tests/ClientTests.cs
+++ b/csharp/test/Apache.Arrow.Adbc.Tests/ClientTests.cs
@@ -171,6 +171,14 @@ namespace Apache.Arrow.Adbc.Tests
preQueryCommand.ExecuteNonQuery();
}
+ if (!string.IsNullOrEmpty(sample.StructBehavior))
+ {
+ if (Enum.TryParse(sample.StructBehavior, out
StructBehavior behavior))
+ {
+ adbcConnection.StructBehavior = behavior;
+ }
+ }
+
using AdbcCommand dbCommand = adbcConnection.CreateCommand();
dbCommand.CommandText = sample.Query;
@@ -268,7 +276,25 @@ namespace Apache.Arrow.Adbc.Tests
}
else
{
- Assert.True(ctv.ExpectedValue.Equals(value),
Utils.FormatMessage($"Expected value [{ctv.ExpectedValue}] does not match
actual value [{value}] for {ctv.Name} for query [{query}]", environmentName));
+ bool areEqual = false;
+
+ if (value is Dictionary<string, object?>)
+ {
+ if (value == null && ctv.ExpectedValue == null)
+ {
+ areEqual = true;
+ }
+ else
+ {
+ areEqual = AreDictionariesEqual(value as
Dictionary<string, object?>, ctv.ExpectedValue as Dictionary<string, object?>);
+ }
+ }
+ else
+ {
+ areEqual = ctv.ExpectedValue.Equals(value);
+ }
+
+ Assert.True(areEqual, Utils.FormatMessage($"Expected
value [{ctv.ExpectedValue}] does not match actual value [{value}] for
{ctv.Name} for query [{query}]", environmentName));
}
}
else
@@ -304,5 +330,44 @@ namespace Apache.Arrow.Adbc.Tests
Assert.True(ctv.ExpectedValue == null,
Utils.FormatMessage($"The value for {ctv.Name} is null and but it's expected
value is not null for query [{query}]", environmentName));
}
}
+
+ static bool AreDictionariesEqual(Dictionary<string, object?>? dict1,
Dictionary<string, object?>? dict2)
+ {
+ if (dict1 == null && dict2 == null)
+ {
+ return true;
+ }
+ else if (dict1 != null && dict2 == null)
+ {
+ return false;
+ }
+ else if (dict1 == null && dict2 != null)
+ {
+ return false;
+ }
+
+ if (dict1!.Count != dict2!.Count)
+ return false;
+
+ foreach (var key in dict1.Keys)
+ {
+ if (!dict2.TryGetValue(key, out object? value2))
+ return false;
+
+ object? value1 = dict1[key];
+
+ if (value1 is Dictionary<string, object?> nextObj1 && value2
is Dictionary<string, object?> nextObj2)
+ {
+ if (!AreDictionariesEqual(nextObj1, nextObj2))
+ return false;
+ }
+ else if (!object.Equals(value1, value2))
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
}
}
diff --git a/csharp/test/Apache.Arrow.Adbc.Tests/SampleDataBuilder.cs
b/csharp/test/Apache.Arrow.Adbc.Tests/SampleDataBuilder.cs
index 08375ed42..e929fb518 100644
--- a/csharp/test/Apache.Arrow.Adbc.Tests/SampleDataBuilder.cs
+++ b/csharp/test/Apache.Arrow.Adbc.Tests/SampleDataBuilder.cs
@@ -57,6 +57,14 @@ namespace Apache.Arrow.Adbc.Tests
/// </summary>
public List<string> PostQueryCommands { get; set; } = new
List<string>();
+ /// <summary>
+ /// Optional value to indicate how structs should be treated in the
sample data.
+ /// </summary>
+ /// <example>
+ /// JsonString
+ /// Strict
+ /// </example>
+ public string? StructBehavior { get; set; }
/// <summary>
/// The expected values.
diff --git a/csharp/test/Drivers/BigQuery/BigQueryData.cs
b/csharp/test/Drivers/BigQuery/BigQueryData.cs
index aa58ca022..ed0df8db5 100644
--- a/csharp/test/Drivers/BigQuery/BigQueryData.cs
+++ b/csharp/test/Drivers/BigQuery/BigQueryData.cs
@@ -18,7 +18,9 @@
using System;
using System.Collections.Generic;
using System.Data.SqlTypes;
+using System.Dynamic;
using System.Text;
+using System.Text.Json;
using Apache.Arrow.Types;
namespace Apache.Arrow.Adbc.Tests.Drivers.BigQuery
@@ -42,6 +44,35 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.BigQuery
SampleDataBuilder sampleDataBuilder = new SampleDataBuilder();
+ Dictionary<string, object?> person = new Dictionary<string,
object?>();
+ person["name"] = "John Doe";
+ person["age"] = 30L;
+
+ // StructBehavior = "Strict"
+ sampleDataBuilder.Samples.Add(
+ new SampleData()
+ {
+ Query = "SELECT " +
+ "STRUCT('John Doe' as name, 30 as age) as person",
+ StructBehavior = "Strict",
+ ExpectedValues = new List<ColumnNetTypeArrowTypeValue>()
+ {
+ new ColumnNetTypeArrowTypeValue("person",
typeof(Dictionary<string, object?>), typeof(StructType), person),
+ }
+ });
+
+ // StructBehavior = "JsonString" (the default)
+ sampleDataBuilder.Samples.Add(
+ new SampleData()
+ {
+ Query = "SELECT " +
+ "STRUCT('John Doe' as name, 30 as age) as person",
+ StructBehavior = "JsonString",
+ ExpectedValues = new List<ColumnNetTypeArrowTypeValue>()
+ {
+ new ColumnNetTypeArrowTypeValue("person",
typeof(string), typeof(StringType), JsonSerializer.Serialize(person)),
+ }
+ });
// standard values
sampleDataBuilder.Samples.Add(
@@ -63,6 +94,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.BigQuery
"ARRAY[1, 2, 3] as numbers, " +
"STRUCT('John Doe' as name, 30 as age) as person,"
+
"PARSE_JSON('{\"name\":\"Jane Doe\",\"age\":29}')
as json",
+ StructBehavior = "Strict",
ExpectedValues = new List<ColumnNetTypeArrowTypeValue>()
{
new ColumnNetTypeArrowTypeValue("id",
typeof(long), typeof(Int64Type), 1L),
@@ -82,7 +114,8 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.BigQuery
new
ColumnNetTypeArrowTypeValue("timestamp", typeof(DateTimeOffset),
typeof(TimestampType), new DateTimeOffset(new DateTime(2023, 9, 8, 12, 34, 56),
TimeSpan.Zero)),
new ColumnNetTypeArrowTypeValue("point",
typeof(string), typeof(StringType), "POINT(1 2)"),
new ColumnNetTypeArrowTypeValue("numbers",
typeof(Int64Array), typeof(ListType), numbersArray),
- new ColumnNetTypeArrowTypeValue("person",
typeof(string), typeof(StringType), "{\"name\":\"John Doe\",\"age\":30}"),
+
+ new ColumnNetTypeArrowTypeValue("person",
typeof(Dictionary<string, object?>), typeof(StructType), person),
new ColumnNetTypeArrowTypeValue("json",
typeof(string), typeof(StringType), "{\"age\":29,\"name\":\"Jane Doe\"}")
}
});
@@ -125,6 +158,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.BigQuery
"ST_GEOGPOINT(NULL, NULL) as point, " +
"CAST(NULL as ARRAY<INT64>) as numbers, " +
"STRUCT(CAST(NULL as STRING) as name, CAST(NULL as
INT64) as age) as person",
+ StructBehavior = "JsonString",
ExpectedValues = new List<ColumnNetTypeArrowTypeValue>()
{
new ColumnNetTypeArrowTypeValue("id",
typeof(long), typeof(Int64Type), null),
@@ -148,62 +182,61 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.BigQuery
}
});
-
// complex struct
sampleDataBuilder.Samples.Add(
- new SampleData()
- {
- Query = "SELECT " +
- "STRUCT(" +
- "\"Iron Man\" as name," +
- "\"Avengers\" as team," +
- "[\"Genius\", \"Billionaire\", \"Playboy\",
\"Philanthropist\"] as powers," +
- "[" +
- " STRUCT(" +
- " \"Captain America\" as name, " +
- " \"Avengers\" as team, " +
- " [\"Super Soldier Serum\", \"Vibranium Shield\"]
as powers, " +
- " [" +
- " STRUCT(" +
- " \"Thanos\" as name, " +
- " \"Black Order\" as team, " +
- " [\"Infinity Gauntlet\", \"Super Strength\",
\"Teleportation\"] as powers, " +
- " [" +
- " STRUCT(" +
- " \"Loki\" as name, " +
- " \"Asgard\" as team, " +
- " [\"Magic\", \"Shapeshifting\",
\"Trickery\"] as powers " +
- " )" +
- " ] as allies" +
- " )" +
- " ] as enemies" +
- " )," +
- " STRUCT(" +
- " \"Spider-Man\" as name, " +
- " \"Avengers\" as team, " +
- " [\"Spider-Sense\", \"Web-Shooting\",
\"Wall-Crawling\"] as powers, " +
- " [" +
- " STRUCT(" +
- " \"Green Goblin\" as name, " +
- " \"Sinister Six\" as team, " +
- " [\"Glider\", \"Pumpkin Bombs\", \"Super
Strength\"] as powers, " +
- " [" +
- " STRUCT(" +
- " \"Doctor Octopus\" as name, " +
- " \"Sinister Six\" as team, " +
- " [\"Mechanical Arms\", \"Genius\",
\"Madness\"] as powers " +
- " )" +
- " ] as allies" +
- " )" +
- " ] as enemies" +
- " )" +
- " ] as friends" +
- ") as iron_man",
- ExpectedValues = new List<ColumnNetTypeArrowTypeValue>()
- {
+ new SampleData()
+ {
+ Query = "SELECT " +
+ "STRUCT(" +
+ "\"Iron Man\" as name," +
+ "\"Avengers\" as team," +
+ "[\"Genius\", \"Billionaire\", \"Playboy\",
\"Philanthropist\"] as powers," +
+ "[" +
+ " STRUCT(" +
+ " \"Captain America\" as name, " +
+ " \"Avengers\" as team, " +
+ " [\"Super Soldier Serum\", \"Vibranium
Shield\"] as powers, " +
+ " [" +
+ " STRUCT(" +
+ " \"Thanos\" as name, " +
+ " \"Black Order\" as team, " +
+ " [\"Infinity Gauntlet\", \"Super
Strength\", \"Teleportation\"] as powers, " +
+ " [" +
+ " STRUCT(" +
+ " \"Loki\" as name, " +
+ " \"Asgard\" as team, " +
+ " [\"Magic\", \"Shapeshifting\",
\"Trickery\"] as powers " +
+ " )" +
+ " ] as allies" +
+ " )" +
+ " ] as enemies" +
+ " )," +
+ " STRUCT(" +
+ " \"Spider-Man\" as name, " +
+ " \"Avengers\" as team, " +
+ " [\"Spider-Sense\", \"Web-Shooting\",
\"Wall-Crawling\"] as powers, " +
+ " [" +
+ " STRUCT(" +
+ " \"Green Goblin\" as name, " +
+ " \"Sinister Six\" as team, " +
+ " [\"Glider\", \"Pumpkin Bombs\",
\"Super Strength\"] as powers, " +
+ " [" +
+ " STRUCT(" +
+ " \"Doctor Octopus\" as name, " +
+ " \"Sinister Six\" as team, " +
+ " [\"Mechanical Arms\", \"Genius\",
\"Madness\"] as powers " +
+ " )" +
+ " ] as allies" +
+ " )" +
+ " ] as enemies" +
+ " )" +
+ " ] as friends" +
+ ") as iron_man",
+ ExpectedValues = new List<ColumnNetTypeArrowTypeValue>()
+ {
new ColumnNetTypeArrowTypeValue("iron_man",
typeof(string), typeof(StringType), "{\"name\":\"Iron
Man\",\"team\":\"Avengers\",\"powers\":[\"Genius\",\"Billionaire\",\"Playboy\",\"Philanthropist\"],\"friends\":[{\"name\":\"Captain
America\",\"team\":\"Avengers\",\"powers\":[\"Super Soldier
Serum\",\"Vibranium
Shield\"],\"enemies\":[{\"name\":\"Thanos\",\"team\":\"Black
Order\",\"powers\":[\"Infinity Gauntlet\",\"Super
Strength\",\"Teleportation\"],\"allies\":[{\"na [...]
- }
- });
+ }
+ });
return sampleDataBuilder;
}
diff --git a/csharp/test/Drivers/BigQuery/BigQueryTestConfiguration.cs
b/csharp/test/Drivers/BigQuery/BigQueryTestConfiguration.cs
index f736979bc..9be82bd5c 100644
--- a/csharp/test/Drivers/BigQuery/BigQueryTestConfiguration.cs
+++ b/csharp/test/Drivers/BigQuery/BigQueryTestConfiguration.cs
@@ -96,5 +96,14 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.BigQuery
[JsonPropertyName("maxStreamCount")]
public int? MaxStreamCount { get; set; }
+
+ /// <summary>
+ /// How structs should be handled by the ADO.NET client for this
environment.
+ /// </summary>
+ /// <remarks>
+ /// JsonString or Strict
+ /// </remarks>
+ [JsonPropertyName("structBehavior")]
+ public string? StructBehavior { get; set; }
}
}
diff --git a/csharp/test/Drivers/BigQuery/ClientTests.cs
b/csharp/test/Drivers/BigQuery/ClientTests.cs
index 9405c6277..04b1fe86f 100644
--- a/csharp/test/Drivers/BigQuery/ClientTests.cs
+++ b/csharp/test/Drivers/BigQuery/ClientTests.cs
@@ -17,6 +17,7 @@
using System;
using System.Collections.Generic;
+using System.Data.Common;
using Apache.Arrow.Adbc.Drivers.BigQuery;
using Apache.Arrow.Adbc.Tests.Xunit;
using Xunit;
@@ -212,11 +213,43 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.BigQuery
{
environment.IncludeTableConstraints = includeTableConstraints;
- return new Adbc.Client.AdbcConnection(
- new BigQueryDriver(),
- BigQueryTestingUtils.GetBigQueryParameters(environment),
- new Dictionary<string, string>()
- );
+ if (string.IsNullOrEmpty(environment.StructBehavior))
+ {
+ Dictionary<string, string> connectionParameters =
BigQueryTestingUtils.GetBigQueryParameters(environment);
+
+ return new Adbc.Client.AdbcConnection(
+ new BigQueryDriver(),
+ connectionParameters,
+ new Dictionary<string, string>()
+ );
+ }
+ else
+ {
+ return GetAdbcConnectionUsingConnectionString(environment,
includeTableConstraints);
+ }
+ }
+
+ private Adbc.Client.AdbcConnection
GetAdbcConnectionUsingConnectionString(
+ BigQueryTestEnvironment environment,
+ bool includeTableConstraints = true
+ )
+ {
+ Dictionary<string, string> connectionParameters =
BigQueryTestingUtils.GetBigQueryParameters(environment);
+
+ if (!string.IsNullOrEmpty(environment.StructBehavior))
+ connectionParameters.Add("StructBehavior",
environment.StructBehavior!);
+
+ DbConnectionStringBuilder builder = new
DbConnectionStringBuilder(true);
+
+ foreach (string key in connectionParameters.Keys)
+ {
+ builder[key] = connectionParameters[key];
+ }
+
+ return new Adbc.Client.AdbcConnection(builder.ConnectionString)
+ {
+ AdbcDriver = new BigQueryDriver()
+ };
}
}
}