CurtHagenlocher commented on code in PR #2540: URL: https://github.com/apache/arrow-adbc/pull/2540#discussion_r1970329654
########## csharp/src/Drivers/Apache/Hive2/README.md: ########## @@ -0,0 +1,98 @@ +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + +# Hive Driver + +## Database and Connection Properties + +Properties should be passed in the call to `HiveServer2Driver.Open`, +but can also be passed in the call to `AdbcDatabase.Connect`. + +| Property | Description | Default | +| :--- | :--- | :--- | +| `adbc.hive.type` | (Required) Indicates the Hive transport type. `http` | | Review Comment: Should this be e.g. `adbc.hive.transport_type`? The current name makes it look like it might be a server type and not a transport type. ########## csharp/src/Drivers/Apache/Hive2/SqlTypeNameParser.cs: ########## @@ -336,8 +336,9 @@ internal class SqlCharTypeParser : SqlTypeNameParser<SqlCharVarcharParserResult> public override string BaseTypeName => "CHAR"; + // Allow no precision definition to be optional Review Comment: nit: remove either "no" or "to be optional". (I think removing "no" makes more sense.) ########## csharp/src/Drivers/Apache/Hive2/SqlTypeNameParser.cs: ########## @@ -349,7 +350,7 @@ protected override SqlCharVarcharParserResult GenerateResult(string input, Match int precision = int.TryParse(precisionGroup.Value, out int candidatePrecision) ? candidatePrecision - : throw new ArgumentException($"Unable to parse length: '{precisionGroup.Value}'", nameof(input)); + : -1; // precision not found Review Comment: Why isn't this something like `SqlVarcharTypeParser.VarcharColumnSizeDefault`? Do we need two different "magic constants", and if so, why? ########## csharp/src/Drivers/Apache/Hive2/HiveServer2ConnectionFactory.cs: ########## @@ -0,0 +1,38 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +using System; +using System.Collections.Generic; + +namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2 +{ + internal class HiveServer2ConnectionFactory + { + public static HiveServer2Connection NewConnection(IReadOnlyDictionary<string, string> properties) + { + bool _ = properties.TryGetValue(HiveServer2Parameters.Type, out string? type); + bool __ = HiveServer2TypeParser.TryParse(type, out HiveServer2Type typeValue); Review Comment: This is what I mean about unidiomatic. I'd prefer following the more standard .NET pattern and doing ``` if (!properties.TryGetValue(HiveServer2Parameters.Type, out string? type) || !HiveServer2TypeParser.TryParse(type, out HiveServer2Type typeValue)) { throw new ... } ``` ########## csharp/src/Drivers/Apache/Hive2/README.md: ########## @@ -0,0 +1,98 @@ +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + +# Hive Driver + +## Database and Connection Properties + +Properties should be passed in the call to `HiveServer2Driver.Open`, +but can also be passed in the call to `AdbcDatabase.Connect`. + +| Property | Description | Default | +| :--- | :--- | :--- | +| `adbc.hive.type` | (Required) Indicates the Hive transport type. `http` | | +| `adbc.hive.auth_type` | An indicator of the intended type of authentication. Allowed values: `none`, `username_only` and `basic`. This property is optional. The authentication type can be inferred from `username`, and `password`. | | +| `adbc.hive.host` | Host name for the data source. Do not include scheme or port number. Example: `hiveserver.region.cloudapp.azure.com` | | +| `adbc.hive.port` | The port number the data source listens on for a new connections. | `443` | +| `adbc.hive.path` | The URI path on the data source server. Example: `/hive2` | | +| `uri` | The full URI that includes scheme, host, port and path. If set, this property takes precedence over `adbc.hive.host`, `adbc.hive.port` and `adbc.hive.path`. | | Review Comment: Would it be better to produce an error if both are specified? ########## csharp/src/Drivers/Apache/Hive2/HiveServer2ConnectionFactory.cs: ########## @@ -0,0 +1,38 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +using System; +using System.Collections.Generic; + +namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2 +{ + internal class HiveServer2ConnectionFactory + { + public static HiveServer2Connection NewConnection(IReadOnlyDictionary<string, string> properties) + { + bool _ = properties.TryGetValue(HiveServer2Parameters.Type, out string? type); + bool __ = HiveServer2TypeParser.TryParse(type, out HiveServer2Type typeValue); + return typeValue switch + { + HiveServer2Type.Http => new HiveServer2HttpConnection(properties), + HiveServer2Type.Empty => throw new ArgumentException($"Required property '{HiveServer2Parameters.Type}' is missing. Supported types: {HiveServer2TypeParser.SupportedList}", nameof(properties)), + _ => throw new ArgumentOutOfRangeException(nameof(properties), $"Unsupported or unknown value '{type}' given for property '{HiveServer2Parameters.Type}'. Supported types: {HiveServer2TypeParser.SupportedList}"), + }; + } + Review Comment: nit: extra blank line ########## csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs: ########## @@ -80,6 +82,12 @@ public HiveServer2Reader( _dataTypeConversion = dataTypeConversion; } + public bool EnableBatchSizeStopCondition Review Comment: Consider making this a property with a default value of `true` which the `HiveServer2HttpConnection` class overrides and sets to `false`. This is the same pattern as `ColumnMapIndexOffset`. Having this be a settable property is not needed and potentially confusing. ########## csharp/src/Drivers/Apache/Hive2/HiveServer2AuthType.cs: ########## @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2 +{ + internal enum HiveServer2AuthType + { + Invalid = 0, Review Comment: This strikes me as somewhat unidiomatic in that it leads to consuming code which ignores the boolean result. For me, at least, a "Try" method that ignores the result value is a big red flag. ########## csharp/src/Drivers/Apache/Hive2/HiveServer2HttpConnection.cs: ########## @@ -0,0 +1,342 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Net; +using System.Net.Http; +using System.Net.Http.Headers; +using System.Net.Security; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using Apache.Arrow.Ipc; +using Apache.Hive.Service.Rpc.Thrift; +using Thrift; +using Thrift.Protocol; +using Thrift.Transport; + +namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2 +{ + internal class HiveServer2HttpConnection : HiveServer2Connection + { + private const string ProductVersionDefault = "1.0.0"; + private const string DriverName = "ADBC Hive Driver"; + private const string ArrowVersion = "1.0.0"; + private const string BasicAuthenticationScheme = "Basic"; + private readonly Lazy<string> _productVersion; + private static readonly string s_userAgent = $"{DriverName.Replace(" ", "")}/{ProductVersionDefault}"; + + protected override string GetProductVersionDefault() => ProductVersionDefault; + + protected override string ProductVersion => _productVersion.Value; + + public HiveServer2HttpConnection(IReadOnlyDictionary<string, string> properties) : base(properties) + { + ValidateProperties(); + _productVersion = new Lazy<string>(() => GetProductVersion(), LazyThreadSafetyMode.PublicationOnly); + } + + private void ValidateProperties() + { + ValidateAuthentication(); + ValidateConnection(); + ValidateOptions(); + } + + private void ValidateAuthentication() + { + // Validate authentication parameters + Properties.TryGetValue(AdbcOptions.Username, out string? username); + Properties.TryGetValue(AdbcOptions.Password, out string? password); + Properties.TryGetValue(HiveServer2Parameters.AuthType, out string? authType); + bool isValidAuthType = HiveServer2AuthTypeParser.TryParse(authType, out HiveServer2AuthType authTypeValue); + switch (authTypeValue) + { + case HiveServer2AuthType.Basic: + if (string.IsNullOrWhiteSpace(username) || string.IsNullOrWhiteSpace(password)) + throw new ArgumentException( + $"Parameter '{HiveServer2Parameters.AuthType}' is set to '{HiveServer2AuthTypeConstants.Basic}' but parameters '{AdbcOptions.Username}' or '{AdbcOptions.Password}' are not set. Please provide a values for these parameters.", + nameof(Properties)); + break; + case HiveServer2AuthType.UsernameOnly: + if (string.IsNullOrWhiteSpace(username)) + throw new ArgumentException( + $"Parameter '{HiveServer2Parameters.AuthType}' is set to '{HiveServer2AuthTypeConstants.UsernameOnly}' but parameter '{AdbcOptions.Username}' is not set. Please provide a values for this parameter.", + nameof(Properties)); + break; + case HiveServer2AuthType.None: + break; + case HiveServer2AuthType.Empty: + if (string.IsNullOrWhiteSpace(username) || string.IsNullOrWhiteSpace(password)) + throw new ArgumentException( + $"Parameters must include valid authentiation settings. Please provide '{AdbcOptions.Username}' and '{AdbcOptions.Password}'.", Review Comment: typo: `authentication` ########## csharp/src/Drivers/Apache/Hive2/HiveServer2HttpConnection.cs: ########## @@ -0,0 +1,342 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Net; +using System.Net.Http; +using System.Net.Http.Headers; +using System.Net.Security; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using Apache.Arrow.Ipc; +using Apache.Hive.Service.Rpc.Thrift; +using Thrift; +using Thrift.Protocol; +using Thrift.Transport; + +namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2 +{ + internal class HiveServer2HttpConnection : HiveServer2Connection + { + private const string ProductVersionDefault = "1.0.0"; + private const string DriverName = "ADBC Hive Driver"; + private const string ArrowVersion = "1.0.0"; + private const string BasicAuthenticationScheme = "Basic"; + private readonly Lazy<string> _productVersion; + private static readonly string s_userAgent = $"{DriverName.Replace(" ", "")}/{ProductVersionDefault}"; + + protected override string GetProductVersionDefault() => ProductVersionDefault; + + protected override string ProductVersion => _productVersion.Value; + + public HiveServer2HttpConnection(IReadOnlyDictionary<string, string> properties) : base(properties) + { + ValidateProperties(); + _productVersion = new Lazy<string>(() => GetProductVersion(), LazyThreadSafetyMode.PublicationOnly); + } + + private void ValidateProperties() + { + ValidateAuthentication(); + ValidateConnection(); + ValidateOptions(); + } + + private void ValidateAuthentication() + { + // Validate authentication parameters + Properties.TryGetValue(AdbcOptions.Username, out string? username); + Properties.TryGetValue(AdbcOptions.Password, out string? password); + Properties.TryGetValue(HiveServer2Parameters.AuthType, out string? authType); + bool isValidAuthType = HiveServer2AuthTypeParser.TryParse(authType, out HiveServer2AuthType authTypeValue); + switch (authTypeValue) + { + case HiveServer2AuthType.Basic: + if (string.IsNullOrWhiteSpace(username) || string.IsNullOrWhiteSpace(password)) + throw new ArgumentException( + $"Parameter '{HiveServer2Parameters.AuthType}' is set to '{HiveServer2AuthTypeConstants.Basic}' but parameters '{AdbcOptions.Username}' or '{AdbcOptions.Password}' are not set. Please provide a values for these parameters.", + nameof(Properties)); + break; + case HiveServer2AuthType.UsernameOnly: + if (string.IsNullOrWhiteSpace(username)) + throw new ArgumentException( + $"Parameter '{HiveServer2Parameters.AuthType}' is set to '{HiveServer2AuthTypeConstants.UsernameOnly}' but parameter '{AdbcOptions.Username}' is not set. Please provide a values for this parameter.", + nameof(Properties)); + break; + case HiveServer2AuthType.None: + break; + case HiveServer2AuthType.Empty: + if (string.IsNullOrWhiteSpace(username) || string.IsNullOrWhiteSpace(password)) + throw new ArgumentException( + $"Parameters must include valid authentiation settings. Please provide '{AdbcOptions.Username}' and '{AdbcOptions.Password}'.", + nameof(Properties)); + break; + default: + throw new ArgumentOutOfRangeException(HiveServer2Parameters.AuthType, authType, $"Unsupported {HiveServer2Parameters.AuthType} value."); + } + } + + private void ValidateConnection() + { + // HostName or Uri is required parameter + Properties.TryGetValue(AdbcOptions.Uri, out string? uri); + Properties.TryGetValue(HiveServer2Parameters.HostName, out string? hostName); + if ((Uri.CheckHostName(hostName) == UriHostNameType.Unknown) + && (string.IsNullOrEmpty(uri) || !Uri.TryCreate(uri, UriKind.Absolute, out Uri? _))) + { + throw new ArgumentException( + $"Required parameter '{HiveServer2Parameters.HostName}' or '{AdbcOptions.Uri}' is missing or invalid. Please provide a valid hostname or URI for the data source.", + nameof(Properties)); + } + + // Validate port range + Properties.TryGetValue(HiveServer2Parameters.Port, out string? port); + if (int.TryParse(port, out int portNumber) && (portNumber <= IPEndPoint.MinPort || portNumber > IPEndPoint.MaxPort)) + throw new ArgumentOutOfRangeException( + nameof(Properties), + port, + $"Parameter '{HiveServer2Parameters.Port}' value is not in the valid range of 1 .. {IPEndPoint.MaxPort}."); + + // Ensure the parameters will produce a valid address + Properties.TryGetValue(HiveServer2Parameters.Path, out string? path); + _ = new HttpClient() + { + BaseAddress = GetBaseAddress(uri, hostName, path, port) + }; + } + + private void ValidateOptions() + { + Properties.TryGetValue(HiveServer2Parameters.DataTypeConv, out string? dataTypeConv); + DataTypeConversion = DataTypeConversionParser.Parse(dataTypeConv); + Properties.TryGetValue(HiveServer2Parameters.TLSOptions, out string? tlsOptions); + TlsOptions = TlsOptionsParser.Parse(tlsOptions); + Properties.TryGetValue(HiveServer2Parameters.ConnectTimeoutMilliseconds, out string? connectTimeoutMs); + if (connectTimeoutMs != null) + { + ConnectTimeoutMilliseconds = int.TryParse(connectTimeoutMs, NumberStyles.Integer, CultureInfo.InvariantCulture, out int connectTimeoutMsValue) && (connectTimeoutMsValue >= 0) + ? connectTimeoutMsValue + : throw new ArgumentOutOfRangeException(HiveServer2Parameters.ConnectTimeoutMilliseconds, connectTimeoutMs, $"must be a value of 0 (infinite) or between 1 .. {int.MaxValue}. default is 30000 milliseconds."); + } + } + + public override AdbcStatement CreateStatement() + { + return new HiveServer2Statement(this); + } + + internal override IArrowArrayStream NewReader<T>(T statement, Schema schema) + { + var reader = new HiveServer2Reader(statement, schema, dataTypeConversion: statement.Connection.DataTypeConversion); + reader.EnableBatchSizeStopCondition = false; + return reader; + } + + protected override TTransport CreateTransport() + { + // Assumption: parameters have already been validated. + Properties.TryGetValue(HiveServer2Parameters.HostName, out string? hostName); + Properties.TryGetValue(HiveServer2Parameters.Path, out string? path); + Properties.TryGetValue(HiveServer2Parameters.Port, out string? port); + Properties.TryGetValue(HiveServer2Parameters.AuthType, out string? authType); + bool isValidAuthType = HiveServer2AuthTypeParser.TryParse(authType, out HiveServer2AuthType authTypeValue); + Properties.TryGetValue(AdbcOptions.Username, out string? username); + Properties.TryGetValue(AdbcOptions.Password, out string? password); + Properties.TryGetValue(AdbcOptions.Uri, out string? uri); + + Uri baseAddress = GetBaseAddress(uri, hostName, path, port); + AuthenticationHeaderValue? authenticationHeaderValue = GetAuthenticationHeaderValue(authTypeValue, username, password); + + HttpClientHandler httpClientHandler = NewHttpClientHandler(); + httpClientHandler.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate; + HttpClient httpClient = new(httpClientHandler); + httpClient.BaseAddress = baseAddress; + httpClient.DefaultRequestHeaders.Authorization = authenticationHeaderValue; + httpClient.DefaultRequestHeaders.UserAgent.ParseAdd(s_userAgent); + httpClient.DefaultRequestHeaders.AcceptEncoding.Clear(); + httpClient.DefaultRequestHeaders.AcceptEncoding.Add(new StringWithQualityHeaderValue("identity")); + httpClient.DefaultRequestHeaders.ExpectContinue = false; + + TConfiguration config = new(); + ThriftHttpTransport transport = new(httpClient, config) + { + // This value can only be set before the first call/request. So if a new value for query timeout + // is set, we won't be able to update the value. Setting to ~infinite and relying on cancellation token + // to ensure cancelled correctly. + ConnectTimeout = int.MaxValue, + }; + return transport; + } + + private HttpClientHandler NewHttpClientHandler() + { + HttpClientHandler httpClientHandler = new(); + if (TlsOptions != HiveServer2TlsOption.Empty) + { + httpClientHandler.ServerCertificateCustomValidationCallback = (request, certificate, chain, policyErrors) => + { + Review Comment: nit: remove blank line -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
