kevinjqliu commented on code in PR #766: URL: https://github.com/apache/iceberg-python/pull/766#discussion_r1740136965
########## mkdocs/docs/configuration.md: ########## @@ -268,19 +268,15 @@ catalog: catalog: default: uri: thrift://localhost:9083 + hive.hive2-compatible: true + hive.kerberos-authorization: true s3.endpoint: http://localhost:9000 - s3.access-key-id: admin - s3.secret-access-key: password ``` -When using Hive 2.x, make sure to set the compatibility flag: - -```yaml -catalog: - default: -... - hive.hive2-compatible: true -``` Review Comment: nit: leave this as is, since this is only required for hive2. and maybe add similar block for `hive.kerberos-authorization` since that's also optional ########## pyiceberg/catalog/hive.py: ########## @@ -138,17 +141,34 @@ class _HiveClient: _client: Client _ugi: Optional[List[str]] - def __init__(self, uri: str, ugi: Optional[str] = None): - url_parts = urlparse(uri) - transport = TSocket.TSocket(url_parts.hostname, url_parts.port) - self._transport = TTransport.TBufferedTransport(transport) - protocol = TBinaryProtocol.TBinaryProtocol(transport) + def __init__(self, uri: str, ugi: Optional[str] = None, kerberos_auth: Optional[bool] = HIVE_KERBEROS_AUTH_DEFAULT): + self._uri = uri + self._kerberos_auth = kerberos_auth + self._ugi = ugi.split(":") if ugi else None + + self._init_thrift_client() + + def _init_thrift_client(self): + url_parts = urlparse(self._uri) + + socket = TSocket.TSocket(url_parts.hostname, url_parts.port) + + if not self._kerberos_auth: + self._transport = TTransport.TBufferedTransport(socket) + else: + self._transport = TTransport.TSaslClientTransport(socket, host=url_parts.hostname, service="hive") + + protocol = TBinaryProtocol.TBinaryProtocol(self._transport) self._client = Client(protocol) - self._ugi = ugi.split(":") if ugi else None def __enter__(self) -> Client: - self._transport.open() + if not self._kerberos_auth: + self._transport.open() + else: + self._init_thrift_client() Review Comment: can this be called multiple times? The init function already calls this ########## pyiceberg/catalog/hive.py: ########## @@ -138,17 +141,34 @@ class _HiveClient: _client: Client _ugi: Optional[List[str]] - def __init__(self, uri: str, ugi: Optional[str] = None): - url_parts = urlparse(uri) - transport = TSocket.TSocket(url_parts.hostname, url_parts.port) - self._transport = TTransport.TBufferedTransport(transport) - protocol = TBinaryProtocol.TBinaryProtocol(transport) + def __init__(self, uri: str, ugi: Optional[str] = None, kerberos_auth: Optional[bool] = HIVE_KERBEROS_AUTH_DEFAULT): + self._uri = uri + self._kerberos_auth = kerberos_auth + self._ugi = ugi.split(":") if ugi else None + + self._init_thrift_client() + + def _init_thrift_client(self): + url_parts = urlparse(self._uri) + + socket = TSocket.TSocket(url_parts.hostname, url_parts.port) + + if not self._kerberos_auth: + self._transport = TTransport.TBufferedTransport(socket) + else: + self._transport = TTransport.TSaslClientTransport(socket, host=url_parts.hostname, service="hive") Review Comment: is "hive" a static value here or should it be configurable? ########## pyiceberg/catalog/hive.py: ########## @@ -121,6 +121,9 @@ HIVE2_COMPATIBLE = "hive.hive2-compatible" HIVE2_COMPATIBLE_DEFAULT = False +HIVE_KERBEROS_AUTH = "hive.kerberos-authorization" Review Comment: nit: `authentication` is different than `authorization` ```suggestion HIVE_KERBEROS_AUTH = "hive.kerberos-authentication" ``` Kerberos is authentication https://docs.cloudera.com/cdw-runtime/1.5.1/securing-hive/topics/hive_remote_data_access.html -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org