rchowell commented on code in PR #1920:
URL: https://github.com/apache/iceberg-python/pull/1920#discussion_r2052639531
##########
pyiceberg/catalog/glue.py:
##########
@@ -303,32 +303,46 @@ def add_glue_catalog_id(params: Dict[str, str], **kwargs:
Any) -> None:
class GlueCatalog(MetastoreCatalog):
- def __init__(self, name: str, **properties: Any):
- super().__init__(name, **properties)
+ glue: GlueClient
- retry_mode_prop_value = get_first_property_value(properties,
GLUE_RETRY_MODE)
+ def __init__(self, name: str, client: Optional[GlueClient] = None,
**properties: Any):
+ """Glue Catalog.
- session = boto3.Session(
- profile_name=properties.get(GLUE_PROFILE_NAME),
- region_name=get_first_property_value(properties, GLUE_REGION,
AWS_REGION),
- botocore_session=properties.get(BOTOCORE_SESSION),
- aws_access_key_id=get_first_property_value(properties,
GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
- aws_secret_access_key=get_first_property_value(properties,
GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
- aws_session_token=get_first_property_value(properties,
GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN),
- )
- self.glue: GlueClient = session.client(
- "glue",
- endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT),
- config=Config(
- retries={
- "max_attempts": properties.get(GLUE_MAX_RETRIES,
MAX_RETRIES),
- "mode": retry_mode_prop_value if retry_mode_prop_value in
EXISTING_RETRY_MODES else STANDARD_RETRY_MODE,
- }
- ),
- )
+ You either need to provide a boto3 glue client, or one will be
constructed from the properties.
+
+ Args:
+ name: Name to identify the catalog.
+ client: An optional boto3 glue client.
+ properties: Properties for glue client construction and
configuration.
+ """
+ super().__init__(name, **properties)
+
+ if client:
+ self.glue = client
+ else:
+ retry_mode_prop_value = get_first_property_value(properties,
GLUE_RETRY_MODE)
+
+ session = boto3.Session(
+ profile_name=properties.get(GLUE_PROFILE_NAME),
+ region_name=get_first_property_value(properties, GLUE_REGION,
AWS_REGION),
+ botocore_session=properties.get(BOTOCORE_SESSION),
+ aws_access_key_id=get_first_property_value(properties,
GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
+ aws_secret_access_key=get_first_property_value(properties,
GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
+ aws_session_token=get_first_property_value(properties,
GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN),
+ )
+ self.glue: GlueClient = session.client(
+ "glue",
+ endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT),
+ config=Config(
+ retries={
+ "max_attempts": properties.get(GLUE_MAX_RETRIES,
MAX_RETRIES),
+ "mode": retry_mode_prop_value if retry_mode_prop_value
in EXISTING_RETRY_MODES else STANDARD_RETRY_MODE,
+ }
+ ),
+ )
- if glue_catalog_id := properties.get(GLUE_ID):
- _register_glue_catalog_id_with_glue_client(self.glue,
glue_catalog_id)
+ if glue_catalog_id := properties.get(GLUE_ID):
+ _register_glue_catalog_id_with_glue_client(self.glue,
glue_catalog_id)
Review Comment:
@kevinjqliu good question, I left this out intentionally so that we do not
make any modifications to the customer's input client / impede on their
existing events. Since the catalog_id parameter is optional, it doesn't make a
functional difference afaik. Something to consider is using the [unique_id
arg](https://github.com/boto/botocore/blob/aaa6690e45c8dabcde3a8d2d1aa34b5fd399fba7/botocore/hooks.py#L89)
when registering an event. Let me know what you think, and I can follow-up 👍
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]