JDarDagran commented on code in PR #35794:
URL: https://github.com/apache/airflow/pull/35794#discussion_r1402686751


##########
airflow/providers/amazon/aws/hooks/redshift_sql.py:
##########
@@ -174,3 +175,63 @@ def get_conn(self) -> RedshiftConnection:
         conn_kwargs_dejson = self.conn.extra_dejson
         conn_kwargs: dict = {**conn_params, **conn_kwargs_dejson}
         return redshift_connector.connect(**conn_kwargs)
+
+    def get_openlineage_database_info(self, connection) -> DatabaseInfo:
+        """Returns Redshift specific information for OpenLineage."""
+        from airflow.providers.openlineage.sqlparser import DatabaseInfo
+
+        authority = self._get_openlineage_redshift_authority_part(connection)
+
+        return DatabaseInfo(
+            scheme="redshift",
+            authority=authority,
+            database=connection.schema,
+            information_schema_table_name="SVV_REDSHIFT_COLUMNS",
+            information_schema_columns=[
+                "schema_name",
+                "table_name",
+                "column_name",
+                "ordinal_position",
+                "data_type",
+                "database_name",
+            ],
+            is_information_schema_cross_db=True,
+            use_flat_cross_db_query=True,
+        )
+
+    def _get_openlineage_redshift_authority_part(self, connection) -> str:
+        from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
+
+        port = connection.port or 5439
+
+        cluster_identifier = None
+
+        if connection.extra_dejson.get("iam", False):
+            cluster_identifier = 
connection.extra_dejson.get("cluster_identifier")
+            region_name = AwsBaseHook(aws_conn_id=self.aws_conn_id).region_name

Review Comment:
   It's not very well documented `redshift-connector` what's the precedence.
   Now I think these are the cases:
   
   1. IAM = True:
       a. `cluster_identifier` from `connect` argument, rest from default AWS 
profile.
       b. `cluster_identifier` but also `access_key_id`, 
`secret_access_key`,`session_token`, `region` from `connect` aguments
       c. `cluster_identifier` from `connect` argument but without credentials 
passed explicitly, not sure if `region` has precedence over value from AWS 
default profile
   2. IAM = False: attempt to retrieve region name from hostname
   
   For 1.c. I think `get_iam_token` does not include `region` as argument so it 
default to AWS default profiles` value.
   
   Feedback from anyone closer to this connector implementation would be really 
helpful :)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@airflow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to