o-nikolas commented on code in PR #66736:
URL: https://github.com/apache/airflow/pull/66736#discussion_r3245064194
##########
providers/amazon/tests/system/amazon/aws/example_emr_eks.py:
##########
@@ -177,8 +176,71 @@ def update_trust_policy_execution_role(cluster_name,
cluster_namespace, role_nam
if build.returncode != 0:
raise RuntimeError(err)
- # Wait for IAM changes to propagate to avoid authentication failures
- time.sleep(int(wait_time))
+
+class TrustPolicyNotPropagatedError(Exception):
+ """Raised when the IAM trust policy has not yet propagated."""
+
+
+@task
+def wait_for_trust_policy_propagation(cluster_name, role_name):
+ """Validate that the IAM trust policy has propagated by checking the role's
+ trust policy contains the expected OIDC provider.
+
+ Uses exponential backoff retries (up to 5 minutes) instead of a fixed
sleep,
+ which avoids both wasting time when propagation is fast and failing when
it's slow.
+ """
+ log = logging.getLogger(__name__)
+
+ # Determine the expected OIDC provider ARN from the EKS cluster
+ eks_client = boto3.client("eks")
+ oidc_issuer_url =
eks_client.describe_cluster(name=cluster_name)["cluster"]["identity"]["oidc"]["issuer"]
+ oidc_issuer_endpoint = oidc_issuer_url.replace("https://", "")
+ account_id = boto3.client("sts").get_caller_identity()["Account"]
+ expected_oidc_provider_arn =
f"arn:aws:iam::{account_id}:oidc-provider/{oidc_issuer_endpoint}"
+
+ @retry(
+ retry=retry_if_exception_type(TrustPolicyNotPropagatedError),
Review Comment:
We could probably just catch RuntimeError or another built in. This seems a
bit overkill. Most of the stuff that will throw below are ClientErrors and
index errors
##########
providers/amazon/tests/system/amazon/aws/example_emr_eks.py:
##########
@@ -177,8 +176,71 @@ def update_trust_policy_execution_role(cluster_name,
cluster_namespace, role_nam
if build.returncode != 0:
raise RuntimeError(err)
- # Wait for IAM changes to propagate to avoid authentication failures
- time.sleep(int(wait_time))
+
+class TrustPolicyNotPropagatedError(Exception):
+ """Raised when the IAM trust policy has not yet propagated."""
+
+
+@task
+def wait_for_trust_policy_propagation(cluster_name, role_name):
+ """Validate that the IAM trust policy has propagated by checking the role's
+ trust policy contains the expected OIDC provider.
+
+ Uses exponential backoff retries (up to 5 minutes) instead of a fixed
sleep,
+ which avoids both wasting time when propagation is fast and failing when
it's slow.
+ """
+ log = logging.getLogger(__name__)
+
+ # Determine the expected OIDC provider ARN from the EKS cluster
+ eks_client = boto3.client("eks")
+ oidc_issuer_url =
eks_client.describe_cluster(name=cluster_name)["cluster"]["identity"]["oidc"]["issuer"]
+ oidc_issuer_endpoint = oidc_issuer_url.replace("https://", "")
+ account_id = boto3.client("sts").get_caller_identity()["Account"]
+ expected_oidc_provider_arn =
f"arn:aws:iam::{account_id}:oidc-provider/{oidc_issuer_endpoint}"
+
+ @retry(
+ retry=retry_if_exception_type(TrustPolicyNotPropagatedError),
+ wait=wait_exponential(multiplier=1, min=5, max=30),
+ stop=stop_after_delay(300),
+ reraise=True,
+ )
+ def _validate_trust_policy():
+ iam_client = boto3.client("iam")
+
+ # Step 1: Verify the trust policy document contains the expected OIDC
provider
Review Comment:
If there's just one Step you can remove that bit
##########
providers/amazon/tests/system/amazon/aws/example_emr_eks.py:
##########
@@ -110,7 +109,7 @@ def run_eksctl_commands(cluster_name, ns):
# See
https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/setting-up-cluster-access.html
file =
"https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname
-s)_amd64.tar.gz"
commands = f"""
- curl --silent --location "{file}" | tar xz -C /tmp &&
+ curl --silent --location --retry 3 --retry-delay 5 "{file}" | tar xz
-C /tmp &&
Review Comment:
Doesn't this separate `--location` flag from the `"{file}"` value? Also why
are we adding the retry and retry-delay here? Isn't the problem solved by your
new wait task below?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]