This is an automated email from the ASF dual-hosted git repository.
onikolas pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 7f9c310fa0a Fix SageMakerNotebookOperator
domain_id/project_id/domain_region params (#62962)
7f9c310fa0a is described below
commit 7f9c310fa0ae9f49aabdc00f9c4c6ad79eaaacbc
Author: Nikita Arbuzov <[email protected]>
AuthorDate: Tue Mar 17 15:32:53 2026 -0400
Fix SageMakerNotebookOperator domain_id/project_id/domain_region params
(#62962)
- Method _get_sagemaker_studio_config in hook did not specify domain,
project, and DZ region identifiers. The hook was missing domain_identifier,
project_identifier, and datazone_domain_region.
- Add template_fields to SageMakerNotebookOperator. Without
template_fields, passing XCom references (e.g. task_instance.xcom_pull(...))
for domain_id, project_id, or domain_region caused a ParamValidationError
because the hook was instantiated before Airflow resolved the XCom values to
actual strings. Adding these to template_fields ensures they are rendered
before execute() is called.
- Updated example_sagemaker_unified_studio to reflect changes.
---
providers/amazon/README.rst | 4 +-
providers/amazon/docs/index.rst | 9 ++---
.../docs/operators/sagemakerunifiedstudio.rst | 8 ++++
providers/amazon/pyproject.toml | 5 +--
.../amazon/aws/hooks/sagemaker_unified_studio.py | 11 +++++-
.../aws/operators/sagemaker_unified_studio.py | 10 +++++
.../amazon/aws/example_sagemaker_unified_studio.py | 46 +++++++++++++++++++---
uv.lock | 19 +--------
8 files changed, 76 insertions(+), 36 deletions(-)
diff --git a/providers/amazon/README.rst b/providers/amazon/README.rst
index 6fd0afb4e73..bb5dec7ad99 100644
--- a/providers/amazon/README.rst
+++ b/providers/amazon/README.rst
@@ -66,9 +66,7 @@ PIP package Version required
``asgiref`` ``>=2.3.0``
``PyAthena`` ``>=3.10.0``
``jmespath`` ``>=0.7.0``
-``sagemaker-studio`` ``>=1.0.9``
-``pydynamodb`` ``>=0.7.5; python_version >=
"3.13"``
-``sqlean.py`` ``>=3.47.0; python_version >=
"3.13"``
+``sagemaker-studio`` ``>=1.0.25,<1.1.0``
``marshmallow`` ``>=3``
==========================================
======================================
diff --git a/providers/amazon/docs/index.rst b/providers/amazon/docs/index.rst
index f737b2c2186..f0736f90539 100644
--- a/providers/amazon/docs/index.rst
+++ b/providers/amazon/docs/index.rst
@@ -107,9 +107,9 @@ Requirements
The minimum Apache Airflow version supported by this provider distribution is
``2.11.0``.
-==========================================
=====================================
+========================================== ===================
PIP package Version required
-==========================================
=====================================
+========================================== ===================
``apache-airflow`` ``>=2.11.0``
``apache-airflow-providers-common-compat`` ``>=1.13.0``
``apache-airflow-providers-common-sql`` ``>=1.32.0``
@@ -123,10 +123,9 @@ PIP package Version
required
``asgiref`` ``>=2.3.0``
``PyAthena`` ``>=3.10.0``
``jmespath`` ``>=0.7.0``
-``sagemaker-studio`` ``>=1.0.9``
-``pydynamodb`` ``>=0.8.1; python_version >=
"3.13"``
+``sagemaker-studio`` ``>=1.0.25,<1.1.0``
``marshmallow`` ``>=3``
-==========================================
=====================================
+========================================== ===================
Cross provider package dependencies
-----------------------------------
diff --git a/providers/amazon/docs/operators/sagemakerunifiedstudio.rst
b/providers/amazon/docs/operators/sagemakerunifiedstudio.rst
index 6418ca03274..b37aec57804 100644
--- a/providers/amazon/docs/operators/sagemakerunifiedstudio.rst
+++ b/providers/amazon/docs/operators/sagemakerunifiedstudio.rst
@@ -54,6 +54,14 @@ To create an Amazon SageMaker Unified Studio workflow to
orchestrate your notebo
:end-before: [END howto_operator_sagemaker_unified_studio_notebook]
+The following example adds domain ID, project ID, and domain name as operator
parameters.
+
+.. exampleinclude::
/../../amazon/tests/system/amazon/aws/example_sagemaker_unified_studio.py
+ :language: python
+ :dedent: 4
+ :start-after: [START
howto_operator_sagemaker_unified_studio_notebook_explicit_params]
+ :end-before: [END
howto_operator_sagemaker_unified_studio_notebook_explicit_params]
+
Reference
---------
diff --git a/providers/amazon/pyproject.toml b/providers/amazon/pyproject.toml
index 8f654c65b75..b478343e297 100644
--- a/providers/amazon/pyproject.toml
+++ b/providers/amazon/pyproject.toml
@@ -76,9 +76,8 @@ dependencies = [
"PyAthena>=3.10.0",
"jmespath>=0.7.0",
# Do not change sagemaker-studio versions without approval from AWS.
- "sagemaker-studio>=1.0.9",
- # Sagemaker studio in Python 3.13 requires version >=1.1.0 and Pydynamodb
>=0.7.5
- "pydynamodb>=0.8.1; python_version >= '3.13'",
+ # Capping sagemaker-studio under 1.1.0 temporarily until the
NotebookOperator params are released to newer versions
+ "sagemaker-studio>=1.0.25,<1.1.0",
"marshmallow>=3",
]
diff --git
a/providers/amazon/src/airflow/providers/amazon/aws/hooks/sagemaker_unified_studio.py
b/providers/amazon/src/airflow/providers/amazon/aws/hooks/sagemaker_unified_studio.py
index ac06fcbacf2..b78c5669ffb 100644
---
a/providers/amazon/src/airflow/providers/amazon/aws/hooks/sagemaker_unified_studio.py
+++
b/providers/amazon/src/airflow/providers/amazon/aws/hooks/sagemaker_unified_studio.py
@@ -98,11 +98,11 @@ class SageMakerNotebookHook(BaseHook):
**kwargs,
):
super().__init__(*args, **kwargs)
- self._sagemaker_studio =
SageMakerStudioAPI(self._get_sagemaker_studio_config())
self.execution_name = execution_name
self.domain_id = domain_id
self.project_id = project_id
self.domain_region = domain_region
+ self._sagemaker_studio =
SageMakerStudioAPI(self._get_sagemaker_studio_config())
self.input_config = input_config or {}
self.output_config = output_config or {"output_formats": ["NOTEBOOK"]}
self.compute = compute
@@ -113,7 +113,14 @@ class SageMakerNotebookHook(BaseHook):
def _get_sagemaker_studio_config(self):
config = ClientConfig()
- config.overrides["execution"] = {"local": is_local_runner()}
+ if self.domain_region:
+ config.region = self.domain_region
+ config.overrides["execution"] = {
+ "local": is_local_runner(),
+ "domain_identifier": self.domain_id,
+ "project_identifier": self.project_id,
+ "datazone_domain_region": self.domain_region,
+ }
return config
def _format_start_execution_input_config(self):
diff --git
a/providers/amazon/src/airflow/providers/amazon/aws/operators/sagemaker_unified_studio.py
b/providers/amazon/src/airflow/providers/amazon/aws/operators/sagemaker_unified_studio.py
index 70310873670..c91f3e757aa 100644
---
a/providers/amazon/src/airflow/providers/amazon/aws/operators/sagemaker_unified_studio.py
+++
b/providers/amazon/src/airflow/providers/amazon/aws/operators/sagemaker_unified_studio.py
@@ -101,6 +101,16 @@ class SageMakerNotebookOperator(BaseOperator):
"""
operator_extra_links = (SageMakerUnifiedStudioLink(),)
+ template_fields = (
+ "domain_id",
+ "project_id",
+ "domain_region",
+ "input_config",
+ "output_config",
+ "compute",
+ "termination_condition",
+ "tags",
+ )
def __init__(
self,
diff --git
a/providers/amazon/tests/system/amazon/aws/example_sagemaker_unified_studio.py
b/providers/amazon/tests/system/amazon/aws/example_sagemaker_unified_studio.py
index a46970dcdb3..5019de24b2a 100644
---
a/providers/amazon/tests/system/amazon/aws/example_sagemaker_unified_studio.py
+++
b/providers/amazon/tests/system/amazon/aws/example_sagemaker_unified_studio.py
@@ -41,9 +41,15 @@ Prerequisites: The account which runs this test must
manually have the following
3. A project within the SageMaker Unified Studio Domain
4. A notebook (test_notebook.ipynb) placed in the project's s3 path
-This test will emulate a DAG run in the shared MWAA environment inside a
SageMaker Unified Studio Project.
-The setup tasks will set up the project and configure the test runner to
emulate an MWAA instance.
-Then, the SageMakerNotebookOperator will run a test notebook. This should spin
up a SageMaker training job, run the notebook, and exit successfully.
+The test runs in two stages:
+1. run-notebook-explicit: passes domain_id, domain_region, and project_id
directly as operator
+ parameters. No environment variables are required. Requires
sagemaker-studio>=1.0.25.
+2. setup_mwaa_environment + run-notebook: sets MWAA-style environment
variables and runs the
+ notebook using the legacy env-var-based resolution path.
+
+The ordering is intentional: run-notebook-explicit runs BEFORE the env vars
are set, so on
+older SDK versions (<1.0.25) that cannot resolve the region from explicit
params, the test
+will fail at stage 1 rather than accidentally passing via the env vars.
"""
DAG_ID = "example_sagemaker_unified_studio"
@@ -103,6 +109,7 @@ with DAG(
DAG_ID,
schedule="@once",
start_date=datetime(2021, 1, 1),
+ tags=["example"],
catchup=False,
) as dag:
test_context = sys_test_context_task()
@@ -124,9 +131,35 @@ with DAG(
setup_mwaa_environment =
mock_mwaa_environment(mock_mwaa_environment_params)
- # [START howto_operator_sagemaker_unified_studio_notebook]
notebook_path = "test_notebook.ipynb" # This should be the path to your
.ipynb, .sqlnb, or .vetl file in your project.
+ # [START howto_operator_sagemaker_unified_studio_notebook_explicit_params]
+ # Run notebook with domain_id/project_id/domain_region passed explicitly
as operator parameters.
+ # No environment variables needed — the SDK resolves the S3 path and
region from these params.
+ # Requires sagemaker-studio>=1.0.25.
+ # NOTE: this task runs BEFORE env vars are set intentionally, to prove
that explicit params
+ # work without any MWAA-style environment variables present.
+ run_notebook_explicit_params = SageMakerNotebookOperator(
+ task_id="run-notebook-explicit",
+ domain_id=domain_id,
+ project_id=project_id,
+ domain_region=region_name,
+ input_config={"input_path": notebook_path, "input_params": {}},
+ output_config={"output_formats": ["NOTEBOOK"]}, # optional
+ compute={
+ "instance_type": "ml.m5.large",
+ "volume_size_in_gb": 30,
+ }, # optional
+ termination_condition={"max_runtime_in_seconds": 600}, # optional
+ tags={}, # optional
+ wait_for_completion=True, # optional
+ waiter_delay=5, # optional
+ deferrable=False, # optional
+ )
+ # [END howto_operator_sagemaker_unified_studio_notebook_explicit_params]
+
+ # [START howto_operator_sagemaker_unified_studio_notebook]
+ # Run notebook using the legacy env-var-based resolution path (MWAA-style).
run_notebook = SageMakerNotebookOperator(
task_id="run-notebook",
input_config={"input_path": notebook_path, "input_params": {}},
@@ -159,8 +192,11 @@ with DAG(
chain(
# TEST SETUP
test_context,
+ # TEST BODY: explicit params first (no env vars set yet)
+ run_notebook_explicit_params,
+ # TEST SETUP: set MWAA-style env vars
setup_mwaa_environment,
- # TEST BODY
+ # TEST BODY: legacy env-var-based resolution
run_notebook,
)
diff --git a/uv.lock b/uv.lock
index 254e349f702..2d162bd3776 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2538,7 +2538,6 @@ dependencies = [
{ name = "jsonpath-ng" },
{ name = "marshmallow" },
{ name = "pyathena" },
- { name = "pydynamodb", marker = "python_full_version >= '3.13'" },
{ name = "redshift-connector", version = "2.1.7", source = { registry =
"https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
{ name = "redshift-connector", version = "2.1.11", source = { registry =
"https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
{ name = "sagemaker-studio" },
@@ -2676,11 +2675,10 @@ requires-dist = [
{ name = "pandas", marker = "python_full_version >= '3.13' and extra ==
'pandas'", specifier = ">=2.2.3" },
{ name = "pandas", marker = "python_full_version < '3.13' and extra ==
'pandas'", specifier = ">=2.1.2" },
{ name = "pyathena", specifier = ">=3.10.0" },
- { name = "pydynamodb", marker = "python_full_version >= '3.13'", specifier
= ">=0.8.1" },
{ name = "python3-saml", marker = "python_full_version < '3.13' and extra
== 'python3-saml'", specifier = ">=1.16.0" },
{ name = "redshift-connector", specifier = ">=2.1.3" },
{ name = "s3fs", marker = "extra == 's3fs'", specifier = ">=2023.10.0" },
- { name = "sagemaker-studio", specifier = ">=1.0.9" },
+ { name = "sagemaker-studio", specifier = ">=1.0.25,<1.1.0" },
{ name = "sqlalchemy", marker = "extra == 'sqlalchemy'", specifier =
">=1.4.54" },
{ name = "watchtower", specifier = ">=3.3.1,<4" },
{ name = "xmlsec", marker = "python_full_version < '3.13' and extra ==
'python3-saml'", specifier = ">=1.3.14" },
@@ -17489,21 +17487,6 @@ dependencies = [
]
sdist = { url =
"https://files.pythonhosted.org/packages/e9/45/2f64d8c14b487274e825d793dc634f9f1040537aa6cb93d1ea8fc4498d65/pydruid-0.6.9.tar.gz",
hash =
"sha256:63c41b33ab47fbb71cc25d3f3316cad78f18bfe947fa108862dd841d1f44fe49", size
= 135508, upload-time = "2024-05-16T19:40:32.469Z" }
-[[package]]
-name = "pydynamodb"
-version = "0.8.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
- { name = "boto3", marker = "python_full_version >= '3.13'" },
- { name = "botocore", marker = "python_full_version >= '3.13'" },
- { name = "pyparsing", marker = "python_full_version >= '3.13'" },
- { name = "tenacity", marker = "python_full_version >= '3.13'" },
-]
-sdist = { url =
"https://files.pythonhosted.org/packages/c1/1a/e362260a767e59419d45476bb1cfeaf978e894f6f89dd794be47a4898695/pydynamodb-0.8.1.tar.gz",
hash =
"sha256:b073f99217695dcf304fcd8933e74ab97d13590af8d0d96109df6f689a3de465", size
= 69769, upload-time = "2026-01-21T16:45:21.437Z" }
-wheels = [
- { url =
"https://files.pythonhosted.org/packages/b2/46/1970a44620582f5eac6dbcd76845258d42b2ff9ec8bc4e6cdd89dcf02260/pydynamodb-0.8.1-py3-none-any.whl",
hash =
"sha256:a9679650d0559ed1cae600dda5517f0f8e7e795446e8ef56f71451a82bd68472", size
= 62520, upload-time = "2026-01-21T16:45:20.446Z" },
-]
-
[[package]]
name = "pyenchant"
version = "3.3.0"