Re: [PR] Add DMS Serverless Operators [airflow]

2025-01-08 Thread via GitHub


ellisms commented on code in PR #43988:
URL: https://github.com/apache/airflow/pull/43988#discussion_r1907101253


##
providers/tests/system/amazon/aws/example_dms_serverless.py:
##
@@ -0,0 +1,481 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Note:  DMS requires you to configure specific IAM roles/permissions.  For more 
information, see
+https://docs.aws.amazon.com/dms/latest/userguide/security-iam.html#CHAP_Security.APIRole
+"""
+
+from __future__ import annotations
+
+import json
+from datetime import datetime
+
+import boto3
+from sqlalchemy import Column, MetaData, String, Table, create_engine
+
+from airflow.decorators import task
+from airflow.models.baseoperator import chain
+from airflow.models.dag import DAG
+from airflow.providers.amazon.aws.operators.dms import (
+DmsCreateReplicationConfigOperator,
+DmsDeleteReplicationConfigOperator,
+DmsDescribeReplicationConfigsOperator,
+DmsDescribeReplicationsOperator,
+DmsStartReplicationOperator,
+DmsStopReplicationOperator,
+)
+from airflow.providers.amazon.aws.operators.rds import (
+RdsCreateDbInstanceOperator,
+RdsDeleteDbInstanceOperator,
+)
+from airflow.providers.amazon.aws.operators.s3 import S3CreateBucketOperator, 
S3DeleteBucketOperator
+from airflow.utils.trigger_rule import TriggerRule
+
+from providers.tests.system.amazon.aws.utils import ENV_ID_KEY, 
SystemTestContextBuilder
+from providers.tests.system.amazon.aws.utils.ec2 import get_default_vpc_id
+
+"""
+This example demonstrates how to use the DMS operators to create a serverless 
replication task to replicate data
+from a PostgreSQL database to Amazon S3.
+
+The IAM role used for the replication must have the permissions defined in the 
[Amazon S3 
target](https://docs.aws.amazon.com/dms/latest/userguide/CHAP_Target.S3.html#CHAP_Target.S3.Prerequisites)
+documentation.
+"""
+
+DAG_ID = "example_dms_serverless"
+ROLE_ARN_KEY = "ROLE_ARN"
+
+sys_test_context_task = 
SystemTestContextBuilder().add_variable(ROLE_ARN_KEY).build()
+
+# Config values for setting up the "Source" database.
+CA_CERT_ID = "rds-ca-rsa2048-g1"
+RDS_ENGINE = "postgres"
+RDS_PROTOCOL = "postgresql"
+RDS_USERNAME = "username"
+# NEVER store your production password in plaintext in a DAG like this.
+# Use Airflow Secrets or a secret manager for this in production.
+RDS_PASSWORD = "rds_password"
+TABLE_HEADERS = ["apache_project", "release_year"]
+SAMPLE_DATA = [
+("Airflow", "2015"),
+("OpenOffice", "2012"),
+("Subversion", "2000"),
+("NiFi", "2006"),
+]
+SG_IP_PERMISSION = {
+"FromPort": 5432,
+"IpProtocol": "All",
+"IpRanges": [{"CidrIp": "0.0.0.0/0"}],
+}
+
+
+def _get_rds_instance_endpoint(instance_name: str):
+print("Retrieving RDS instance endpoint.")
+rds_client = boto3.client("rds")
+
+response = 
rds_client.describe_db_instances(DBInstanceIdentifier=instance_name)
+rds_instance_endpoint = response["DBInstances"][0]["Endpoint"]
+return rds_instance_endpoint
+
+
+@task
+def create_security_group(security_group_name: str, vpc_id: str):
+client = boto3.client("ec2")
+security_group = client.create_security_group(
+GroupName=security_group_name,
+Description="Created for DMS system test",
+VpcId=vpc_id,
+)
+client.get_waiter("security_group_exists").wait(
+GroupIds=[security_group["GroupId"]],
+)
+client.authorize_security_group_ingress(
+GroupId=security_group["GroupId"],
+IpPermissions=[SG_IP_PERMISSION],
+)
+
+return security_group["GroupId"]
+
+
+@task
+def create_sample_table(instance_name: str, db_name: str, table_name: str):
+print("Creating sample table.")
+
+rds_endpoint = _get_rds_instance_endpoint(instance_name)
+hostname = rds_endpoint["Address"]
+port = rds_endpoint["Port"]
+rds_url = 
f"{RDS_PROTOCOL}://{RDS_USERNAME}:{RDS_PASSWORD}@{hostname}:{port}/{db_name}"
+engine = create_engine(rds_url)
+
+table = Table(
+table_name,
+MetaData(engine),
+Column(TABLE_HEADERS[0], String, primary_key=True),
+Column(TABLE_HEADERS[1], String),
+)
+
+with engine.connect() as connection:
+# Create the Table.
+ 

Re: [PR] Add DMS Serverless Operators [airflow]

2025-01-07 Thread via GitHub


eladkal commented on code in PR #43988:
URL: https://github.com/apache/airflow/pull/43988#discussion_r1906265567


##
providers/tests/system/amazon/aws/example_dms_serverless.py:
##
@@ -0,0 +1,481 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Note:  DMS requires you to configure specific IAM roles/permissions.  For more 
information, see
+https://docs.aws.amazon.com/dms/latest/userguide/security-iam.html#CHAP_Security.APIRole
+"""
+
+from __future__ import annotations
+
+import json
+from datetime import datetime
+
+import boto3
+from sqlalchemy import Column, MetaData, String, Table, create_engine
+
+from airflow.decorators import task
+from airflow.models.baseoperator import chain
+from airflow.models.dag import DAG
+from airflow.providers.amazon.aws.operators.dms import (
+DmsCreateReplicationConfigOperator,
+DmsDeleteReplicationConfigOperator,
+DmsDescribeReplicationConfigsOperator,
+DmsDescribeReplicationsOperator,
+DmsStartReplicationOperator,
+DmsStopReplicationOperator,
+)
+from airflow.providers.amazon.aws.operators.rds import (
+RdsCreateDbInstanceOperator,
+RdsDeleteDbInstanceOperator,
+)
+from airflow.providers.amazon.aws.operators.s3 import S3CreateBucketOperator, 
S3DeleteBucketOperator
+from airflow.utils.trigger_rule import TriggerRule
+
+from providers.tests.system.amazon.aws.utils import ENV_ID_KEY, 
SystemTestContextBuilder
+from providers.tests.system.amazon.aws.utils.ec2 import get_default_vpc_id
+
+"""
+This example demonstrates how to use the DMS operators to create a serverless 
replication task to replicate data
+from a PostgreSQL database to Amazon S3.
+
+The IAM role used for the replication must have the permissions defined in the 
[Amazon S3 
target](https://docs.aws.amazon.com/dms/latest/userguide/CHAP_Target.S3.html#CHAP_Target.S3.Prerequisites)
+documentation.
+"""
+
+DAG_ID = "example_dms_serverless"
+ROLE_ARN_KEY = "ROLE_ARN"
+
+sys_test_context_task = 
SystemTestContextBuilder().add_variable(ROLE_ARN_KEY).build()
+
+# Config values for setting up the "Source" database.
+CA_CERT_ID = "rds-ca-rsa2048-g1"
+RDS_ENGINE = "postgres"
+RDS_PROTOCOL = "postgresql"
+RDS_USERNAME = "username"
+# NEVER store your production password in plaintext in a DAG like this.
+# Use Airflow Secrets or a secret manager for this in production.
+RDS_PASSWORD = "rds_password"
+TABLE_HEADERS = ["apache_project", "release_year"]
+SAMPLE_DATA = [
+("Airflow", "2015"),
+("OpenOffice", "2012"),
+("Subversion", "2000"),
+("NiFi", "2006"),
+]
+SG_IP_PERMISSION = {
+"FromPort": 5432,
+"IpProtocol": "All",
+"IpRanges": [{"CidrIp": "0.0.0.0/0"}],
+}
+
+
+def _get_rds_instance_endpoint(instance_name: str):
+print("Retrieving RDS instance endpoint.")
+rds_client = boto3.client("rds")
+
+response = 
rds_client.describe_db_instances(DBInstanceIdentifier=instance_name)
+rds_instance_endpoint = response["DBInstances"][0]["Endpoint"]
+return rds_instance_endpoint
+
+
+@task
+def create_security_group(security_group_name: str, vpc_id: str):
+client = boto3.client("ec2")
+security_group = client.create_security_group(
+GroupName=security_group_name,
+Description="Created for DMS system test",
+VpcId=vpc_id,
+)
+client.get_waiter("security_group_exists").wait(
+GroupIds=[security_group["GroupId"]],
+)
+client.authorize_security_group_ingress(
+GroupId=security_group["GroupId"],
+IpPermissions=[SG_IP_PERMISSION],
+)
+
+return security_group["GroupId"]
+
+
+@task
+def create_sample_table(instance_name: str, db_name: str, table_name: str):
+print("Creating sample table.")
+
+rds_endpoint = _get_rds_instance_endpoint(instance_name)
+hostname = rds_endpoint["Address"]
+port = rds_endpoint["Port"]
+rds_url = 
f"{RDS_PROTOCOL}://{RDS_USERNAME}:{RDS_PASSWORD}@{hostname}:{port}/{db_name}"
+engine = create_engine(rds_url)
+
+table = Table(
+table_name,
+MetaData(engine),
+Column(TABLE_HEADERS[0], String, primary_key=True),
+Column(TABLE_HEADERS[1], String),
+)
+
+with engine.connect() as connection:
+# Create the Table.
+ 

Re: [PR] Add DMS Serverless Operators [airflow]

2025-01-07 Thread via GitHub


eladkal commented on code in PR #43988:
URL: https://github.com/apache/airflow/pull/43988#discussion_r1906265567


##
providers/tests/system/amazon/aws/example_dms_serverless.py:
##
@@ -0,0 +1,481 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Note:  DMS requires you to configure specific IAM roles/permissions.  For more 
information, see
+https://docs.aws.amazon.com/dms/latest/userguide/security-iam.html#CHAP_Security.APIRole
+"""
+
+from __future__ import annotations
+
+import json
+from datetime import datetime
+
+import boto3
+from sqlalchemy import Column, MetaData, String, Table, create_engine
+
+from airflow.decorators import task
+from airflow.models.baseoperator import chain
+from airflow.models.dag import DAG
+from airflow.providers.amazon.aws.operators.dms import (
+DmsCreateReplicationConfigOperator,
+DmsDeleteReplicationConfigOperator,
+DmsDescribeReplicationConfigsOperator,
+DmsDescribeReplicationsOperator,
+DmsStartReplicationOperator,
+DmsStopReplicationOperator,
+)
+from airflow.providers.amazon.aws.operators.rds import (
+RdsCreateDbInstanceOperator,
+RdsDeleteDbInstanceOperator,
+)
+from airflow.providers.amazon.aws.operators.s3 import S3CreateBucketOperator, 
S3DeleteBucketOperator
+from airflow.utils.trigger_rule import TriggerRule
+
+from providers.tests.system.amazon.aws.utils import ENV_ID_KEY, 
SystemTestContextBuilder
+from providers.tests.system.amazon.aws.utils.ec2 import get_default_vpc_id
+
+"""
+This example demonstrates how to use the DMS operators to create a serverless 
replication task to replicate data
+from a PostgreSQL database to Amazon S3.
+
+The IAM role used for the replication must have the permissions defined in the 
[Amazon S3 
target](https://docs.aws.amazon.com/dms/latest/userguide/CHAP_Target.S3.html#CHAP_Target.S3.Prerequisites)
+documentation.
+"""
+
+DAG_ID = "example_dms_serverless"
+ROLE_ARN_KEY = "ROLE_ARN"
+
+sys_test_context_task = 
SystemTestContextBuilder().add_variable(ROLE_ARN_KEY).build()
+
+# Config values for setting up the "Source" database.
+CA_CERT_ID = "rds-ca-rsa2048-g1"
+RDS_ENGINE = "postgres"
+RDS_PROTOCOL = "postgresql"
+RDS_USERNAME = "username"
+# NEVER store your production password in plaintext in a DAG like this.
+# Use Airflow Secrets or a secret manager for this in production.
+RDS_PASSWORD = "rds_password"
+TABLE_HEADERS = ["apache_project", "release_year"]
+SAMPLE_DATA = [
+("Airflow", "2015"),
+("OpenOffice", "2012"),
+("Subversion", "2000"),
+("NiFi", "2006"),
+]
+SG_IP_PERMISSION = {
+"FromPort": 5432,
+"IpProtocol": "All",
+"IpRanges": [{"CidrIp": "0.0.0.0/0"}],
+}
+
+
+def _get_rds_instance_endpoint(instance_name: str):
+print("Retrieving RDS instance endpoint.")
+rds_client = boto3.client("rds")
+
+response = 
rds_client.describe_db_instances(DBInstanceIdentifier=instance_name)
+rds_instance_endpoint = response["DBInstances"][0]["Endpoint"]
+return rds_instance_endpoint
+
+
+@task
+def create_security_group(security_group_name: str, vpc_id: str):
+client = boto3.client("ec2")
+security_group = client.create_security_group(
+GroupName=security_group_name,
+Description="Created for DMS system test",
+VpcId=vpc_id,
+)
+client.get_waiter("security_group_exists").wait(
+GroupIds=[security_group["GroupId"]],
+)
+client.authorize_security_group_ingress(
+GroupId=security_group["GroupId"],
+IpPermissions=[SG_IP_PERMISSION],
+)
+
+return security_group["GroupId"]
+
+
+@task
+def create_sample_table(instance_name: str, db_name: str, table_name: str):
+print("Creating sample table.")
+
+rds_endpoint = _get_rds_instance_endpoint(instance_name)
+hostname = rds_endpoint["Address"]
+port = rds_endpoint["Port"]
+rds_url = 
f"{RDS_PROTOCOL}://{RDS_USERNAME}:{RDS_PASSWORD}@{hostname}:{port}/{db_name}"
+engine = create_engine(rds_url)
+
+table = Table(
+table_name,
+MetaData(engine),
+Column(TABLE_HEADERS[0], String, primary_key=True),
+Column(TABLE_HEADERS[1], String),
+)
+
+with engine.connect() as connection:
+# Create the Table.
+ 

Re: [PR] Add DMS Serverless Operators [airflow]

2025-01-07 Thread via GitHub


ellisms commented on code in PR #43988:
URL: https://github.com/apache/airflow/pull/43988#discussion_r1905943430


##
providers/tests/system/amazon/aws/example_dms_serverless.py:
##
@@ -0,0 +1,481 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Note:  DMS requires you to configure specific IAM roles/permissions.  For more 
information, see
+https://docs.aws.amazon.com/dms/latest/userguide/security-iam.html#CHAP_Security.APIRole
+"""
+
+from __future__ import annotations
+
+import json
+from datetime import datetime
+
+import boto3
+from sqlalchemy import Column, MetaData, String, Table, create_engine
+
+from airflow.decorators import task
+from airflow.models.baseoperator import chain
+from airflow.models.dag import DAG
+from airflow.providers.amazon.aws.operators.dms import (
+DmsCreateReplicationConfigOperator,
+DmsDeleteReplicationConfigOperator,
+DmsDescribeReplicationConfigsOperator,
+DmsDescribeReplicationsOperator,
+DmsStartReplicationOperator,
+DmsStopReplicationOperator,
+)
+from airflow.providers.amazon.aws.operators.rds import (
+RdsCreateDbInstanceOperator,
+RdsDeleteDbInstanceOperator,
+)
+from airflow.providers.amazon.aws.operators.s3 import S3CreateBucketOperator, 
S3DeleteBucketOperator
+from airflow.utils.trigger_rule import TriggerRule
+
+from providers.tests.system.amazon.aws.utils import ENV_ID_KEY, 
SystemTestContextBuilder
+from providers.tests.system.amazon.aws.utils.ec2 import get_default_vpc_id
+
+"""
+This example demonstrates how to use the DMS operators to create a serverless 
replication task to replicate data
+from a PostgreSQL database to Amazon S3.
+
+The IAM role used for the replication must have the permissions defined in the 
[Amazon S3 
target](https://docs.aws.amazon.com/dms/latest/userguide/CHAP_Target.S3.html#CHAP_Target.S3.Prerequisites)
+documentation.
+"""
+
+DAG_ID = "example_dms_serverless"
+ROLE_ARN_KEY = "ROLE_ARN"
+
+sys_test_context_task = 
SystemTestContextBuilder().add_variable(ROLE_ARN_KEY).build()
+
+# Config values for setting up the "Source" database.
+CA_CERT_ID = "rds-ca-rsa2048-g1"
+RDS_ENGINE = "postgres"
+RDS_PROTOCOL = "postgresql"
+RDS_USERNAME = "username"
+# NEVER store your production password in plaintext in a DAG like this.
+# Use Airflow Secrets or a secret manager for this in production.
+RDS_PASSWORD = "rds_password"
+TABLE_HEADERS = ["apache_project", "release_year"]
+SAMPLE_DATA = [
+("Airflow", "2015"),
+("OpenOffice", "2012"),
+("Subversion", "2000"),
+("NiFi", "2006"),
+]
+SG_IP_PERMISSION = {
+"FromPort": 5432,
+"IpProtocol": "All",
+"IpRanges": [{"CidrIp": "0.0.0.0/0"}],
+}
+
+
+def _get_rds_instance_endpoint(instance_name: str):
+print("Retrieving RDS instance endpoint.")
+rds_client = boto3.client("rds")
+
+response = 
rds_client.describe_db_instances(DBInstanceIdentifier=instance_name)
+rds_instance_endpoint = response["DBInstances"][0]["Endpoint"]
+return rds_instance_endpoint
+
+
+@task
+def create_security_group(security_group_name: str, vpc_id: str):
+client = boto3.client("ec2")
+security_group = client.create_security_group(
+GroupName=security_group_name,
+Description="Created for DMS system test",
+VpcId=vpc_id,
+)
+client.get_waiter("security_group_exists").wait(
+GroupIds=[security_group["GroupId"]],
+)
+client.authorize_security_group_ingress(
+GroupId=security_group["GroupId"],
+IpPermissions=[SG_IP_PERMISSION],
+)
+
+return security_group["GroupId"]
+
+
+@task
+def create_sample_table(instance_name: str, db_name: str, table_name: str):
+print("Creating sample table.")
+
+rds_endpoint = _get_rds_instance_endpoint(instance_name)
+hostname = rds_endpoint["Address"]
+port = rds_endpoint["Port"]
+rds_url = 
f"{RDS_PROTOCOL}://{RDS_USERNAME}:{RDS_PASSWORD}@{hostname}:{port}/{db_name}"
+engine = create_engine(rds_url)
+
+table = Table(
+table_name,
+MetaData(engine),
+Column(TABLE_HEADERS[0], String, primary_key=True),
+Column(TABLE_HEADERS[1], String),
+)
+
+with engine.connect() as connection:
+# Create the Table.
+ 

Re: [PR] Add DMS Serverless Operators [airflow]

2025-01-07 Thread via GitHub


ferruzzi commented on code in PR #43988:
URL: https://github.com/apache/airflow/pull/43988#discussion_r1905880538


##
providers/tests/system/amazon/aws/example_dms_serverless.py:
##
@@ -0,0 +1,481 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Note:  DMS requires you to configure specific IAM roles/permissions.  For more 
information, see
+https://docs.aws.amazon.com/dms/latest/userguide/security-iam.html#CHAP_Security.APIRole
+"""
+
+from __future__ import annotations
+
+import json
+from datetime import datetime
+
+import boto3
+from sqlalchemy import Column, MetaData, String, Table, create_engine
+
+from airflow.decorators import task
+from airflow.models.baseoperator import chain
+from airflow.models.dag import DAG
+from airflow.providers.amazon.aws.operators.dms import (
+DmsCreateReplicationConfigOperator,
+DmsDeleteReplicationConfigOperator,
+DmsDescribeReplicationConfigsOperator,
+DmsDescribeReplicationsOperator,
+DmsStartReplicationOperator,
+DmsStopReplicationOperator,
+)
+from airflow.providers.amazon.aws.operators.rds import (
+RdsCreateDbInstanceOperator,
+RdsDeleteDbInstanceOperator,
+)
+from airflow.providers.amazon.aws.operators.s3 import S3CreateBucketOperator, 
S3DeleteBucketOperator
+from airflow.utils.trigger_rule import TriggerRule
+
+from providers.tests.system.amazon.aws.utils import ENV_ID_KEY, 
SystemTestContextBuilder
+from providers.tests.system.amazon.aws.utils.ec2 import get_default_vpc_id
+
+"""
+This example demonstrates how to use the DMS operators to create a serverless 
replication task to replicate data
+from a PostgreSQL database to Amazon S3.
+
+The IAM role used for the replication must have the permissions defined in the 
[Amazon S3 
target](https://docs.aws.amazon.com/dms/latest/userguide/CHAP_Target.S3.html#CHAP_Target.S3.Prerequisites)
+documentation.
+"""
+
+DAG_ID = "example_dms_serverless"
+ROLE_ARN_KEY = "ROLE_ARN"
+
+sys_test_context_task = 
SystemTestContextBuilder().add_variable(ROLE_ARN_KEY).build()
+
+# Config values for setting up the "Source" database.
+CA_CERT_ID = "rds-ca-rsa2048-g1"
+RDS_ENGINE = "postgres"
+RDS_PROTOCOL = "postgresql"
+RDS_USERNAME = "username"
+# NEVER store your production password in plaintext in a DAG like this.
+# Use Airflow Secrets or a secret manager for this in production.
+RDS_PASSWORD = "rds_password"
+TABLE_HEADERS = ["apache_project", "release_year"]
+SAMPLE_DATA = [
+("Airflow", "2015"),
+("OpenOffice", "2012"),
+("Subversion", "2000"),
+("NiFi", "2006"),
+]
+SG_IP_PERMISSION = {
+"FromPort": 5432,
+"IpProtocol": "All",
+"IpRanges": [{"CidrIp": "0.0.0.0/0"}],
+}
+
+
+def _get_rds_instance_endpoint(instance_name: str):
+print("Retrieving RDS instance endpoint.")
+rds_client = boto3.client("rds")
+
+response = 
rds_client.describe_db_instances(DBInstanceIdentifier=instance_name)
+rds_instance_endpoint = response["DBInstances"][0]["Endpoint"]
+return rds_instance_endpoint
+
+
+@task
+def create_security_group(security_group_name: str, vpc_id: str):
+client = boto3.client("ec2")
+security_group = client.create_security_group(
+GroupName=security_group_name,
+Description="Created for DMS system test",
+VpcId=vpc_id,
+)
+client.get_waiter("security_group_exists").wait(
+GroupIds=[security_group["GroupId"]],
+)
+client.authorize_security_group_ingress(
+GroupId=security_group["GroupId"],
+IpPermissions=[SG_IP_PERMISSION],
+)
+
+return security_group["GroupId"]
+
+
+@task
+def create_sample_table(instance_name: str, db_name: str, table_name: str):
+print("Creating sample table.")
+
+rds_endpoint = _get_rds_instance_endpoint(instance_name)
+hostname = rds_endpoint["Address"]
+port = rds_endpoint["Port"]
+rds_url = 
f"{RDS_PROTOCOL}://{RDS_USERNAME}:{RDS_PASSWORD}@{hostname}:{port}/{db_name}"
+engine = create_engine(rds_url)
+
+table = Table(
+table_name,
+MetaData(engine),
+Column(TABLE_HEADERS[0], String, primary_key=True),
+Column(TABLE_HEADERS[1], String),
+)
+
+with engine.connect() as connection:
+# Create the Table.
+

Re: [PR] Add DMS Serverless Operators [airflow]

2024-12-20 Thread via GitHub


ellisms commented on code in PR #43988:
URL: https://github.com/apache/airflow/pull/43988#discussion_r1893936281


##
providers/tests/system/amazon/aws/example_dms_serverless.py:
##
@@ -0,0 +1,481 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Note:  DMS requires you to configure specific IAM roles/permissions.  For more 
information, see
+https://docs.aws.amazon.com/dms/latest/userguide/security-iam.html#CHAP_Security.APIRole
+"""
+
+from __future__ import annotations
+
+import json
+from datetime import datetime
+
+import boto3
+from sqlalchemy import Column, MetaData, String, Table, create_engine
+
+from airflow.decorators import task
+from airflow.models.baseoperator import chain
+from airflow.models.dag import DAG
+from airflow.providers.amazon.aws.operators.dms import (
+DmsCreateReplicationConfigOperator,
+DmsDeleteReplicationConfigOperator,
+DmsDescribeReplicationConfigsOperator,
+DmsDescribeReplicationsOperator,
+DmsStartReplicationOperator,
+DmsStopReplicationOperator,
+)
+from airflow.providers.amazon.aws.operators.rds import (
+RdsCreateDbInstanceOperator,
+RdsDeleteDbInstanceOperator,
+)
+from airflow.providers.amazon.aws.operators.s3 import S3CreateBucketOperator, 
S3DeleteBucketOperator
+from airflow.utils.trigger_rule import TriggerRule
+
+from providers.tests.system.amazon.aws.utils import ENV_ID_KEY, 
SystemTestContextBuilder
+from providers.tests.system.amazon.aws.utils.ec2 import get_default_vpc_id
+
+"""
+This example demonstrates how to use the DMS operators to create a serverless 
replication task to replicate data
+from a PostgreSQL database to Amazon S3.
+
+The IAM role used for the replication must have the permissions defined in the 
[Amazon S3 
target](https://docs.aws.amazon.com/dms/latest/userguide/CHAP_Target.S3.html#CHAP_Target.S3.Prerequisites)
+documentation.
+"""
+
+DAG_ID = "example_dms_serverless"
+ROLE_ARN_KEY = "ROLE_ARN"
+
+sys_test_context_task = 
SystemTestContextBuilder().add_variable(ROLE_ARN_KEY).build()
+
+# Config values for setting up the "Source" database.
+CA_CERT_ID = "rds-ca-rsa2048-g1"
+RDS_ENGINE = "postgres"
+RDS_PROTOCOL = "postgresql"
+RDS_USERNAME = "username"
+# NEVER store your production password in plaintext in a DAG like this.
+# Use Airflow Secrets or a secret manager for this in production.
+RDS_PASSWORD = "rds_password"
+TABLE_HEADERS = ["apache_project", "release_year"]
+SAMPLE_DATA = [
+("Airflow", "2015"),
+("OpenOffice", "2012"),
+("Subversion", "2000"),
+("NiFi", "2006"),
+]
+SG_IP_PERMISSION = {
+"FromPort": 5432,
+"IpProtocol": "All",
+"IpRanges": [{"CidrIp": "0.0.0.0/0"}],
+}
+
+
+def _get_rds_instance_endpoint(instance_name: str):
+print("Retrieving RDS instance endpoint.")
+rds_client = boto3.client("rds")
+
+response = 
rds_client.describe_db_instances(DBInstanceIdentifier=instance_name)
+rds_instance_endpoint = response["DBInstances"][0]["Endpoint"]
+return rds_instance_endpoint
+
+
+@task
+def create_security_group(security_group_name: str, vpc_id: str):
+client = boto3.client("ec2")
+security_group = client.create_security_group(
+GroupName=security_group_name,
+Description="Created for DMS system test",
+VpcId=vpc_id,
+)
+client.get_waiter("security_group_exists").wait(
+GroupIds=[security_group["GroupId"]],
+)
+client.authorize_security_group_ingress(
+GroupId=security_group["GroupId"],
+IpPermissions=[SG_IP_PERMISSION],
+)
+
+return security_group["GroupId"]
+
+
+@task
+def create_sample_table(instance_name: str, db_name: str, table_name: str):
+print("Creating sample table.")
+
+rds_endpoint = _get_rds_instance_endpoint(instance_name)
+hostname = rds_endpoint["Address"]
+port = rds_endpoint["Port"]
+rds_url = 
f"{RDS_PROTOCOL}://{RDS_USERNAME}:{RDS_PASSWORD}@{hostname}:{port}/{db_name}"
+engine = create_engine(rds_url)
+
+table = Table(
+table_name,
+MetaData(engine),
+Column(TABLE_HEADERS[0], String, primary_key=True),
+Column(TABLE_HEADERS[1], String),
+)
+
+with engine.connect() as connection:
+# Create the Table.
+ 

Re: [PR] Add DMS Serverless Operators [airflow]

2024-12-19 Thread via GitHub


ferruzzi commented on code in PR #43988:
URL: https://github.com/apache/airflow/pull/43988#discussion_r1893312617


##
providers/tests/system/amazon/aws/example_dms_serverless.py:
##
@@ -0,0 +1,481 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Note:  DMS requires you to configure specific IAM roles/permissions.  For more 
information, see
+https://docs.aws.amazon.com/dms/latest/userguide/security-iam.html#CHAP_Security.APIRole
+"""
+
+from __future__ import annotations
+
+import json
+from datetime import datetime
+
+import boto3
+from sqlalchemy import Column, MetaData, String, Table, create_engine
+
+from airflow.decorators import task
+from airflow.models.baseoperator import chain
+from airflow.models.dag import DAG
+from airflow.providers.amazon.aws.operators.dms import (
+DmsCreateReplicationConfigOperator,
+DmsDeleteReplicationConfigOperator,
+DmsDescribeReplicationConfigsOperator,
+DmsDescribeReplicationsOperator,
+DmsStartReplicationOperator,
+DmsStopReplicationOperator,
+)
+from airflow.providers.amazon.aws.operators.rds import (
+RdsCreateDbInstanceOperator,
+RdsDeleteDbInstanceOperator,
+)
+from airflow.providers.amazon.aws.operators.s3 import S3CreateBucketOperator, 
S3DeleteBucketOperator
+from airflow.utils.trigger_rule import TriggerRule
+
+from providers.tests.system.amazon.aws.utils import ENV_ID_KEY, 
SystemTestContextBuilder
+from providers.tests.system.amazon.aws.utils.ec2 import get_default_vpc_id
+
+"""
+This example demonstrates how to use the DMS operators to create a serverless 
replication task to replicate data
+from a PostgreSQL database to Amazon S3.
+
+The IAM role used for the replication must have the permissions defined in the 
[Amazon S3 
target](https://docs.aws.amazon.com/dms/latest/userguide/CHAP_Target.S3.html#CHAP_Target.S3.Prerequisites)
+documentation.
+"""
+
+DAG_ID = "example_dms_serverless"
+ROLE_ARN_KEY = "ROLE_ARN"
+
+sys_test_context_task = 
SystemTestContextBuilder().add_variable(ROLE_ARN_KEY).build()
+
+# Config values for setting up the "Source" database.
+CA_CERT_ID = "rds-ca-rsa2048-g1"
+RDS_ENGINE = "postgres"
+RDS_PROTOCOL = "postgresql"
+RDS_USERNAME = "username"
+# NEVER store your production password in plaintext in a DAG like this.
+# Use Airflow Secrets or a secret manager for this in production.
+RDS_PASSWORD = "rds_password"
+TABLE_HEADERS = ["apache_project", "release_year"]
+SAMPLE_DATA = [
+("Airflow", "2015"),
+("OpenOffice", "2012"),
+("Subversion", "2000"),
+("NiFi", "2006"),
+]
+SG_IP_PERMISSION = {
+"FromPort": 5432,
+"IpProtocol": "All",
+"IpRanges": [{"CidrIp": "0.0.0.0/0"}],
+}
+
+
+def _get_rds_instance_endpoint(instance_name: str):
+print("Retrieving RDS instance endpoint.")
+rds_client = boto3.client("rds")
+
+response = 
rds_client.describe_db_instances(DBInstanceIdentifier=instance_name)
+rds_instance_endpoint = response["DBInstances"][0]["Endpoint"]
+return rds_instance_endpoint
+
+
+@task
+def create_security_group(security_group_name: str, vpc_id: str):
+client = boto3.client("ec2")
+security_group = client.create_security_group(
+GroupName=security_group_name,
+Description="Created for DMS system test",
+VpcId=vpc_id,
+)
+client.get_waiter("security_group_exists").wait(
+GroupIds=[security_group["GroupId"]],
+)
+client.authorize_security_group_ingress(
+GroupId=security_group["GroupId"],
+IpPermissions=[SG_IP_PERMISSION],
+)
+
+return security_group["GroupId"]
+
+
+@task
+def create_sample_table(instance_name: str, db_name: str, table_name: str):
+print("Creating sample table.")
+
+rds_endpoint = _get_rds_instance_endpoint(instance_name)
+hostname = rds_endpoint["Address"]
+port = rds_endpoint["Port"]
+rds_url = 
f"{RDS_PROTOCOL}://{RDS_USERNAME}:{RDS_PASSWORD}@{hostname}:{port}/{db_name}"
+engine = create_engine(rds_url)
+
+table = Table(
+table_name,
+MetaData(engine),
+Column(TABLE_HEADERS[0], String, primary_key=True),
+Column(TABLE_HEADERS[1], String),
+)
+
+with engine.connect() as connection:
+# Create the Table.
+

Re: [PR] Add DMS Serverless Operators [airflow]

2024-12-17 Thread via GitHub


potiuk commented on PR #43988:
URL: https://github.com/apache/airflow/pull/43988#issuecomment-2549756515

   Fix merged in #45013


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] Add DMS Serverless Operators [airflow]

2024-12-17 Thread via GitHub


jscheffl commented on PR #43988:
URL: https://github.com/apache/airflow/pull/43988#issuecomment-2549629413

   Okay, I thought I make a "quick fix" and add aiobotocore as dependency... 
but there is a bit of history and even as pre-commit check NOT to add this.
   So loading/testing it seems need to be selective.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] Add DMS Serverless Operators [airflow]

2024-12-17 Thread via GitHub


jscheffl commented on PR #43988:
URL: https://github.com/apache/airflow/pull/43988#issuecomment-2549612452

   It seems the merge of this PR broke tests on main/canary builds:
   https://github.com/apache/airflow/actions/runs/12380031516/job/34555973915
   
   Somebody having an idea how to fix? Shall we revert?


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] Add DMS Serverless Operators [airflow]

2024-12-17 Thread via GitHub


eladkal merged PR #43988:
URL: https://github.com/apache/airflow/pull/43988


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] Add DMS Serverless Operators [airflow]

2024-12-17 Thread via GitHub


ellisms commented on code in PR #43988:
URL: https://github.com/apache/airflow/pull/43988#discussion_r1888373500


##
.pre-commit-config.yaml:
##
@@ -18,7 +18,7 @@
 default_stages: [pre-commit, pre-push]
 default_language_version:
   python: python3
-  node: 22.2.0
+  node: system #22.2.0

Review Comment:
   No, I need to revert that.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] Add DMS Serverless Operators [airflow]

2024-12-14 Thread via GitHub


eladkal commented on code in PR #43988:
URL: https://github.com/apache/airflow/pull/43988#discussion_r1885531660


##
.pre-commit-config.yaml:
##
@@ -18,7 +18,7 @@
 default_stages: [pre-commit, pre-push]
 default_language_version:
   python: python3
-  node: 22.2.0
+  node: system #22.2.0

Review Comment:
   Is this change relevant to the PR?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] Add DMS Serverless Operators [airflow]

2024-12-11 Thread via GitHub


eladkal commented on PR #43988:
URL: https://github.com/apache/airflow/pull/43988#issuecomment-2535838777

   still seeing error
   ```
   providers/tests/system/amazon/aws/example_dms_serverless.py:472: error: 
Module
   "tests_common.test_utils.system_tests" has no attribute "watcher" 
   [attr-defined]
   from tests_common.test_utils.system_tests import watcher
   ^
   Found 1 error in 1 file (checked 3356 source files)
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] Add DMS Serverless Operators [airflow]

2024-12-11 Thread via GitHub


ellisms commented on PR #43988:
URL: https://github.com/apache/airflow/pull/43988#issuecomment-2535673295

   The import error should be fixed now.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] Add DMS Serverless Operators [airflow]

2024-12-10 Thread via GitHub


eladkal commented on PR #43988:
URL: https://github.com/apache/airflow/pull/43988#issuecomment-253385

   Test fail :(
   
   ```
   
tests/always/test_example_dags.py::test_should_be_importable[providers/tests/system/amazon/aws/example_dms_serverless.py]
 - AssertionError: 
import_errors={'/opt/airflow/providers/tests/system/amazon/aws/example_dms_serverless.py':
 'Traceback (most recent call last):\n  File "", 
line 228, in _call_with_frames_removed\n  File 
"/opt/airflow/providers/tests/system/amazon/aws/example_dms_serverless.py", 
line 472, in \nfrom tests.system.utils.watcher import 
watcher\nModuleNotFoundError: No module named \'tests.system.utils\'\n'}
   assert 1 == 0
+  where 1 = 
len({'/opt/airflow/providers/tests/system/amazon/aws/example_dms_serverless.py':
 'Traceback (most recent call last):\n  Fi...e>\nfrom 
tests.system.utils.watcher import watcher\nModuleNotFoundError: No module named 
\'tests.system.utils\'\n'})
+where 
{'/opt/airflow/providers/tests/system/amazon/aws/example_dms_serverless.py': 
'Traceback (most recent call last):\n  Fi...e>\nfrom 
tests.system.utils.watcher import watcher\nModuleNotFoundError: No module named 
\'tests.system.utils\'\n'} = .import_errors
   
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] Add DMS Serverless Operators [airflow]

2024-12-10 Thread via GitHub


ellisms commented on PR #43988:
URL: https://github.com/apache/airflow/pull/43988#issuecomment-2533362614

   Added the missing example.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] Add DMS Serverless Operators [airflow]

2024-11-28 Thread via GitHub


eladkal commented on PR #43988:
URL: https://github.com/apache/airflow/pull/43988#issuecomment-2505968905

   The current error in the CI is a valid one:
   `FAILED 
tests/always/test_project_structure.py::TestAmazonProviderProjectStructure::test_missing_examples
 - Failed: Not all classes are covered with example dags. Update 
self.MISSING_EXAMPLES_FOR_CLASSES if you want to skip this error`
   
   You'll need to add example dags or exclude if there is a reason


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] Add DMS Serverless Operators [airflow]

2024-11-28 Thread via GitHub


eladkal commented on code in PR #43988:
URL: https://github.com/apache/airflow/pull/43988#discussion_r1862040989


##
docs/apache-airflow-providers-amazon/operators/dms.rst:
##
@@ -114,6 +114,69 @@ To delete a replication task you can use
 :start-after: [START howto_operator_dms_delete_task]
 :end-before: [END howto_operator_dms_delete_task]
 
+
+Create a serverless replication config
+==
+
+To create a serverless replication config use
+:class:`~airflow.providers.amazon.aws.operators.dms.DmsCreateReplicationConfigOperator`.
+
+.. exampleinclude:: 
/../../tests/system/providers/amazon/aws/example_dms_serverless.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_dms_create_replication_config]
+:end-before: [END howto_operator_dms_create_replication_config]
+
+Describe a serverless replication config
+
+
+To describe a serverless replication config use
+:class:`~airflow.providers.amazon.aws.operators.dms.DmsDescribeReplicationConfigsOperator`.
+
+.. exampleinclude:: 
/../../tests/system/providers/amazon/aws/example_dms_serverless.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_dms_describe_replication_config]
+:end-before: [END howto_operator_dms_describe_replication_config]
+
+Start a serverless replication
+==
+
+To start a serverless replication use
+:class:`~airflow.providers.amazon.aws.operators.dms.DmsStartReplicationOperator`.
+
+.. exampleinclude:: 
/../../tests/system/providers/amazon/aws/example_dms_serverless.py

Review Comment:
   ```suggestion
   .. exampleinclude:: 
/../../providers/tests/system/amazon/aws/example_dms_serverless.py
   ```



##
docs/apache-airflow-providers-amazon/operators/dms.rst:
##
@@ -114,6 +114,69 @@ To delete a replication task you can use
 :start-after: [START howto_operator_dms_delete_task]
 :end-before: [END howto_operator_dms_delete_task]
 
+
+Create a serverless replication config
+==
+
+To create a serverless replication config use
+:class:`~airflow.providers.amazon.aws.operators.dms.DmsCreateReplicationConfigOperator`.
+
+.. exampleinclude:: 
/../../tests/system/providers/amazon/aws/example_dms_serverless.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_dms_create_replication_config]
+:end-before: [END howto_operator_dms_create_replication_config]
+
+Describe a serverless replication config
+
+
+To describe a serverless replication config use
+:class:`~airflow.providers.amazon.aws.operators.dms.DmsDescribeReplicationConfigsOperator`.
+
+.. exampleinclude:: 
/../../tests/system/providers/amazon/aws/example_dms_serverless.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_dms_describe_replication_config]
+:end-before: [END howto_operator_dms_describe_replication_config]
+
+Start a serverless replication
+==
+
+To start a serverless replication use
+:class:`~airflow.providers.amazon.aws.operators.dms.DmsStartReplicationOperator`.
+
+.. exampleinclude:: 
/../../tests/system/providers/amazon/aws/example_dms_serverless.py
+:language: python
+:dedent: 4
+:start-after: [START howto_operator_dms_serverless_start_replication]
+:end-before: [END howto_operator_dms_serverless_start_replication]
+
+Get the status of a serverless replication
+==
+
+To get the status of a serverless replication use
+:class:`~airflow.providers.amazon.aws.operators.dms.DmsDescribeReplicationsOperator`.
+
+.. exampleinclude:: 
/../../tests/system/providers/amazon/aws/example_dms_serverless.py

Review Comment:
   ```suggestion
   .. exampleinclude:: 
/../../providers/tests/system/amazon/aws/example_dms_serverless.py
   ```



##
docs/apache-airflow-providers-amazon/operators/dms.rst:
##
@@ -114,6 +114,69 @@ To delete a replication task you can use
 :start-after: [START howto_operator_dms_delete_task]
 :end-before: [END howto_operator_dms_delete_task]
 
+
+Create a serverless replication config
+==
+
+To create a serverless replication config use
+:class:`~airflow.providers.amazon.aws.operators.dms.DmsCreateReplicationConfigOperator`.
+
+.. exampleinclude:: 
/../../tests/system/providers/amazon/aws/example_dms_serverless.py

Review Comment:
   ```suggestion
   .. exampleinclude:: 
/../../providers/tests/system/amazon/aws/example_dms_serverless.py
   ```



##
providers/src/airflow/providers/amazon/aws/operators/dms.py:
##
@@ -17,12 +17,23 @@
 # under the License.
 from __future__ import annotations
 
+from datetime import datetime
 from collections.abc import Sequence

Review Comment:
   ```suggestion
   from collections.abc import Sequence
   from datetime import datetim

Re: [PR] Add DMS Serverless Operators [airflow]

2024-11-27 Thread via GitHub


eladkal commented on code in PR #43988:
URL: https://github.com/apache/airflow/pull/43988#discussion_r1861153997


##
providers/tests/system/amazon/aws/example_dms_serverless.py:
##
@@ -0,0 +1,473 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Note:  DMS requires you to configure specific IAM roles/permissions.  For more 
information, see
+https://docs.aws.amazon.com/dms/latest/userguide/security-iam.html#CHAP_Security.APIRole
+"""
+
+from __future__ import annotations
+
+import json
+from datetime import datetime
+
+import boto3
+from sqlalchemy import Column, MetaData, String, Table, create_engine
+
+from airflow.decorators import task
+from airflow.models.baseoperator import chain
+from airflow.models.dag import DAG
+from airflow.providers.amazon.aws.operators.dms import (
+DmsCreateReplicationConfigOperator,
+DmsDeleteReplicationConfigOperator,
+DmsDescribeReplicationConfigsOperator,
+DmsDescribeReplicationsOperator,
+DmsStartReplicationOperator,
+)
+from airflow.providers.amazon.aws.operators.rds import (
+RdsCreateDbInstanceOperator,
+RdsDeleteDbInstanceOperator,
+)
+from airflow.providers.amazon.aws.operators.s3 import S3CreateBucketOperator, 
S3DeleteBucketOperator
+from airflow.utils.trigger_rule import TriggerRule
+
+from tests.system.providers.amazon.aws.utils import ENV_ID_KEY, 
SystemTestContextBuilder
+from tests.system.providers.amazon.aws.utils.ec2 import get_default_vpc_id

Review Comment:
   ```suggestion
   from providers.tests.system.amazon.aws.utils.ec2 import get_default_vpc_id
   ```



##
providers/tests/system/amazon/aws/example_dms_serverless.py:
##
@@ -0,0 +1,473 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Note:  DMS requires you to configure specific IAM roles/permissions.  For more 
information, see
+https://docs.aws.amazon.com/dms/latest/userguide/security-iam.html#CHAP_Security.APIRole
+"""
+
+from __future__ import annotations
+
+import json
+from datetime import datetime
+
+import boto3
+from sqlalchemy import Column, MetaData, String, Table, create_engine
+
+from airflow.decorators import task
+from airflow.models.baseoperator import chain
+from airflow.models.dag import DAG
+from airflow.providers.amazon.aws.operators.dms import (
+DmsCreateReplicationConfigOperator,
+DmsDeleteReplicationConfigOperator,
+DmsDescribeReplicationConfigsOperator,
+DmsDescribeReplicationsOperator,
+DmsStartReplicationOperator,
+)
+from airflow.providers.amazon.aws.operators.rds import (
+RdsCreateDbInstanceOperator,
+RdsDeleteDbInstanceOperator,
+)
+from airflow.providers.amazon.aws.operators.s3 import S3CreateBucketOperator, 
S3DeleteBucketOperator
+from airflow.utils.trigger_rule import TriggerRule
+
+from tests.system.providers.amazon.aws.utils import ENV_ID_KEY, 
SystemTestContextBuilder

Review Comment:
   ```suggestion
   from providers.tests.system.amazon.aws.utils import ENV_ID_KEY, 
SystemTestContextBuilder
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] Add DMS Serverless Operators [airflow]

2024-11-22 Thread via GitHub


ellisms commented on code in PR #43988:
URL: https://github.com/apache/airflow/pull/43988#discussion_r1854780498


##
providers/src/airflow/providers/amazon/aws/hooks/dms.py:
##
@@ -219,3 +224,158 @@ def wait_for_task_status(self, replication_task_arn: str, 
status: DmsTaskWaiterS
 ],
 WithoutSettings=True,
 )
+

Review Comment:
   I'm inclined to leave it as-is since its already tested. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] Add DMS Serverless Operators [airflow]

2024-11-14 Thread via GitHub


o-nikolas commented on code in PR #43988:
URL: https://github.com/apache/airflow/pull/43988#discussion_r1842705282


##
providers/src/airflow/providers/amazon/aws/operators/dms.py:
##
@@ -277,3 +288,551 @@ def execute(self, context: Context):
 """Stop AWS DMS replication task from Airflow."""
 
self.hook.stop_replication_task(replication_task_arn=self.replication_task_arn)
 self.log.info("DMS replication task(%s) is stopping.", 
self.replication_task_arn)
+
+
+class DmsDescribeReplicationConfigsOperator(AwsBaseOperator[DmsHook]):
+"""
+Describes AWS DMS Serverless replication configurations.
+
+.. seealso::
+For more information on how to use this operator, take a look at the 
guide:
+:ref:`howto/operator:DmsDescribeReplicationConfigsOperator`
+
+:param describe_config_filter: Filters block for filtering results.
+:param aws_conn_id: The Airflow connection used for AWS credentials.
+If this is ``None`` or empty then the default boto3 behaviour is used. 
If
+running Airflow in a distributed manner and aws_conn_id is None or
+empty, then default boto3 configuration would be used (and must be
+"""
+
+aws_hook_class = DmsHook
+template_fields: Sequence[str] = aws_template_fields("filter")
+template_fields_renderers = {"filter": "json"}
+
+def __init__(
+self,
+*,
+filter: list[dict] | None = None,
+aws_conn_id: str | None = "aws_default",
+**kwargs,
+):
+super().__init__(aws_conn_id=aws_conn_id, **kwargs)
+self.filter = filter
+
+def execute(self, context: Context) -> list:
+"""
+Describe AWS DMS replication configurations.
+
+:return: List of replication configurations
+"""
+return self.hook.describe_replication_configs(filters=self.filter)
+
+
+class DmsCreateReplicationConfigOperator(AwsBaseOperator[DmsHook]):
+"""
+
+Creates an AWS DMS Serverless replication configuration.
+
+.. seealso::
+For more information on how to use this operator, take a look at the 
guide:
+:ref:`howto/operator:DmsCreateReplicationConfigOperator`
+
+:param replication_config_id: Unique identifier used to create a 
ReplicationConfigArn.
+:param source_endpoint_arn: ARN of the source endpoint
+:param target_endpoint_arn: ARN of the target endpoint
+:param compute_config: Parameters for provisioning an DMS Serverless 
replication.
+:param replication_type: type of DMS Serverless replication
+:param table_mappings: JSON table mappings
+:param tags: Key-value tag pairs
+:param additional_config_kwargs: Additional configuration parameters for 
DMS Serverless replication. Passed directly to the API
+:param aws_conn_id: The Airflow connection used for AWS credentials.
+If this is ``None`` or empty then the default boto3 behaviour is used. 
If
+running Airflow in a distributed manner and aws_conn_id is None or
+empty, then default boto3 configuration would be used (and must be
+"""
+
+aws_hook_class = DmsHook
+template_fields: Sequence[str] = aws_template_fields(
+"replication_config_id",
+"source_endpoint_arn",
+"target_endpoint_arn",
+"compute_config",
+"replication_type",
+"table_mappings",
+)
+
+template_fields_renderers = {"compute_config": "json", "tableMappings": 
"json"}
+
+def __init__(
+self,
+*,
+replication_config_id: str,
+source_endpoint_arn: str,
+target_endpoint_arn: str,
+compute_config: dict[str, Any],
+replication_type: str,
+table_mappings: str,
+additional_config_kwargs: dict | None = None,
+aws_conn_id: str | None = "aws_default",
+**kwargs,
+):
+super().__init__(
+aws_conn_id=aws_conn_id,
+**kwargs,
+)
+
+self.replication_config_id = replication_config_id
+self.source_endpoint_arn = source_endpoint_arn
+self.target_endpoint_arn = target_endpoint_arn
+self.compute_config = compute_config
+self.replication_type = replication_type
+self.table_mappings = table_mappings
+self.additional_config_kwargs = additional_config_kwargs or {}
+
+def execute(self, context: Context) -> str:
+resp = self.hook.create_replication_config(
+replication_config_id=self.replication_config_id,
+source_endpoint_arn=self.source_endpoint_arn,
+target_endpoint_arn=self.target_endpoint_arn,
+compute_config=self.compute_config,
+replication_type=self.replication_type,
+table_mappings=self.table_mappings,
+additional_config_kwargs=self.additional_config_kwargs,
+)
+
+self.log.info("DMS replication config(%s) has been created.", 
self.replication_config_id)
+return resp

Re: [PR] Add DMS Serverless Operators [airflow]

2024-11-14 Thread via GitHub


vincbeck commented on code in PR #43988:
URL: https://github.com/apache/airflow/pull/43988#discussion_r1842706155


##
providers/src/airflow/providers/amazon/aws/operators/dms.py:
##
@@ -277,3 +288,551 @@ def execute(self, context: Context):
 """Stop AWS DMS replication task from Airflow."""
 
self.hook.stop_replication_task(replication_task_arn=self.replication_task_arn)
 self.log.info("DMS replication task(%s) is stopping.", 
self.replication_task_arn)
+
+
+class DmsDescribeReplicationConfigsOperator(AwsBaseOperator[DmsHook]):
+"""
+Describes AWS DMS Serverless replication configurations.
+
+.. seealso::
+For more information on how to use this operator, take a look at the 
guide:
+:ref:`howto/operator:DmsDescribeReplicationConfigsOperator`
+
+:param describe_config_filter: Filters block for filtering results.
+:param aws_conn_id: The Airflow connection used for AWS credentials.
+If this is ``None`` or empty then the default boto3 behaviour is used. 
If
+running Airflow in a distributed manner and aws_conn_id is None or
+empty, then default boto3 configuration would be used (and must be
+"""
+
+aws_hook_class = DmsHook
+template_fields: Sequence[str] = aws_template_fields("filter")
+template_fields_renderers = {"filter": "json"}
+
+def __init__(
+self,
+*,
+filter: list[dict] | None = None,
+aws_conn_id: str | None = "aws_default",
+**kwargs,
+):
+super().__init__(aws_conn_id=aws_conn_id, **kwargs)
+self.filter = filter
+
+def execute(self, context: Context) -> list:
+"""
+Describe AWS DMS replication configurations.
+
+:return: List of replication configurations
+"""
+return self.hook.describe_replication_configs(filters=self.filter)
+
+
+class DmsCreateReplicationConfigOperator(AwsBaseOperator[DmsHook]):
+"""
+
+Creates an AWS DMS Serverless replication configuration.
+
+.. seealso::
+For more information on how to use this operator, take a look at the 
guide:
+:ref:`howto/operator:DmsCreateReplicationConfigOperator`
+
+:param replication_config_id: Unique identifier used to create a 
ReplicationConfigArn.
+:param source_endpoint_arn: ARN of the source endpoint
+:param target_endpoint_arn: ARN of the target endpoint
+:param compute_config: Parameters for provisioning an DMS Serverless 
replication.
+:param replication_type: type of DMS Serverless replication
+:param table_mappings: JSON table mappings
+:param tags: Key-value tag pairs
+:param additional_config_kwargs: Additional configuration parameters for 
DMS Serverless replication. Passed directly to the API
+:param aws_conn_id: The Airflow connection used for AWS credentials.
+If this is ``None`` or empty then the default boto3 behaviour is used. 
If
+running Airflow in a distributed manner and aws_conn_id is None or
+empty, then default boto3 configuration would be used (and must be
+"""
+
+aws_hook_class = DmsHook
+template_fields: Sequence[str] = aws_template_fields(
+"replication_config_id",
+"source_endpoint_arn",
+"target_endpoint_arn",
+"compute_config",
+"replication_type",
+"table_mappings",
+)
+
+template_fields_renderers = {"compute_config": "json", "tableMappings": 
"json"}
+
+def __init__(
+self,
+*,
+replication_config_id: str,
+source_endpoint_arn: str,
+target_endpoint_arn: str,
+compute_config: dict[str, Any],
+replication_type: str,
+table_mappings: str,
+additional_config_kwargs: dict | None = None,
+aws_conn_id: str | None = "aws_default",
+**kwargs,
+):
+super().__init__(
+aws_conn_id=aws_conn_id,
+**kwargs,
+)
+
+self.replication_config_id = replication_config_id
+self.source_endpoint_arn = source_endpoint_arn
+self.target_endpoint_arn = target_endpoint_arn
+self.compute_config = compute_config
+self.replication_type = replication_type
+self.table_mappings = table_mappings
+self.additional_config_kwargs = additional_config_kwargs or {}
+
+def execute(self, context: Context) -> str:
+resp = self.hook.create_replication_config(
+replication_config_id=self.replication_config_id,
+source_endpoint_arn=self.source_endpoint_arn,
+target_endpoint_arn=self.target_endpoint_arn,
+compute_config=self.compute_config,
+replication_type=self.replication_type,
+table_mappings=self.table_mappings,
+additional_config_kwargs=self.additional_config_kwargs,
+)
+
+self.log.info("DMS replication config(%s) has been created.", 
self.replication_config_id)
+return resp

Re: [PR] Add DMS Serverless Operators [airflow]

2024-11-14 Thread via GitHub


ellisms commented on code in PR #43988:
URL: https://github.com/apache/airflow/pull/43988#discussion_r1842675565


##
providers/src/airflow/providers/amazon/aws/operators/dms.py:
##
@@ -277,3 +288,551 @@ def execute(self, context: Context):
 """Stop AWS DMS replication task from Airflow."""
 
self.hook.stop_replication_task(replication_task_arn=self.replication_task_arn)
 self.log.info("DMS replication task(%s) is stopping.", 
self.replication_task_arn)
+
+
+class DmsDescribeReplicationConfigsOperator(AwsBaseOperator[DmsHook]):
+"""
+Describes AWS DMS Serverless replication configurations.
+
+.. seealso::
+For more information on how to use this operator, take a look at the 
guide:
+:ref:`howto/operator:DmsDescribeReplicationConfigsOperator`
+
+:param describe_config_filter: Filters block for filtering results.
+:param aws_conn_id: The Airflow connection used for AWS credentials.
+If this is ``None`` or empty then the default boto3 behaviour is used. 
If
+running Airflow in a distributed manner and aws_conn_id is None or
+empty, then default boto3 configuration would be used (and must be
+"""
+
+aws_hook_class = DmsHook
+template_fields: Sequence[str] = aws_template_fields("filter")
+template_fields_renderers = {"filter": "json"}
+
+def __init__(
+self,
+*,
+filter: list[dict] | None = None,
+aws_conn_id: str | None = "aws_default",
+**kwargs,
+):
+super().__init__(aws_conn_id=aws_conn_id, **kwargs)
+self.filter = filter
+
+def execute(self, context: Context) -> list:
+"""
+Describe AWS DMS replication configurations.
+
+:return: List of replication configurations
+"""
+return self.hook.describe_replication_configs(filters=self.filter)
+
+
+class DmsCreateReplicationConfigOperator(AwsBaseOperator[DmsHook]):
+"""
+
+Creates an AWS DMS Serverless replication configuration.
+
+.. seealso::
+For more information on how to use this operator, take a look at the 
guide:
+:ref:`howto/operator:DmsCreateReplicationConfigOperator`
+
+:param replication_config_id: Unique identifier used to create a 
ReplicationConfigArn.
+:param source_endpoint_arn: ARN of the source endpoint
+:param target_endpoint_arn: ARN of the target endpoint
+:param compute_config: Parameters for provisioning an DMS Serverless 
replication.
+:param replication_type: type of DMS Serverless replication
+:param table_mappings: JSON table mappings
+:param tags: Key-value tag pairs
+:param additional_config_kwargs: Additional configuration parameters for 
DMS Serverless replication. Passed directly to the API
+:param aws_conn_id: The Airflow connection used for AWS credentials.
+If this is ``None`` or empty then the default boto3 behaviour is used. 
If
+running Airflow in a distributed manner and aws_conn_id is None or
+empty, then default boto3 configuration would be used (and must be
+"""
+
+aws_hook_class = DmsHook
+template_fields: Sequence[str] = aws_template_fields(
+"replication_config_id",
+"source_endpoint_arn",
+"target_endpoint_arn",
+"compute_config",
+"replication_type",
+"table_mappings",
+)
+
+template_fields_renderers = {"compute_config": "json", "tableMappings": 
"json"}
+
+def __init__(
+self,
+*,
+replication_config_id: str,
+source_endpoint_arn: str,
+target_endpoint_arn: str,
+compute_config: dict[str, Any],
+replication_type: str,
+table_mappings: str,
+additional_config_kwargs: dict | None = None,
+aws_conn_id: str | None = "aws_default",
+**kwargs,
+):
+super().__init__(
+aws_conn_id=aws_conn_id,
+**kwargs,
+)
+
+self.replication_config_id = replication_config_id
+self.source_endpoint_arn = source_endpoint_arn
+self.target_endpoint_arn = target_endpoint_arn
+self.compute_config = compute_config
+self.replication_type = replication_type
+self.table_mappings = table_mappings
+self.additional_config_kwargs = additional_config_kwargs or {}
+
+def execute(self, context: Context) -> str:
+resp = self.hook.create_replication_config(
+replication_config_id=self.replication_config_id,
+source_endpoint_arn=self.source_endpoint_arn,
+target_endpoint_arn=self.target_endpoint_arn,
+compute_config=self.compute_config,
+replication_type=self.replication_type,
+table_mappings=self.table_mappings,
+additional_config_kwargs=self.additional_config_kwargs,
+)
+
+self.log.info("DMS replication config(%s) has been created.", 
self.replication_config_id)
+return resp
+

Re: [PR] Add DMS Serverless Operators [airflow]

2024-11-14 Thread via GitHub


ellisms commented on code in PR #43988:
URL: https://github.com/apache/airflow/pull/43988#discussion_r1842664768


##
providers/src/airflow/providers/amazon/aws/hooks/dms.py:
##
@@ -219,3 +224,158 @@ def wait_for_task_status(self, replication_task_arn: str, 
status: DmsTaskWaiterS
 ],
 WithoutSettings=True,
 )
+

Review Comment:
   The `describe_*` methods get used in a few places, so I think those should 
be in the hook to avoid duplication. I'll look to consolidate some of the 
simpler ones directly into the Operator.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] Add DMS Serverless Operators [airflow]

2024-11-14 Thread via GitHub


ellisms commented on code in PR #43988:
URL: https://github.com/apache/airflow/pull/43988#discussion_r1842630839


##
providers/src/airflow/providers/amazon/aws/operators/dms.py:
##
@@ -277,3 +288,551 @@ def execute(self, context: Context):
 """Stop AWS DMS replication task from Airflow."""
 
self.hook.stop_replication_task(replication_task_arn=self.replication_task_arn)
 self.log.info("DMS replication task(%s) is stopping.", 
self.replication_task_arn)
+
+
+class DmsDescribeReplicationConfigsOperator(AwsBaseOperator[DmsHook]):
+"""
+Describes AWS DMS Serverless replication configurations.
+
+.. seealso::
+For more information on how to use this operator, take a look at the 
guide:
+:ref:`howto/operator:DmsDescribeReplicationConfigsOperator`
+
+:param describe_config_filter: Filters block for filtering results.
+:param aws_conn_id: The Airflow connection used for AWS credentials.
+If this is ``None`` or empty then the default boto3 behaviour is used. 
If
+running Airflow in a distributed manner and aws_conn_id is None or
+empty, then default boto3 configuration would be used (and must be
+"""
+
+aws_hook_class = DmsHook
+template_fields: Sequence[str] = aws_template_fields("filter")
+template_fields_renderers = {"filter": "json"}
+
+def __init__(
+self,
+*,
+filter: list[dict] | None = None,
+aws_conn_id: str | None = "aws_default",
+**kwargs,
+):
+super().__init__(aws_conn_id=aws_conn_id, **kwargs)
+self.filter = filter
+
+def execute(self, context: Context) -> list:
+"""
+Describe AWS DMS replication configurations.
+
+:return: List of replication configurations
+"""
+return self.hook.describe_replication_configs(filters=self.filter)
+
+
+class DmsCreateReplicationConfigOperator(AwsBaseOperator[DmsHook]):
+"""
+
+Creates an AWS DMS Serverless replication configuration.
+
+.. seealso::
+For more information on how to use this operator, take a look at the 
guide:
+:ref:`howto/operator:DmsCreateReplicationConfigOperator`
+
+:param replication_config_id: Unique identifier used to create a 
ReplicationConfigArn.
+:param source_endpoint_arn: ARN of the source endpoint
+:param target_endpoint_arn: ARN of the target endpoint
+:param compute_config: Parameters for provisioning an DMS Serverless 
replication.
+:param replication_type: type of DMS Serverless replication
+:param table_mappings: JSON table mappings
+:param tags: Key-value tag pairs
+:param additional_config_kwargs: Additional configuration parameters for 
DMS Serverless replication. Passed directly to the API
+:param aws_conn_id: The Airflow connection used for AWS credentials.
+If this is ``None`` or empty then the default boto3 behaviour is used. 
If
+running Airflow in a distributed manner and aws_conn_id is None or
+empty, then default boto3 configuration would be used (and must be
+"""
+
+aws_hook_class = DmsHook
+template_fields: Sequence[str] = aws_template_fields(
+"replication_config_id",
+"source_endpoint_arn",
+"target_endpoint_arn",
+"compute_config",
+"replication_type",
+"table_mappings",
+)
+
+template_fields_renderers = {"compute_config": "json", "tableMappings": 
"json"}
+
+def __init__(
+self,
+*,
+replication_config_id: str,
+source_endpoint_arn: str,
+target_endpoint_arn: str,
+compute_config: dict[str, Any],
+replication_type: str,
+table_mappings: str,
+additional_config_kwargs: dict | None = None,
+aws_conn_id: str | None = "aws_default",
+**kwargs,
+):
+super().__init__(
+aws_conn_id=aws_conn_id,
+**kwargs,
+)
+
+self.replication_config_id = replication_config_id
+self.source_endpoint_arn = source_endpoint_arn
+self.target_endpoint_arn = target_endpoint_arn
+self.compute_config = compute_config
+self.replication_type = replication_type
+self.table_mappings = table_mappings
+self.additional_config_kwargs = additional_config_kwargs or {}
+
+def execute(self, context: Context) -> str:
+resp = self.hook.create_replication_config(
+replication_config_id=self.replication_config_id,
+source_endpoint_arn=self.source_endpoint_arn,
+target_endpoint_arn=self.target_endpoint_arn,
+compute_config=self.compute_config,
+replication_type=self.replication_type,
+table_mappings=self.table_mappings,
+additional_config_kwargs=self.additional_config_kwargs,
+)
+
+self.log.info("DMS replication config(%s) has been created.", 
self.replication_config_id)
+return resp
+

Re: [PR] Add DMS Serverless Operators [airflow]

2024-11-14 Thread via GitHub


ellisms commented on code in PR #43988:
URL: https://github.com/apache/airflow/pull/43988#discussion_r1842619718


##
providers/src/airflow/providers/amazon/aws/hooks/dms.py:
##
@@ -219,3 +224,158 @@ def wait_for_task_status(self, replication_task_arn: str, 
status: DmsTaskWaiterS
 ],
 WithoutSettings=True,
 )
+
+def describe_replication_configs(self, filters: list[dict] | None = None, 
**kwargs) -> list[dict]:
+"""
+Return list of serverless replication configs.
+
+.. seealso::
+- 
:external+boto3:py:meth:`DatabaseMigrationService.Client.describe_replication_configs`
+
+:param filters: List of filter objects
+:return: List of replication tasks
+"""
+filters = filters if filters is not None else []
+
+try:
+resp = self.conn.describe_replication_configs(Filters=filters, 
**kwargs)
+return resp.get("ReplicationConfigs", [])
+except Exception as ex:
+self.log.error("Error while describing replication configs: %s", 
str(ex))
+return []

Review Comment:
   Good point. I'll take another look at it.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] Add DMS Serverless Operators [airflow]

2024-11-14 Thread via GitHub


vincbeck commented on code in PR #43988:
URL: https://github.com/apache/airflow/pull/43988#discussion_r1842564082


##
providers/src/airflow/providers/amazon/aws/hooks/dms.py:
##
@@ -219,3 +224,158 @@ def wait_for_task_status(self, replication_task_arn: str, 
status: DmsTaskWaiterS
 ],
 WithoutSettings=True,
 )
+
+def describe_replication_configs(self, filters: list[dict] | None = None, 
**kwargs) -> list[dict]:
+"""
+Return list of serverless replication configs.
+
+.. seealso::
+- 
:external+boto3:py:meth:`DatabaseMigrationService.Client.describe_replication_configs`
+
+:param filters: List of filter objects
+:return: List of replication tasks
+"""
+filters = filters if filters is not None else []
+
+try:
+resp = self.conn.describe_replication_configs(Filters=filters, 
**kwargs)
+return resp.get("ReplicationConfigs", [])
+except Exception as ex:
+self.log.error("Error while describing replication configs: %s", 
str(ex))
+return []

Review Comment:
   Do you really want to swallow any exception and return an empty list in that 
case? We might miss some issues



##
providers/src/airflow/providers/amazon/aws/hooks/dms.py:
##
@@ -219,3 +224,158 @@ def wait_for_task_status(self, replication_task_arn: str, 
status: DmsTaskWaiterS
 ],
 WithoutSettings=True,
 )
+
+def describe_replication_configs(self, filters: list[dict] | None = None, 
**kwargs) -> list[dict]:
+"""
+Return list of serverless replication configs.
+
+.. seealso::
+- 
:external+boto3:py:meth:`DatabaseMigrationService.Client.describe_replication_configs`
+
+:param filters: List of filter objects
+:return: List of replication tasks
+"""
+filters = filters if filters is not None else []
+
+try:
+resp = self.conn.describe_replication_configs(Filters=filters, 
**kwargs)
+return resp.get("ReplicationConfigs", [])
+except Exception as ex:
+self.log.error("Error while describing replication configs: %s", 
str(ex))
+return []
+
+def create_replication_config(
+self,
+replication_config_id: str,
+source_endpoint_arn: str,
+target_endpoint_arn: str,
+compute_config: dict[str, Any],
+replication_type: str,
+table_mappings: str,
+additional_config_kwargs: dict[str, Any] | None = None,
+**kwargs,
+):
+"""
+Create an AWS DMS Serverless configuration that can be used to start 
an DMS Serverless replication.
+
+.. seealso::
+- 
:external+boto3:py:meth:`DatabaseMigrationService.Client.create_replication_config`
+
+:param replicationConfigId: Unique identifier used to create a 
ReplicationConfigArn.
+:param sourceEndpointArn: ARN of the source endpoint
+:param targetEndpointArn: ARN of the target endpoint
+:param computeConfig: Parameters for provisioning an DMS Serverless 
replication.
+:param replicationType: type of DMS Serverless replication
+:param tableMappings: JSON table mappings
+:param tags: Key-value tag pairs
+:param resourceId: Unique value or name that you set for a given 
resource that can be used to construct an Amazon Resource Name (ARN) for that 
resource.
+:param supplementalSettings: JSON settings for specifying supplemental 
data
+:param replicationSettings: JSON settings for DMS Serverless 
replications
+
+:return: ReplicationConfigArn
+
+"""
+if additional_config_kwargs is None:
+additional_config_kwargs = {}
+try:
+resp = self.conn.create_replication_config(
+ReplicationConfigIdentifier=replication_config_id,
+SourceEndpointArn=source_endpoint_arn,
+TargetEndpointArn=target_endpoint_arn,
+ComputeConfig=compute_config,
+ReplicationType=replication_type,
+TableMappings=table_mappings,
+**additional_config_kwargs,
+)
+arn = resp.get("ReplicationConfig", {}).get("ReplicationConfigArn")
+self.log.info("Successfully created replication config: %s", arn)
+return arn
+
+except ClientError as err:
+err_str = f"Error: {err.get('Error','').get('Code','')}: 
{err.get('Error','').get('Message','')}"
+self.log.error("Error while creating replication config: %s", 
err_str)
+raise err
+
+def describe_replications(self, filters: list[dict[str, Any]] | None = 
None, **kwargs) -> list[dict]:
+"""
+Return list of serverless replications.
+
+.. seealso::
+- 
:external+boto3:py:meth:`DatabaseMigrationService.Clie