(systemds) branch main updated: [SYSTEMDS-2926] AWS script fix for EMR-7.0.0, part 2

mboehm7 Sat, 16 Mar 2024 08:05:48 -0700

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git



The following commit(s) were added to refs/heads/main by this push:
     new 09508528e0 [SYSTEMDS-2926] AWS script fix for EMR-7.0.0, part 2
09508528e0 is described below

commit 09508528e0ed893ffb38c43cf83e8ef9993d9efe
Author: lachezar-n <[email protected]>
AuthorDate: Sat Mar 16 16:00:38 2024 +0100

    [SYSTEMDS-2926] AWS script fix for EMR-7.0.0, part 2
    
    Additional fixes for EMR 7
    Closes #2004.
---
 scripts/aws/run_systemds_script.sh        |  9 +++++----
 scripts/aws/spinup_systemds_cluster.sh    | 26 ++++++++++++++++++--------
 scripts/aws/terminate_systemds_cluster.sh |  5 +++--
 3 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/scripts/aws/run_systemds_script.sh 
b/scripts/aws/run_systemds_script.sh
index db2d7185e2..48f5a59f66 100755
--- a/scripts/aws/run_systemds_script.sh
+++ b/scripts/aws/run_systemds_script.sh
@@ -41,7 +41,7 @@ fi
 
 dml_filename=$(basename $1)
 
-STEP_INFO=$(aws emr add-steps --cluster-id $CLUSTER_ID --steps "Type=Spark,
+STEP_INFO=$(aws emr add-steps --cluster-id $CLUSTER_ID  --region $REGION 
--steps "Type=Spark,
   Name='SystemDS Spark Program',
   ActionOnFailure=CONTINUE,
   Args=[
@@ -54,7 +54,8 @@ STEP_INFO=$(aws emr add-steps --cluster-id $CLUSTER_ID 
--steps "Type=Spark,
 
 STEP_ID=$(echo $STEP_INFO | jq .StepIds | tr -d '"' | tr -d ']' | tr -d '[' | 
tr -d '[:space:]' )
 echo "Waiting for the step to finish"
-aws emr wait step-complete --cluster-id $CLUSTER_ID --step-id $STEP_ID
+aws emr wait step-complete --cluster-id $CLUSTER_ID --step-id $STEP_ID 
--region $REGION
+
+aws emr ssh --cluster-id $CLUSTER_ID --key-pair-file ${KEYPAIR_NAME}.pem 
--region $REGION --command "cat /mnt/var/log/hadoop/steps/$STEP_ID/stderr"
+aws emr ssh --cluster-id $CLUSTER_ID --key-pair-file ${KEYPAIR_NAME}.pem 
--region $REGION --command "cat /mnt/var/log/hadoop/steps/$STEP_ID/stdout"
 
-aws emr ssh --cluster-id $CLUSTER_ID --key-pair-file ${KEYPAIR_NAME}.pem 
--command "cat /mnt/var/log/hadoop/steps/$STEP_ID/stderr"
-aws emr ssh --cluster-id $CLUSTER_ID --key-pair-file ${KEYPAIR_NAME}.pem 
--command "cat /mnt/var/log/hadoop/steps/$STEP_ID/stdout"
\ No newline at end of file
diff --git a/scripts/aws/spinup_systemds_cluster.sh 
b/scripts/aws/spinup_systemds_cluster.sh
index 58f9f2db05..c319f270d3 100755
--- a/scripts/aws/spinup_systemds_cluster.sh
+++ b/scripts/aws/spinup_systemds_cluster.sh
@@ -49,9 +49,11 @@ set_config "BUCKET" $BUCKET-$(((RANDOM % 999) + 1000))
 #Source again to update the changes for the current session
 source systemds_cluster.config
 
-#Create systemDS bucket (LocationConstraint configuration required regions 
outside of us-east-1)
-aws s3api create-bucket --bucket $BUCKET --region $REGION 
--create-bucket-configuration LocationConstraint=$REGION &> /dev/null
-aws s3api create-bucket --bucket $BUCKET-logs --region $REGION 
--create-bucket-configuration LocationConstraint=$REGION &> /dev/null
+#Create systemDS bucket
+#LocationConstraint configuration required regions outside of us-east-1
+if [ "$REGION" = "us-east-1" ]; then LOCATION_CONSTRAINT=""; else 
LOCATION_CONSTRAINT="--create-bucket-configuration LocationConstraint=$REGION"; 
fi
+aws s3api create-bucket --bucket $BUCKET --region $REGION $LOCATION_CONSTRAINT 
&> /dev/null
+aws s3api create-bucket --bucket $BUCKET-logs --region $REGION 
$LOCATION_CONSTRAINT &> /dev/null
 
 # Upload Jar and scripts to s3
 aws s3 sync $SYSTEMDS_TARGET_DIRECTORY s3://$BUCKET --exclude "*" --include 
"*.dml" --include "*config.xml" --include "*DS.jar*"
@@ -87,7 +89,13 @@ CLUSTER_INFO=$(aws emr create-cluster \
                         "InstanceGroupType":"CORE",
                         "InstanceType":"'${INSTANCES_TYPE}'",
                         "Name":"Core Instance Group"}]'\
- --configurations 
'[{"Classification":"spark","Properties":{"maximizeResourceAllocation": 
"true"}}]'\
+ --configurations 
'[{"Classification":"spark","Properties":{"maximizeResourceAllocation": 
"true"}},
+                     {"Classification": "spark-env",
+                         "Configurations": [{
+                           "Classification": "export",
+                           "Properties": {"JAVA_HOME": "/usr/lib/jvm/jre-11"}
+                         }]
+                     }]'\
  --scale-down-behavior TERMINATE_AT_TASK_COMPLETION \
  --region $REGION)
 
@@ -98,21 +106,23 @@ set_config "CLUSTER_ID" $CLUSTER_ID
 ip_address=$(curl ipecho.net/plain ; echo)
 
 #Add your ip to the security group
-aws ec2 create-security-group --group-name ElasticMapReduce-master 
--description "info" &> /dev/null
+aws ec2 create-security-group --group-name ElasticMapReduce-master 
--description "info" --region $REGION &> /dev/null
 aws ec2 authorize-security-group-ingress \
     --group-name ElasticMapReduce-master \
     --protocol tcp \
     --port 22 \
-    --cidr "${ip_address}"/24 &> /dev/null
+    --cidr "${ip_address}"/24 \
+    --region $REGION &> /dev/null
 
 # Wait for cluster to start
 echo "Waiting for cluster running state"
-aws emr wait cluster-running --cluster-id $CLUSTER_ID
+aws emr wait cluster-running --cluster-id $CLUSTER_ID --region $REGION
 
 echo "Cluster info:"
-export CLUSTER_URL=$(aws emr describe-cluster --cluster-id $CLUSTER_ID | jq 
.Cluster.MasterPublicDnsName | tr -d '"')
+export CLUSTER_URL=$(aws emr describe-cluster --cluster-id $CLUSTER_ID 
--region $REGION | jq .Cluster.MasterPublicDnsName | tr -d '"')
 
 aws emr ssh --cluster-id $CLUSTER_ID --key-pair-file ${KEYPAIR_NAME}.pem 
--region $REGION \
     --command 'aws s3 cp s3://'${BUCKET}' . --recursive --exclude "*" 
--include "*DS.jar*"'
 
 echo "Spinup finished."
+
diff --git a/scripts/aws/terminate_systemds_cluster.sh 
b/scripts/aws/terminate_systemds_cluster.sh
index 2de546723c..835dcf5fb6 100755
--- a/scripts/aws/terminate_systemds_cluster.sh
+++ b/scripts/aws/terminate_systemds_cluster.sh
@@ -22,10 +22,11 @@
 
 source systemds_cluster.config
 
-aws emr terminate-clusters --cluster-ids $CLUSTER_ID
+aws emr terminate-clusters --cluster-ids $CLUSTER_ID --region $REGION
 
 # Wait for cluster to start
 echo "Waiting for cluster terminated state"
-aws emr wait cluster-terminated --cluster-id $CLUSTER_ID
+aws emr wait cluster-terminated --cluster-id $CLUSTER_ID --region $REGION
 
 echo "Cluster: ${CLUSTER_ID} terminated."
+

(systemds) branch main updated: [SYSTEMDS-2926] AWS script fix for EMR-7.0.0, part 2

Reply via email to