Hi,
I'm trying to run a streaming job on a single node EMR 4.1/Spark 1.5 cluster. Its throwing an IllegalArgumentException right away on the submit. Attaching full output from console. Thanks for any insights. -- Nick 15/12/11 16:44:43 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 15/12/11 16:44:43 INFO client.RMProxy: Connecting to ResourceManager at ip-10-247-129-50.ec2.internal/10.247.129.50:8032 15/12/11 16:44:43 INFO yarn.Client: Requesting a new application from cluster with 1 NodeManagers 15/12/11 16:44:43 INFO yarn.Client: Verifying our application has not requested more than the maximum memory capability of the cluster (54272 MB per container) 15/12/11 16:44:43 INFO yarn.Client: Will allocate AM container, with 11264 MB memory including 1024 MB overhead 15/12/11 16:44:43 INFO yarn.Client: Setting up container launch context for our AM 15/12/11 16:44:43 INFO yarn.Client: Setting up the launch environment for our AM container 15/12/11 16:44:43 INFO yarn.Client: Preparing resources for our AM container 15/12/11 16:44:44 INFO yarn.Client: Uploading resource file:/usr/lib/spark/lib/spark-assembly-1.5.0-hadoop2.6.0-amzn-1.jar -> hdfs://ip-10-247-129-50.ec2.internal:8020/user/hadoop/.sparkStaging/application_1447\ 442727308_0126/spark-assembly-1.5.0-hadoop2.6.0-amzn-1.jar 15/12/11 16:44:44 INFO metrics.MetricsSaver: MetricsConfigRecord disabledInCluster: false instanceEngineCycleSec: 60 clusterEngineCycleSec: 60 disableClusterEngine: false maxMemoryMb: 3072 maxInstanceCount: 500\ lastModified: 1447442734295 15/12/11 16:44:44 INFO metrics.MetricsSaver: Created MetricsSaver j-2H3BTA60FGUYO:i-f7812947:SparkSubmit:15603 period:60 /mnt/var/em/raw/i-f7812947_20151211_SparkSubmit_15603_raw.bin 15/12/11 16:44:45 INFO metrics.MetricsSaver: 1 aggregated HDFSWriteDelay 1276 raw values into 1 aggregated values, total 1 15/12/11 16:44:45 INFO yarn.Client: Uploading resource file:/home/hadoop/spark-pipeline-framework-1.1.6-SNAPSHOT/workflow/lib/spark-kafka-services-1.0.jar -> hdfs://ip-10-247-129-50.ec2.internal:8020/user/hadoo\ p/.sparkStaging/application_1447442727308_0126/spark-kafka-services-1.0.jar 15/12/11 16:44:45 INFO yarn.Client: Uploading resource file:/home/hadoop/spark-pipeline-framework-1.1.6-SNAPSHOT/conf/AwsCredentials.properties -> hdfs://ip-10-247-129-50.ec2.internal:8020/user/hadoop/.sparkSta\ ging/application_1447442727308_0126/AwsCredentials.properties 15/12/11 16:44:45 WARN yarn.Client: Resource file:/home/hadoop/spark-pipeline-framework-1.1.6-SNAPSHOT/conf/AwsCredentials.properties added multiple times to distributed cache. 15/12/11 16:44:45 INFO yarn.Client: Deleting staging directory .sparkStaging/application_1447442727308_0126 Exception in thread "main" java.lang.IllegalArgumentException: requirement failed at scala.Predef$.require(Predef.scala:221) at org.apache.spark.deploy.yarn.Client$$anonfun$prepareLocalResources$6$$anonfun$apply$2.apply(Client.scala:392) at org.apache.spark.deploy.yarn.Client$$anonfun$prepareLocalResources$6$$anonfun$apply$2.apply(Client.scala:390) at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:108) at org.apache.spark.deploy.yarn.Client$$anonfun$prepareLocalResources$6.apply(Client.scala:390) at org.apache.spark.deploy.yarn.Client$$anonfun$prepareLocalResources$6.apply(Client.scala:388) at scala.collection.immutable.List.foreach(List.scala:318) at org.apache.spark.deploy.yarn.Client.prepareLocalResources(Client.scala:388) at org.apache.spark.deploy.yarn.Client.createContainerLaunchContext(Client.scala:629) at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:119) at org.apache.spark.deploy.yarn.Client.run(Client.scala:907) at org.apache.spark.deploy.yarn.Client$.main(Client.scala:966) at org.apache.spark.deploy.yarn.Client.main(Client.scala)
adjust ~/spark-pipeline-framework-1.1.6-SNAPSHOT > adjust ~/spark-pipeline-framework-1.1.6-SNAPSHOT > ./bin/run-event-streaming.sh conf/dev/nick-malcolm-events.properties > console.txt Using properties file: /usr/lib/spark/conf/spark-defaults.conf Adding default property: spark.executor.extraJavaOptions=-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:OnOutOfMemoryError='kill -9 %p' Adding default property: spark.history.fs.logDirectory=hdfs:///var/log/spark/apps Adding default property: spark.eventLog.enabled=true Adding default property: spark.shuffle.service.enabled=true Adding default property: spark.driver.extraLibraryPath=/usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native Adding default property: spark.yarn.historyServer.address=ip-10-247-129-50.ec2.internal:18080 Adding default property: spark.driver.extraJavaOptions=-Dlog4j.configuration=file:///etc/spark/conf/log4j.properties -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=512M -XX:OnOutOfMemoryError='kill -9 %p' Adding default property: spark.master=yarn Adding default property: spark.executor.extraLibraryPath=/usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native Adding default property: spark.driver.extraClassPath=/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop/*:/usr/lib/hadoop-hdfs/*:/usr/lib/hadoop-mapreduce/*:/usr/lib/hadoop-yarn/*:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*:/usr/lib/spark/lib/RedshiftJDBC41-1.1.7.1007.jar Adding default property: spark.eventLog.dir=hdfs:///var/log/spark/apps Adding default property: spark.executor.extraClassPath=/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop/*:/usr/lib/hadoop-hdfs/*:/usr/lib/hadoop-mapreduce/*:/usr/lib/hadoop-yarn/*:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*:/usr/lib/spark/lib/RedshiftJDBC41-1.1.7.1007.jar Adding default property: spark.history.ui.port=18080 Parsed arguments: master yarn deployMode cluster executorMemory 10G executorCores 1 totalExecutorCores null propertiesFile /usr/lib/spark/conf/spark-defaults.conf driverMemory 10G driverCores 1 driverExtraClassPath /etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop/*:/usr/lib/hadoop-hdfs/*:/usr/lib/hadoop-mapreduce/*:/usr/lib/hadoop-yarn/*:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*:/usr/lib/spark/lib/RedshiftJDBC41-1.1.7.1007.jar driverExtraLibraryPath /usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native driverExtraJavaOptions -Dlog4j.configuration=file:///etc/spark/conf/log4j.properties -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=512M -XX:OnOutOfMemoryError='kill -9 %p' supervise false queue null numExecutors null files file:/home/hadoop/spark-pipeline-framework-1.1.6-SNAPSHOT/conf/AwsCredentials.properties pyFiles null archives null mainClass com.wb.analytics.spark.services.streaming.drivers.StreamingKafkaConsumerDriver primaryResource file:/home/hadoop/spark-pipeline-framework-1.1.6-SNAPSHOT/workflow/lib/spark-kafka-services-1.0.jar name SparkStreamingBaseKafkaAvro childArgs [] jars file:/home/hadoop/spark-pipeline-framework-1.1.6-SNAPSHOT/conf/AwsCredentials.properties packages null packagesExclusions null repositories null verbose true Spark properties used, including those specified through --conf and those from the properties file /usr/lib/spark/conf/spark-defaults.conf: spark.executor.extraLibraryPath -> /usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native spark.driver.memory -> 10G spark.driver.extraLibraryPath -> /usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native spark.yarn.historyServer.address -> ip-10-247-129-50.ec2.internal:18080 spark.eventLog.enabled -> true spark.history.ui.port -> 18080 spark.executor.extraJavaOptions -> -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:OnOutOfMemoryError='kill -9 %p' spark.history.fs.logDirectory -> hdfs:///var/log/spark/apps spark.shuffle.service.enabled -> true spark.driver.extraJavaOptions -> -Dlog4j.configuration=file:///etc/spark/conf/log4j.properties -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=512M -XX:OnOutOfMemoryError='kill -9 %p' spark.eventLog.dir -> hdfs:///var/log/spark/apps spark.executor.extraClassPath -> /etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop/*:/usr/lib/hadoop-hdfs/*:/usr/lib/hadoop-mapreduce/*:/usr/lib/hadoop-yarn/*:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*:/usr/lib/spark/lib/RedshiftJDBC41-1.1.7.1007.jar spark.master -> yarn spark.driver.extraClassPath -> /etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop/*:/usr/lib/hadoop-hdfs/*:/usr/lib/hadoop-mapreduce/*:/usr/lib/hadoop-yarn/*:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*:/usr/lib/spark/lib/RedshiftJDBC41-1.1.7.1007.jar Main class: org.apache.spark.deploy.yarn.Client Arguments: --name SparkStreamingBaseKafkaAvro --driver-memory 10G --driver-cores 1 --executor-memory 10G --executor-cores 1 --files file:/home/hadoop/spark-pipeline-framework-1.1.6-SNAPSHOT/conf/AwsCredentials.properties --addJars file:/home/hadoop/spark-pipeline-framework-1.1.6-SNAPSHOT/conf/AwsCredentials.properties --jar file:/home/hadoop/spark-pipeline-framework-1.1.6-SNAPSHOT/workflow/lib/spark-kafka-services-1.0.jar --class com.wb.analytics.spark.services.streaming.drivers.StreamingKafkaConsumerDriver System properties: spark.executor.extraLibraryPath -> /usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native spark.executor.memory -> 10G spark.driver.memory -> 10G spark.driver.extraLibraryPath -> /usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native spark.yarn.historyServer.address -> ip-10-247-129-50.ec2.internal:18080 spark.eventLog.enabled -> true spark.history.ui.port -> 18080 SPARK_SUBMIT -> true spark.executor.extraJavaOptions -> -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:OnOutOfMemoryError='kill -9 %p' spark.shuffle.service.enabled -> true spark.history.fs.logDirectory -> hdfs:///var/log/spark/apps spark.app.name -> SparkStreamingBaseKafkaAvro spark.driver.extraJavaOptions -> -Dlog4j.configuration=file:///etc/spark/conf/log4j.properties -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=512M -XX:OnOutOfMemoryError='kill -9 %p' spark.submit.deployMode -> cluster spark.executor.extraClassPath -> /etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop/*:/usr/lib/hadoop-hdfs/*:/usr/lib/hadoop-mapreduce/*:/usr/lib/hadoop-yarn/*:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*:/usr/lib/spark/lib/RedshiftJDBC41-1.1.7.1007.jar spark.eventLog.dir -> hdfs:///var/log/spark/apps spark.master -> yarn-cluster spark.executor.cores -> 1 spark.driver.extraClassPath -> /etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop/*:/usr/lib/hadoop-hdfs/*:/usr/lib/hadoop-mapreduce/*:/usr/lib/hadoop-yarn/*:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*:/usr/lib/spark/lib/RedshiftJDBC41-1.1.7.1007.jar Classpath elements: 15/12/11 16:44:43 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 15/12/11 16:44:43 INFO client.RMProxy: Connecting to ResourceManager at ip-10-247-129-50.ec2.internal/10.247.129.50:8032 15/12/11 16:44:43 INFO yarn.Client: Requesting a new application from cluster with 1 NodeManagers 15/12/11 16:44:43 INFO yarn.Client: Verifying our application has not requested more than the maximum memory capability of the cluster (54272 MB per container) 15/12/11 16:44:43 INFO yarn.Client: Will allocate AM container, with 11264 MB memory including 1024 MB overhead 15/12/11 16:44:43 INFO yarn.Client: Setting up container launch context for our AM 15/12/11 16:44:43 INFO yarn.Client: Setting up the launch environment for our AM container 15/12/11 16:44:43 INFO yarn.Client: Preparing resources for our AM container 15/12/11 16:44:44 INFO yarn.Client: Uploading resource file:/usr/lib/spark/lib/spark-assembly-1.5.0-hadoop2.6.0-amzn-1.jar -> hdfs://ip-10-247-129-50.ec2.internal:8020/user/hadoop/.sparkStaging/application_1447442727308_0126/spark-assembly-1.5.0-hadoop2.6.0-amzn-1.jar 15/12/11 16:44:44 INFO metrics.MetricsSaver: MetricsConfigRecord disabledInCluster: false instanceEngineCycleSec: 60 clusterEngineCycleSec: 60 disableClusterEngine: false maxMemoryMb: 3072 maxInstanceCount: 500 lastModified: 1447442734295 15/12/11 16:44:44 INFO metrics.MetricsSaver: Created MetricsSaver j-2H3BTA60FGUYO:i-f7812947:SparkSubmit:15603 period:60 /mnt/var/em/raw/i-f7812947_20151211_SparkSubmit_15603_raw.bin 15/12/11 16:44:45 INFO metrics.MetricsSaver: 1 aggregated HDFSWriteDelay 1276 raw values into 1 aggregated values, total 1 15/12/11 16:44:45 INFO yarn.Client: Uploading resource file:/home/hadoop/spark-pipeline-framework-1.1.6-SNAPSHOT/workflow/lib/spark-kafka-services-1.0.jar -> hdfs://ip-10-247-129-50.ec2.internal:8020/user/hadoop/.sparkStaging/application_1447442727308_0126/spark-kafka-services-1.0.jar 15/12/11 16:44:45 INFO yarn.Client: Uploading resource file:/home/hadoop/spark-pipeline-framework-1.1.6-SNAPSHOT/conf/AwsCredentials.properties -> hdfs://ip-10-247-129-50.ec2.internal:8020/user/hadoop/.sparkStaging/application_1447442727308_0126/AwsCredentials.properties 15/12/11 16:44:45 WARN yarn.Client: Resource file:/home/hadoop/spark-pipeline-framework-1.1.6-SNAPSHOT/conf/AwsCredentials.properties added multiple times to distributed cache. 15/12/11 16:44:45 INFO yarn.Client: Deleting staging directory .sparkStaging/application_1447442727308_0126 Exception in thread "main" java.lang.IllegalArgumentException: requirement failed at scala.Predef$.require(Predef.scala:221) at org.apache.spark.deploy.yarn.Client$$anonfun$prepareLocalResources$6$$anonfun$apply$2.apply(Client.scala:392) at org.apache.spark.deploy.yarn.Client$$anonfun$prepareLocalResources$6$$anonfun$apply$2.apply(Client.scala:390) at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:108) at org.apache.spark.deploy.yarn.Client$$anonfun$prepareLocalResources$6.apply(Client.scala:390) at org.apache.spark.deploy.yarn.Client$$anonfun$prepareLocalResources$6.apply(Client.scala:388) at scala.collection.immutable.List.foreach(List.scala:318) at org.apache.spark.deploy.yarn.Client.prepareLocalResources(Client.scala:388) at org.apache.spark.deploy.yarn.Client.createContainerLaunchContext(Client.scala:629) at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:119) at org.apache.spark.deploy.yarn.Client.run(Client.scala:907) at org.apache.spark.deploy.yarn.Client$.main(Client.scala:966) at org.apache.spark.deploy.yarn.Client.main(Client.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:672) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) adjust ~/spark-pipeline-framework-1.1.6-SNAPSHOT >
--------------------------------------------------------------------- To unsubscribe, e-mail: user-unsubscr...@spark.apache.org For additional commands, e-mail: user-h...@spark.apache.org