HDFS path should be something like; hdfs:// 127.0.0.1:8020/user/cloudera/inputs/
On Mon, Jun 8, 2015 at 4:15 PM, Pa Rö <paul.roewer1...@googlemail.com> wrote: > hello, > > i submit my spark job with the following parameters: > > ./spark-1.1.0-bin-hadoop2.4/bin/spark-submit \ > --class mgm.tp.bigdata.ma_spark.SparkMain \ > --master spark://quickstart.cloudera:7077 \ > ma-spark.jar \ > 1000 > > and get the following exception: > > java.io.IOException: Mkdirs failed to create file:/ > 127.0.0.1:8020/user/cloudera/outputs/output_spark > at > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:438) > at > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:424) > at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:906) > at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:887) > at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:784) > at mgm.tp.bigdata.ma_spark.Helper.writeCenterHistory(Helper.java:35) > at mgm.tp.bigdata.ma_spark.SparkMain.main(SparkMain.java:100) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > java.lang.IllegalArgumentException: java.net.URISyntaxException: Relative > path in absolute URI: 127.0.0.1:8020 > at org.apache.hadoop.fs.Path.initialize(Path.java:206) > at org.apache.hadoop.fs.Path.<init>(Path.java:172) > at org.apache.hadoop.fs.Path.<init>(Path.java:94) > at org.apache.hadoop.fs.Globber.glob(Globber.java:211) > at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1642) > at > org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:257) > at > org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:228) > at > org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:304) > at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:179) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) > at scala.Option.getOrElse(Option.scala:120) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) > at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) > at scala.Option.getOrElse(Option.scala:120) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) > at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) > at scala.Option.getOrElse(Option.scala:120) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) > at org.apache.spark.rdd.FilteredRDD.getPartitions(FilteredRDD.scala:29) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) > at scala.Option.getOrElse(Option.scala:120) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1135) > at org.apache.spark.rdd.RDD.count(RDD.scala:904) > at org.apache.spark.rdd.RDD.takeSample(RDD.scala:401) > at > org.apache.spark.api.java.JavaRDDLike$class.takeSample(JavaRDDLike.scala:426) > at org.apache.spark.api.java.JavaRDD.takeSample(JavaRDD.scala:32) > at > org.apache.spark.api.java.JavaRDDLike$class.takeSample(JavaRDDLike.scala:422) > at org.apache.spark.api.java.JavaRDD.takeSample(JavaRDD.scala:32) > at mgm.tp.bigdata.ma_spark.SparkMain.kmeans(SparkMain.java:123) > at mgm.tp.bigdata.ma_spark.SparkMain.main(SparkMain.java:102) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > Caused by: java.net.URISyntaxException: Relative path in absolute URI: > 127.0.0.1:8020 > at java.net.URI.checkPath(URI.java:1804) > at java.net.URI.<init>(URI.java:752) > at org.apache.hadoop.fs.Path.initialize(Path.java:203) > ... 43 more > Exception in thread "main" java.lang.IllegalArgumentException: > java.net.URISyntaxException: Relative path in absolute URI: 127.0.0.1:8020 > at org.apache.hadoop.fs.Path.initialize(Path.java:206) > at org.apache.hadoop.fs.Path.<init>(Path.java:172) > at org.apache.hadoop.fs.Path.<init>(Path.java:94) > at org.apache.hadoop.fs.Globber.glob(Globber.java:211) > at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1642) > at > org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:257) > at > org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:228) > at > org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:304) > at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:179) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) > at scala.Option.getOrElse(Option.scala:120) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) > at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) > at scala.Option.getOrElse(Option.scala:120) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) > at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) > at scala.Option.getOrElse(Option.scala:120) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) > at org.apache.spark.rdd.FilteredRDD.getPartitions(FilteredRDD.scala:29) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) > at scala.Option.getOrElse(Option.scala:120) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) > at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) > at scala.Option.getOrElse(Option.scala:120) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1135) > at org.apache.spark.rdd.RDD.foreach(RDD.scala:759) > at > org.apache.spark.api.java.JavaRDDLike$class.foreach(JavaRDDLike.scala:297) > at org.apache.spark.api.java.JavaPairRDD.foreach(JavaPairRDD.scala:44) > at mgm.tp.bigdata.ma_spark.SparkMain.saveResults(SparkMain.java:216) > at mgm.tp.bigdata.ma_spark.SparkMain.main(SparkMain.java:108) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > Caused by: java.net.URISyntaxException: Relative path in absolute URI: > 127.0.0.1:8020 > at java.net.URI.checkPath(URI.java:1804) > at java.net.URI.<init>(URI.java:752) > at org.apache.hadoop.fs.Path.initialize(Path.java:203) > ... 45 more > > i set my path like: > file:///127.0.0.1:8020/user/cloudera/inputs/ > (namenode of hadoop) > > how i must set the path to hdfs?? > > best regards, > paul > -- Thanks & regards, Nirmal Associate Technical Lead - Data Technologies Team, WSO2 Inc. Mobile: +94715779733 Blog: http://nirmalfdo.blogspot.com/