cannot submit a job via java client in hadoop- 2.0.5-alpha

Francis . Hu Wed, 10 Jul 2013 01:35:31 -0700

Hi,All


I have a hadoop- 2.0.5-alpha cluster with 3 data nodes . I have Resource
Manager and all data nodes started and can access web ui of Resource
Manager. 

I wrote a java client to submit a job as TestJob class below. But the job is
never submitted successfully. It throws out exception all the time. 

My configurations are attached.  Can anyone help me? Thanks.

 

---------my-java client

public class TestJob {

    

    public void execute() {

 

        Configuration conf1 = new Configuration();

        conf1.addResource("resources/core-site.xml");

        conf1.addResource("resources/hdfs-site.xml");

        conf1.addResource("resources/yarn-site.xml");

        conf1.addResource("resources/mapred-site.xml");

        JobConf conf = new JobConf(conf1);

        

        conf.setJar("/home/francis/hadoop-jobs/MapReduceJob.jar");

        conf.setJobName("Test");

 

        conf.setInputFormat(TextInputFormat.class);

        conf.setOutputFormat(TextOutputFormat.class);

 

        conf.setOutputKeyClass(Text.class);

        conf.setOutputValueClass(IntWritable.class);

 

        conf.setMapperClass(DisplayRequestMapper.class);

        conf.setReducerClass(DisplayRequestReducer.class);

 

        FileInputFormat.setInputPaths(conf,new
Path("/home/francis/hadoop-jobs/2013070907.FNODE.2.txt"));

        FileOutputFormat.setOutputPath(conf, new
Path("/home/francis/hadoop-jobs/result/"));

 

        try {

            JobClient client = new JobClient(conf);

            RunningJob job = client.submitJob(conf);

            job.waitForCompletion();

        } catch (IOException e) {

            e.printStackTrace();

        }

    }

}

 

----------Exception 

 

jvm 1    | java.io.IOException: Cannot initialize Cluster. Please check your
configuration for mapreduce.framework.name and the correspond server
addresses.

jvm 1    |      at
org.apache.hadoop.mapreduce.Cluster.initialize(Cluster.java:119)

jvm 1    |      at
org.apache.hadoop.mapreduce.Cluster.<init>(Cluster.java:81)

jvm 1    |      at
org.apache.hadoop.mapreduce.Cluster.<init>(Cluster.java:74)

jvm 1    |      at
org.apache.hadoop.mapred.JobClient.init(JobClient.java:482)

jvm 1    |      at
org.apache.hadoop.mapred.JobClient.<init>(JobClient.java:461)

jvm 1    |      at
com.rh.elastic.hadoop.job.TestJob.execute(TestJob.java:59)

 

 

Thanks,

Francis.Hu

<?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>

<!-- Site specific YARN configuration properties -->

<!--Configurations for ResourceManager and NodeManager:-->
<property>
	<name>yarn.acl.enable</name>
	<value>false</value>
	<description>Enable ACLs? Defaults to false.</description>
</property>

<!--
<property>
	<name>yarn.admin.acl</name>
	<value>*</value>
	<description>ACL to set admins on the cluster. ACLs are of for comma-separated-usersspacecomma-separated-groups. Defaults to special value of * which means anyone. Special value of just space means no one has access.</description>
</property>
<property>
	<name>yarn.log-aggregation-enable</name>
	<value>false</value>
	<description>Configuration to enable or disable log aggregation</description>
</property>
-->
<!--
<property>
	<name>yarn.web-proxy.address</name>
	<value>192.168.219.129:8034</value>
	<description>host:port if this is the same as yarn.resourcemanager.webapp.address or it is not defined then the ResourceManager will run the proxy otherwise a standalone proxy server will need to be launched.</description>
</property>
-->
<!--Configurations for ResourceManager:-->
<property>
	<name>yarn.resourcemanager.address</name>
	<value>192.168.219.129:9001</value>
	<description>ResourceManager host:port for clients to submit jobs.</description>
</property>
<property>
	<name>yarn.resourcemanager.scheduler.address</name>
	<value>192.168.219.129:8030</value>
	<description>ResourceManager host:port for ApplicationMasters to talk to Scheduler to obtain resources.</description>
</property>
<property>
	<name>yarn.resourcemanager.resource-tracker.address</name>
	<value>192.168.219.129:8031</value>
	<description>ResourceManager host:port for NodeManagers.</description>
</property>
<property>
	<name>yarn.resourcemanager.admin.address</name>
	<value>192.168.219.129:8033</value>
	<description>ResourceManager host:port for administrative commands.</description>
</property>
<property>
	<name>yarn.resourcemanager.webapp.address</name>
	<value>192.168.219.129:8088</value>
	<description>ResourceManager web-ui host:port.</description>
</property>
<property>
	<name>yarn.resourcemanager.scheduler.class</name>
	<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
	<description>ResourceManager Scheduler class.</description>
</property>
<property>
	<name>yarn.scheduler.minimum-allocation-mb</name>
	<value>1024</value>
	<description>Minimum limit of memory to allocate to each container request at the Resource Manager.</description>
</property>
<property>
	<name>yarn.scheduler.maximum-allocation-mb</name>
	<value>8192</value>
	<description>Maximum limit of memory to allocate to each container request at the Resource Manager.</description>
</property>


<!--?Configurations for NodeManager:-->
<property>
	<name>yarn.nodemanager.resource.memory-mb</name>
	<value>8192</value>
	<description>Resource i.e. available physical memory, in MB, for given NodeManager.Defines total available resources on the NodeManager to be made available to running containers</description>
</property>
<property>
	<name>yarn.nodemanager.vmem-pmem-ratio</name>
	<value>2.1</value>
	<description>Maximum ratio by which virtual memory usage of tasks may exceed physical memory</description>
</property>
<property>
	<name>yarn.nodemanager.local-dirs</name>
	<value>/home/francis/hadoop2-hdfs/yarn</value>
	<description>Comma-separated list of paths on the local filesystem where intermediate data is written.Multiple paths help spread disk i/o.</description>
</property>
<property>
	<name>yarn.nodemanager.log-dirs</name>
	<value>/home/francis/hadoop2-hdfs/yarn-log</value>
	<description>Comma-separated list of paths on the local filesystem where logs are written.Multiple paths help spread disk i/o.</description>
</property>
<property>
	<name>yarn.nodemanager.log.retain-seconds</name>
	<value>10800</value>
	<description>Default time (in seconds) to retain log files on the NodeManager Only applicable if log-aggregation is disabled.</description>
</property>
<property>
	<name>yarn.nodemanager.remote-app-log-dir</name>
	<value>/logs</value>
	<description>HDFS directory where the application logs are moved on application completion. Need to set appropriate permissions. Only applicable if log-aggregation is enabled.</description>
</property>
<property>
	<name>yarn.nodemanager.remote-app-log-dir-suffix</name>
	<value>logs</value>
	<description>Suffix appended to the remote log dir. Logs will be aggregated to ${yarn.nodemanager.remote-app-log-dir}/${user}/${thisParam} Only applicable if log-aggregation is enabled.</description>
</property>
<property>
	<name>yarn.nodemanager.aux-services</name>
	<value>mapreduce.shuffle</value>
	<description>Shuffle service that needs to be set for Map Reduce applications.</description>
</property>

<!--?Configurations for History Server (Needs to be moved elsewhere):-->
<property>
	<name>yarn.log-aggregation.retain-seconds</name>
	<value>-1</value>
	<description>How long to keep aggregation logs before deleting them. -1 disables. Be careful, set this too small and you will spam the name node.</description>
</property>
<property>
	<name>yarn.log-aggregation.retain-check-interval-seconds</name>
	<value>-1</value>
	<description>Time between checks for aggregated log retention. If set to 0 or a negative value then the value is computed as one-tenth of the aggregated log retention time. Be careful, set this too small and you will spam the name node.</description>
</property>
<property>
	<name>yarn.log-aggregation.retain-seconds</name>
	<value>-1</value>
	<description>How long to keep aggregation logs before deleting them. -1 disables. Be careful, set this too small and you will spam the name node.</description>
</property>

</configuration>

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->
<!--?Configurations for MapReduce Applications:-->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
<description>Execution framework set to Hadoop YARN.</description>
</property>

<property>
<name>mapreduce.map.memory.mb</name>
<value>1536</value>
<description>Larger resource limit for maps.</description>
</property>

<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx1024M</value>
<description>Larger heap-size for child jvms of maps.</description>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>3072</value>
<description>Larger resource limit for reduces.</description>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx2560M</value>
<description>Larger heap-size for child jvms of reduces.</description>
</property>
<property>
<name>mapreduce.task.io.sort.mb</name>
<value>512</value>
<description>Higher memory-limit while sorting data for efficiency.</description>
</property>
<property>
<name>mapreduce.task.io.sort.factor</name>
<value>100</value>
<description>More streams merged at once while sorting files.</description>
</property>
<property>
<name>mapreduce.reduce.shuffle.parallelcopies</name>
<value>50</value>
<description>Higher number of parallel copies run by reduces to fetch outputs from very large number of maps.</description>
</property>

<!--?Configurations for MapReduce JobHistory Server:-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>192.168.219.129:10020</value>
<description>MapReduce JobHistory Server host:port,Default port is 10020.</description>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>192.168.219.129:19888</value>
<description>MapReduce JobHistory Server Web UI host:port,Default port is 19888.</description>
</property>
<property>
<name>mapreduce.jobhistory.intermediate-done-dir</name>
<value>/mr-history/tmp</value>
<description>Directory where history files are written by MapReduce jobs.</description>
</property>
<property>
<name>mapreduce.jobhistory.done-dir</name>
<value>/mr-history/done</value>
<description>Directory where history files are managed by the MR JobHistory Server.</description>
</property>

</configuration>

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
	<!--
	<property>
		<name>fs.defaultFS</name>
		<value>hdfs://192.168.219.129:9000</value>
	</property>
	-->
<property>
  <name>fs.defaultFS</name>
  <value>hdfs://RhCluster</value>
</property>

  <property>
		<name>io.file.buffer.size</name>
		<value>4096</value>
		<description>Size of read/write buffer used in SequenceFiles.</description>
	</property>
	
	<property>
   <name>hadoop.tmp.dir</name> 
   <value>/home/francis/hadoop2-hdfs/tmp</value> 
   <description>A base for other temporary directories.</description>
</property>

<!--automtic failover configuration-->
<property>
   <name>ha.zookeeper.quorum</name>
   <value>192.168.219.129:2181,192.168.219.130:2181,192.168.219.132:2181</value>
 </property>

</configuration>

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>

<property>  
   <name>dfs.namenode.name.dir</name>  
   <value>/home/francis/hadoop2-hdfs/name</value>  
   <description>Path on the local filesystem where the NameNode stores the namespace and transactions logs persistently.</description>  
</property>

<property>  
  <name>dfs.datanode.data.dir</name>  
  <value>/home/francis/hadoop2-hdfs/data</value>  
  <description>Comma separated list of paths on the local filesystem of a DataNode where it should store its blocks.</description>  
</property>

<property>  
   <name>dfs.blocksize</name>  
   <value>67108864</value>  
   <description>HDFS blocksize of 268435456(256MB) for large file-systems.</description>  
</property>

<property>  
   <name>dfs.namenode.handler.count</name>  
   <value>10</value>  
   <description>More NameNode server threads to handle RPCs from large number of DataNodes.</description>  
</property>


<!--copied from 1.1.2 configuration-->
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>

<property>
<name>dfs.hosts</name>
<value>/home/francis/hadoop-2.0.5-alpha/etc/hadoop/slaves</value>
</property>


<!--this is not supported in hadoop 1.x.x,it is supported in 2.x.x-->

<property>
<name>dfs.support.append</name>
<value>true</value>
</property>

<property>
	<name>dfs.client.block.write.replace-datanode-on-failure.enable</name>
	<value>true</value>
	<description>NOTE:this is cannot be disabled if you need to do APPEND on a file</description>
</property>

<property>
	<name>dfs.client.block.write.replace-datanode-on-failure.policy</name>
	<value>DEFAULT</value>
	<description>NEVER: never add a new datanode.When the cluster size is extremely small, e.g. 3 nodes or less, cluster administrators may want to set the policy to NEVER in the default configuration file or disable this feature</description>
</property>

<!--this is a temporary solution for append in 1.x.x-->
<!--
<property>
<name>dfs.support.broken.append</name>
<value>true</value>
</property>
-->

<!--Configuration of Hight Availablity with QJM -->
<property>
  <name>dfs.nameservices</name>
  <value>RhCluster</value>
  <description>the logical name for this new nameservice</description>
</property>
<property>
  <name>dfs.ha.namenodes.RhCluster</name>
  <value>nn1,nn2</value>
</property>
<property>
  <name>dfs.namenode.rpc-address.RhCluster.nn1</name>
  <value>192.168.219.129:8020</value>
</property>
<property>
  <name>dfs.namenode.rpc-address.RhCluster.nn2</name>
  <value>192.168.219.132:8020</value>
</property>
<property>
  <name>dfs.namenode.http-address.RhCluster.nn1</name>
  <value>192.168.219.129:50070</value>
</property>
<property>
  <name>dfs.namenode.http-address.RhCluster.nn2</name>
  <value>192.168.219.132:50070</value>
</property>
<property>
  <name>dfs.namenode.shared.edits.dir</name>
  <value>qjournal://192.168.219.129:8485;192.168.219.132:8485;192.168.219.130:8485/RhCluster</value>
</property>
<property>
  <name>dfs.client.failover.proxy.provider.RhCluster</name>
  <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>

<property>
  <name>dfs.ha.fencing.methods</name>
  <!--<value>shell(/bin/true)</value>-->
  <value>sshfence</value>
</property>

<property>
  <name>dfs.ha.fencing.ssh.connect-timeout</name>
  <value>30000</value>
</property>

<property>
  <name>dfs.ha.fencing.ssh.private-key-files</name>
  <value>/home/francis/.ssh/id_rsa</value>
</property>

<property>
  <name>dfs.journalnode.edits.dir</name>
  <value>/home/francis/hadoop2-hdfs/journalnode/data</value>
</property>

<!--automtic failover configuration-->
 <property>
   <name>dfs.ha.automatic-failover.enabled.RhCluster</name>
   <value>true</value>
 </property>

<!--balancer configuration-->
 <property>
   <name>dfs.datanode.balance.bandwidthPerSec</name>
   <value>10485760</value>
   <description>10M per seconds when transfering data for balancing</description>
 </property>

 <!--stale configuration-->
  <property>
   <name>dfs.namenode.avoid.read.stale.datanode</name>
   <value>true</value>
   <description>stale data node</description>
 </property>
 <property>
   <name>dfs.namenode.avoid.write.stale.datanode</name>
   <value>true</value>
   <description>stale data node</description>
 </property>
 <property>
   <name>dfs.namenode.stale.datanode.interval</name>
   <value>30000</value>
   <description>in milliseconds</description>
 </property>
  <property>
   <name>dfs.namenode.write.stale.datanode.ratio</name>
   <value>0.5f</value>
   <description>in percentage</description>
 </property>
 
</configuration>

cannot submit a job via java client in hadoop- 2.0.5-alpha

Reply via email to