Author: bobby Date: Mon Nov 12 20:38:55 2012 New Revision: 1408447 URL: http://svn.apache.org/viewvc?rev=1408447&view=rev Log: svn merge -c 1408444 FIXES: MAPREDUCE-4786. Job End Notification retry interval is 5 milliseconds by default (Ravi Prakash via bobby)
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/JobEndNotifier.java hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt?rev=1408447&r1=1408446&r2=1408447&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt Mon Nov 12 20:38:55 2012 @@ -510,6 +510,9 @@ Release 0.23.5 - UNRELEASED MAPREDUCE-4425. Speculation + Fetch failures can lead to a hung job (jlowe via bobby) + + MAPREDUCE-4786. Job End Notification retry interval is 5 milliseconds by + default (Ravi Prakash via bobby) Release 0.23.4 - UNRELEASED Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/JobEndNotifier.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/JobEndNotifier.java?rev=1408447&r1=1408446&r2=1408447&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/JobEndNotifier.java (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/JobEndNotifier.java Mon Nov 12 20:38:55 2012 @@ -53,7 +53,7 @@ public class JobEndNotifier implements C protected String userUrl; protected String proxyConf; protected int numTries; //Number of tries to attempt notification - protected int waitInterval; //Time to wait between retrying notification + protected int waitInterval; //Time (ms) to wait between retrying notification protected URL urlToNotify; //URL to notify read from the config protected Proxy proxyToUse = Proxy.NO_PROXY; //Proxy to use for notification @@ -71,10 +71,10 @@ public class JobEndNotifier implements C , conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, 1) ); waitInterval = Math.min( - conf.getInt(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, 5) - , conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, 5) + conf.getInt(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, 5000) + , conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, 5000) ); - waitInterval = (waitInterval < 0) ? 5 : waitInterval; + waitInterval = (waitInterval < 0) ? 5000 : waitInterval; userUrl = conf.get(MRJobConfig.MR_JOB_END_NOTIFICATION_URL); Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java?rev=1408447&r1=1408446&r2=1408447&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java Mon Nov 12 20:38:55 2012 @@ -55,22 +55,22 @@ public class TestJobEndNotifier extends //Test maximum retry interval is capped by //MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL private void testWaitInterval(Configuration conf) { - conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "5"); - conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "1"); + conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "5000"); + conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "1000"); setConf(conf); - Assert.assertTrue("Expected waitInterval to be 1, but was " + waitInterval, - waitInterval == 1); + Assert.assertTrue("Expected waitInterval to be 1000, but was " + + waitInterval, waitInterval == 1000); - conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "10"); + conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "10000"); setConf(conf); - Assert.assertTrue("Expected waitInterval to be 5, but was " + waitInterval, - waitInterval == 5); + Assert.assertTrue("Expected waitInterval to be 5000, but was " + + waitInterval, waitInterval == 5000); //Test negative numbers are set to default conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "-10"); setConf(conf); - Assert.assertTrue("Expected waitInterval to be 5, but was " + waitInterval, - waitInterval == 5); + Assert.assertTrue("Expected waitInterval to be 5000, but was " + + waitInterval, waitInterval == 5000); } private void testProxyConfiguration(Configuration conf) { @@ -125,17 +125,28 @@ public class TestJobEndNotifier extends public void testNotifyRetries() throws InterruptedException { Configuration conf = new Configuration(); conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_URL, "http://nonexistent"); + JobReport jobReport = Mockito.mock(JobReport.class); + + long startTime = System.currentTimeMillis(); + this.notificationCount = 0; + this.setConf(conf); + this.notify(jobReport); + long endTime = System.currentTimeMillis(); + Assert.assertEquals("Only 1 try was expected but was : " + + this.notificationCount, this.notificationCount, 1); + Assert.assertTrue("Should have taken more than 5 seconds it took " + + (endTime - startTime), endTime - startTime > 5000); + conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, "3"); conf.set(MRJobConfig.MR_JOB_END_RETRY_ATTEMPTS, "3"); conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "3000"); conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "3000"); - JobReport jobReport = Mockito.mock(JobReport.class); - long startTime = System.currentTimeMillis(); + startTime = System.currentTimeMillis(); this.notificationCount = 0; this.setConf(conf); this.notify(jobReport); - long endTime = System.currentTimeMillis(); + endTime = System.currentTimeMillis(); Assert.assertEquals("Only 3 retries were expected but was : " + this.notificationCount, this.notificationCount, 3); Assert.assertTrue("Should have taken more than 9 seconds it took " Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml?rev=1408447&r1=1408446&r2=1408447&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml Mon Nov 12 20:38:55 2012 @@ -985,35 +985,6 @@ </description> </property> -<!-- Job Notification Configuration --> - -<!-- -<property> - <name>mapreduce.job.end-notification.url</name> - <value>http://localhost:8080/jobstatus.php?jobId=$jobId&jobStatus=$jobStatus</value> - <description>Indicates url which will be called on completion of job to inform - end status of job. - User can give at most 2 variables with URI : $jobId and $jobStatus. - If they are present in URI, then they will be replaced by their - respective values. -</description> -</property> ---> - -<property> - <name>mapreduce.job.end-notification.retry.attempts</name> - <value>0</value> - <description>Indicates how many times hadoop should attempt to contact the - notification URL </description> -</property> - -<property> - <name>mapreduce.job.end-notification.retry.interval</name> - <value>30000</value> - <description>Indicates time in milliseconds between notification URL retry - calls</description> -</property> - <!-- Proxy Configuration --> <property> <name>mapreduce.jobtracker.taskcache.levels</name> @@ -1235,49 +1206,53 @@ </description> </property> +<!-- Job Notification Configuration --> <property> - <name>mapreduce.job.end-notification.max.attempts</name> - <value>5</value> - <final>true</final> - <description>The maximum number of times a URL will be read for providing job - end notification. Cluster administrators can set this to limit how long - after end of a job, the Application Master waits before exiting. Must be - marked as final to prevent users from overriding this. - </description> + <name>mapreduce.job.end-notification.url</name> + <!--<value>http://localhost:8080/jobstatus.php?jobId=$jobId&jobStatus=$jobStatus</value>--> + <description>Indicates url which will be called on completion of job to inform + end status of job. + User can give at most 2 variables with URI : $jobId and $jobStatus. + If they are present in URI, then they will be replaced by their + respective values. +</description> </property> <property> - <name>mapreduce.job.end-notification.max.retry.interval</name> - <value>5</value> - <final>true</final> - <description>The maximum amount of time (in seconds) to wait before retrying - job end notification. Cluster administrators can set this to limit how long - the Application Master waits before exiting. Must be marked as final to - prevent users from overriding this.</description> + <name>mapreduce.job.end-notification.retry.attempts</name> + <value>0</value> + <description>The number of times the submitter of the job wants to retry job + end notification if it fails. This is capped by + mapreduce.job.end-notification.max.attempts</description> </property> <property> - <name>mapreduce.job.end-notification.url</name> - <value></value> - <description>The URL to send job end notification. It may contain sentinels - $jobId and $jobStatus which will be replaced with jobId and jobStatus. - </description> + <name>mapreduce.job.end-notification.retry.interval</name> + <value>1000</value> + <description>The number of milliseconds the submitter of the job wants to + wait before job end notification is retried if it fails. This is capped by + mapreduce.job.end-notification.max.retry.interval</description> </property> <property> - <name>mapreduce.job.end-notification.retry.attempts</name> + <name>mapreduce.job.end-notification.max.attempts</name> <value>5</value> - <description>The number of times the submitter of the job wants to retry job - end notification if it fails. This is capped by - mapreduce.job.end-notification.max.attempts</description> + <final>true</final> + <description>The maximum number of times a URL will be read for providing job + end notification. Cluster administrators can set this to limit how long + after end of a job, the Application Master waits before exiting. Must be + marked as final to prevent users from overriding this. + </description> </property> <property> - <name>mapreduce.job.end-notification.retry.interval</name> - <value>1</value> - <description>The number of seconds the submitter of the job wants to wait - before job end notification is retried if it fails. This is capped by - mapreduce.job.end-notification.max.retry.interval</description> + <name>mapreduce.job.end-notification.max.retry.interval</name> + <value>5000</value> + <final>true</final> + <description>The maximum amount of time (in milliseconds) to wait before + retrying job end notification. Cluster administrators can set this to + limit how long the Application Master waits before exiting. Must be marked + as final to prevent users from overriding this.</description> </property> <property>