Repository: incubator-airflow Updated Branches: refs/heads/master 0da5125ed -> 4ec932b55
[AIRFLOW-1035] Use binary exponential backoff Closes #2196 from IvanVergiliev/exponential- backoff Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/4ec932b5 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/4ec932b5 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/4ec932b5 Branch: refs/heads/master Commit: 4ec932b551774bb394c5770c4d2660f565a4c592 Parents: 0da5125 Author: Ivan Vergiliev <ivan.vergil...@gmail.com> Authored: Fri Apr 7 19:35:03 2017 +0200 Committer: Bolke de Bruin <bo...@xs4all.nl> Committed: Fri Apr 7 19:35:03 2017 +0200 ---------------------------------------------------------------------- airflow/models.py | 10 +++++++++- tests/models.py | 17 ++++++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/4ec932b5/airflow/models.py ---------------------------------------------------------------------- diff --git a/airflow/models.py b/airflow/models.py index 42b621d..e6374d4 100755 --- a/airflow/models.py +++ b/airflow/models.py @@ -1181,7 +1181,15 @@ class TaskInstance(Base): """ delay = self.task.retry_delay if self.task.retry_exponential_backoff: - delay_backoff_in_seconds = delay.total_seconds() ** self.try_number + # timedelta has a maximum representable value. The exponentiation + # here means this value can be exceeded after a certain number + # of tries (around 50 if the initial delay is 1s, even fewer if + # the delay is larger). Cap the value here before creating a + # timedelta object so the operation doesn't fail. + delay_backoff_in_seconds = min( + delay.total_seconds() * (2 ** (self.try_number - 1)), + timedelta.max.total_seconds() - 1 + ) delay = timedelta(seconds=delay_backoff_in_seconds) if self.task.max_retry_delay: delay = min(self.task.max_retry_delay, delay) http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/4ec932b5/tests/models.py ---------------------------------------------------------------------- diff --git a/tests/models.py b/tests/models.py index 20da4d4..a30830e 100644 --- a/tests/models.py +++ b/tests/models.py @@ -774,9 +774,8 @@ class TaskInstanceTest(unittest.TestCase): self.assertEqual(ti.try_number, 4) def test_next_retry_datetime(self): - delay = datetime.timedelta(seconds=3) - delay_squared = datetime.timedelta(seconds=9) - max_delay = datetime.timedelta(seconds=10) + delay = datetime.timedelta(seconds=30) + max_delay = datetime.timedelta(minutes=60) dag = models.DAG(dag_id='fail_dag') task = BashOperator( @@ -795,13 +794,17 @@ class TaskInstanceTest(unittest.TestCase): ti.try_number = 1 dt = ti.next_retry_datetime() - self.assertEqual(dt, ti.end_date+delay) + self.assertEqual(dt, ti.end_date + delay) - ti.try_number = 2 + ti.try_number = 6 dt = ti.next_retry_datetime() - self.assertEqual(dt, ti.end_date+delay_squared) + self.assertEqual(dt, ti.end_date + (2 ** 5) * delay) - ti.try_number = 3 + ti.try_number = 8 + dt = ti.next_retry_datetime() + self.assertEqual(dt, ti.end_date+max_delay) + + ti.try_number = 50 dt = ti.next_retry_datetime() self.assertEqual(dt, ti.end_date+max_delay)