Bobby Bruce has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/57274 )

Change subject: stdlib: Update the downloader to retry on failure
......................................................................

stdlib: Update the downloader to retry on failure

On occasion HTTP status codes, such as 429, are returned when
downloading the resources and/or the "resources.json" file. In such
cases it makes sense to retry the download.

This patch adds a Truncated Exponential Backoff algorithm to the
downloader to retry downloads, with a default maximum of 6 attempts.
This is done in cases where HTTP status codes 408, 429, and 5xx are
returned.

So this feature is applied to both the retrieval of the resources and
the "resource.json" file, the downloading of the "resources.json" file
has been modified to utilize the `_download` function as is used to
download resources

Change-Id: Ic4444b52f0f71d294fccec9de3140beece5f7576
---
M src/python/gem5/resources/downloader.py
1 file changed, 100 insertions(+), 13 deletions(-)



diff --git a/src/python/gem5/resources/downloader.py b/src/python/gem5/resources/downloader.py
index 5afceeb..2647c56 100644
--- a/src/python/gem5/resources/downloader.py
+++ b/src/python/gem5/resources/downloader.py
@@ -32,6 +32,10 @@
 import gzip
 import hashlib
 import base64
+import time
+import random
+from tempfile import gettempdir
+from urllib.error import HTTPError
 from typing import List, Dict

 from ..utils.filelock import FileLock
@@ -55,6 +59,33 @@

     return uri

+def _get_resources_json_at_url(url: str) -> Dict:
+    '''
+    Returns a resource JSON, in the form of a Python Dict. The URL location
+    of the JSON must be specified.
+
+    **Note**: The URL is assumed to be the location within a Google Source
+ repository. Special processing is done to handle this. This is the primary
+    reason there are separate functions for handling the retrieving of the
+ resources JSON comapared to just using the `_download` function directly.
+
+    :param url: The URL of the JSON file.
+    '''
+
+    file_path = os.path.join(
+        gettempdir(),
+        f"gem5-resources-{hashlib.md5(url.encode()).hexdigest()}.base64",
+    )
+
+    _download(url, file_path)
+
+    # Note: Google Source does not properly support obtaining files as raw
+    # text. Therefore when we open the URL we receive the JSON in base64
+    # format. Conversion is needed before it can be loaded.
+    with open(file_path) as file:
+ to_return = json.loads(base64.b64decode(file.read()).decode("utf-8"))
+
+    return to_return

 def _get_resources_json() -> Dict:
     """
@@ -63,23 +94,16 @@
     :returns: The Resources JSON (as a Python Dictionary).
     """

-    # Note: Google Source does not properly support obtaining files as raw
-    # text. Therefore when we open the URL we receive the JSON in base64
-    # format. Conversion is needed before it can be loaded.
-    with urllib.request.urlopen(_get_resources_json_uri()) as url:
- to_return = json.loads(base64.b64decode(url.read()).decode("utf-8"))
+    to_return = _get_resources_json_at_url(url = _get_resources_json_uri())

     # If the current version pulled is not correct, look up the
     # "previous-versions" field to find the correct one.
     version = _resources_json_version_required()
     if to_return["version"] != version:
         if version in to_return["previous-versions"].keys():
-            with urllib.request.urlopen(
-                    to_return["previous-versions"][version]
-                ) as url:
-                to_return = json.loads(
-                    base64.b64decode(url.read()).decode("utf-8")
-                )
+            to_return = _get_resources_json_at_url(
+                url = to_return["previous-versions"][version]
+            )
         else:
# This should never happen, but we thrown an exception to explain
             # that we can't find the version.
@@ -170,18 +194,58 @@
     return md5_object.hexdigest()


-def _download(url: str, download_to: str) -> None:
+def _download(
+    url: str,
+    download_to: str,
+    attempt: int = 0,
+    max_attempts = 6,
+) -> None:
     """
     Downloads a file.

+ The function will run a Truncated Exponential Backoff algorithm to retry
+    the download if the HTTP Status Code returned is deemed retryable.
+
     :param url: The URL of the file to download.

     :param download_to: The location the downloaded file is to be stored.
+
+ :param attempt: The number of download attempts thus far. This is used for
+    recursion and not meant to be used directly when using this function.
+
+ :param max_attempts: The max number of download attempts before stopping. + The default is 6. This translates to roughly 1 minute of retrying before
+    stopping
     """

     # TODO: This whole setup will only work for single files we can get via
     # wget. We also need to support git clones going forward.
-    urllib.request.urlretrieve(url, download_to)
+
+    try:
+        urllib.request.urlretrieve(url, download_to)
+    except HTTPError as e:
+ # If the error code retrieved is retryable, we retry using a Truncated
+        # Exponential backoff algorithm, truncating after "max_attempts".
+        # We consider HTTP status codes 408, 429, and 5xx as retryable. If
+        # anyother is retrieved we raise the error.
+        if e.code in (408, 429) or e.code in range(500, 600):
+            attempt = attempt + 1
+            if attempt == max_attempts:
+                raise Exception(
+ f"After {attempt} attempts, the resource json could not "
+                    "be retrieved. HTTP Status Code retrieved: "
+                    f"{e.code}"
+                )
+            time.sleep((2 ** attempt) + random.uniform(0, 1))
+            _download(
+                url=url,
+                download_to=download_to,
+                attempt=attempt,
+                max_attempts=max_attempts,
+            )
+        else:
+            raise e
+


 def list_resources() -> List[str]:

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/57274
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Ic4444b52f0f71d294fccec9de3140beece5f7576
Gerrit-Change-Number: 57274
Gerrit-PatchSet: 1
Gerrit-Owner: Bobby Bruce <bbr...@ucdavis.edu>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

Reply via email to