Niels Thykier wrote: > * tracker.d.o does *not* import excuses.yaml but update_excuses.html > (as far as I am informed at least)
True. Here is a patch for tracker to parse YAML instead of HTML. Cheers, Christophe
>From 04692b5c65124b930a94f668cd2b409269d186c5 Mon Sep 17 00:00:00 2001 From: Christophe Siraut <tob...@debian.org> Date: Wed, 1 Feb 2017 17:05:05 +0100 Subject: [PATCH] Use excuses.yaml instead of parsing HTML. Closes: #853189 --- .../vendor/debian/tests-data/update_excuses-1.html | 11 --- .../vendor/debian/tests-data/update_excuses-1.yaml | 11 +++ .../vendor/debian/tests-data/update_excuses-2.html | 11 --- .../vendor/debian/tests-data/update_excuses-2.yaml | 12 +++ distro_tracker/vendor/debian/tests.py | 11 +-- distro_tracker/vendor/debian/tracker_tasks.py | 106 ++++++--------------- 6 files changed, 56 insertions(+), 106 deletions(-) delete mode 100644 distro_tracker/vendor/debian/tests-data/update_excuses-1.html create mode 100644 distro_tracker/vendor/debian/tests-data/update_excuses-1.yaml delete mode 100644 distro_tracker/vendor/debian/tests-data/update_excuses-2.html create mode 100644 distro_tracker/vendor/debian/tests-data/update_excuses-2.yaml diff --git a/distro_tracker/vendor/debian/tests-data/update_excuses-1.html b/distro_tracker/vendor/debian/tests-data/update_excuses-1.html deleted file mode 100644 index c23541e..0000000 --- a/distro_tracker/vendor/debian/tests-data/update_excuses-1.html +++ /dev/null @@ -1,11 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/REC-html40/strict.dtd"> -<html><head><title>excuses...</title><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body> -<p>Generated: 2013.08.12 10:03:22 +0000</p> -<ul> -<li><a id="dummy-package" name="dummy-package">dummy-package</a> (1.0.0 to 2.0.0) -<ul> -<li>Maintainer: Some Maintainer -<li>20 days old (needed 10 days) -<li>Not considered -</ul> -</ul></body></html> diff --git a/distro_tracker/vendor/debian/tests-data/update_excuses-1.yaml b/distro_tracker/vendor/debian/tests-data/update_excuses-1.yaml new file mode 100644 index 0000000..bb0d86e --- /dev/null +++ b/distro_tracker/vendor/debian/tests-data/update_excuses-1.yaml @@ -0,0 +1,11 @@ +generated-date: 2017-02-01 06:47:18.195464 +sources: +- excuses: + - 20 days old (needed 10 days) + hints: + is-candidate: + item-name: dummy-package + new-version: 2.0.0 + old-version: 1.0.0 + reason: [] + source: dummy-package diff --git a/distro_tracker/vendor/debian/tests-data/update_excuses-2.html b/distro_tracker/vendor/debian/tests-data/update_excuses-2.html deleted file mode 100644 index 4666c7b..0000000 --- a/distro_tracker/vendor/debian/tests-data/update_excuses-2.html +++ /dev/null @@ -1,11 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/REC-html40/strict.dtd"> -<html><head><title>excuses...</title><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body> -<p>Generated: 2013.08.12 10:03:22 +0000</p> -<ul> -<li><a id="dummy-package" name="dummy-package">dummy-package</a> (1.0.0 to 2.0.0) -<ul> -<li>Maintainer: Some Maintainer -<li>10 days old (needed 10 days) -<li>Not considered -</ul> -</ul></body></html> diff --git a/distro_tracker/vendor/debian/tests-data/update_excuses-2.yaml b/distro_tracker/vendor/debian/tests-data/update_excuses-2.yaml new file mode 100644 index 0000000..f3e74be --- /dev/null +++ b/distro_tracker/vendor/debian/tests-data/update_excuses-2.yaml @@ -0,0 +1,12 @@ +generated-date: 2017-02-01 06:47:18.195464 +sources: +- excuses: + - 10 days old (needed 10 days) + hints: + is-candidate: + item-name: dummy-package + new-version: 2.0.0 + old-version: 1.0.0 + reason: [] + source: dummy-package + diff --git a/distro_tracker/vendor/debian/tests.py b/distro_tracker/vendor/debian/tests.py index b67271e..5a56566 100644 --- a/distro_tracker/vendor/debian/tests.py +++ b/distro_tracker/vendor/debian/tests.py @@ -1760,15 +1760,14 @@ class UpdateExcusesTaskActionItemTest(TestCase): def set_update_excuses_content(self, content): """ - Sets the stub content of the update_excuses.html that the task will + Sets the stub content of the update_excuses.yaml that the task will have access to. """ - self.task._get_update_excuses_content.return_value = iter( - content.splitlines()) + self.task._get_update_excuses_content.return_value = content def set_update_excuses_content_from_file(self, file_name): """ - Sets the stub content of the update_excuses.html that the task will + Sets the stub content of the update_excuses.yaml that the task will have access to based on the content of the test file with the given name. """ @@ -1786,7 +1785,7 @@ class UpdateExcusesTaskActionItemTest(TestCase): Tests that an action item is created when a package has not moved to testing after the allocated period. """ - self.set_update_excuses_content_from_file('update_excuses-1.html') + self.set_update_excuses_content_from_file('update_excuses-1.yaml') # Sanity check: no action items currently self.assertEqual(0, ActionItem.objects.count()) expected_data = { @@ -1834,7 +1833,7 @@ class UpdateExcusesTaskActionItemTest(TestCase): package=self.package_name, item_type=self.get_action_item_type(), short_description="Desc") - self.set_update_excuses_content_from_file('update_excuses-2.html') + self.set_update_excuses_content_from_file('update_excuses-2.yaml') self.run_task() diff --git a/distro_tracker/vendor/debian/tracker_tasks.py b/distro_tracker/vendor/debian/tracker_tasks.py index 0a7b185..efbe152 100644 --- a/distro_tracker/vendor/debian/tracker_tasks.py +++ b/distro_tracker/vendor/debian/tracker_tasks.py @@ -830,86 +830,36 @@ class UpdateExcusesTask(BaseTask): return True return False - def _extract_problems_in_excuses_item(self, subline, package, problematic): - if 'days old (needed' in subline: - words = subline.split() - age, limit = words[0], words[4] - if age != limit: - # It is problematic only when the age is strictly - # greater than the limit. - problematic[package] = { - 'age': age, - 'limit': limit, - } + def _extract_problematic(self, item): + if not 'policy_info' in item or not 'age' in item['policy_info']: + return + package = item['item-name'] + age = item['policy_info']['age']['current-age'] + limit = item['policy_info']['age']['age-requirement'] + if age > limit: + return (package, {'age': age, 'limit': limit}) - def _get_excuses_and_problems(self, content_lines): + def _get_excuses_and_problems(self, content): """ - Gets the excuses for each package from the given iterator of lines - representing the excuses html file. - Also finds a list of packages which have not migrated to testing even - after the necessary time has passed. + Gets the excuses for each package. + Also finds a list of packages which have not migrated to testing + agter the necessary time has passed. - :returns: A two-tuple where the first element is a dict mapping - package names to a list of excuses. The second element is a dict - mapping package names to a problem information. Problem information - is a dict with the keys ``age`` and ``limit``. + :returns: A two-tuple where the first element is a dict mapping + package names to a list of excuses. The second element is a dict + mapping packages names to a problem information. Problem information + is a dict with the keys ``age`` and ``limit``. """ - try: - # Skip all HTML before the first list - while '<ul>' not in next(content_lines): - pass - except StopIteration: + if not 'sources' in content: logger.warning("Invalid format of excuses file") return - top_level_list = True - package = "" - package_excuses = {} - problematic = {} - excuses = [] - for line in content_lines: - if isinstance(line, six.binary_type): - line = line.decode('utf-8') - if '</ul>' in line: - # The inner list is closed -- all excuses for the package are - # processed and we're back to the top-level list. - top_level_list = True - if '/' in package: - continue - # Done with the package - package_excuses[package] = deepcopy(excuses) - continue - - if '<ul>' in line: - # Entering the list of excuses - top_level_list = False - continue - - if top_level_list: - # The entry in the top level list outside of an inner list is - # a <li> item giving the name of the package for which the - # excuses follow. - words = re.split("[><() ]", line) - package = words[6] - excuses = [] - top_level_list = False - continue - - line = line.strip() - for subline in line.split("<li>"): - if self._skip_excuses_item(subline): - continue - - # Check if there is a problem for the package. - self._extract_problems_in_excuses_item(subline, package, - problematic) - - # Extract the rest of the excuses - # If it contains a link to an anchor convert it to a link to a - # package page. - excuses.append(self._adapt_excuse_links(subline)) - - return package_excuses, problematic + items = content['sources'] + linked = lambda e: map(self._adapt_excuse_links, e) + excuses = [(e['item-name'], linked(e['excuses'])) for e in items] + problems = map(self._extract_problematic, items) + problematic = [p for p in problems if p] + return dict(excuses), dict(problematic) def _create_action_item(self, package, extra_data): """ @@ -944,16 +894,16 @@ class UpdateExcusesTask(BaseTask): def _get_update_excuses_content(self): """ - Function returning the content of the update_excuses.html file as an - terable of lines. - Returns ``None`` if the content in the cache is up to date. + Function returning the content of excuses from debian-release + :returns: a dict of excuses or ``None`` if the content in the + cache is up to date. """ - url = 'https://release.debian.org/britney/update_excuses.html' + url = 'https://release.debian.org/britney/excuses.yaml' response, updated = self.cache.update(url, force=self.force_update) if not updated: return - return response.iter_lines(decode_unicode=True) + return yaml.load(response.text) def execute(self): content_lines = self._get_update_excuses_content() -- 2.11.0