Hi Niels, Niels Thykier wrote: > As the maintainer of Britney, I am a bit concerned that this patch > appears to be relying on the "excuses"-field inside. That is a > "non-machine"-parsable format (basically all raw HTML notes) that I > would like to eventually phase out of the excuses.yaml.
These notes are useful to display in tracker. I can understand you want them clean from HTML markup. > If there is data in that field that tracker needs, then it should > preferably be extracted to another field. (FTR, the format is still a > bit WIP) Maybe we can have something like: - item-name: foo excuses: - excuse: Updating introduces new bugs links: https://bugs.debian.org/123456, https://bugs.debian.org/234567 - excuse: Not touching package due to block request by freeze links: https://release.debian.org/testing/freeze_policy.html - excuse: Removal request by Bar links: https://bugs.debian.org/345678 This is my naïve take on it;) I don't know which services rely on excuses.yaml. Currently tracker uses the following information: name, excuses, policy-info.age.age-requirement, and policy-info.age.current-age. Excuses notes are displayed in the "testing migration" panel. An additional notification is displayed in the "action needed" panel when age requirement is over. The YAML test files from the first patch were incorrect, here is an update. Cheers, Christophe
>From 52c4608c2366ac8a43a4619d5bed9602247e67e4 Mon Sep 17 00:00:00 2001 From: Christophe Siraut <tob...@debian.org> Date: Thu, 2 Feb 2017 18:36:24 +0100 Subject: [PATCH] Use excuses.yaml instead of parsing HTML. Closes: #853189 --- .../vendor/debian/tests-data/update_excuses-1.html | 11 --- .../vendor/debian/tests-data/update_excuses-1.yaml | 14 +++ .../vendor/debian/tests-data/update_excuses-2.html | 11 --- .../vendor/debian/tests-data/update_excuses-2.yaml | 15 +++ distro_tracker/vendor/debian/tests.py | 11 +-- distro_tracker/vendor/debian/tracker_tasks.py | 106 ++++++--------------- 6 files changed, 62 insertions(+), 106 deletions(-) delete mode 100644 distro_tracker/vendor/debian/tests-data/update_excuses-1.html create mode 100644 distro_tracker/vendor/debian/tests-data/update_excuses-1.yaml delete mode 100644 distro_tracker/vendor/debian/tests-data/update_excuses-2.html create mode 100644 distro_tracker/vendor/debian/tests-data/update_excuses-2.yaml diff --git a/distro_tracker/vendor/debian/tests-data/update_excuses-1.html b/distro_tracker/vendor/debian/tests-data/update_excuses-1.html deleted file mode 100644 index c23541e..0000000 --- a/distro_tracker/vendor/debian/tests-data/update_excuses-1.html +++ /dev/null @@ -1,11 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/REC-html40/strict.dtd"> -<html><head><title>excuses...</title><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body> -<p>Generated: 2013.08.12 10:03:22 +0000</p> -<ul> -<li><a id="dummy-package" name="dummy-package">dummy-package</a> (1.0.0 to 2.0.0) -<ul> -<li>Maintainer: Some Maintainer -<li>20 days old (needed 10 days) -<li>Not considered -</ul> -</ul></body></html> diff --git a/distro_tracker/vendor/debian/tests-data/update_excuses-1.yaml b/distro_tracker/vendor/debian/tests-data/update_excuses-1.yaml new file mode 100644 index 0000000..e64ad55 --- /dev/null +++ b/distro_tracker/vendor/debian/tests-data/update_excuses-1.yaml @@ -0,0 +1,14 @@ +generated-date: 2017-02-01 06:47:18.195464 +sources: +- excuses: + policy-info: + age: + current-age: 20 + age-requirement: 10 + hints: + is-candidate: + item-name: dummy-package + new-version: 2.0.0 + old-version: 1.0.0 + reason: [] + source: dummy-package diff --git a/distro_tracker/vendor/debian/tests-data/update_excuses-2.html b/distro_tracker/vendor/debian/tests-data/update_excuses-2.html deleted file mode 100644 index 4666c7b..0000000 --- a/distro_tracker/vendor/debian/tests-data/update_excuses-2.html +++ /dev/null @@ -1,11 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/REC-html40/strict.dtd"> -<html><head><title>excuses...</title><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body> -<p>Generated: 2013.08.12 10:03:22 +0000</p> -<ul> -<li><a id="dummy-package" name="dummy-package">dummy-package</a> (1.0.0 to 2.0.0) -<ul> -<li>Maintainer: Some Maintainer -<li>10 days old (needed 10 days) -<li>Not considered -</ul> -</ul></body></html> diff --git a/distro_tracker/vendor/debian/tests-data/update_excuses-2.yaml b/distro_tracker/vendor/debian/tests-data/update_excuses-2.yaml new file mode 100644 index 0000000..59f7ca4 --- /dev/null +++ b/distro_tracker/vendor/debian/tests-data/update_excuses-2.yaml @@ -0,0 +1,15 @@ +generated-date: 2017-02-01 06:47:18.195464 +sources: +- excuses: + policy-info: + age: + current-age: 10 + age-requirement: 10 + hints: + is-candidate: + item-name: dummy-package + new-version: 2.0.0 + old-version: 1.0.0 + reason: [] + source: dummy-package + diff --git a/distro_tracker/vendor/debian/tests.py b/distro_tracker/vendor/debian/tests.py index b67271e..5a56566 100644 --- a/distro_tracker/vendor/debian/tests.py +++ b/distro_tracker/vendor/debian/tests.py @@ -1760,15 +1760,14 @@ class UpdateExcusesTaskActionItemTest(TestCase): def set_update_excuses_content(self, content): """ - Sets the stub content of the update_excuses.html that the task will + Sets the stub content of the update_excuses.yaml that the task will have access to. """ - self.task._get_update_excuses_content.return_value = iter( - content.splitlines()) + self.task._get_update_excuses_content.return_value = content def set_update_excuses_content_from_file(self, file_name): """ - Sets the stub content of the update_excuses.html that the task will + Sets the stub content of the update_excuses.yaml that the task will have access to based on the content of the test file with the given name. """ @@ -1786,7 +1785,7 @@ class UpdateExcusesTaskActionItemTest(TestCase): Tests that an action item is created when a package has not moved to testing after the allocated period. """ - self.set_update_excuses_content_from_file('update_excuses-1.html') + self.set_update_excuses_content_from_file('update_excuses-1.yaml') # Sanity check: no action items currently self.assertEqual(0, ActionItem.objects.count()) expected_data = { @@ -1834,7 +1833,7 @@ class UpdateExcusesTaskActionItemTest(TestCase): package=self.package_name, item_type=self.get_action_item_type(), short_description="Desc") - self.set_update_excuses_content_from_file('update_excuses-2.html') + self.set_update_excuses_content_from_file('update_excuses-2.yaml') self.run_task() diff --git a/distro_tracker/vendor/debian/tracker_tasks.py b/distro_tracker/vendor/debian/tracker_tasks.py index 0a7b185..efbe152 100644 --- a/distro_tracker/vendor/debian/tracker_tasks.py +++ b/distro_tracker/vendor/debian/tracker_tasks.py @@ -830,86 +830,36 @@ class UpdateExcusesTask(BaseTask): return True return False - def _extract_problems_in_excuses_item(self, subline, package, problematic): - if 'days old (needed' in subline: - words = subline.split() - age, limit = words[0], words[4] - if age != limit: - # It is problematic only when the age is strictly - # greater than the limit. - problematic[package] = { - 'age': age, - 'limit': limit, - } + def _extract_problematic(self, item): + if not 'policy_info' in item or not 'age' in item['policy_info']: + return + package = item['item-name'] + age = item['policy_info']['age']['current-age'] + limit = item['policy_info']['age']['age-requirement'] + if age > limit: + return (package, {'age': age, 'limit': limit}) - def _get_excuses_and_problems(self, content_lines): + def _get_excuses_and_problems(self, content): """ - Gets the excuses for each package from the given iterator of lines - representing the excuses html file. - Also finds a list of packages which have not migrated to testing even - after the necessary time has passed. + Gets the excuses for each package. + Also finds a list of packages which have not migrated to testing + agter the necessary time has passed. - :returns: A two-tuple where the first element is a dict mapping - package names to a list of excuses. The second element is a dict - mapping package names to a problem information. Problem information - is a dict with the keys ``age`` and ``limit``. + :returns: A two-tuple where the first element is a dict mapping + package names to a list of excuses. The second element is a dict + mapping packages names to a problem information. Problem information + is a dict with the keys ``age`` and ``limit``. """ - try: - # Skip all HTML before the first list - while '<ul>' not in next(content_lines): - pass - except StopIteration: + if not 'sources' in content: logger.warning("Invalid format of excuses file") return - top_level_list = True - package = "" - package_excuses = {} - problematic = {} - excuses = [] - for line in content_lines: - if isinstance(line, six.binary_type): - line = line.decode('utf-8') - if '</ul>' in line: - # The inner list is closed -- all excuses for the package are - # processed and we're back to the top-level list. - top_level_list = True - if '/' in package: - continue - # Done with the package - package_excuses[package] = deepcopy(excuses) - continue - - if '<ul>' in line: - # Entering the list of excuses - top_level_list = False - continue - - if top_level_list: - # The entry in the top level list outside of an inner list is - # a <li> item giving the name of the package for which the - # excuses follow. - words = re.split("[><() ]", line) - package = words[6] - excuses = [] - top_level_list = False - continue - - line = line.strip() - for subline in line.split("<li>"): - if self._skip_excuses_item(subline): - continue - - # Check if there is a problem for the package. - self._extract_problems_in_excuses_item(subline, package, - problematic) - - # Extract the rest of the excuses - # If it contains a link to an anchor convert it to a link to a - # package page. - excuses.append(self._adapt_excuse_links(subline)) - - return package_excuses, problematic + items = content['sources'] + linked = lambda e: map(self._adapt_excuse_links, e) + excuses = [(e['item-name'], linked(e['excuses'])) for e in items] + problems = map(self._extract_problematic, items) + problematic = [p for p in problems if p] + return dict(excuses), dict(problematic) def _create_action_item(self, package, extra_data): """ @@ -944,16 +894,16 @@ class UpdateExcusesTask(BaseTask): def _get_update_excuses_content(self): """ - Function returning the content of the update_excuses.html file as an - terable of lines. - Returns ``None`` if the content in the cache is up to date. + Function returning the content of excuses from debian-release + :returns: a dict of excuses or ``None`` if the content in the + cache is up to date. """ - url = 'https://release.debian.org/britney/update_excuses.html' + url = 'https://release.debian.org/britney/excuses.yaml' response, updated = self.cache.update(url, force=self.force_update) if not updated: return - return response.iter_lines(decode_unicode=True) + return yaml.load(response.text) def execute(self): content_lines = self._get_update_excuses_content() -- 2.11.0