jenkins-bot has submitted this change and it was merged.

Change subject: textlib.extract_templates_and_params tests
......................................................................


textlib.extract_templates_and_params tests

allow textlib.extract_templates_and_params_mwpfh to be
accessed and tested even if config.mwparserfromhell is disabled.

Delay import of mwparserfromhell until it is actually used.

Merge common test results for mwpfh and regex, highlighting the
case known to be different, and mention them in the docstring.

Change-Id: Id6a17940f241d95d9e8dc9b86131dec3989ea36a
---
M pywikibot/textlib.py
M tests/textlib_tests.py
2 files changed, 119 insertions(+), 34 deletions(-)

Approvals:
  John Vandenberg: Looks good to me, but someone else must approve
  XZise: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 859c290..ed25218 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -14,19 +14,14 @@
 __version__ = '$Id$'
 #
 
-try:
-    import mwparserfromhell
-except ImportError:
-    mwparserfromhell = False
+import datetime
+import re
+import sys
 
 try:
     from collections import OrderedDict
 except ImportError:
     from ordereddict import OrderedDict
-
-import datetime
-import re
-import sys
 
 if sys.version_info[0] > 2:
     from html.parser import HTMLParser
@@ -966,17 +961,53 @@
     parameters, and if this results multiple parameters with the same name
     only the last value provided will be returned.
 
-    This uses a third party library (mwparserfromhell) if it is installed
-    and enabled in the user-config.py. Otherwise it falls back on a
-    regex based function defined below.
+    This uses the package L{mwparserfromhell} (mwpfh) if it is installed
+    and enabled by config.mwparserfromhell. Otherwise it falls back on a
+    regex based implementation.
+
+    There are minor differences between the two implementations.
+
+    The two implementations return nested templates in a different order.
+    i.e. for {{a|b={{c}}}}, mwpfh returns [a, c], whereas regex returns [c, a].
+
+    mwpfh preserves whitespace in parameter names and values.  regex excludes
+    anything between <!-- --> before parsing the text.
 
     @param text: The wikitext from which templates are extracted
     @type text: unicode or string
     @return: list of template name and params
     @rtype: list of tuple
     """
-    if not (config.use_mwparserfromhell and mwparserfromhell):
+    use_mwparserfromhell = config.use_mwparserfromhell
+    if use_mwparserfromhell:
+        try:
+            import mwparserfromhell  # noqa
+        except ImportError:
+            use_mwparserfromhell = False
+
+    if use_mwparserfromhell:
+        return extract_templates_and_params_mwpfh(text)
+    else:
         return extract_templates_and_params_regex(text)
+
+
+def extract_templates_and_params_mwpfh(text):
+    """
+    Extract templates with params using mwparserfromhell.
+
+    This function should not be called directly.
+
+    Use extract_templates_and_params, which will select this
+    mwparserfromhell implementation if based on whether the
+    mwparserfromhell package is installed and enabled by
+    config.mwparserfromhell.
+
+    @param text: The wikitext from which templates are extracted
+    @type text: unicode or string
+    @return: list of template name and params
+    @rtype: list of tuple
+    """
+    import mwparserfromhell
     code = mwparserfromhell.parse(text)
     result = []
     for template in code.filter_templates(recursive=True):
diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py
index 10dd200..630ced4 100644
--- a/tests/textlib_tests.py
+++ b/tests/textlib_tests.py
@@ -7,10 +7,6 @@
 #
 __version__ = '$Id$'
 
-try:
-    import mwparserfromhell
-except ImportError:
-    mwparserfromhell = False
 import codecs
 import os
 
@@ -53,24 +49,6 @@
 
     def testCurrentBehaviour(self):
         self.assertContains("enwiki_help_editing", u"Editing")
-
-    def testExtractTemplates(self):
-        if not (pywikibot.config.use_mwparserfromhell and mwparserfromhell):
-            raise unittest.SkipTest('mwparserfromhell not available or 
enabled')
-        func = textlib.extract_templates_and_params  # It's really long.
-        self.assertEqual(func('{{a}}'), [('a', OrderedDict())])
-        self.assertEqual(func('{{a|b=c}}'), [('a', OrderedDict((('b', 'c'), 
)))])
-        self.assertEqual(func('{{a|b|c=d}}'), [('a', OrderedDict((('1', 'b'), 
('c', 'd'))))])
-        self.assertEqual(func('{{a|b={{c}}}}'), [('a', OrderedDict((('b', 
'{{c}}'), ))), ('c', OrderedDict())])
-        self.assertEqual(func('{{a|b=c|f=g|d=e|1=}}'), [('a', 
OrderedDict((('b', 'c'), ('f', 'g'), ('d', 'e'), ('1', ''))))])
-
-    def testExtractTemplatesRegex(self):
-        func = textlib.extract_templates_and_params_regex  # It's really long.
-        self.assertEqual(func('{{a}}'), [('a', OrderedDict())])
-        self.assertEqual(func('{{a|b=c}}'), [('a', OrderedDict((('b', 'c'), 
)))])
-        self.assertEqual(func('{{a|b|c=d}}'), [('a', OrderedDict((('1', 'b'), 
('c', 'd'))))])
-        self.assertEqual(func('{{a|b={{c}}}}'), [('c', OrderedDict()), ('a', 
OrderedDict((('b', '{{c}}'), )))])
-        self.assertEqual(func('{{a|b=c|f=g|d=e|1=}}'), [('a', 
OrderedDict((('b', 'c'), ('f', 'g'), ('d', 'e'), ('1', ''))))])
 
     def testSpacesInSection(self):
         self.assertContains("enwiki_help_editing", u"Minor_edits")
@@ -218,6 +196,82 @@
                           '[[Category:nasty{{{!}}]]', self.site)
 
 
+class TestTemplateParams(TestCase):
+
+    """Test to verify that template params extraction works."""
+
+    net = False
+
+    def _extract_templates_params(self, func):
+        self.assertEqual(func('{{a}}'), [('a', OrderedDict())])
+        self.assertEqual(func('{{ a}}'), [('a', OrderedDict())])
+        self.assertEqual(func('{{a }}'), [('a', OrderedDict())])
+        self.assertEqual(func('{{ a }}'), [('a', OrderedDict())])
+        self.assertEqual(func('{{a|b=c}}'), [('a', OrderedDict((('b', 'c'), 
)))])
+        self.assertEqual(func('{{a|b|c=d}}'), [('a', OrderedDict((('1', 'b'), 
('c', 'd'))))])
+        self.assertEqual(func('{{a|b=c|f=g|d=e|1=}}'), [('a', 
OrderedDict((('b', 'c'), ('f', 'g'), ('d', 'e'), ('1', ''))))])
+        self.assertEqual(func('{{a|1=2|c=d}}'), [('a', OrderedDict((('1', 
'2'), ('c', 'd'))))])
+        self.assertEqual(func('{{a|c=d|1=2}}'), [('a', OrderedDict((('c', 
'd'), ('1', '2'))))])
+        self.assertEqual(func('{{a|5=d|a=b}}'), [('a', OrderedDict((('5', 
'd'), ('a', 'b'))))])
+        self.assertEqual(func('{{a|=2}}'), [('a', OrderedDict((('', '2'), )))])
+        self.assertEqual(func('{{a|=|}}'), [('a', OrderedDict((('', ''), ('1', 
''))))])
+        self.assertEqual(func('{{a||}}'), [('a', OrderedDict((('1', ''), ('2', 
''))))])
+        self.assertEqual(func('{{a|b={{{1}}}}}'), [('a', OrderedDict((('b', 
'{{{1}}}'), )))])
+        self.assertEqual(func('{{a|b=<noinclude>{{{1}}}</noinclude>}}'), 
[('a', OrderedDict((('b', '<noinclude>{{{1}}}</noinclude>'), )))])
+        self.assertEqual(func('{{subst:a|b=c}}'), [('subst:a', 
OrderedDict((('b', 'c'), )))])
+        self.assertEqual(func('{{safesubst:a|b=c}}'), [('safesubst:a', 
OrderedDict((('b', 'c'), )))])
+        self.assertEqual(func('{{msgnw:a|b=c}}'), [('msgnw:a', 
OrderedDict((('b', 'c'), )))])
+        self.assertEqual(func('{{Template:a|b=c}}'), [('Template:a', 
OrderedDict((('b', 'c'), )))])
+        self.assertEqual(func('{{template:a|b=c}}'), [('template:a', 
OrderedDict((('b', 'c'), )))])
+        self.assertEqual(func('{{:a|b=c}}'), [(':a', OrderedDict((('b', 'c'), 
)))])
+        self.assertEqual(func('{{subst::a|b=c}}'), [('subst::a', 
OrderedDict((('b', 'c'), )))])
+
+    def test_extract_templates_params_mwpfh(self):
+        try:
+            import mwparserfromhell  # noqa
+        except ImportError:
+            raise unittest.SkipTest('mwparserfromhell not available')
+
+        func = textlib.extract_templates_and_params_mwpfh
+        self._extract_templates_params(func)
+
+        self.assertEqual(func('{{a|}}'), [('a', OrderedDict((('1', ''), )))])
+
+        self.assertEqual(func('{{a| b=c}}'), [('a', OrderedDict(((' b', 'c'), 
)))])
+        self.assertEqual(func('{{a|b =c}}'), [('a', OrderedDict((('b ', 'c'), 
)))])
+        self.assertEqual(func('{{a|b= c}}'), [('a', OrderedDict((('b', ' c'), 
)))])
+        self.assertEqual(func('{{a|b=c }}'), [('a', OrderedDict((('b', 'c '), 
)))])
+
+        self.assertEqual(func('{{a| b={{c}}}}'), [('a', OrderedDict(((' b', 
'{{c}}'), ))), ('c', OrderedDict())])
+        self.assertEqual(func('{{a|b={{c}}}}'), [('a', OrderedDict((('b', 
'{{c}}'), ))), ('c', OrderedDict())])
+        self.assertEqual(func('{{a|b= {{c}}}}'), [('a', OrderedDict((('b', ' 
{{c}}'), ))), ('c', OrderedDict())])
+        self.assertEqual(func('{{a|b={{c}} }}'), [('a', OrderedDict((('b', 
'{{c}} '), ))), ('c', OrderedDict())])
+
+        self.assertEqual(func('{{a|b=<!--{{{1}}}-->}}'), [('a', 
OrderedDict((('b', '<!--{{{1}}}-->'), )))])
+
+    def test_extract_templates_params_regex(self):
+        func = textlib.extract_templates_and_params_regex
+        self._extract_templates_params(func)
+
+        self.assertEqual(func('{{a|}}'), [])  # FIXME: this is a bug
+
+        self.assertEqual(func('{{a| b=c}}'), [('a', OrderedDict((('b', 'c'), 
)))])
+        self.assertEqual(func('{{a|b =c}}'), [('a', OrderedDict((('b', 'c'), 
)))])
+        self.assertEqual(func('{{a|b= c}}'), [('a', OrderedDict((('b', 'c'), 
)))])
+        self.assertEqual(func('{{a|b=c }}'), [('a', OrderedDict((('b', 'c'), 
)))])
+
+        self.assertEqual(func('{{a| b={{c}}}}'), [('c', OrderedDict()), ('a', 
OrderedDict((('b', '{{c}}'), )))])
+        self.assertEqual(func('{{a|b={{c}}}}'), [('c', OrderedDict()), ('a', 
OrderedDict((('b', '{{c}}'), )))])
+        self.assertEqual(func('{{a|b= {{c}}}}'), [('c', OrderedDict()), ('a', 
OrderedDict((('b', '{{c}}'), )))])
+        self.assertEqual(func('{{a|b={{c}} }}'), [('c', OrderedDict()), ('a', 
OrderedDict((('b', '{{c}}'), )))])
+
+        self.assertEqual(func('{{a|b=<!--{{{1}}}-->}}'), [('a', 
OrderedDict((('b', ''), )))])
+
+    def test_extract_templates_params(self):
+        self._extract_templates_params(
+            textlib.extract_templates_and_params)
+
+
 class TestLocalDigits(TestCase):
 
     """Test to verify that local digits are correctly being handled."""

-- 
To view, visit https://gerrit.wikimedia.org/r/179420
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Id6a17940f241d95d9e8dc9b86131dec3989ea36a
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <jay...@gmail.com>
Gerrit-Reviewer: John Vandenberg <jay...@gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgr...@gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhall...@arctus.nl>
Gerrit-Reviewer: XZise <commodorefabia...@gmx.de>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
Pywikibot-commits mailing list
Pywikibot-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits

Reply via email to