Hello community, here is the log from the commit of package python-html2text for openSUSE:Leap:15.2 checked in at 2020-04-03 15:52:31 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Leap:15.2/python-html2text (Old) and /work/SRC/openSUSE:Leap:15.2/.python-html2text.new.3248 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-html2text" Fri Apr 3 15:52:31 2020 rev:15 rq:790923 version:2019.9.26 Changes: -------- --- /work/SRC/openSUSE:Leap:15.2/python-html2text/python-html2text.changes 2020-04-02 16:47:56.797832051 +0200 +++ /work/SRC/openSUSE:Leap:15.2/.python-html2text.new.3248/python-html2text.changes 2020-04-03 15:52:34.413816687 +0200 @@ -1,0 +2,12 @@ +Fri Dec 13 13:43:47 UTC 2019 - Matthias Fehring <buschman...@opensuse.org> + +- Update to 2019.9.26: + * Fix long blockquotes wrapping. + * Remove the trailing whitespaces that were added after wrapping list items & blockquotes. + * Remove support for Python <= 3.4. Now requires Python 3.5+. + * Fix memory leak when processing a document containing a <abbr> tag. + * Fix AttributeError when reading text from stdin. + * Fix UnicodeEncodeError when writing output to stdout. +- Disable build for Python 2 + +------------------------------------------------------------------- Old: ---- html2text-2019.8.11.tar.gz New: ---- html2text-2019.9.26.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-html2text.spec ++++++ --- /var/tmp/diff_new_pack.KypKV4/_old 2020-04-03 15:52:34.745817069 +0200 +++ /var/tmp/diff_new_pack.KypKV4/_new 2020-04-03 15:52:34.749817073 +0200 @@ -1,7 +1,7 @@ # # spec file for package python-html2text # -# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2019 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -17,9 +17,10 @@ %define upname html2text +%define skip_python2 1 %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-%{upname} -Version: 2019.8.11 +Version: 2019.9.26 Release: 0 Summary: Python script for turning HTML into Markdown text License: GPL-3.0-only @@ -63,6 +64,8 @@ %python_uninstall_alternative html2text %check +# otherwise python 3.6 does not automatically select UTF-8 for console output +export LANG=en_US.UTF-8 %pytest %files %{python_files} ++++++ html2text-2019.8.11.tar.gz -> html2text-2019.9.26.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2019.8.11/ChangeLog.rst new/html2text-2019.9.26/ChangeLog.rst --- old/html2text-2019.8.11/ChangeLog.rst 2019-08-11 21:33:38.000000000 +0200 +++ new/html2text-2019.9.26/ChangeLog.rst 2019-09-26 12:36:15.000000000 +0200 @@ -1,3 +1,15 @@ +2019.9.26 +========= +---- + +* Fix long blockquotes wrapping. +* Remove the trailing whitespaces that were added after wrapping list items & blockquotes. +* Remove support for Python ≤ 3.4. Now requires Python 3.5+. +* Fix memory leak when processing a document containing a ``<abbr>`` tag. +* Fix ``AttributeError`` when reading text from stdin. +* Fix ``UnicodeEncodeError`` when writing output to stdout. + + 2019.8.11 ========= ---- @@ -10,13 +22,16 @@ * Add ``__main__.py`` module to allow running the CLI using ``python -m html2text ...``. * Fix #238: correct spacing when a HTML entity follows a non-stressed tags which follow a stressed tag. * Remove unused or deprecated: + * ``html2text.compat.escape()`` * ``html2text.config.RE_UNESCAPE`` * ``html2text.HTML2Text.replaceEntities()`` * ``html2text.HTML2Text.unescape()`` * ``html2text.unescape()`` + * Fix #208: handle LEFT-TO-RIGHT MARK after a stressed tag. + 2018.1.9 ======== ---- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2019.8.11/PKG-INFO new/html2text-2019.9.26/PKG-INFO --- old/html2text-2019.8.11/PKG-INFO 2019-08-11 21:36:00.000000000 +0200 +++ new/html2text-2019.9.26/PKG-INFO 2019-09-26 12:37:26.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: html2text -Version: 2019.8.11 +Version: 2019.9.26 Summary: Turn HTML into equivalent Markdown-structured text. Home-page: https://github.com/Alir3z4/html2text/ Author: Aaron Swartz @@ -101,14 +101,12 @@ Classifier: License :: OSI Approved :: GNU General Public License (GPL) Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python -Classifier: Programming Language :: Python :: 2 -Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3 :: Only Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy -Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.* +Requires-Python: >=3.5 Description-Content-Type: text/markdown diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2019.8.11/html2text/__init__.py new/html2text-2019.9.26/html2text/__init__.py --- old/html2text-2019.8.11/html2text/__init__.py 2019-08-11 21:35:55.000000000 +0200 +++ new/html2text-2019.9.26/html2text/__init__.py 2019-09-26 12:36:15.000000000 +0200 @@ -1,13 +1,12 @@ -# coding: utf-8 """html2text: Turn HTML into equivalent Markdown-structured text.""" -from __future__ import division, unicode_literals +import html.entities +import html.parser import re -import sys +import urllib.parse as urlparse from textwrap import wrap from html2text import config -from html2text.compat import HTMLParser, urlparse from html2text.utils import ( dumb_css_parser, element_style, @@ -19,27 +18,19 @@ google_text_emphasis, hn, list_numbering_start, - name2cp, pad_tables_in_text, skipwrap, unifiable_n, ) -try: - chr = unichr - nochr = unicode("") -except NameError: - # python3 uses chr - nochr = str("") - -__version__ = (2019, 8, 11) +__version__ = (2019, 9, 26) # TODO: # Support decoded entities with UNIFIABLE. -class HTML2Text(HTMLParser.HTMLParser): +class HTML2Text(html.parser.HTMLParser): def __init__(self, out=None, baseurl="", bodywidth=config.BODY_WIDTH): """ Input parameters: @@ -47,10 +38,7 @@ appends lines of text). baseurl: base URL of the document we process """ - kwargs = {} - if sys.version_info >= (3, 4): - kwargs["convert_charrefs"] = False - HTMLParser.HTMLParser.__init__(self, **kwargs) + super().__init__(convert_charrefs=False) # Config options self.split_next_td = False @@ -135,7 +123,7 @@ def feed(self, data): data = data.replace("</' + 'script>", "</ignore>") - HTMLParser.HTMLParser.feed(self, data) + super().feed(data) def handle(self, data): self.feed(data) @@ -152,17 +140,17 @@ self.lastWasNL = s[-1] == "\n" def close(self): - HTMLParser.HTMLParser.close(self) + super().close() self.pbr() self.o("", force="end") - outtext = nochr.join(self.outtextlist) + outtext = "".join(self.outtextlist) if self.unicode_snob: - nbsp = chr(name2cp("nbsp")) + nbsp = html.entities.html5["nbsp;"] else: - nbsp = chr(32) + nbsp = " " outtext = outtext.replace(" _place_holder;", nbsp) # Clear self.outtextlist to avoid memory leak of its content to @@ -187,10 +175,10 @@ self.handle_data(ref, True) def handle_starttag(self, tag, attrs): - self.handle_tag(tag, attrs, 1) + self.handle_tag(tag, attrs, start=True) def handle_endtag(self, tag): - self.handle_tag(tag, None, 0) + self.handle_tag(tag, None, start=False) def previousIndex(self, attrs): """ @@ -202,11 +190,9 @@ """ if "href" not in attrs: return None - i = -1 - for a in self.a: - i += 1 - match = False + match = False + for i, a in enumerate(self.a): if "href" in a and a["href"] == attrs["href"]: if "title" in a or "title" in attrs: if ( @@ -220,6 +206,7 @@ if match: return i + return None def handle_emphasis(self, start, tag_style, parent_style): """ @@ -442,7 +429,7 @@ if self.abbr_title is not None: self.abbr_list[self.abbr_data] = self.abbr_title self.abbr_title = None - self.abbr_data = "" + self.abbr_data = None if tag == "q": if not self.quote: @@ -569,7 +556,7 @@ if tag in ["ol", "ul"]: # Google Docs create sub lists as top level lists - if (not self.list) and (not self.lastWasList): + if not self.list and not self.lastWasList: self.p() if start: if self.google_doc: @@ -581,7 +568,7 @@ else: if self.list: self.list.pop() - if (not self.google_doc) and (not self.list): + if not self.google_doc and not self.list: self.o("\n") self.lastWasList = True else: @@ -856,16 +843,11 @@ def entityref(self, c): if not self.unicode_snob and c in config.UNIFIABLE: return config.UNIFIABLE[c] - else: - try: - cp = name2cp(c) - except KeyError: - return "&" + c + ";" - else: - if c == "nbsp": - return config.UNIFIABLE[c] - else: - return chr(cp) + try: + ch = html.entities.html5[c + ";"] + except KeyError: + return "&" + c + ";" + return config.UNIFIABLE[c] if c == "nbsp" else ch def google_nest_count(self, style): """ @@ -904,7 +886,13 @@ if not skipwrap(para, self.wrap_links, self.wrap_list_items): indent = "" if para.startswith(" " + self.ul_item_mark): - indent = " " # For list items. + # list item continuation: add a double indent to the + # new lines + indent = " " + elif para.startswith("> "): + # blockquote continuation: add the greater than symbol + # to the new lines + indent = "> " wrapped = wrap( para, self.body_width, @@ -912,9 +900,12 @@ subsequent_indent=indent, ) result += "\n".join(wrapped) - if indent or para.endswith(" "): + if para.endswith(" "): result += " \n" newlines = 1 + elif indent: + result += "\n" + newlines = 1 else: result += "\n\n" newlines = 2 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2019.8.11/html2text/cli.py new/html2text-2019.9.26/html2text/cli.py --- old/html2text-2019.8.11/html2text/cli.py 2019-08-11 21:27:39.000000000 +0200 +++ new/html2text-2019.9.26/html2text/cli.py 2019-08-15 12:56:54.000000000 +0200 @@ -1,7 +1,7 @@ import argparse +import sys from html2text import HTML2Text, __version__, config -from html2text.utils import wrap_read, wrapwrite def main(): @@ -256,7 +256,7 @@ with open(args.filename, "rb") as fp: data = fp.read() else: - data = wrap_read() + data = sys.stdin.buffer.read() try: data = data.decode(args.encoding, args.decode_errors) @@ -303,4 +303,4 @@ h.open_quote = args.open_quote h.close_quote = args.close_quote - wrapwrite(h.handle(data)) + sys.stdout.write(h.handle(data)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2019.8.11/html2text/compat.py new/html2text-2019.9.26/html2text/compat.py --- old/html2text-2019.8.11/html2text/compat.py 2019-08-11 21:27:39.000000000 +0200 +++ new/html2text-2019.9.26/html2text/compat.py 1970-01-01 01:00:00.000000000 +0100 @@ -1,12 +0,0 @@ -import sys - -if sys.version_info[0] == 2: - import htmlentitydefs - import urlparse - import HTMLParser -else: - import urllib.parse as urlparse - import html.entities as htmlentitydefs - import html.parser as HTMLParser - -__all__ = ["HTMLParser", "htmlentitydefs", "urlparse"] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2019.8.11/html2text/config.py new/html2text-2019.9.26/html2text/config.py --- old/html2text-2019.8.11/html2text/config.py 2019-08-11 21:27:39.000000000 +0200 +++ new/html2text-2019.9.26/html2text/config.py 2019-08-15 12:56:54.000000000 +0200 @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re # Use Unicode characters instead of their ascii pseudo-replacements diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2019.8.11/html2text/utils.py new/html2text-2019.9.26/html2text/utils.py --- old/html2text-2019.8.11/html2text/utils.py 2019-08-11 21:27:39.000000000 +0200 +++ new/html2text-2019.9.26/html2text/utils.py 2019-08-15 12:56:54.000000000 +0200 @@ -1,17 +1,12 @@ -import sys +import html.entities from html2text import config -from html2text.compat import htmlentitydefs - -def name2cp(k): - """Return sname to codepoint""" - if k == "apos": - return ord("'") - return htmlentitydefs.name2codepoint[k] - - -unifiable_n = {name2cp(k): v for k, v in config.UNIFIABLE.items() if k != "nbsp"} +unifiable_n = { + html.entities.name2codepoint[k]: v + for k, v in config.UNIFIABLE.items() + if k != "nbsp" +} def hn(tag): @@ -187,24 +182,6 @@ ) -def wrapwrite(text): - text = text.encode("utf-8") - try: # Python3 - sys.stdout.buffer.write(text) - except AttributeError: - sys.stdout.write(text) - - -def wrap_read(): - """ - :rtype: str - """ - try: - return sys.stdin.read() - except AttributeError: - return sys.stdin.buffer.read() - - def escape_md(text): """ Escapes markdown-sensitive characters within other markdown diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2019.8.11/html2text.egg-info/PKG-INFO new/html2text-2019.9.26/html2text.egg-info/PKG-INFO --- old/html2text-2019.8.11/html2text.egg-info/PKG-INFO 2019-08-11 21:35:58.000000000 +0200 +++ new/html2text-2019.9.26/html2text.egg-info/PKG-INFO 2019-09-26 12:37:26.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: html2text -Version: 2019.8.11 +Version: 2019.9.26 Summary: Turn HTML into equivalent Markdown-structured text. Home-page: https://github.com/Alir3z4/html2text/ Author: Aaron Swartz @@ -101,14 +101,12 @@ Classifier: License :: OSI Approved :: GNU General Public License (GPL) Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python -Classifier: Programming Language :: Python :: 2 -Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3 :: Only Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy -Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.* +Requires-Python: >=3.5 Description-Content-Type: text/markdown diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2019.8.11/html2text.egg-info/SOURCES.txt new/html2text-2019.9.26/html2text.egg-info/SOURCES.txt --- old/html2text-2019.8.11/html2text.egg-info/SOURCES.txt 2019-08-11 21:35:59.000000000 +0200 +++ new/html2text-2019.9.26/html2text.egg-info/SOURCES.txt 2019-09-26 12:37:26.000000000 +0200 @@ -9,7 +9,6 @@ html2text/__init__.py html2text/__main__.py html2text/cli.py -html2text/compat.py html2text/config.py html2text/utils.py html2text.egg-info/PKG-INFO diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2019.8.11/setup.cfg new/html2text-2019.9.26/setup.cfg --- old/html2text-2019.8.11/setup.cfg 2019-08-11 21:36:00.000000000 +0200 +++ new/html2text-2019.9.26/setup.cfg 2019-09-26 12:37:26.000000000 +0200 @@ -1,6 +1,3 @@ -[bdist_wheel] -universal = 1 - [flake8] max_line_length = 88 ignore = diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2019.8.11/setup.py new/html2text-2019.9.26/setup.py --- old/html2text-2019.8.11/setup.py 2019-08-11 21:27:39.000000000 +0200 +++ new/html2text-2019.9.26/setup.py 2019-08-15 12:56:54.000000000 +0200 @@ -1,4 +1,3 @@ -# coding: utf-8 from setuptools import setup @@ -25,17 +24,15 @@ "License :: OSI Approved :: GNU General Public License (GPL)", "Operating System :: OS Independent", "Programming Language :: Python", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ], - python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*", + python_requires=">=3.5", entry_points={"console_scripts": ["html2text = html2text.cli:main"]}, license="GNU GPL 3", packages=["html2text"], diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2019.8.11/test/blockquote_example.html new/html2text-2019.9.26/test/blockquote_example.html --- old/html2text-2019.8.11/test/blockquote_example.html 2019-08-11 21:27:39.000000000 +0200 +++ new/html2text-2019.9.26/test/blockquote_example.html 2019-08-15 12:56:54.000000000 +0200 @@ -1,3 +1,3 @@ <blockquote> -The time has come, the Walrus said, to speak of many things. +"The time has come", the Walrus said, "To talk of many things: Of shoes - and ships - and sealing wax - Of cabbages - and kings- And why the sea is boiling hot - And whether pigs have wings." </blockquote> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2019.8.11/test/blockquote_example.md new/html2text-2019.9.26/test/blockquote_example.md --- old/html2text-2019.8.11/test/blockquote_example.md 2019-08-11 21:27:39.000000000 +0200 +++ new/html2text-2019.9.26/test/blockquote_example.md 2019-09-25 10:07:55.000000000 +0200 @@ -1,2 +1,4 @@ -> The time has come, the Walrus said, to speak of many things. +> "The time has come", the Walrus said, "To talk of many things: Of shoes - +> and ships - and sealing wax - Of cabbages - and kings- And why the sea is +> boiling hot - And whether pigs have wings." diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2019.8.11/test/test_html2text.py new/html2text-2019.9.26/test/test_html2text.py --- old/html2text-2019.8.11/test/test_html2text.py 2019-08-11 21:27:39.000000000 +0200 +++ new/html2text-2019.9.26/test/test_html2text.py 2019-08-15 12:56:54.000000000 +0200 @@ -1,4 +1,3 @@ -import codecs import glob import os import re @@ -189,7 +188,7 @@ result = get_baseline(fn) out = subprocess.check_output(cmd) - actual = out.decode("utf8") + actual = out.decode() actual = cleanup_eol(actual) @@ -210,7 +209,7 @@ def get_baseline(fn): name = get_baseline_name(fn) - with codecs.open(name, mode="r", encoding="utf8") as f: + with open(name, encoding="utf-8") as f: out = f.read() return cleanup_eol(out) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2019.8.11/test/test_memleak.py new/html2text-2019.9.26/test/test_memleak.py --- old/html2text-2019.8.11/test/test_memleak.py 2019-08-11 21:27:39.000000000 +0200 +++ new/html2text-2019.9.26/test/test_memleak.py 2019-09-25 09:41:57.000000000 +0200 @@ -17,3 +17,10 @@ h2t.handle(INSTR) # And even less when the input is empty. assert h2t.handle("") == "\n\n" + + +def test_abbr_data(): + h2t = html2text.HTML2Text() + result = h2t.handle('<p>foo <abbr title="Three Letter Acronym">TLA</abbr> bar</p>') + assert result == "foo TLA bar\n\n *[TLA]: Three Letter Acronym\n\n" + assert h2t.abbr_data is None diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2019.8.11/test/wrap_list_items_example.md new/html2text-2019.9.26/test/wrap_list_items_example.md --- old/html2text-2019.8.11/test/wrap_list_items_example.md 2019-08-11 21:27:39.000000000 +0200 +++ new/html2text-2019.9.26/test/wrap_list_items_example.md 2019-09-25 10:07:55.000000000 +0200 @@ -1,14 +1,14 @@ * One two three four five six seven eight nine ten eleven twelve thirteen - fourteen fifteen sixteen seventeen eighteen nineteen twenty. + fourteen fifteen sixteen seventeen eighteen nineteen twenty. * One two three four five six seven eight nine ten eleven twelve thirteen - fourteen fifteen sixteen seventeen eighteen nineteen twenty. + fourteen fifteen sixteen seventeen eighteen nineteen twenty. Text between lists. * One two three four five six seven eight nine ten eleven twelve thirteen - fourteen fifteen sixteen seventeen eighteen nineteen twenty. + fourteen fifteen sixteen seventeen eighteen nineteen twenty. * One two three four five six seven eight nine ten eleven twelve thirteen - fourteen fifteen sixteen seventeen eighteen nineteen twenty. + fourteen fifteen sixteen seventeen eighteen nineteen twenty. Text after list. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2019.8.11/tox.ini new/html2text-2019.9.26/tox.ini --- old/html2text-2019.8.11/tox.ini 2019-08-11 21:27:39.000000000 +0200 +++ new/html2text-2019.9.26/tox.ini 2019-09-25 09:41:57.000000000 +0200 @@ -3,7 +3,7 @@ black flake8 isort - py{27,34,35,36,37,py,py3} + py{35,36,37,py3} minversion = 1.9 [testenv] @@ -16,7 +16,7 @@ [testenv:black] basepython = python3 commands = - black --check --diff . + black --target-version py35 --check --diff . deps = black skip_install = true