Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python310 for openSUSE:Factory checked in at 2025-07-03 12:10:49 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python310 (Old) and /work/SRC/openSUSE:Factory/.python310.new.1903 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python310" Thu Jul 3 12:10:49 2025 rev:64 rq:1290033 version:3.10.18 Changes: -------- --- /work/SRC/openSUSE:Factory/python310/python310.changes 2025-06-26 11:41:03.878110835 +0200 +++ /work/SRC/openSUSE:Factory/.python310.new.1903/python310.changes 2025-07-03 12:13:12.576563489 +0200 @@ -1,0 +2,7 @@ +Wed Jul 2 14:47:20 UTC 2025 - Matej Cepl <mc...@cepl.eu> + +- Add CVE-2025-6069-quad-complex-HTMLParser.patch to avoid worst + case quadratic complexity when processing certain crafted + malformed inputs with HTMLParser (CVE-2025-6069, bsc#1244705). + +------------------------------------------------------------------- New: ---- CVE-2025-6069-quad-complex-HTMLParser.patch ----------(New B)---------- New: - Add CVE-2025-6069-quad-complex-HTMLParser.patch to avoid worst case quadratic complexity when processing certain crafted ----------(New E)---------- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python310.spec ++++++ --- /var/tmp/diff_new_pack.qaiTR3/_old 2025-07-03 12:13:15.964705391 +0200 +++ /var/tmp/diff_new_pack.qaiTR3/_new 2025-07-03 12:13:15.964705391 +0200 @@ -202,6 +202,9 @@ # PATCH-FIX-UPSTREAM sphinx-802.patch mc...@suse.com # status_iterator method moved between the Sphinx versions Patch28: sphinx-802.patch +# PATCH-FIX-UPSTREAM CVE-2025-6069-quad-complex-HTMLParser.patch bsc#1244705 mc...@suse.com +# avoid quadratic complexity when processing malformed inputs with HTMLParser +Patch29: CVE-2025-6069-quad-complex-HTMLParser.patch BuildRequires: autoconf-archive BuildRequires: automake BuildRequires: fdupes ++++++ CVE-2025-6069-quad-complex-HTMLParser.patch ++++++ >From ec28625203b5dd4b4447dc4bb512898294cd6d0c Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka <storch...@gmail.com> Date: Fri, 13 Jun 2025 19:57:48 +0300 Subject: [PATCH] [3.10] gh-135462: Fix quadratic complexity in processing special input in HTMLParser (GH-135464) End-of-file errors are now handled according to the HTML5 specs -- comments and declarations are automatically closed, tags are ignored. (cherry picked from commit 6eb6c5dbfb528bd07d77b60fd71fd05d81d45c41) Co-authored-by: Serhiy Storchaka <storch...@gmail.com> --- Lib/html/parser.py | 41 +++- Lib/test/test_htmlparser.py | 95 ++++++++-- Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst | 4 3 files changed, 117 insertions(+), 23 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst Index: Python-3.10.18/Lib/html/parser.py =================================================================== --- Python-3.10.18.orig/Lib/html/parser.py 2025-07-02 18:00:32.520419724 +0200 +++ Python-3.10.18/Lib/html/parser.py 2025-07-02 18:00:42.616018075 +0200 @@ -25,6 +25,7 @@ charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') starttagopen = re.compile('<[a-zA-Z]') +endtagopen = re.compile('</[a-zA-Z]') piclose = re.compile('>') commentclose = re.compile(r'--\s*>') # Note: @@ -176,7 +177,7 @@ k = self.parse_pi(i) elif startswith("<!", i): k = self.parse_html_declaration(i) - elif (i + 1) < n: + elif (i + 1) < n or end: self.handle_data("<") k = i + 1 else: @@ -184,17 +185,35 @@ if k < 0: if not end: break - k = rawdata.find('>', i + 1) - if k < 0: - k = rawdata.find('<', i + 1) - if k < 0: - k = i + 1 + if starttagopen.match(rawdata, i): # < + letter + pass + elif startswith("</", i): + if i + 2 == n: + self.handle_data("</") + elif endtagopen.match(rawdata, i): # </ + letter + pass + else: + # bogus comment + self.handle_comment(rawdata[i+2:]) + elif startswith("<!--", i): + j = n + for suffix in ("--!", "--", "-"): + if rawdata.endswith(suffix, i+4): + j -= len(suffix) + break + self.handle_comment(rawdata[i+4:j]) + elif startswith("<![CDATA[", i): + self.unknown_decl(rawdata[i+3:]) + elif rawdata[i:i+9].lower() == '<!doctype': + self.handle_decl(rawdata[i+2:]) + elif startswith("<!", i): + # bogus comment + self.handle_comment(rawdata[i+2:]) + elif startswith("<?", i): + self.handle_pi(rawdata[i+2:]) else: - k += 1 - if self.convert_charrefs and not self.cdata_elem: - self.handle_data(unescape(rawdata[i:k])) - else: - self.handle_data(rawdata[i:k]) + raise AssertionError("we should not get here!") + k = n i = self.updatepos(i, k) elif startswith("&#", i): match = charref.match(rawdata, i) Index: Python-3.10.18/Lib/test/test_htmlparser.py =================================================================== --- Python-3.10.18.orig/Lib/test/test_htmlparser.py 2025-07-02 18:00:33.941415672 +0200 +++ Python-3.10.18/Lib/test/test_htmlparser.py 2025-07-02 18:00:42.616237238 +0200 @@ -4,6 +4,8 @@ import pprint import unittest +from test import support + class EventCollector(html.parser.HTMLParser): @@ -391,28 +393,34 @@ ('data', '<'), ('starttag', 'bc<', [('a', None)]), ('endtag', 'html'), - ('data', '\n<img src="URL>'), - ('comment', '/img'), - ('endtag', 'html<')]) + ('data', '\n')]) def test_starttag_junk_chars(self): + self._run_check("<", [('data', '<')]) + self._run_check("<>", [('data', '<>')]) + self._run_check("< >", [('data', '< >')]) + self._run_check("< ", [('data', '< ')]) self._run_check("</>", []) + self._run_check("<$>", [('data', '<$>')]) self._run_check("</$>", [('comment', '$')]) self._run_check("</", [('data', '</')]) - self._run_check("</a", [('data', '</a')]) + self._run_check("</a", []) + self._run_check("</ a>", [('endtag', 'a')]) + self._run_check("</ a", [('comment', ' a')]) self._run_check("<a<a>", [('starttag', 'a<a', [])]) self._run_check("</a<a>", [('endtag', 'a<a')]) - self._run_check("<!", [('data', '<!')]) - self._run_check("<a", [('data', '<a')]) - self._run_check("<a foo='bar'", [('data', "<a foo='bar'")]) - self._run_check("<a foo='bar", [('data', "<a foo='bar")]) - self._run_check("<a foo='>'", [('data', "<a foo='>'")]) - self._run_check("<a foo='>", [('data', "<a foo='>")]) + self._run_check("<!", [('comment', '')]) + self._run_check("<a", []) + self._run_check("<a foo='bar'", []) + self._run_check("<a foo='bar", []) + self._run_check("<a foo='>'", []) + self._run_check("<a foo='>", []) self._run_check("<a$>", [('starttag', 'a$', [])]) self._run_check("<a$b>", [('starttag', 'a$b', [])]) self._run_check("<a$b/>", [('startendtag', 'a$b', [])]) self._run_check("<a$b >", [('starttag', 'a$b', [])]) self._run_check("<a$b />", [('startendtag', 'a$b', [])]) + self._run_check("</a$b>", [('endtag', 'a$b')]) def test_slashes_in_starttag(self): self._run_check('<a foo="var"/>', [('startendtag', 'a', [('foo', 'var')])]) @@ -537,13 +545,56 @@ for html, expected in data: self._run_check(html, expected) - def test_broken_comments(self): - html = ('<! not really a comment >' + def test_eof_in_comments(self): + data = [ + ('<!--', [('comment', '')]), + ('<!---', [('comment', '')]), + ('<!----', [('comment', '')]), + ('<!-----', [('comment', '-')]), + ('<!------', [('comment', '--')]), + ('<!----!', [('comment', '')]), + ('<!---!', [('comment', '-!')]), + ('<!---!>', [('comment', '-!>')]), + ('<!--foo', [('comment', 'foo')]), + ('<!--foo-', [('comment', 'foo')]), + ('<!--foo--', [('comment', 'foo')]), + ('<!--foo--!', [('comment', 'foo')]), + ('<!--<!--', [('comment', '<!')]), + ('<!--<!--!', [('comment', '<!')]), + ] + for html, expected in data: + self._run_check(html, expected) + + def test_eof_in_declarations(self): + data = [ + ('<!', [('comment', '')]), + ('<!-', [('comment', '-')]), + ('<![', [('comment', '[')]), + ('<![CDATA[', [('unknown decl', 'CDATA[')]), + ('<![CDATA[x', [('unknown decl', 'CDATA[x')]), + ('<![CDATA[x]', [('unknown decl', 'CDATA[x]')]), + ('<![CDATA[x]]', [('unknown decl', 'CDATA[x]]')]), + ('<!DOCTYPE', [('decl', 'DOCTYPE')]), + ('<!DOCTYPE ', [('decl', 'DOCTYPE ')]), + ('<!DOCTYPE html', [('decl', 'DOCTYPE html')]), + ('<!DOCTYPE html ', [('decl', 'DOCTYPE html ')]), + ('<!DOCTYPE html PUBLIC', [('decl', 'DOCTYPE html PUBLIC')]), + ('<!DOCTYPE html PUBLIC "foo', [('decl', 'DOCTYPE html PUBLIC "foo')]), + ('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "foo', + [('decl', 'DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "foo')]), + ] + for html, expected in data: + self._run_check(html, expected) + + def test_bogus_comments(self): + html = ('<!ELEMENT br EMPTY>' + '<! not really a comment >' '<! not a comment either -->' '<! -- close enough -->' '<!><!<-- this was an empty comment>' '<!!! another bogus comment !!!>') expected = [ + ('comment', 'ELEMENT br EMPTY'), ('comment', ' not really a comment '), ('comment', ' not a comment either --'), ('comment', ' -- close enough --'), @@ -598,6 +649,26 @@ ('endtag', 'a'), ('data', ' bar & baz')] ) + @support.requires_resource('cpu') + def test_eof_no_quadratic_complexity(self): + # Each of these examples used to take about an hour. + # Now they take a fraction of a second. + def check(source): + parser = html.parser.HTMLParser() + parser.feed(source) + parser.close() + n = 120_000 + check("<a " * n) + check("<a a=" * n) + check("</a " * 14 * n) + check("</a a=" * 11 * n) + check("<!--" * 4 * n) + check("<!" * 60 * n) + check("<?" * 19 * n) + check("</$" * 15 * n) + check("<![CDATA[" * 9 * n) + check("<!doctype" * 35 * n) + class AttributesTestCase(TestCaseBase): Index: Python-3.10.18/Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ Python-3.10.18/Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst 2025-07-02 18:00:42.616466739 +0200 @@ -0,0 +1,4 @@ +Fix quadratic complexity in processing specially crafted input in +:class:`html.parser.HTMLParser`. End-of-file errors are now handled according +to the HTML5 specs -- comments and declarations are automatically closed, +tags are ignored.