https://github.com/python/cpython/commit/f2b7954ce0b07cceebea30d7a89d8a97d206d745 commit: f2b7954ce0b07cceebea30d7a89d8a97d206d745 branch: 3.13 author: Miss Islington (bot) <[email protected]> committer: serhiy-storchaka <[email protected]> date: 2025-08-17T10:59:24Z summary:
[3.13] gh-135661: Fix parsing unterminated bogus comments in HTMLParser (GH-137873) (GH-137875) Bogus comments that start with "<![CDATA[" should not include the starting "!" in its value. (cherry picked from commit 7636a66635a0da849cfccd06a52d0a21fb692271) Co-authored-by: Serhiy Storchaka <[email protected]> files: M Lib/html/parser.py M Lib/test/test_htmlparser.py diff --git a/Lib/html/parser.py b/Lib/html/parser.py index 75bf8adae6d70a..5d7050dad2396b 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -271,11 +271,8 @@ def goahead(self, end): j -= len(suffix) break self.handle_comment(rawdata[i+4:j]) - elif startswith("<![CDATA[", i): - if self._support_cdata: - self.unknown_decl(rawdata[i+3:]) - else: - self.handle_comment(rawdata[i+1:]) + elif startswith("<![CDATA[", i) and self._support_cdata: + self.unknown_decl(rawdata[i+3:]) elif rawdata[i:i+9].lower() == '<!doctype': self.handle_decl(rawdata[i+2:]) elif startswith("<!", i): @@ -350,15 +347,12 @@ def parse_html_declaration(self, i): if rawdata[i:i+4] == '<!--': # this case is actually already handled in goahead() return self.parse_comment(i) - elif rawdata[i:i+9] == '<![CDATA[': - if self._support_cdata: - j = rawdata.find(']]>', i+9) - if j < 0: - return -1 - self.unknown_decl(rawdata[i+3: j]) - return j + 3 - else: - return self.parse_bogus_comment(i) + elif rawdata[i:i+9] == '<![CDATA[' and self._support_cdata: + j = rawdata.find(']]>', i+9) + if j < 0: + return -1 + self.unknown_decl(rawdata[i+3: j]) + return j + 3 elif rawdata[i:i+9].lower() == '<!doctype': # find the closing > gtpos = rawdata.find('>', i+9) diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index fff41dab321acd..6a1d69335a0616 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -791,7 +791,7 @@ def test_eof_in_cdata(self, content): self._run_check('<![CDATA[' + content, [('unknown decl', 'CDATA[' + content)]) self._run_check('<![CDATA[' + content, - [('comment', '![CDATA[' + content)], + [('comment', '[CDATA[' + content)], collector=EventCollector(autocdata=True)) self._run_check('<svg><text y="100"><![CDATA[' + content, [('starttag', 'svg', []), _______________________________________________ Python-checkins mailing list -- [email protected] To unsubscribe send an email to [email protected] https://mail.python.org/mailman3//lists/python-checkins.python.org Member address: [email protected]
