Replied Diff comments:
> diff --git a/lib/lp/bugs/utilities/filebugdataparser.py > b/lib/lp/bugs/utilities/filebugdataparser.py > index 4806dcb..6b37f14 100644 > --- a/lib/lp/bugs/utilities/filebugdataparser.py > +++ b/lib/lp/bugs/utilities/filebugdataparser.py > @@ -32,6 +32,105 @@ class FileBugDataParser: > self.attachments = [] > self.BUFFER_SIZE = 8192 > > + # XXX: ilkeremrekoc 2025-02-26: The whole point of trying to find the > "LF" > + # or "CRLF" is because the Apport package (which is used to send > + # bug-reports to Launchpad) uses LF line-breaks since its inception. > + # This is against the RFC standards, which standardized the "CRLF" as the > + # line-break for all HTTP requests. And because the main parser for zope > + # (which is the "multipart" package) started enforcing this standard > + # strictly in its newer versions, and considering we must upgrade our > + # dependencies, we had to make sure FileBugDataParser accepts the "CRLF" > + # as well. But we cannot accept only "CRLF" for now because the apport > + # package for every older version of Ubuntu would still use the "LF" > + # making it impossible to send bug-reports from older LTS versions. > + # So, until a sufficient number or all the apport packages in future, > + # current and past Ubuntu versions are patched to only send "CRLF" we > + # must use this method to ensure both "LF" and "CRLF" is accepted. > + # Once the apport package patches are done, we can revert this change, > + # get rid of this method and its tests and simply change > + # the "LF" expectance in the previous version into "CRLF" expectance. > + def _findLineBreakType(self) -> bytes: > + """Find the line-break/end_string the message is using and return it. > + > + Assumptions: > + - The method would be run at the start of any parsing run as this > + method doesn't open the blob file but resets the blob-stream's > + cursor. > + > + - The line-break can only be LF ("\n") or CRLF ("\r\n") with > + no CR ("\r") functionality. > + > + - The message must have at least one line-break character when > + parsing. An empty message or one without any line-breaks doesn't > + count (which should be impossible anyway but I digress). > + > + - The whole message must be made up of a single line-break type. > + If more than one type is present, something will break after this > + method. > + > + Reads through the message until it finds an LF character ("\n") > before > + closing and reopening the file-alias/blob to reset its cursor (as the > + file-alias/blob doesn't have a "seek" functionality). Then returns > + whichever line-break type is used in the message. > + > + ... > + :raises AssertionError: If the method cannot find any LF linebreak on > + the message stream, it raises. > + ... > + :return: Either byte typed CRLF (b"\r\n") or byte typed LF (b"\n") > + :rtype: bytes > + """ > + > + # The LF line break is assumed to be a part of the message as it is > + # a part of both LF and CRLF. > + lf_linebreak = b"\n" > + > + # A temporary buffer we don't need to save outside the scope as it is > + # only for finding the first line-break. > + temp_buffer = b"" > + > + while lf_linebreak not in temp_buffer: > + data = self.blob_file.read(self.BUFFER_SIZE) > + > + # Append the data to the temp_buffer. This is to ensure any > + # CR ("\r") that is part of a CRLF isn't deleted in a preceding > + # buffer accidentally. > + temp_buffer += data > + > + if len(data) < self.BUFFER_SIZE: > + # End of file. > + > + if lf_linebreak not in temp_buffer: > + # If the linebreak isn't present, then the message must > + # be broken since the method must read from the start > + > + if ( > + b"\r" in temp_buffer > + ): # If the linebreak inside the whole message is only > CR > + raise AssertionError( > + "The wrong linebreak is used. CR isn't accepted." > + ) > + > + raise AssertionError( > + "There are no linebreaks in the blob." > + ) > + break > + > + # This part is for ".seek(0)" functionality that LibraryFileAlias > + # lack, requiring the calls to close() -> open() back to back > + # to reset the stream's read cursor to the start of the file. > + self.blob_file.close() > + self.blob_file.open() > + > + lf_index = temp_buffer.index(lf_linebreak) > + > + # A slice is needed even if for a single character as bytes type acts > + # differently in slices and in singe character accesses. Yup, good practice anyway to re-order your commits logically at the end, rather than in the order you made them. > + if temp_buffer[lf_index - 1 : lf_index] == b"\r": > + return b"\r\n" > + > + return b"\n" > + > def _consumeBytes(self, end_string): > """Read bytes from the message up to the end_string. > > diff --git a/lib/lp/bugs/utilities/tests/test_filebugdataparser.py > b/lib/lp/bugs/utilities/tests/test_filebugdataparser.py > index 28d4be9..84b8e0b 100644 > --- a/lib/lp/bugs/utilities/tests/test_filebugdataparser.py > +++ b/lib/lp/bugs/utilities/tests/test_filebugdataparser.py > @@ -44,9 +67,65 @@ class TestFileBugDataParser(TestCase): > self.assertEqual(b"", parser._consumeBytes(b"0")) > self.assertEqual(b"", parser._consumeBytes(b"0")) > > + def test__findLineBreakType(self): > + # _findLineBreakType reads the whole message until either an LF or > + # a CRLF is found, returns that type in bytes string format and > exits. > + > + msg = dedent( > + """\ > + Header: value > + Space-Folded-Header: this header > + is folded with a space. > + Tab-Folded-Header: this header > + \tis folded with a tab. > + Another-header: another-value > + > + Not-A-Header: not-a-value > + """ > + ) > + > + # Test the above message with LF endings > + msg_with_lf = msg.encode("ASCII") > + lf_parser = FileBugDataParser(MockBytesIO(msg_with_lf)) > + lf_linebreak = lf_parser._findLineBreakType() > + > + self.assertEqual(lf_linebreak, b"\n") ack > + > + # Test the above message after replacing LF with CRLF > + msg_with_crlf = msg.replace("\n", "\r\n").encode("ASCII") > + crlf_parser = FileBugDataParser(MockBytesIO(msg_with_crlf)) > + crlf_linebreak = crlf_parser._findLineBreakType() > + > + self.assertEqual(crlf_linebreak, b"\r\n") > + > + # Test the above message after deleting the line-breaks > + # Which should break as there must be at least some line-breaks > + # in the message We should also think about future users though. I don't think it's technically a problem to have no line breaks at all right? Why explicitly prevent this when it's not a necessary condition? > + msg_without_linebreak = msg.replace("\n", "").encode("ASCII") > + without_linebreak_parser = FileBugDataParser( > + MockBytesIO(msg_without_linebreak) > + ) > + > + self.assertRaisesWithContent( > + AssertionError, > + "There are no linebreaks in the blob.", > + without_linebreak_parser._findLineBreakType, > + ) > + > + # Test the above message after replacing LF with CR > + # Which should break as we do not accept CR line-breaks > + msg_with_cr = msg.replace("\n", "\r").encode("ASCII") > + cr_parser = FileBugDataParser(MockBytesIO(msg_with_cr)) > + > + self.assertRaisesWithContent( > + AssertionError, > + "The wrong linebreak is used. CR isn't accepted.", > + cr_parser._findLineBreakType, > + ) > + > def test_readLine(self): > # readLine reads a single line of the file. > - parser = FileBugDataParser(io.BytesIO(b"123\n456\n789")) > + parser = FileBugDataParser(MockBytesIO(b"123\n456\n789")) > self.assertEqual(b"123\n", parser.readLine()) > self.assertEqual(b"456\n", parser.readLine()) > self.assertEqual(b"789", parser.readLine()) -- https://code.launchpad.net/~ilkeremrekoc/launchpad/+git/launchpad/+merge/481785 Your team Launchpad code reviewers is requested to review the proposed merge of ~ilkeremrekoc/launchpad:accept-crlf-bugreports into launchpad:master. _______________________________________________ Mailing list: https://launchpad.net/~launchpad-reviewers Post to : launchpad-reviewers@lists.launchpad.net Unsubscribe : https://launchpad.net/~launchpad-reviewers More help : https://help.launchpad.net/ListHelp