We recently encountered a case in our glibc patchwork instance on sourceware, where a patch was dropped because it had x-unknown charset. I used the following patch to fix this in our instance. The fix I used was to fall back on a set of encodings (instead of just utf-8) when the charset is not mentioned or if it is set as x-unknown.
v2 removes ascii as a fallback since it won't work anyway if utf-8 failed. Signed-off-by: Siddhesh Poyarekar <[email protected]> --- apps/patchwork/bin/parsemail.py | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py index b6eb97a..7c173d9 100755 --- a/apps/patchwork/bin/parsemail.py +++ b/apps/patchwork/bin/parsemail.py @@ -147,6 +147,13 @@ def find_pull_request(content): return match.group(1) return None +def try_decode(payload, charset): + try: + payload = unicode(payload, charset) + except UnicodeDecodeError: + return None + return payload + def find_content(project, mail): patchbuf = None commentbuf = '' @@ -157,15 +164,27 @@ def find_content(project, mail): continue payload = part.get_payload(decode=True) - charset = part.get_content_charset() subtype = part.get_content_subtype() - # if we don't have a charset, assume utf-8 - if charset is None: - charset = 'utf-8' - if not isinstance(payload, unicode): - payload = unicode(payload, charset) + charset = part.get_content_charset() + + # If there is no charset or if it is unknown, then try some common + # charsets before we fail. + if charset is None or charset == 'x-unknown': + try_charsets = ['utf-8', 'windows-1252', 'iso-8859-1'] + else: + try_charsets = [charset] + + for cset in try_charsets: + decoded_payload = try_decode(payload, cset) + if decoded_payload is not None: + break + payload = decoded_payload + + # Could not find a valid decoded payload. Fail. + if payload is None: + return (None, None) if subtype in ['x-patch', 'x-diff']: patchbuf = payload -- 1.9.3
pgp5WTfnjZeVF.pgp
Description: PGP signature
_______________________________________________ Patchwork mailing list [email protected] https://lists.ozlabs.org/listinfo/patchwork
