I fixed this issue with a patch a maintainer sent me. Replace the code in your Unicode.py with the code below and recompile.
Andrew #!/usr/bin/env python # # Copyright (C) 2002 Gre7g Luterman <[EMAIL PROTECTED]> # # This file is part of TMDA. # # TMDA is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. A copy of this license should # be included in the file COPYING. # # TMDA is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. # # You should have received a copy of the GNU General Public License # along with TMDA; if not, write to the Free Software Foundation, Inc., # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA "Unicode utilities for tmda-cgi." import codecs import re import Template # Handy values AltChar = re.compile("[\x80-\xFF]") UTF8 = codecs.lookup("utf-8")[0] def Xlate(Chr): if ord(Chr) >= 160: return unichr(ord(Chr)) if Chr == "\x80": return u"\u20AC" if Chr == "‚": return u"\u201A" if Chr == "ƒ": return u"\u2061" if Chr == "„": return u"\u201E" if Chr == "…": return u"\u2026" if Chr == "†": return u"\u2020" if Chr == "‡": return u"\u2021" if Chr == "‰": return u"\u2030" if Chr == "‹": return u"\u2039" if Chr == "‘": return u"\u2018" if Chr == "’": return u"\u2019" if Chr == "“": return u"\u201C" if Chr == "”": return u"\u201D" if Chr == "•": return u"\u2022" if Chr == "–": return u"\u2014" if Chr == "—": return u"\u2015" if Chr == "™": return u"\u2122" return u"\u007F" def Iso8859(Str): RetVal = u"" while 1: Match = AltChar.search(Str) if Match: RetVal += Str[:Match.start()] + Xlate(Match.group(0)) Str = Str[Match.end():] else: break RetVal += Str return (RetVal,) def TranslateToUTF8(CharSet, Str, Errors): "Represent a string in UTF-8." import email.Charset if not CharSet: CharSet = "iso-8859-1" CS = email.Charset.Charset(CharSet) CharSet = CS.input_charset # Find appropriate decoder if CharSet in ("iso-8859-1", "us-ascii", "us_ascii" ): Decoder = Iso8859 else: try: Decoder = codecs.lookup(CharSet)[1] except LookupError: try: # Is it GB2312? if CharSet == "gb2312": import chinese.gb2312 Lib = chinese.gb2312 # Is it GBK? elif CharSet == "gbk": import chinese.gbk Lib = chinese.gbk # Is it Big5? elif CharSet == "big5": import chinese.big5 Lib = chinese.big5 # Is it iso-2022-jp? elif CharSet == "iso-2022-jp": import japanese.iso_2022_jp_ext Lib = japanese.iso_2022_jp_ext # Don't recognize it. Was it our fallback? elif CharSet == PVars[("General", "CSEncoding")]: # It was our fallback! Give up now! return Str # Mark it and use the fallback else: return "(%s) %s" % (CharSet, TranslateToUTF8(PVars[("General", "CSEncoding")], Str, Errors)) Decoder = Lib.Codec().decode except ImportError: # We know what it was, but we don't have the library installed. return "(%s) %s" % (CharSet, Str) # Decode string to Unicode try: Uni = Decoder(Str, errors = Errors)[0] except: try: Uni = Decoder(Str)[0] except: try: # what? it claimed to be this character set but won't decode? # try iso-8859 as a last resort Decoder = Iso8859 Uni = Decoder(Str)[0] except: # total failure - we were lied to and can't figure out the character # set. return "?" # Encode for UTF-8 return UTF8(Uni)[0] Andreas Plachy wrote: > Hi all! > > We got often problems with spammails, which have special characters in > the header. > The tmda-cgi is still unaseable, till I delete the pending file or > remove the specials chars in it. > > What we have to change to bypass this unicode error? > > In this expamle the "From"-field looks like this: "From: Viagra.com Inc > ® <[EMAIL PROTECTED]>" > > Hope someone needs an answer... > > so long, > Andreas > > > > > UnicodeDecodeError Python 2.3.4: /usr/bin/python > Wed Sep 5 07:17:50 2007 > > A problem occurred in a Python script. Here is the sequence of function > calls leading up to the error, in the order they occurred. > > /usr/src/tmda-cgi-0.13/tmda-cgi.py > 188 elif Cmd == "pending": > > 189 import PendList > > 190 Call(PendList) > > 191 elif Cmd == "restore": > > 192 pass > > Call = <function Call>, PendList = <module 'PendList' from > '/usr/src/tmda-cgi-0.13/PendList.pyc'> > > > /usr/src/tmda-cgi-0.13/tmda-cgi.py in Call(Library=<module 'PendList' > from '/usr/src/tmda-cgi-0.13/PendList.pyc'>, Str=None) > 86 Library.Show(Str) > > 87 else: > > 88 Library.Show() > > 89 > > 90 # Capture WebUID > > Library = <module 'PendList' from > '/usr/src/tmda-cgi-0.13/PendList.pyc'>, Library.Show = <function Show> > > > /usr/src/tmda-cgi-0.13/PendList.py in Show() > 498 value += Unicode.TranslateToUTF8(CharSet, > decoded[0], "ignore") > > 499 else: > > 500 value += Unicode.TranslateToUTF8(CharSet, decoded[0], > "ignore") > > 501 From = value > > 502 Temp = Address.search(From) > > value = '', global Unicode = <module 'Unicode' from > '/usr/src/tmda-cgi-0.13/Unicode.pyc'>, Unicode.TranslateToUTF8 = > <function TranslateToUTF8>, CharSet = 'us-ascii"', decoded = > ('Viagra.com Inc \xae <[EMAIL PROTECTED]>', None) > > > /usr/src/tmda-cgi-0.13/Unicode.py in > TranslateToUTF8(CharSet='us-ascii"', Str='Viagra.com Inc \xae > <[EMAIL PROTECTED]>', Errors='ignore') > 113 Uni = Decoder(Str, errors = Errors)[0] > > 114 except: > > 115 Uni = Decoder(Str)[0] > > 116 > > 117 # Encode for UTF-8 > > Uni undefined, Decoder = <built-in function ascii_decode>, Str = > 'Viagra.com Inc \xae <[EMAIL PROTECTED]>' > > > UnicodeDecodeError: 'ascii' codec can't decode byte 0xae in position 15: > ordinal not in range(128) > args = ('ascii', 'Viagra.com Inc \xae <[EMAIL PROTECTED]>', 15, 16, > 'ordinal not in range(128)') > encoding = 'ascii' > end = 16 > object = 'Viagra.com Inc \xae <[EMAIL PROTECTED]>' > reason = 'ordinal not in range(128)' > start = 15 > > > ------------------------------------------------------------------------ > > _____________________________________________ > tmda-users mailing list (tmda-users@tmda.net) > http://tmda.net/lists/listinfo/tmda-users -- This message has been scanned for viruses and dangerous content by MailScanner. _____________________________________________ tmda-users mailing list (tmda-users@tmda.net) http://tmda.net/lists/listinfo/tmda-users