It is my first script on python. Don't know is it correctly uses
modules, but it is working fine with specially with russian code pages
and mime formated messages. Also quoted-printable and base64
encoded....

It will be very good if anybody post any comments on this script. Is
it good or bad...


import email
import mailbox
from email.Header import decode_header
from email.Header import make_header
import string
import sys

outEnc="cp866"
infile=sys.argv[1]

subStrObrez = []
subStrObrez.append("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
subStrObrez.append("""~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
To UNSUBSCRIBE from this forum, send an email to:""")
subStrObrez.append("~~~~~~~~~~~~~~~~~~")

# Cut yahoo info at the end of message
def obrez(strMsg):
    for s in subStrObrez:
        n = string.rfind(strMsg,s)
        if n != -1:
            return strMsg[0:n]
    return strMsg

# Convert message header
def my_get_header(str):
    str2=""
    for val,encoding in decode_header(str):
        if encoding:
            str2 = str2+ val.decode(encoding)+" "
        else:
            str2 = str2+ val+" "
    return str2

# Process the message
def proc(msg):
    print 'From   : '+  my_get_header(msg['From']).encode(outEnc)
    print 'To     : '+  my_get_header(msg['To']).encode(outEnc)
    print 'Subject: '+ my_get_header(msg['Subject']).encode(outEnc)
    print

    if msg.is_multipart():
        for part in msg.walk():
            if part.get_content_type() == "text/plain":
                if part.get_content_charset():
                    print
obrez(part.get_payload(None,True).decode(part.get_content_charset()).encode(outEnc))
                else:
                    print obrez(part.get_payload(None,True))

    else:
        if msg.get_content_type() == "text/plain":
            if msg.get_content_charset():
                print
obrez( (msg.get_payload(None,True)).decode(msg.get_content_charset()) 
).encode(outEnc)
            else:
                print obrez(  msg.get_payload(None,True)  )
        else:
            if msg.get_content_type() == "text/html":
                if msg.get_content_charset():
                    print
(msg.get_payload(None,True)).decode(msg.get_content_charset()).encode(outEnc)
                else:
                    print msg.get_payload(None,True)


####################################################################################
#  The main program

f = open(infile, "rb")
m1 = mailbox.UnixMailbox(f)

RubLst=[]
RubLst.append(["[contestru]","FOTSTR"])
RubLst.append(["[russiandx]","FORUDX"])

for msg in mailbox.UnixMailbox(f,email.message_from_file):
    for rub in RubLst:
        if string.find(my_get_header(msg['Subject']),rub[0]) != -1 :
            print "SB "+rub[1]+"@FORUM < INET"
            print my_get_header(msg['Subject']).encode(outEnc)
            print
            proc(msg)
            print
            print "powered by Python"
            print "/EX"
--
http://mail.python.org/mailman/listinfo/python-list

Reply via email to