[whimsy] branch master updated: Obsolete

sebb Wed, 08 Feb 2023 04:50:09 -0800

This is an automated email from the ASF dual-hosted git repository.

sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git



The following commit(s) were added to refs/heads/master by this push:
     new 8a440dcf Obsolete
8a440dcf is described below

commit 8a440dcfd83b6f884e2fd3bc25496e20ccd4de4d
Author: Sebb <[email protected]>
AuthorDate: Wed Feb 8 12:49:58 2023 +0000

    Obsolete
---
 secmail.py | 353 -------------------------------------------------------------
 1 file changed, 353 deletions(-)

diff --git a/secmail.py b/secmail.py
deleted file mode 100644
index 879d0da3..00000000
--- a/secmail.py
+++ /dev/null
@@ -1,353 +0,0 @@
-#!/usr/bin/python
-
-"""
-The purpose of this script is to find attachments to email messages that
-are sent to [email protected] and commit them into svn:documents/received.
-
-This task is made more difficult by the fact that email often uses payloads
-for reasons other than attachments, from time to time we get spam, some
-people routinely pgp sign all of their emails, and others use pgp signatures
-to sign forms.
-
-Deciding what to commit is therefore, necessarily, a bit of heuristics.  When
-in doubt, the intent here is to err on the side of committing more than is
-necessary than to miss an email.
-
-Examples of heurisitics:
- * Images less than 10K bytes tend to be decorations for HTML formatted
-   spam emails, and are not likely to be scanned forms.
- * text/plain email that contain a PGP signature and the ASF fax number
-   are likely to be signed forms.
-"""
-
-import email
-import gzip
-import mailbox
-import rfc822
-import mimetypes
-import os
-from datetime import datetime
-from email.header import decode_header
-from glob import glob
-import re
-from subprocess import Popen, PIPE
-from threading import Thread
-import commands
-import getpass
-
-try:
-  from hashlib import md5
-except ImportError:
-  from md5 import new as md5
-
-# attachment types which generally are not saved.
-skip = ['multipart/alternative', 'multipart/related', 'multipart/mixed',
-        'message/delivery-status', 'text/plain', 'text/html']
-
-# attachment file names which always are saved, even if they come in
-# with one of the 'skip' mime types.
-forms = ['pgp.txt', 'icla.txt', 'icla.txt.asc', 'icla.pdf', 'icla.pdf.asc', 
'membership-application.txt']
-
-# mime types for pgp signatures
-sigs  = ['application/pkcs7-signature', 'application/pgp-signature']
-
-# convert header from whatever encoding it is in to utf-8.  Handle
-# mislabelled encodings.
-def decode(header, field=0):
-  if isinstance(header, unicode):
-    data = (header.encode('utf-8'), 'utf-8')
-  else:
-    data = decode_header(header)[field]
-
-  try:
-    return data[0].decode(data[1]).encode('utf-8')
-  except:
-    return data[0].decode('iso-8859-1').encode('utf-8')
-
-# convert non-ascii characters into rough equivalents for the purpose
-# of determining a file name to store in SVN.
-def asciize(name):
-  if re.search(r"[^\x00-\x7F]", name):
-    # digraphs.  May be culturally sensitive
-    name=re.sub(r"\xc3\x9f", 'ss', name)
-    name=re.sub(r"\xc3\xa4|a\xcc\x88", 'ae', name)
-    name=re.sub(r"\xc3\xa5|a\xcc\x8a", 'aa', name)
-    name=re.sub(r"\xc3\xa6", 'ae', name)
-    name=re.sub(r"\xc3\xb1|n\xcc\x83", 'ny', name)
-    name=re.sub(r"\xc3\xb6|o\xcc\x88", 'oe', name)
-    name=re.sub(r"\xc3\xbc|u\xcc\x88", 'ue', name)
-
-    # latin 1
-    name=re.sub(r"\xc3[\xa0-\xa5]", 'a', name)
-    name=re.sub(r"\xc3\xa7", 'c', name)
-    name=re.sub(r"\xc3[\xa8-\xab]", 'e', name)
-    name=re.sub(r"\xc3[\xac-\xaf]", 'i', name)
-    name=re.sub(r"\xc3[\xb2-\xb6]|\xc3\xb8", 'o', name)
-    name=re.sub(r"\xc3[\xb9-\xbc]", 'u', name)
-    name=re.sub(r"\xc3[\xbd\xbf]", 'y', name)
-
-    # Latin Extended-A
-    name=re.sub(r"\xc4[\x80-\x85]", 'a', name)
-    name=re.sub(r"\xc4[\x86-\x8d]", 'c', name)
-    name=re.sub(r"\xc4[\x8e-\x91]", 'd', name)
-    name=re.sub(r"\xc4[\x92-\x9b]", 'e', name)
-    name=re.sub(r"\xc4[\x9c-\xa3]", 'g', name)
-    name=re.sub(r"\xc4[\xa4-\xa7]", 'h', name)
-    name=re.sub(r"\xc4[\xa8-\xb1]", 'i', name)
-    name=re.sub(r"\xc4[\xb2-\xb3]", 'ij', name)
-    name=re.sub(r"\xc4[\xb4-\xb5]", 'j', name)
-    name=re.sub(r"\xc4[\xb6-\xb8]", 'k', name)
-    name=re.sub(r"\xc4[\xb9-\xff]|\xc5[\x80-\x82]", 'l', name)
-    name=re.sub(r"\xc5[\x83-\x8b]", 'n', name)
-    name=re.sub(r"\xc5[\x8c-\x91]", 'o', name)
-    name=re.sub(r"\xc5[\x92-\x93]", 'oe', name)
-    name=re.sub(r"\xc5[\x94-\x99]", 'r', name)
-    name=re.sub(r"\xc5[\x9a-\xa2]", 's', name)
-    name=re.sub(r"\xc5[\xa2-\xa7]", 't', name)
-    name=re.sub(r"\xc5[\xa8-\xb3]", 'u', name)
-    name=re.sub(r"\xc5[\xb4-\xb5]", 'w', name)
-    name=re.sub(r"\xc5[\xb6-\xb8]", 'y', name)
-    name=re.sub(r"\xc5[\xb9-\xbe]", 'z', name)
-
-    # denormalized diacritics
-    name=re.sub(r"\xcc[\x80-\xff]|\xcd[\x80-\xaf]", '', name)
-
-  return re.sub(r"[^.\w]+", '-', name)
-
-# add svn at sign if necessary
-def svn(command, file):
-  command = 'svn ' + command + ' ' + file
-  if '@' in file: command = command + '@'
-  # import sys
-  # sys.stderr.write(command+"\n")
-  return os.system(command)
-
-# spam assassin client
-def analyze(msg):
-  spamc = Popen('spamc', shell=True, stdin=PIPE, stdout=PIPE)
-  class passthru(Thread):
-    def __init__(self, stdin, msg):
-      Thread.__init__(self)
-      self.msg = msg
-      self.stdin = stdin
-    def run(self):
-      try:
-        email.generator.Generator(self.stdin).flatten(self.msg)
-      except:
-        pass
-      self.stdin.close()
-  thread = passthru(spamc.stdin, msg)
-  thread.start()
-  subject = msg['subject']
-  msg = email.message_from_file(spamc.stdout)
-  msg['subject'] = subject # spamc mangles encoded strings
-  setattr(msg, 'spam', str(msg['X-Spam-Status']).startswith('Yes'))
-  thread.join()
-  spamc.wait()
-  spamc.stdout.close()
-  return msg
-
-# main logic for this script: process attachments for a single message
-def detach(msg):
-  # quick exit if we have seen this entry before
-  if not msg['message-id']: return
-  mid = md5(msg['message-id']).hexdigest()
-  if os.path.exists(os.path.join('tally',mid)): return
-
-  # known spammers
-  if '<[email protected]>' in msg['from']:
-    return
-
-  # collect eligible attachments
-  attachments = []
-  for payload in msg.get_payload():
-
-    # progress into multipart/mixed
-    if payload.get_content_type() == 'multipart/mixed':
-      payload = payload.get_payload()
-    else:
-      payload = [payload]
-
-    # iterate over (possibly nested) attachments
-    for subpayload in payload:
-      if subpayload.get_content_type() in skip:
-        if subpayload.get_filename() not in forms: continue
-        content = subpayload.get_payload(decode=True)
-        if 'License Agreement' not in content and \
-          '-----BEGIN PGP SIGNATURE-----' not in content:
-          continue
-      if subpayload.get_content_type() == 'image/gif':
-        if len(subpayload.get_payload(decode=True))<10240: continue
-      # if not subpayload.get_payload(decode=True): continue
-
-      # get_filename doesn't appear to have an endswith method
-      # if subpayload.get_filename().endswith('.gpg'): continue
-      attachments.append(subpayload)
-
-  if len(attachments) == 0: return
-
-  if os.system('svn update received') != 0:
-    return
-
-  ## COMMENTED OUT - AS SPAMC IS NOT INSTALLED HERE
-  #
-  # if 'eFax message from' not in decode(msg['subject']):
-  #   msg = analyze(msg)
-  #   if msg.spam:
-  #     attachments = []
-
-  # determine output file name prefix
-  prefix = ''
-  if len(attachments) > 1:
-    prefix = rfc822.parseaddr(decode(msg['from']).decode('utf-8'))[1]
-    received = os.path.join('received',prefix)
-    if (not re.match(r'^[.@\w]+$',prefix)) or os.path.exists(received):
-      dirname = datetime(*email.utils.parsedate(msg['date'])[:7]).isoformat()
-      prefix = dirname.replace(':','_').replace('-','_')
-      received = os.path.join('received',prefix)
-    if not os.path.exists(received): os.mkdir(received)
-    svn('add', received)
-    prefix += os.sep
-  elif len(attachments) == 1:
-    name=asciize(decode(attachments[0].get_filename()))
-    if not name: return
-    if attachments[0].get_content_type() in sigs: return
-    if len(name)<16:
-      prefix = decode(msg['from'])
-      if prefix.startswith('"eFax"'):
-        prefix = 'eFax'
-      else:
-        prefix = asciize(prefix)
-        if prefix.find('<')>=0: prefix = prefix.split('<')[1]
-        prefix = prefix.split('@')[0]
-      prefix = prefix + '-'
-    try:
-      name.decode('utf-8')
-    except:
-      name=name.decode('iso-8859-1').encode('utf-8')
-
-  # determine commit message
-  summary = "\n".join([
-    'Subject: ' + decode(msg['subject']),
-    'From: ' + decode(msg['from']),
-    'Date: ' + str(msg['date']),
-    'Message-Id: ' + str(msg['message-id']),
-    'X-Spam-Status' + str(msg['X-Spam-Status']),
-  ])
-
-  count = 0
-  file = None
-
-  # decode payloads and place add to svn
-  for attachment in attachments:
-    mime = attachment.get_content_type()
-    if mime == 'application/octet-stream':
-      mime = mimetypes.guess_type(decode(attachment.get_filename()))[0]
-    name=asciize(decode(attachment.get_filename()))
-    if name=='none': name=str(dict(attachment.get_params()).get('name'))
-
-    content = attachment.get_payload(decode=True)
-    if content:
-      file=os.path.join('received',(prefix+name).strip('-'))
-      if os.path.isdir(file): file = os.path.join(file, 'unnamed')
-      fh=open(file,'w')
-      fh.write(content)
-      fh.close()
-
-      svn('add', file)
-      if mime: svn('propset svn:mime-type ' + mime, file)
-      count = count + 1
-
-  if count>1: file = os.path.join('received',prefix.strip('-'))
-
-  try:
-    name = decode(msg['from'],0)
-    try:
-      addr = rfc822.parseaddr(decode(msg['from'],1))[1]
-    except:
-      name, addr = rfc822.parseaddr(name)
-
-    if name != 'eFax' and file:
-      props = {
-        'email:id': msg['message-id'],
-        'email:subject': re.sub(r'\n\s*', ' ', decode(msg['subject']))
-      }
-      if name: props['email:name'] = name
-      if addr: props['email:addr'] = addr
-      if msg['cc']: props['email:cc'] =  re.sub(r'\s+', ' ', decode(msg['cc']))
-      for (key, value) in props.items():
-        svn('propset ' + key + ' ' + repr(value), file)
-  except:
-    pass
-
-  tally = os.path.join('tally',mid)
-  fh=open(tally,'w')
-  fh.write(summary + "\n")
-  fh.close()
-
-  if count>0 and getpass.getuser() != 'www-data':
-    if svn('commit --file ' + tally, file) != 0:
-      return # try again next cron cycle
-
-if __name__ == "__main__":
-  if os.path.exists('/home/apmail/private-arch/officers-secretary'):
-    archive = '/home/apmail/private-arch/officers-secretary/20*'
-    os.chdir('/home/apmail/secretary-mail')
-    previous = os.stat('latest').st_mtime
-  elif os.path.exists('mailbox'):
-    archive = 'mailbox'
-    previous = int(os.stat(archive).st_mtime) - 1
-  else:
-    import sys
-    sys.stderr.write("can't find mailbox.  Exiting.\n")
-    sys.exit(1)
-
-  latest = previous
-  last_processed = None
-
-  # process updated mbox files
-  for file in glob(archive):
-    if int(previous) >= int(os.stat(file).st_mtime): continue
-
-    # open gzipped/raw file
-    if file.endswith('.gz'):
-      fh=gzip.open(file)
-    else:
-      fh=open(file)
-
-    # process each multipart message in the mailbox 
-    for msg in iter(mailbox.UnixMailbox(fh, email.message_from_file)):
-      last_processed = msg['Date']
-
-      if msg.is_multipart():
-        detach(msg)
-      elif '919-573-9199' in msg.get_payload():
-        if '-----BEGIN PGP SIGNATURE-----' in msg.get_payload().split("\n"):
-          msg.add_header('Content-Disposition', 'attachment',
-            filename='pgp.txt')
-          wrapper=email.message.Message()
-          wrapper.attach(msg)
-          for header in msg.keys(): wrapper[header]=msg[header]
-          detach(wrapper)
-
-    # keep track of the latest
-    if latest < os.stat(file).st_mtime:
-      latest = os.stat(file).st_mtime
-
-  # record where we are so that the next run can pick up where we left off
-  if latest > previous:
-    os.utime('latest', (latest, latest))
-     
-  # check for any incomplete removals
-  if commands.getoutput('svn status received') != '':
-    os.system("svn st received | grep '!' | cut -c 8- | xargs -r svn revert 
--")
-
-  # check for any incomplete commits
-  if commands.getoutput('svn status received') != '':
-    if getpass.getuser() != 'www-data':
-      os.system('svn commit -m "queued documents" received')
-
-  # update web page with last processed information
-  if last_processed and os.path.exists('../public_html/secmail.txt'):
-    with open('../public_html/secmail.txt', 'w') as fh:
-      fh.write("Latest email processed was sent: %s" % last_processed)

[whimsy] branch master updated: Obsolete

Reply via email to