------------------------------------------------------------
revno: 6534
committer: Barry Warsaw <[EMAIL PROTECTED]>
branch nick: 3.0
timestamp: Sun 2007-07-22 19:52:34 -0400
message:
  The start of a message store definition.  Whether this will end up being used
  for the archive or not is left to be seen.
  
  Define an interface, test, and implementation of a basic message store using
  globally unique identifiers of the form: archive/hash/seqno
  
  - archive is the base url of the archive, e.g. http://archives.example.com.
    This is available in the List-Archive header.
  - hash is the base32 encoded sha1 hash of the message's Message-ID and Date
    headers, which it must have.  This is available in the X-List-ID-Hash
    header.
  - seqno is a sequence number specific to the archive which will uniquely
    identify the message should there be a Message-ID/Date collision.  this is
    available in the X-List-Sequence-Number header.
  
  Added a MESSAGES_DIR variable to the config.
  
  Added a .message_store attribute to the config.
added:
  Mailman/database/messagestore.py
  Mailman/database/model/message.py
  Mailman/docs/messagestore.txt
  Mailman/interfaces/messagestore.py
modified:
  Mailman/configuration.py
  Mailman/database/__init__.py
  Mailman/database/model/__init__.py
  Mailman/docs/archives.txt

=== added file 'Mailman/database/messagestore.py'
--- a/Mailman/database/messagestore.py  1970-01-01 00:00:00 +0000
+++ b/Mailman/database/messagestore.py  2007-07-22 23:52:34 +0000
@@ -0,0 +1,140 @@
+# Copyright (C) 2007 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+# USA.
+
+from __future__ import with_statement
+
+__metaclass__ = type
+__all__ = [
+    'MessageStore',
+    ]
+
+import os
+import errno
+import base64
+import hashlib
+import cPickle as pickle
+
+from zope.interface import implements
+
+from Mailman import Utils
+from Mailman.configuration import config
+from Mailman.database.model import Message
+from Mailman.interfaces import IMessageStore
+
+# It could be very bad if you have already stored files and you change this
+# value.  We'd need a script to reshuffle and resplit.
+MAX_SPLITS = 2
+EMPTYSTRING = ''
+
+
+
+class MessageStore:
+    implements(IMessageStore)
+
+    def add(self, message):
+        # Ensure that the message has the requisite headers.
+        message_ids = message.get_all('message-id', [])
+        dates = message.get_all('date', [])
+        if not (len(message_ids) == 1 and len(dates) == 1):
+            raise ValueError(
+                'Exactly one Message-ID and one Date header required')
+        # Calculate and insert the X-List-ID-Hash.
+        message_id = message_ids[0]
+        date = dates[0]
+        shaobj = hashlib.sha1(message_id)
+        shaobj.update(date)
+        hash32 = base64.b32encode(shaobj.digest())
+        del message['X-List-ID-Hash']
+        message['X-List-ID-Hash'] = hash32
+        # Calculate the path on disk where we're going to store this message
+        # object, in pickled format.
+        parts = []
+        split = list(hash32)
+        while split and len(parts) < MAX_SPLITS:
+            parts.append(split.pop(0) + split.pop(0))
+        parts.append(EMPTYSTRING.join(split))
+        relpath = os.path.join(*parts)
+        # Store the message in the database.  This relies on the database
+        # providing a unique serial number, but to get this information, we
+        # have to use a straight insert instead of relying on Elixir to create
+        # the object.
+        result = Message.table.insert().execute(
+            hash=hash32, path=relpath, message_id=message_id)
+        # Add the additional header.
+        seqno = result.last_inserted_ids()[0]
+        del message['X-List-Sequence-Number']
+        message['X-List-Sequence-Number'] = str(seqno)
+        # Now calculate the full file system path.
+        path = os.path.join(config.MESSAGES_DIR, relpath, str(seqno))
+        # Write the file to the path, but catch the appropriate exception in
+        # case the parent directories don't yet exist.  In that case, create
+        # them and try again.
+        while True:
+            try:
+                with open(path, 'w') as fp:
+                    # -1 says to use the highest protocol available.
+                    pickle.dump(message, fp, -1)
+                    break
+            except IOError, e:
+                if e.errno <> errno.ENOENT:
+                    raise
+            os.makedirs(os.path.dirname(path))
+        return seqno
+
+    def _msgobj(self, msgrow):
+        path = os.path.join(config.MESSAGES_DIR, msgrow.path, str(msgrow.id))
+        with open(path) as fp:
+            return pickle.load(fp)
+
+    def get_messages_by_message_id(self, message_id):
+        for msgrow in Message.select_by(message_id=message_id):
+            yield self._msgobj(msgrow)
+
+    def get_messages_by_hash(self, hash):
+        for msgrow in Message.select_by(hash=hash):
+            yield self._msgobj(msgrow)
+
+    def _getmsg(self, global_id):
+        try:
+            hash, seqno = global_id.split('/', 1)
+            seqno = int(seqno)
+        except ValueError:
+            return None
+        msgrows = Message.select_by(id=seqno)
+        if not msgrows:
+            return None
+        assert len(msgrows) == 1, 'Multiple id matches'
+        if msgrows[0].hash <> hash:
+            # The client lied about which message they wanted.  They gave a
+            # valid sequence number, but the hash did not match.
+            return None
+        return msgrows[0]
+
+    def get_message(self, global_id):
+        msgrow = self._getmsg(global_id)
+        return (self._msgobj(msgrow) if msgrow is not None else None)
+
+    @property
+    def messages(self):
+        for msgrow in Message.select():
+            yield self._msgobj(msgrow)
+
+    def delete_message(self, global_id):
+        msgrow = self._getmsg(global_id)
+        if msgrow is None:
+            raise KeyError(global_id)
+        msgrow.delete()

=== added file 'Mailman/database/model/message.py'
--- a/Mailman/database/model/message.py 1970-01-01 00:00:00 +0000
+++ b/Mailman/database/model/message.py 2007-07-22 23:52:34 +0000
@@ -0,0 +1,30 @@
+# Copyright (C) 2007 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+# USA.
+
+from elixir import *
+from zope.interface import implements
+
+
+
+class Message(Entity):
+    """A message in the message store."""
+
+    has_field('hash',       Unicode)
+    has_field('path',       Unicode)
+    has_field('message_id', Unicode)
+
+    using_options(shortnames=True)

=== added file 'Mailman/docs/messagestore.txt'
--- a/Mailman/docs/messagestore.txt     1970-01-01 00:00:00 +0000
+++ b/Mailman/docs/messagestore.txt     2007-07-22 23:52:34 +0000
@@ -0,0 +1,169 @@
+The message store
+=================
+
+The message store is a collection of messages keyed off of unique global
+identifiers.  A global id for a message is calculated relative to the message
+store's base URL and its components are stored as headers on the message.  One
+piece of information is the X-List-ID-Hash, a base-32 encoding of the SHA1
+hash of the message's Message-ID and Date headers, which the message must
+have.   The second piece of information is supplied by the message store; it
+is a sequence number that will uniquely identify the message even when the
+X-List-ID-Hash collides.
+
+    >>> from email import message_from_string
+    >>> from Mailman.configuration import config
+    >>> from Mailman.database import flush
+    >>> store = config.message_store
+
+If you try to add a message to the store which is missing either the
+Message-ID header or the Date header, you will get a ValueError.
+
+    >>> msg = message_from_string("""\
+    ... Subject: An important message
+    ...
+    ... This message is very important.
+    ... """)
+    >>> store.add(msg)
+    Traceback (most recent call last):
+    ...
+    ValueError: Exactly one Message-ID and one Date header required
+
+Adding a Message-ID header alone doesn't help.
+
+    >>> msg['Message-ID'] = '<[EMAIL PROTECTED]>'
+    >>> store.add(msg)
+    Traceback (most recent call last):
+    ...
+    ValueError: Exactly one Message-ID and one Date header required
+
+Neither does adding just a Date header.
+
+    >>> del msg['message-id']
+    >>> msg['Date'] = 'Wed, 04 Jul 2007 16:49:58 +0900'
+    >>> store.add(msg)
+    Traceback (most recent call last):
+    ...
+    ValueError: Exactly one Message-ID and one Date header required
+
+However, having them both is all you need.
+
+    >>> msg['Message-ID'] = '<[EMAIL PROTECTED]>'
+    >>> store.add(msg)
+    1
+    >>> flush()
+    >>> print msg.as_string()
+    Subject: An important message
+    Date: Wed, 04 Jul 2007 16:49:58 +0900
+    Message-ID: <[EMAIL PROTECTED]>
+    X-List-ID-Hash: RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI
+    X-List-Sequence-Number: 1
+    <BLANKLINE>
+    This message is very important.
+    <BLANKLINE>
+
+
+Finding messages
+----------------
+
+There are several ways to find a message given some or all of the information
+created above.  Because Message-IDs are not guaranteed unique, looking up
+messages with that key resturns a collection.  The collection may be empty if
+there are no matches.
+
+    >>> list(store.get_messages_by_message_id('nothing'))
+    []
+
+Given an existing Message-ID, all matching messages will be found.
+
+    >>> msgs = list(store.get_messages_by_message_id(msg['message-id']))
+    >>> len(msgs)
+    1
+    >>> print msgs[0].as_string()
+    Subject: An important message
+    Date: Wed, 04 Jul 2007 16:49:58 +0900
+    Message-ID: <[EMAIL PROTECTED]>
+    X-List-ID-Hash: RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI
+    X-List-Sequence-Number: 1
+    <BLANKLINE>
+    This message is very important.
+    <BLANKLINE>
+
+Similarly, we can find messages by the ID hash.
+
+    >>> list(store.get_messages_by_hash('nothing'))
+    []
+    >>> msgs = list(store.get_messages_by_hash(msg['x-list-id-hash']))
+    >>> len(msgs)
+    1
+    >>> print msgs[0].as_string()
+    Subject: An important message
+    Date: Wed, 04 Jul 2007 16:49:58 +0900
+    Message-ID: <[EMAIL PROTECTED]>
+    X-List-ID-Hash: RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI
+    X-List-Sequence-Number: 1
+    <BLANKLINE>
+    This message is very important.
+    <BLANKLINE>
+
+We can also get a single message by using it's relative global ID.  This
+returns None if there is no match.
+
+    >>> print store.get_message('nothing')
+    None
+    >>> print store.get_message('nothing/1')
+    None
+    >>> id_hash = msg['x-list-id-hash']
+    >>> seqno = msg['x-list-sequence-number']
+    >>> global_id = id_hash + '/' + seqno
+    >>> print store.get_message(global_id).as_string()
+    Subject: An important message
+    Date: Wed, 04 Jul 2007 16:49:58 +0900
+    Message-ID: <[EMAIL PROTECTED]>
+    X-List-ID-Hash: RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI
+    X-List-Sequence-Number: 1
+    <BLANKLINE>
+    This message is very important.
+    <BLANKLINE>
+
+
+Iterating over all messages
+---------------------------
+
+The message store provides a means to iterate over all the messages it
+contains.
+
+    >>> msgs = list(store.messages)
+    >>> len(msgs)
+    1
+    >>> print msgs[0].as_string()
+    Subject: An important message
+    Date: Wed, 04 Jul 2007 16:49:58 +0900
+    Message-ID: <[EMAIL PROTECTED]>
+    X-List-ID-Hash: RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI
+    X-List-Sequence-Number: 1
+    <BLANKLINE>
+    This message is very important.
+    <BLANKLINE>
+
+
+Deleting messages from the store
+--------------------------------
+
+The global relative ID is the key into the message store.  If you try to
+delete a global ID that isn't in the store, you get an exception.
+
+    >>> store.delete_message('nothing')
+    Traceback (most recent call last):
+    ...
+    KeyError: 'nothing'
+
+But if you delete an existing message, it really gets deleted.
+
+    >>> store.delete_message(global_id)
+    >>> flush()
+    >>> list(store.messages)
+    []
+    >>> print store.get_message(global_id)
+    None
+    >>> list(store.get_messages_by_message_id(msg['message-id']))
+    []

=== added file 'Mailman/interfaces/messagestore.py'
--- a/Mailman/interfaces/messagestore.py        1970-01-01 00:00:00 +0000
+++ b/Mailman/interfaces/messagestore.py        2007-07-22 23:52:34 +0000
@@ -0,0 +1,101 @@
+# Copyright (C) 2007 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+# USA.
+
+"""The message storage service."""
+
+from zope.interface import Interface, Attribute
+
+
+
+class IMessageStore(Interface):
+    """The interface of the global message storage service.
+
+    All messages that are stored in the system live in the message storage
+    service.  This store is responsible for providing unique identifiers for
+    every message stored in it.  A message stored in this service must have at
+    least a Message-ID header and a Date header.  These are not guaranteed to
+    be unique, so the service also provides a unique sequence number to every
+    message.
+
+    Storing a message returns the unique sequence number for the message.
+    This sequence number will be stored on the message's
+    X-List-Sequence-Number header.  Any previous such header value will be
+    overwritten.  An X-List-ID-Hash header will also be added, containing the
+    Base-32 encoded SHA1 hash of the message's Message-ID and Date headers.
+
+    The combination of the X-List-ID-Hash header and the
+    X-List-Sequence-Number header uniquely identify this message to the
+    storage service.  A globally unique URL that addresses this message may be
+    crafted from these headers and the List-Archive header as follows.  For a
+    message with the following headers:
+
+    Message-ID: <[EMAIL PROTECTED]>
+    Date: Wed, 04 Jul 2007 16:49:58 +0900
+    List-Archive: http://archive.example.com/
+    X-List-ID-Hash: RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI
+    X-List-Sequence-Number: 801
+
+    the globally unique URL would be:
+
+    http://archive.example.com/RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI/801
+    """
+
+    def add(message):
+        """Add the message to the store.
+
+        :param message: An email.message.Message instance containing at least
+            a Message-ID header and a Date header.  The message will be given
+            an X-List-ID-Hash header and an X-List-Sequence-Number header.
+        :returns: The message's sequence ID as an integer.
+        :raises ValueError: if the message is missing one of the required
+            headers.
+        """
+
+    def get_messages_by_message_id(message_id):
+        """Return the set of messages with the matching Message-ID.
+
+        :param message_id: The Message-ID header contents to search for.
+        :returns: An iterator over all the matching messages.
+        """
+
+    def get_messages_by_hash(hash):
+        """Return the set of messages with the matching X-List-ID-Hash.
+        
+        :param hash: The X-List-ID-Hash header contents to search for.
+        :returns: An iterator over all the matching messages.
+        """
+
+    def get_message(global_id):
+        """Return the message with the matching hash and sequence number.
+
+        :param global_id: The global relative ID which uniquely addresses this
+            message, relative to the base address of the message store.  This
+            must be a string of the X-List-ID-Hash followed by a single slash
+            character, followed by the X-List-Sequence-Number.
+        :returns: The matching message, or None if there is no match.
+        """
+
+    def delete_message(global_id):
+        """Remove the addressed message from the store.
+
+        :param global_id: The global relative ID which uniquely addresses the
+            message to delete.
+        :raises KeyError: if there is no such message.
+        """
+
+    messages = Attribute(
+        """An iterator over all messages in this message store.""")

=== modified file 'Mailman/configuration.py'
--- a/Mailman/configuration.py  2007-07-17 03:55:49 +0000
+++ b/Mailman/configuration.py  2007-07-22 23:52:34 +0000
@@ -133,6 +133,7 @@
         self.BADQUEUE_DIR       = join(qdir, 'bad')
         self.RETRYQUEUE_DIR     = join(qdir, 'retry')
         self.MAILDIR_DIR        = join(qdir, 'maildir')
+        self.MESSAGES_DIR       = join(VAR_DIR, 'messages')
         # Other useful files
         self.PIDFILE                = join(datadir, 'master-qrunner.pid')
         self.SITE_PW_FILE           = join(datadir, 'adm.pw')

=== modified file 'Mailman/database/__init__.py'
--- a/Mailman/database/__init__.py      2007-05-28 20:21:41 +0000
+++ b/Mailman/database/__init__.py      2007-07-22 23:52:34 +0000
@@ -23,6 +23,7 @@
 
 from Mailman.database.listmanager import ListManager
 from Mailman.database.usermanager import UserManager
+from Mailman.database.messagestore import MessageStore
 
 __all__ = [
     'initialize',
@@ -42,6 +43,7 @@
         model.initialize()
     config.list_manager = ListManager()
     config.user_manager = UserManager()
+    config.message_store = MessageStore()
     flush()
 
 

=== modified file 'Mailman/database/model/__init__.py'
--- a/Mailman/database/model/__init__.py        2007-06-16 02:37:33 +0000
+++ b/Mailman/database/model/__init__.py        2007-07-22 23:52:34 +0000
@@ -19,6 +19,7 @@
     'Address',
     'Language',
     'MailingList',
+    'Message',
     'Preferences',
     'User',
     'Version',
@@ -43,6 +44,7 @@
 from Mailman.database.model.language import Language
 from Mailman.database.model.mailinglist import MailingList
 from Mailman.database.model.member import Member
+from Mailman.database.model.message import Message
 from Mailman.database.model.preferences import Preferences
 from Mailman.database.model.user import User
 from Mailman.database.model.version import Version

=== modified file 'Mailman/docs/archives.txt'
--- a/Mailman/docs/archives.txt 2007-07-11 11:06:34 +0000
+++ b/Mailman/docs/archives.txt 2007-07-22 23:52:34 +0000
@@ -25,7 +25,6 @@
     ...         msg, msgdata = switchboard.dequeue(filebase)
     ...         switchboard.finish(filebase)
 
-
 The purpose of the ToArchive handler is to make a simple decision as to
 whether the message should get archived and if so, to drop the message in the
 archiving queue.  Really the most important things are to determine when a



--
(no title)
https://code.launchpad.net/~mailman-coders/mailman/3.0

You are receiving this branch notification because you are subscribed to it.
To unsubscribe from this branch go to 
https://code.launchpad.net/~mailman-coders/mailman/3.0/+subscription/mailman-checkins.
_______________________________________________
Mailman-checkins mailing list
[email protected]
Unsubscribe: 
http://mail.python.org/mailman/options/mailman-checkins/archive%40jab.org

Reply via email to