Hello everyone, Please cc me on any replies as I'm not subscribed.
Here is some quick and dirty code to add a maildir backend to beagle 0.2.10. It is based on the KMail backend and just assumes that ~/Mail is a maildir directory. Without this patch a big portion of my emails get misclassified as text/plain. It probably does need to be smarter about detecting maildir directories, or trying other places, but it's a starting point if anyone is interested in improving it. -chris diff -urN beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexableGenerator.cs beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexableGenerator.cs --- beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexableGenerator.cs 1969-12-31 19:00:00.000000000 -0500 +++ beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexableGenerator.cs 2006-09-28 13:48:16.000000000 -0400 @@ -0,0 +1,303 @@ + +// +// MaildirIndexableGenerator.cs +// +// Copyright (C) 2005 Novell, Inc. +// Copyright (C) 2005 Debajyoti Bera +// +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// + +using System; +using System.Collections; +using System.IO; +using System.Runtime.Serialization.Formatters.Binary; +using System.Threading; +using System.Xml; + +using Beagle.Util; +using Beagle.Daemon; + +namespace Beagle.Daemon.MaildirQueryable { + + /** + * Indexable generator for maildir mails + */ + public class MaildirdirIndexableGenerator : IIndexableGenerator { + // store the indexer + private MaildirIndexer indexer; + // message file currently indexing + private FileInfo CrawlFile; + // directory currently parsing + private DirectoryInfo current_dir; + // list of files in current directory + private IEnumerable files_to_parse; + // list of directories to scan + private ArrayList dirs_to_scan; + private IEnumerator dir_enumerator = null; + private IEnumerator file_enumerator = null; + + private string account_name { + get { return indexer.AccountName; } + } + + public MaildirdirIndexableGenerator (MaildirIndexer indexer, ArrayList mail_directories) + { + this.indexer = indexer; + dirs_to_scan = new ArrayList (); + + foreach (string directory in mail_directories) { + AddDirectory (directory); + } + dir_enumerator = dirs_to_scan.GetEnumerator (); + } + + public void PostFlushHook () + { + } + + private void AddDirectory (string _dir) { + DirectoryInfo dir; + + // scan mails in directory cur and new, not tmp + if (Directory.Exists (Path.Combine (_dir, "cur"))) { + dir = new DirectoryInfo (Path.Combine (_dir, "cur")); + dirs_to_scan.Add (dir); + } + + if (Directory.Exists (Path.Combine (_dir, "new"))) { + dir = new DirectoryInfo (Path.Combine (_dir, "new")); + dirs_to_scan.Add (dir); + } + } + + public string StatusName { + get { return indexer.MailRoot; } + } + + public Indexable GetNextIndexable () + { + FileInfo file = (FileInfo) file_enumerator.Current; + return indexer.MaildirMessageToIndexable (file.FullName); + } + + public bool IsUpToDate (string path) + { + return indexer.Queryable.FileAttributesStore.IsUpToDate (path); + } + + public bool HasNextIndexable () + { + do { + while (file_enumerator == null || !file_enumerator.MoveNext ()) { + if (!dir_enumerator.MoveNext ()) { + dir_enumerator = null; + return false; + } + current_dir = (DirectoryInfo) dir_enumerator.Current; + Logger.Log.Info ("Scanning maildir feeds in " + current_dir.FullName); + files_to_parse = DirectoryWalker.GetFileInfos (current_dir); + file_enumerator = files_to_parse.GetEnumerator (); + } + CrawlFile = (FileInfo) file_enumerator.Current; + } while (IsUpToDate (CrawlFile.FullName)); + + return true; + } + + } + + /** + * Indexable generator for mbox mail files + * based on Evo code + */ + public class MaildirMboxIndexableGenerator : IIndexableGenerator { + // path of the mbox file + private string mbox_file; + // fd, stream, parser needed for gmime parsing + private int mbox_fd = -1; + private GMime.StreamFs mbox_stream; + private GMime.Parser mbox_parser; + // store the indexer + private MaildirIndexer indexer; + // number of mails scanned + private int indexed_count; + // is this initial scan - in which case the mbox might have been modified since last scan + private bool initial_scan; + + private string account_name { + get { return indexer.AccountName; } + } + + private string folder_name { + get { return indexer.GetFolderMbox (mbox_file); } + } + + public MaildirMboxIndexableGenerator (MaildirIndexer indexer, string mbox_file, bool initial_scan) + { + this.indexer = indexer; + this.mbox_file = mbox_file; + this.initial_scan = initial_scan; + } + + public void PostFlushHook () + { + Checkpoint (); + } + + /** + * store how long indexing is done on the disk + * in case indexing stops midway we dont have to restart from the beginning + * if the mbox file hasnt been modified + */ + public void Checkpoint () + { + if (mbox_parser != null) { + MboxLastOffset = mbox_parser.Tell (); + indexer.Queryable.FileAttributesStore.AttachLastWriteTime (mbox_file, DateTime.UtcNow); + } + } + + public string StatusName { + get { return mbox_file; } + } + + private long MboxLastOffset { + get { + string offset_str = indexer.Queryable.ReadDataLine ("offset-" + mbox_file.Replace ('/', '-')); + long offset = Convert.ToInt64 (offset_str); + return offset; + } + + set { + indexer.Queryable.WriteDataLine ("offset-" + mbox_file.Replace ('/', '-'), value.ToString ()); + } + } + + public bool IsUpToDate (string path) + { + //Logger.Log.Info (path + " is uptodate:" + indexer.Queryable.FileAttributesStore.IsUpToDate (path)); + return indexer.Queryable.FileAttributesStore.IsUpToDate (path); + } + + /** + * Advance to the next mail in the mbox file. + */ + public bool HasNextIndexable () + { + if (mbox_fd < 0) { + Logger.Log.Debug ("Opening mbox {0}", mbox_file); + + try { + MaildirQueryable.InitializeGMime (); + } catch (Exception e) { + Logger.Log.Warn (e, "Caught exception trying to initalize gmime:"); + return false; + } + + + try { + mbox_fd = Mono.Unix.Native.Syscall.open (mbox_file, Mono.Unix.Native.OpenFlags.O_RDONLY); + } catch (System.IO.FileNotFoundException e) { + Logger.Log.Warn ("mbox " + mbox_file + " deleted while indexing."); + return false; + } + mbox_stream = new GMime.StreamFs (mbox_fd); + if (initial_scan && !IsUpToDate (mbox_file)) + // this is the initial scan and + // file has changed since last scan => + // set mboxlastoffset to 0 and seek to 0 + mbox_stream.Seek ((int)(MboxLastOffset = 0)); + else + mbox_stream.Seek ((int) MboxLastOffset); + mbox_parser = new GMime.Parser (mbox_stream); + mbox_parser.ScanFrom = true; + } + + if (mbox_parser.Eos ()) { + // save the state ASAP + Checkpoint (); + + mbox_stream.Close (); + mbox_fd = -1; + mbox_stream.Dispose (); + mbox_stream = null; + mbox_parser.Dispose (); + mbox_parser = null; + + Logger.Log.Debug ("{0}: Finished indexing {1} messages", folder_name, indexed_count); + return false; + } else + return true; + } + + public Indexable GetNextIndexable () + { + GMime.Message message = null; + try { + message = mbox_parser.ConstructMessage (); + } catch (System.IO.FileNotFoundException e) { + Logger.Log.Warn ("mbox " + mbox_file + " deleted while parsing."); + return null; + } + + try { + // Again comment from Evo :P + // Work around what I think is a bug in GMime: If you + // have a zero-byte file or seek to the end of a + // file, parser.Eos () will return true until it + // actually tries to read something off the wire. + // Since parser.ConstructMessage() always returns a + // message (which may also be a bug), we'll often get + // one empty message which we need to deal with here. + // + // Check if its empty by seeing if the Headers + // property is null or empty. + if (message == null || message.Headers == null || message.Headers == "") { + return null; + } + + // mbox KIO slave uses the From line as URI - how weird! + // are those lines supposed to be unique ??? + string id = mbox_parser.From; + System.Uri uri = EmailUri (id); + + Indexable indexable = indexer.MessageToIndexable (mbox_file, uri, message, indexer.GetFolderMbox (mbox_file)); + + if (indexable == null) + return null; + + ++indexed_count; + + return indexable; + } finally { + if (message != null) + message.Dispose (); + } + } + + // TODO: confirm that this works with the mbox kio-slave from new kdepim + public Uri EmailUri (string id) + { + FileInfo fi = new FileInfo (mbox_file); + return new Uri (String.Format ("mbox:///{0}/{1}", fi.FullName, id)); + } + } +} diff -urN beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexer.cs beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexer.cs --- beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexer.cs 1969-12-31 19:00:00.000000000 -0500 +++ beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexer.cs 2006-09-28 14:13:27.000000000 -0400 @@ -0,0 +1,511 @@ + +// +// MaildirIndexer.cs +// +// Copyright (C) 2005 Novell, Inc. +// Copyright (C) 2005 Debajyoti Bera +// +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// + +using System; +using System.Collections; +using System.IO; + +using Beagle.Util; +using Beagle.Daemon; + +namespace Beagle.Daemon.MaildirQueryable { + + /** + * Main indexer class + * The bulk of the indexing work is done here + */ + public class MaildirIndexer { + // location of mail folder + private string mail_root; + public string MailRoot { + get { return mail_root; } + } + // account name for this folder + private string account_name; + public string AccountName { + get { return account_name; } + } + // mail folders not to scan + private ArrayList excludes; + // list of maildir directories which store mails in cur/, new/, tmp/ subdirs + private ArrayList mail_directories; + // list of directories which contain mbox files and other mail folders + private ArrayList folder_directories; + // list of mbox files + private ArrayList mbox_files; + // also store the queryable + private MaildirQueryable queryable; + public MaildirQueryable Queryable { + get { return queryable; } + } + + private string lastGoodDirPath = ""; // cache last successful directory + + public MaildirIndexer (MaildirQueryable queryable, string account, string root) + { + this.queryable = queryable; + account_name = account; + mail_root = root; + mail_directories = new ArrayList (); + Logger.Log.Debug ("mail_directories created for:" + mail_root + " (" + mail_directories.Count + ")"); + folder_directories = new ArrayList (); + mbox_files = new ArrayList (); + + excludes = new ArrayList (); + excludes.Add ("spam"); + excludes.Add ("outbox"); + excludes.Add ("trash"); + excludes.Add ("drafts"); + } + + /** + * inotify callback + */ + private void OnInotifyEvent (Inotify.Watch watch, + string path, + string subitem, + string srcpath, + Inotify.EventType type) + { + //FIXME this case should NEVER occur, still it does + if (mail_directories == null) { + Logger.Log.Debug ("*** WEIRD AVIRAM CASE for :" + mail_root); + Logger.Log.Debug ("Received inotify event{3} for {4}: path={0}, subitem={1}, srcpath={2}", path, subitem, srcpath, type, mail_root); + return; + } + + if (subitem == "") + return; + string fullPath = Path.Combine (path, subitem); + + // we need to watch for all kinds of events - this is tricky + + // Case: new file is created + // - if it is one of the folder_directories, index it + // - if is in one of the mail_directories, index it if it is an mbox file + if ((type & Inotify.EventType.Create) != 0 && (type & Inotify.EventType.IsDirectory) == 0) { + if (IsMailDir (path)) { + Indexable indexable = MaildirMessageToIndexable (fullPath); + AddIndexableTask (indexable, fullPath); + } + return; + } + + // Case: file is deleted + // - if it is a mail file, we might like it to be deleted + if ((type & Inotify.EventType.MovedFrom) != 0 || + ((type & Inotify.EventType.Delete) != 0 && + (type & Inotify.EventType.IsDirectory) == 0)) { + if (IsMailDir (path)) + RemoveMail (fullPath); + else if (mbox_files.Contains (fullPath)) { + RemoveMbox (fullPath); + mbox_files.Remove (fullPath); + } + return; + } + + // Case: file is moved + // - files are moved from tmp/new to cur + // - need to delete from the source + if ((type & Inotify.EventType.MovedTo) != 0 && (type & Inotify.EventType.IsDirectory) == 0) { + if (IsMailDir (path)) { + Indexable indexable = MaildirMessageToIndexable (fullPath); + AddIndexableTask (indexable, fullPath); + } + if (IsMailDir (srcpath)) + RemoveMail (srcpath); + if (mbox_files.Contains (fullPath)) { + // check if this because of compaction, in which case need to delete previous mbox + if (srcpath != null && srcpath.EndsWith ("." + subitem + ".compacted")) + RemoveMbox (fullPath); + // FIXME need to ensure IndexMbox is scheduled *after* RemoveMbox finishes + // RemoveMbox creates a job with immediate priority while + // IndexMbox creates a job with the default priority of a generator + // Is there a better way to ensure the order ? + IndexMbox (fullPath, true); + } + return; + } + + // Case: file is modified i.e. there was no create event but closewrite event + // - possibly some mbox was changed + // FIXME kmail doesnt physically delete the deleted mails from mbox files unless compacted + // - which means one has to read the .index files to find deleted messages... + // - need to find the format of the .index/.index.ids etc files and parse them + if ((type & Inotify.EventType.Modify) != 0 && (type & Inotify.EventType.IsDirectory) == 0) { + if (mbox_files.Contains (fullPath)) + IndexMbox (fullPath, false); + return; + } + + // Case: a directory is created: + // well watch it anyway but also make sure its a maildir directory + // if it a maildir directory, then add it to maildir_dirs + if ((type & Inotify.EventType.Create) != 0 && (type & Inotify.EventType.IsDirectory) != 0) { + if (!IgnoreFolder (fullPath)) { + Watch (fullPath); + UpdateDirectories(fullPath); + } + return; + } + + // Case: if a directory is deleted: + // remove watch + if ((type & Inotify.EventType.Delete) != 0 && (type & Inotify.EventType.IsDirectory) != 0) { + watch.Unsubscribe (); + mail_directories.Remove (fullPath); + folder_directories.Remove (fullPath); + return; + } + + // Case: directory is moved + // FIXME: implement renaming of mail folders + + } + + /** + * Add watch to the parameter directory and its subdirs, recursively + */ + public void Watch (string path) + { + DirectoryInfo root = new DirectoryInfo (path); + if (! root.Exists) + return; + + Queue queue = new Queue (); + queue.Enqueue (root); + + while (queue.Count > 0) { + DirectoryInfo dir = queue.Dequeue () as DirectoryInfo; + + if (! dir.Exists) + continue; + + //log.Debug ("Adding inotify watch to " + dir.FullName); + Inotify.Subscribe (dir.FullName, OnInotifyEvent, + Inotify.EventType.Create + | Inotify.EventType.Delete + | Inotify.EventType.MovedFrom + | Inotify.EventType.MovedTo); + + foreach (DirectoryInfo subdir in DirectoryWalker.GetDirectoryInfos (dir)) + queue.Enqueue (subdir); + } + } + + /** + * Recursively traverse the files and dirctories under mail_root + * to find files that need to be indexed, directories that + * need to be watched for changes + */ + public void Crawl () + { + if (!Directory.Exists (mail_root)) + return; + + mail_directories.Clear (); + folder_directories.Clear (); + mbox_files.Clear(); + + Queue pending = new Queue (); + pending.Enqueue (mail_root); + folder_directories.Add (mail_root); + // add inotify watch to root folder + if (Inotify.Enabled) + Inotify.Subscribe (mail_root, OnInotifyEvent, + Inotify.EventType.Create + | Inotify.EventType.Delete + | Inotify.EventType.MovedFrom + | Inotify.EventType.MovedTo + | Inotify.EventType.Modify); + + while (pending.Count > 0) { + string dir = (string) pending.Dequeue (); + Logger.Log.Debug ("Searching for mbox and maildirs in " + dir); + + foreach (string d in DirectoryWalker.GetDirectoryNames(dir)) { + if (d == "cur" || d == "new" || d == "tmp") { + continue; + } + string fullpath = Path.Combine(dir, d); + mail_directories.Add (fullpath); + if (Inotify.Enabled) { + Watch (fullpath); + } + } + } + + // copy the contents as mail_directories, mbox_files might change due to async events + ArrayList _mail_directories = new ArrayList (mail_directories); + ArrayList _mbox_files = new ArrayList (mbox_files); + + if (queryable.ThisScheduler.ContainsByTag (mail_root)) { + Logger.Log.Debug ("Not adding task for already running task: {0}", mail_root); + return; + } else { + MaildirdirIndexableGenerator generator = new MaildirdirIndexableGenerator (this, _mail_directories); + AddIIndexableTask (generator, mail_root); + } + + foreach (string mbox_file in _mbox_files) { + IndexMbox (mbox_file, true); + } + } + + private void AddIndexableTask (Indexable indexable, string tag) + { + if (indexable == null) + return; + + Scheduler.Task task = queryable.NewAddTask (indexable); + task.Priority = Scheduler.Priority.Immediate; + task.Tag = tag; + queryable.ThisScheduler.Add (task); + } + + private void AddIIndexableTask (IIndexableGenerator generator, string tag) + { + if (generator == null) + return; + + Scheduler.Task task = queryable.NewAddTask (generator); + task.Tag = tag; + queryable.ThisScheduler.Add (task); + } + + /** + * Start a task for indexing an mbox file + */ + public void IndexMbox (string mbox_file, bool initial_scan) + { + if (queryable.ThisScheduler.ContainsByTag (mbox_file)) { + Logger.Log.Debug ("Not adding task for already running task: {0}", mbox_file); + return; + } + + //Logger.Log.Debug ("Creating task to index mbox {0}", mbox_file); + MaildirMboxIndexableGenerator generator = new MaildirMboxIndexableGenerator (this, mbox_file, initial_scan); + AddIIndexableTask (generator, mbox_file); + } + + /** + * Remove maildir mail file + */ + private void RemoveMail (string file) + { + Logger.Log.Debug ("Removing mail:" + file); + Uri uri = UriFu.PathToFileUri (file); + Scheduler.Task task = queryable.NewRemoveTask (uri); + task.Priority = Scheduler.Priority.Immediate; + task.SubPriority = 0; + queryable.ThisScheduler.Add (task); + } + + /** + * Create an indexable from a maildir message + */ + public Indexable MaildirMessageToIndexable (string filename) + { + Logger.Log.Debug ("+ indexing maildir mail:" + filename); + String folder = GetFolderMaildir(filename); + Uri file_uri = UriFu.PathToFileUri (filename); + + Indexable indexable = new Indexable (file_uri); + indexable.HitType = "MailMessage"; + indexable.MimeType = "message/rfc822"; + indexable.CacheContent = false; + + indexable.AddProperty (Property.NewUnsearched ("fixme:client", "maildir")); + indexable.AddProperty (Property.NewUnsearched ("fixme:account", account_name)); + indexable.AddProperty (Property.NewUnsearched ("fixme:folder", folder)); + indexable.ContentUri = file_uri; + + return indexable; + } + + /** + * Create an indexable from an mbox message + * Most of the code here is from Evo backend + */ + public Indexable MessageToIndexable (string file_name, System.Uri uri, GMime.Message message, string folder_name) + { + //Logger.Log.Debug ("Indexing " + uri + " in folder " + folder_name); + Indexable indexable = new Indexable (uri); + // set parent uri to the filename so that when an mbox file + // is deleted, all the messages in that file can be deleted + indexable.ParentUri = UriFu.PathToFileUri (file_name); + + indexable.Timestamp = message.Date.ToUniversalTime (); + indexable.HitType = "MailMessage"; + indexable.MimeType = "message/rfc822"; + indexable.CacheContent = false; + + indexable.AddProperty (Property.NewUnsearched ("fixme:client", "kmail")); + indexable.AddProperty (Property.NewUnsearched ("fixme:account", account_name)); + indexable.AddProperty (Property.NewUnsearched ("fixme:folder", folder_name)); + + GMime.InternetAddressList addrs; + + addrs = message.GetRecipients (GMime.Message.RecipientType.To); + foreach (GMime.InternetAddress ia in addrs) { + if (folder_name == Queryable.SentMailFolderName && ia.AddressType != GMime.InternetAddressType.Group) + indexable.AddProperty (Property.NewKeyword ("fixme:sentTo", ia.Addr)); + } + addrs.Dispose (); + + addrs = message.GetRecipients (GMime.Message.RecipientType.Cc); + foreach (GMime.InternetAddress ia in addrs) { + if (folder_name == Queryable.SentMailFolderName && ia.AddressType != GMime.InternetAddressType.Group) + indexable.AddProperty (Property.NewKeyword ("fixme:sentTo", ia.Addr)); + } + addrs.Dispose (); + + addrs = GMime.InternetAddressList.ParseString (GMime.Utils.HeaderDecodePhrase (message.Sender)); + foreach (GMime.InternetAddress ia in addrs) { + if (folder_name != Queryable.SentMailFolderName && ia.AddressType != GMime.InternetAddressType.Group) + indexable.AddProperty (Property.NewKeyword ("fixme:gotFrom", ia.Addr)); + } + addrs.Dispose (); + + if (folder_name == Queryable.SentMailFolderName) + indexable.AddProperty (Property.NewFlag ("fixme:isSent")); + else { + string kmail_msg_sent = message.GetHeader ("X-KMail-Link-Type"); + if (kmail_msg_sent == "reply") + indexable.AddProperty (Property.NewFlag ("fixme:isSent")); + } + +// no need to store date again, use the issent flag to determine if the date is sentdate or not +#if false + if (folder_name == Queryable.SentMailFolderName) + indexable.AddProperty (Property.NewDate ("fixme:sentdate", message.Date.ToUniversalTime ())); + else + indexable.AddProperty (Property.NewDate ("fixme:received", message.Date.ToUniversalTime ())); +#endif + + indexable.SetBinaryStream (message.Stream); + + return indexable; + } + + /** + * deleting mbox means deleting all the mails which were in this mbox + * we use the idea of parent-uri + * while creating indexables, we set the parent uri to be the uri of the mbox file + * so to delete all mails in the mbox we just delete all documents whose parent uri + * is the uri of the mbox file + */ + public void RemoveMbox (string file) + { + Logger.Log.Debug ("Removing mbox:" + file); + Uri uri = UriFu.PathToFileUri (file); + Scheduler.Task task = queryable.NewRemoveTask (uri); + task.Priority = Scheduler.Priority.Immediate; + task.SubPriority = 0; + queryable.ThisScheduler.Add (task); + } + + /////////////////////////////////////////////////////////// + + // Helpers + + /** + * a maildir is of format: + * some_dir_in_currently_watched_directories/{cur,new,tmp} + * again we ignore tmp - no point trying to watch it - it will be moved anyway + * should we check with the kmail directory structure ? + * presence of files like directory.index, directory.index.ids ? + */ + public bool IsMailDir (string dirPath) + { + if (dirPath == null || ! (dirPath.EndsWith("cur") || dirPath.EndsWith("new"))) + return false; + + string possibleMaildir = (Directory.GetParent (dirPath)).FullName; + if (lastGoodDirPath == possibleMaildir) + return true; + Logger.Log.Debug ("checking if " + possibleMaildir + " is a maildir ?"); + if (mail_directories.Contains (possibleMaildir)) { + lastGoodDirPath = possibleMaildir; + return true; + } else + return false; + } + + /** + * Called when a new directory is created + * Decide what to do with this new directory + */ + public void UpdateDirectories (string dirPath) + { + string parentDir = (Directory.GetParent (dirPath)).FullName; + DirectoryInfo dirinfo = new DirectoryInfo (dirPath); + string dirName = dirinfo.Name; + + if (dirName == "cur" || dirName == "new" || dirName == "tmp") { + // check and add the parentdir to mail_directories + if (!mail_directories.Contains (parentDir)) + mail_directories.Add (parentDir); + return; + } + + // format .name.directory - in which case add it to folder_dir + // format name - in which case add it to mail_dir + if (dirName.EndsWith (".directory")) + folder_directories.Add (dirPath); + else + mail_directories.Add (dirPath); + } + + /** + * FIXME:if we can parse kmailrc file, then we might be + * able to deduce the mail folder name + * currently get it from the file name (mbox) or parent.parent directory name + */ + + public string GetFolderMbox (string mbox_file) + { + FileInfo fi = new FileInfo (mbox_file); + return fi.Name; + } + + public string GetFolderMaildir (string mailFile) + { + return (Directory.GetParent ((Directory.GetParent (mailFile)).FullName).Name); + } + + private bool IgnoreFolder (string path) + { + foreach (string exclude in excludes) { + if (path.ToLower().EndsWith (exclude)) + return true; + } + return false; + } + } +} diff -urN beagle-0.2.10/beagled/MaildirQueryable/MaildirQueryable.cs beagle-0.2.10/beagled/MaildirQueryable/MaildirQueryable.cs --- beagle-0.2.10/beagled/MaildirQueryable/MaildirQueryable.cs 1969-12-31 19:00:00.000000000 -0500 +++ beagle-0.2.10/beagled/MaildirQueryable/MaildirQueryable.cs 2006-09-28 12:54:28.000000000 -0400 @@ -0,0 +1,234 @@ +// +// MaildirQueryable.cs +// +// Copyright (C) 2005 Novell, Inc. +// Copyright (C) 2005 Debajyoti Bera +// +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// + + +using System; +using System.Collections; +using System.IO; +using System.Threading; + +using Beagle.Util; + +namespace Beagle.Daemon.MaildirQueryable { + + [QueryableFlavor (Name="Maildir", Domain=QueryDomain.Local, RequireInotify=false)] + public class MaildirQueryable : LuceneFileQueryable { + + // for non-inotify case, poll after this number of seconds + public const int polling_interval_in_seconds = 300; + // mail folder paths + private string local_path; + // indexers - one for each mailfolder path + private MaildirIndexer local_indexer; + // global variable + public static bool gmime_initialized = false; + public static void InitializeGMime () + { + if (!gmime_initialized) { + GMime.Global.Init (); + gmime_initialized = true; + } + } + + // name of the sentmail folder - should be parsed from kmailrc + private string sentmail_foldername; + public string SentMailFolderName { + get { return sentmail_foldername; } + } + + public MaildirQueryable () : base ("MaildirIndex") + { + // the local mail path is different for different distributions + local_path = GuessLocalFolderPath (); + if (local_path == null) { + Logger.Log.Info ("Maildir folders not found. Will keep trying "); + } else + Logger.Log.Info ("Guessing for location of Maildir folders ... found at " + local_path); + + local_indexer = null; + sentmail_foldername = "sent-mail"; + } + + ////////////////////////////////////////////////////////////////////////////////////////////// + + /** + * initial method called by the daemon + */ + public override void Start () + { + base.Start (); + ExceptionHandlingThread.Start (new ThreadStart (StartWorker)); + } + + /** + * for non-inotify case, this method is invoked repeatedly + */ + private void CrawlHook (Scheduler.Task task) + { + if (local_indexer != null) + local_indexer.Crawl (); + task.Reschedule = true; + task.TriggerTime = DateTime.Now.AddSeconds (polling_interval_in_seconds); + } + + /** + * called by Start(), starts actual work + * create indexers + * ask indexers to crawl the mails + * for non-inotify case, ask to poll + */ + private void StartWorker () + { + Logger.Log.Info ("Starting Maildir backend"); + + Stopwatch stopwatch = new Stopwatch (); + stopwatch.Start (); + + // check if there is at all anything to crawl + if ( local_path == null ) { + GLib.Timeout.Add (60000, new GLib.TimeoutHandler (CheckForExistence)); + Logger.Log.Debug ("Maildir directories (local mail) " + " not found, will repoll."); + return; + } + + Logger.Log.Debug ("Starting mail crawl"); + State = QueryableState.Crawling; + if (local_path != null) { + local_indexer = new MaildirIndexer (this, "local", local_path); + local_indexer.Crawl (); + } + State = QueryableState.Idle; + Logger.Log.Debug ("Mail crawl done"); + + if (! Inotify.Enabled) { + Scheduler.Task task = Scheduler.TaskFromHook (new Scheduler.TaskHook (CrawlHook)); + task.Tag = "Crawling Maildir directories"; + task.Source = this; + task.TriggerTime = DateTime.Now.AddSeconds (polling_interval_in_seconds); + ThisScheduler.Add (task); + } + + stopwatch.Stop (); + Logger.Log.Info ("Maildir driver worker thread done in {0}", stopwatch); + } + + /** + * use this method to determine if we have anything to crawl and index + */ + private bool CheckForExistence () + { + local_path = GuessLocalFolderPath (); + if (local_path == null) + return true; + + StartWorker(); + return false; + } + + ///////////////////////////////////////////////////////////////////////////// + + override public string GetSnippet (string[] query_terms, Hit hit) + { + Logger.Log.Debug ("Fetching snippet for " + hit.Uri.LocalPath); + // FIXME: Also handle mbox emails + if (! hit.Uri.IsFile) + return null; + + // Dont get snippets from attachments, they arent even indexed currently + if (hit.ParentUri != null) + return null; + + int mail_fd = Mono.Unix.Native.Syscall.open (hit.Uri.LocalPath, Mono.Unix.Native.OpenFlags.O_RDONLY); + if (mail_fd == -1) + return null; + + InitializeGMime (); + GMime.StreamFs stream = new GMime.StreamFs (mail_fd); + GMime.Parser parser = new GMime.Parser (stream); + GMime.Message message = parser.ConstructMessage (); + stream.Dispose (); + parser.Dispose (); + + bool html = false; + string body = message.GetBody (true, out html); + // FIXME: Also handle snippets from html message parts - involves invoking html filter + if (html) { + Logger.Log.Debug ("No text/plain message part in " + hit.Uri); + message.Dispose (); + return null; + } + + StringReader reader = new StringReader (body); + string snippet = SnippetFu.GetSnippet (query_terms, reader); + message.Dispose (); + + return snippet; + } + + ///////////////////////////////////////////////////////////////////////////// + // FIXME: How to determine if an mbox hit is valid without scanning the whole file + + public string Name { + get { return "Maildir"; } + } + + /** + * path of local maildir - mine is in ~/.Mail + * This is distribution specific. Mandrake puts kmail mails in + * ~/.Mail whereas default kmail folder location is ~/Mail + * I guess each distribution can fix this path as they know what is + * the path. + * Till then, using a guesser to find out which of ~/.Mail and ~/Mail + * is valid. + * Guesses the kmail local folder path + * first try ~/.Mail, then try ~/Mail + */ + private string GuessLocalFolderPath () + { + string location1 = Path.Combine (PathFinder.HomeDir, "Mail"); + string location2 = Path.Combine (PathFinder.HomeDir, ".Mail"); + + if (GuessLocalFolder (location1)) + return location1; + else if (GuessLocalFolder (location2)) + return location2; + else + return null; + } + + /** + * to check if the path represents a maildir directory: + */ + private bool GuessLocalFolder (string path) + { + if (! Directory.Exists (path)) + return false; + return true; + } + + } + +} diff -urN beagle-0.2.10/beagled/Makefile.am beagle-0.2.10/beagled/Makefile.am --- beagle-0.2.10/beagled/Makefile.am 2006-09-18 18:24:29.000000000 -0400 +++ beagle-0.2.10/beagled/Makefile.am 2006-09-28 12:53:38.000000000 -0400 @@ -291,6 +291,12 @@ $(kmailqueryable)/KMailIndexableGenerator.cs \ $(kmailqueryable)/KMailIndexer.cs +maildirqueryable = $(srcdir)/MaildirQueryable +MAILDIR_QUERYABLE_CSFILES = \ + $(maildirqueryable)/MaildirQueryable.cs \ + $(maildirqueryable)/MaildirIndexableGenerator.cs \ + $(maildirqueryable)/MaildirIndexer.cs + blamqueryable = $(srcdir)/BlamQueryable BLAM_QUERYABLE_CSFILES = \ $(blamqueryable)/BlamQueryable.cs @@ -319,6 +325,7 @@ DAEMON_DLL_CSFILES = \ $(LUCENE_CSFILES) \ $(KMAIL_QUERYABLE_CSFILES) \ + $(MAILDIR_QUERYABLE_CSFILES) \ $(FILE_SYSTEM_QUERYABLE_CSFILES) \ $(GAIM_LOG_QUERYABLE_CSFILES) \ $(INDEXING_SERVICE_QUERYABLE_CSFILES) \ _______________________________________________ Dashboard-hackers mailing list Dashboard-hackers@gnome.org http://mail.gnome.org/mailman/listinfo/dashboard-hackers