Hello everyone,

Please cc me on any replies as I'm not subscribed.

Here is some quick and dirty code to add a maildir backend to beagle
0.2.10.  It is based on the KMail backend and just assumes that ~/Mail
is a maildir directory.

Without this patch a big portion of my emails get misclassified as
text/plain.  It probably does need to be smarter about detecting maildir
directories, or trying other places, but it's a starting point if anyone
is interested in improving it.

-chris


diff -urN beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexableGenerator.cs 
beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexableGenerator.cs
--- beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexableGenerator.cs 
1969-12-31 19:00:00.000000000 -0500
+++ beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexableGenerator.cs 
2006-09-28 13:48:16.000000000 -0400
@@ -0,0 +1,303 @@
+
+//
+// MaildirIndexableGenerator.cs
+//
+// Copyright (C) 2005 Novell, Inc.
+// Copyright (C) 2005 Debajyoti Bera
+//
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+//
+
+using System;
+using System.Collections;
+using System.IO;
+using System.Runtime.Serialization.Formatters.Binary;
+using System.Threading;
+using System.Xml;
+
+using Beagle.Util;
+using Beagle.Daemon;
+
+namespace Beagle.Daemon.MaildirQueryable {
+
+       /**
+        * Indexable generator for maildir mails
+        */
+       public class MaildirdirIndexableGenerator : IIndexableGenerator {
+               // store the indexer
+               private MaildirIndexer indexer;
+               // message file currently indexing
+               private FileInfo CrawlFile;
+               // directory currently parsing
+               private DirectoryInfo current_dir;
+               // list of files in current directory
+               private IEnumerable files_to_parse;
+               // list of directories to scan
+               private ArrayList dirs_to_scan;
+               private IEnumerator dir_enumerator = null;
+               private IEnumerator file_enumerator = null;
+
+               private string account_name {
+                       get { return indexer.AccountName; }
+               }
+
+               public MaildirdirIndexableGenerator (MaildirIndexer indexer, 
ArrayList mail_directories)
+               {
+                       this.indexer = indexer;
+                       dirs_to_scan = new ArrayList ();
+
+                       foreach (string directory in mail_directories) {
+                               AddDirectory (directory);
+                       }
+                       dir_enumerator = dirs_to_scan.GetEnumerator ();
+               }
+
+               public void PostFlushHook ()
+               {
+               }
+
+               private void AddDirectory (string _dir) {
+                       DirectoryInfo dir;
+                       
+                       // scan mails in directory cur and new, not tmp
+                       if (Directory.Exists (Path.Combine (_dir, "cur"))) {
+                               dir = new DirectoryInfo (Path.Combine (_dir, 
"cur"));
+                               dirs_to_scan.Add (dir);
+                       }
+
+                       if (Directory.Exists (Path.Combine (_dir, "new"))) {
+                               dir = new DirectoryInfo (Path.Combine (_dir, 
"new"));
+                               dirs_to_scan.Add (dir);
+                       }
+               }
+
+               public string StatusName {
+                       get { return indexer.MailRoot; }
+               }
+
+               public Indexable GetNextIndexable ()
+               {
+                       FileInfo file = (FileInfo) file_enumerator.Current;
+                       return indexer.MaildirMessageToIndexable 
(file.FullName);
+               }
+
+               public bool IsUpToDate (string path)
+               {
+                       return indexer.Queryable.FileAttributesStore.IsUpToDate 
(path);
+               }
+
+               public bool HasNextIndexable ()
+               {
+                       do {
+                               while (file_enumerator == null || 
!file_enumerator.MoveNext ()) {
+                                       if (!dir_enumerator.MoveNext ()) {
+                                               dir_enumerator = null;
+                                               return false;
+                                       }
+                                       current_dir = (DirectoryInfo) 
dir_enumerator.Current;
+                                       Logger.Log.Info ("Scanning maildir 
feeds in " + current_dir.FullName);
+                                       files_to_parse = 
DirectoryWalker.GetFileInfos (current_dir);
+                                       file_enumerator = 
files_to_parse.GetEnumerator ();
+                               }
+                               CrawlFile = (FileInfo) file_enumerator.Current;
+                       } while (IsUpToDate (CrawlFile.FullName));
+                   
+                       return true;
+               }
+
+       }
+
+       /**
+        * Indexable generator for mbox mail files
+        * based on Evo code
+        */
+       public class MaildirMboxIndexableGenerator : IIndexableGenerator {
+               // path of the mbox file
+               private string mbox_file;
+               // fd, stream, parser needed for gmime parsing
+               private int mbox_fd = -1;
+               private GMime.StreamFs mbox_stream;
+               private GMime.Parser mbox_parser;
+               // store the indexer
+               private MaildirIndexer indexer;
+               // number of mails scanned
+               private int indexed_count;
+               // is this initial scan - in which case the mbox might have 
been modified since last scan
+               private bool initial_scan;
+               
+               private string account_name {
+                       get { return indexer.AccountName; }
+               }
+
+               private string folder_name {
+                       get { return indexer.GetFolderMbox (mbox_file); }
+               }
+
+               public MaildirMboxIndexableGenerator (MaildirIndexer indexer, 
string mbox_file, bool initial_scan)
+               {
+                       this.indexer = indexer;
+                       this.mbox_file = mbox_file;
+                       this.initial_scan = initial_scan;
+               }
+
+               public void PostFlushHook ()
+               {
+                       Checkpoint ();
+               }
+
+               /**
+                * store how long indexing is done on the disk
+                * in case indexing stops midway we dont have to restart from 
the beginning
+                *   if the mbox file hasnt been modified
+                */
+               public void Checkpoint ()
+               {
+                       if (mbox_parser != null) {
+                               MboxLastOffset = mbox_parser.Tell ();
+                               
indexer.Queryable.FileAttributesStore.AttachLastWriteTime (mbox_file, 
DateTime.UtcNow);
+                       }
+               }
+
+               public string StatusName {
+                       get { return mbox_file; }
+               }
+
+               private long MboxLastOffset {
+                       get {
+                               string offset_str = 
indexer.Queryable.ReadDataLine ("offset-" + mbox_file.Replace ('/', '-'));
+                               long offset = Convert.ToInt64 (offset_str);
+                               return offset;
+                       }
+
+                       set {
+                               indexer.Queryable.WriteDataLine ("offset-" + 
mbox_file.Replace ('/', '-'), value.ToString ());
+                       }
+               }
+
+               public bool IsUpToDate (string path)
+               {
+                       //Logger.Log.Info (path + " is uptodate:" + 
indexer.Queryable.FileAttributesStore.IsUpToDate (path));
+                       return indexer.Queryable.FileAttributesStore.IsUpToDate 
(path);
+               }
+
+               /**
+                * Advance to the next mail in the mbox file.
+                */
+               public bool HasNextIndexable ()
+               {       
+                       if (mbox_fd < 0) {
+                               Logger.Log.Debug ("Opening mbox {0}", 
mbox_file);
+
+                               try {
+                                       MaildirQueryable.InitializeGMime ();
+                               } catch (Exception e) {
+                                       Logger.Log.Warn (e, "Caught exception 
trying to initalize gmime:");
+                                       return false;
+                               }
+
+                               
+                               try {
+                                       mbox_fd = Mono.Unix.Native.Syscall.open 
(mbox_file, Mono.Unix.Native.OpenFlags.O_RDONLY);
+                               } catch (System.IO.FileNotFoundException e) {
+                                       Logger.Log.Warn ("mbox " + mbox_file + 
" deleted while indexing.");
+                                       return false;
+                               }
+                               mbox_stream = new GMime.StreamFs (mbox_fd);
+                               if (initial_scan && !IsUpToDate (mbox_file))
+                                       // this is the initial scan and
+                                       // file has changed since last scan =>
+                                       // set mboxlastoffset to 0 and seek to 0
+                                       mbox_stream.Seek ((int)(MboxLastOffset 
= 0));
+                               else
+                                       mbox_stream.Seek ((int) MboxLastOffset);
+                               mbox_parser = new GMime.Parser (mbox_stream);
+                               mbox_parser.ScanFrom = true;
+                       }
+
+                       if (mbox_parser.Eos ()) {
+                               // save the state ASAP
+                               Checkpoint ();
+
+                               mbox_stream.Close ();
+                               mbox_fd = -1;
+                               mbox_stream.Dispose ();
+                               mbox_stream = null;
+                               mbox_parser.Dispose ();
+                               mbox_parser = null;
+                               
+                               Logger.Log.Debug ("{0}: Finished indexing {1} 
messages", folder_name, indexed_count);
+                               return false;
+                       } else
+                               return true;
+               }
+
+               public Indexable GetNextIndexable ()
+               {
+                       GMime.Message message = null;
+                       try {
+                               message = mbox_parser.ConstructMessage ();
+                       } catch (System.IO.FileNotFoundException e) {
+                               Logger.Log.Warn ("mbox " + mbox_file + " 
deleted while parsing.");
+                               return null;
+                       }
+
+                       try {
+                               // Again comment from Evo :P
+                               // Work around what I think is a bug in GMime: 
If you
+                               // have a zero-byte file or seek to the end of a
+                               // file, parser.Eos () will return true until it
+                               // actually tries to read something off the 
wire.
+                               // Since parser.ConstructMessage() always 
returns a
+                               // message (which may also be a bug), we'll 
often get
+                               // one empty message which we need to deal with 
here.
+                               //
+                               // Check if its empty by seeing if the Headers
+                               // property is null or empty.
+                               if (message == null || message.Headers == null 
|| message.Headers == "") {
+                                       return null;
+                               }
+                       
+                               // mbox KIO slave uses the From line as URI - 
how weird!
+                               // are those lines supposed to be unique ???
+                               string id = mbox_parser.From;
+                               System.Uri uri = EmailUri (id);
+                       
+                               Indexable indexable = 
indexer.MessageToIndexable (mbox_file, uri, message, indexer.GetFolderMbox 
(mbox_file));
+                       
+                               if (indexable == null)
+                                       return null;
+
+                               ++indexed_count;
+
+                               return indexable;
+                       } finally {
+                               if (message != null)
+                                       message.Dispose ();
+                       }
+               }
+
+               // TODO: confirm that this works with the mbox kio-slave from 
new kdepim
+               public Uri EmailUri (string id)
+               {
+                       FileInfo fi = new FileInfo (mbox_file);
+                       return new Uri (String.Format ("mbox:///{0}/{1}", 
fi.FullName, id));
+               }
+       }
+}
diff -urN beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexer.cs 
beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexer.cs
--- beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexer.cs    1969-12-31 
19:00:00.000000000 -0500
+++ beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexer.cs    2006-09-28 
14:13:27.000000000 -0400
@@ -0,0 +1,511 @@
+
+//
+// MaildirIndexer.cs
+//
+// Copyright (C) 2005 Novell, Inc.
+// Copyright (C) 2005 Debajyoti Bera
+//
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+//
+
+using System;
+using System.Collections;
+using System.IO;
+
+using Beagle.Util;
+using Beagle.Daemon;
+
+namespace Beagle.Daemon.MaildirQueryable {
+       
+       /**
+        * Main indexer class
+        * The bulk of the indexing work is done here
+        */
+       public class MaildirIndexer {
+               // location of mail folder
+               private string mail_root;
+               public string MailRoot {
+                       get { return mail_root; }
+               }
+               // account name for this folder
+               private string account_name;
+               public string AccountName {
+                       get { return account_name; }
+               }
+               // mail folders not to scan
+               private ArrayList excludes;
+               // list of maildir directories which store mails in cur/, new/, 
tmp/ subdirs
+               private ArrayList mail_directories;
+               // list of directories which contain mbox files and other mail 
folders
+               private ArrayList folder_directories;
+               // list of mbox files
+               private ArrayList mbox_files;
+               // also store the queryable
+               private MaildirQueryable queryable;
+               public MaildirQueryable Queryable {
+                   get { return queryable; }
+               }
+               
+               private string lastGoodDirPath = ""; // cache last successful 
directory
+
+               public MaildirIndexer (MaildirQueryable queryable, string 
account, string root)
+               {
+                       this.queryable = queryable;
+                       account_name = account;
+                       mail_root = root;
+                       mail_directories = new ArrayList ();
+                       Logger.Log.Debug ("mail_directories created for:" + 
mail_root + " (" + mail_directories.Count + ")");
+                       folder_directories = new ArrayList ();
+                       mbox_files = new ArrayList ();
+
+                       excludes = new ArrayList ();
+                       excludes.Add ("spam");
+                       excludes.Add ("outbox");
+                       excludes.Add ("trash");
+                       excludes.Add ("drafts");
+               }
+
+               /**
+                * inotify callback
+                */
+               private void OnInotifyEvent (Inotify.Watch watch,
+                                            string path,
+                                            string subitem,
+                                            string srcpath,
+                                            Inotify.EventType type)
+               {
+                       //FIXME this case should NEVER occur, still it does
+                       if (mail_directories == null) {
+                               Logger.Log.Debug ("*** WEIRD AVIRAM CASE for :" 
+ mail_root);
+                               Logger.Log.Debug ("Received inotify event{3} 
for {4}: path={0}, subitem={1}, srcpath={2}", path, subitem, srcpath, type, 
mail_root);
+                               return;
+                       }
+                       
+                       if (subitem == "")
+                               return;
+                       string fullPath = Path.Combine (path, subitem);
+
+                       // we need to watch for all kinds of events - this is 
tricky
+
+                       // Case: new file is created
+                       // - if it is one of the folder_directories, index it
+                       // - if is in one of the mail_directories, index it if 
it is an mbox file
+                       if ((type & Inotify.EventType.Create) != 0 && (type & 
Inotify.EventType.IsDirectory) == 0) {
+                               if (IsMailDir (path)) {
+                                       Indexable indexable = 
MaildirMessageToIndexable (fullPath);
+                                       AddIndexableTask (indexable, fullPath);
+                               }
+                               return;
+                       }
+
+                       // Case: file is deleted
+                       // - if it is a mail file, we might like it to be 
deleted
+                       if ((type & Inotify.EventType.MovedFrom) != 0 ||
+                           ((type & Inotify.EventType.Delete) != 0 &&
+                           (type & Inotify.EventType.IsDirectory) == 0)) {
+                               if (IsMailDir (path))
+                                       RemoveMail (fullPath);
+                               else if (mbox_files.Contains (fullPath)) {
+                                       RemoveMbox (fullPath);
+                                       mbox_files.Remove (fullPath);
+                               }
+                               return;
+                       }
+
+                       // Case: file is moved
+                       // - files are moved from tmp/new to cur
+                       // - need to delete from the source
+                       if ((type & Inotify.EventType.MovedTo) != 0 && (type & 
Inotify.EventType.IsDirectory) == 0) {
+                               if (IsMailDir (path)) {
+                                       Indexable indexable = 
MaildirMessageToIndexable (fullPath);
+                                       AddIndexableTask (indexable, fullPath);
+                               }
+                               if (IsMailDir (srcpath))
+                                       RemoveMail (srcpath);
+                               if (mbox_files.Contains (fullPath)) {
+                                       // check if this because of compaction, 
in which case need to delete previous mbox
+                                       if (srcpath != null && srcpath.EndsWith 
("." + subitem + ".compacted"))
+                                               RemoveMbox (fullPath);
+                                       // FIXME need to ensure IndexMbox is 
scheduled *after* RemoveMbox finishes
+                                       // RemoveMbox creates a job with 
immediate priority while
+                                       // IndexMbox creates a job with the 
default priority of a generator
+                                       // Is there a better way to ensure the 
order ?
+                                       IndexMbox (fullPath, true);
+                               }
+                               return;
+                       }
+
+                       // Case: file is modified i.e. there was no create 
event but closewrite event
+                       // - possibly some mbox was changed
+                       // FIXME kmail doesnt physically delete the deleted 
mails from mbox files unless compacted
+                       // - which means one has to read the .index files to 
find deleted messages...
+                       // - need to find the format of the .index/.index.ids 
etc files and parse them
+                       if ((type & Inotify.EventType.Modify) != 0 && (type & 
Inotify.EventType.IsDirectory) == 0) {
+                               if (mbox_files.Contains (fullPath))
+                                       IndexMbox (fullPath, false);
+                               return;
+                       }
+
+                       // Case: a directory is created:
+                       // well watch it anyway but also make sure its a 
maildir directory
+                       // if it a maildir directory, then add it to 
maildir_dirs
+                       if ((type & Inotify.EventType.Create) != 0 && (type & 
Inotify.EventType.IsDirectory) != 0) {
+                               if (!IgnoreFolder (fullPath)) {
+                                       Watch (fullPath);
+                                       UpdateDirectories(fullPath);
+                               }
+                               return;
+                       }
+
+                       // Case: if a directory is deleted:
+                       // remove watch
+                       if ((type & Inotify.EventType.Delete) != 0 && (type & 
Inotify.EventType.IsDirectory) != 0) {
+                               watch.Unsubscribe ();
+                               mail_directories.Remove (fullPath);
+                               folder_directories.Remove (fullPath);
+                               return;
+                       }
+
+                       // Case: directory is moved
+                       // FIXME: implement renaming of mail folders
+                       
+               }
+
+               /**
+                * Add watch to the parameter directory and its subdirs, 
recursively
+                */
+               public void Watch (string path)
+               {
+                       DirectoryInfo root = new DirectoryInfo (path);
+                       if (! root.Exists)
+                               return;
+
+                       Queue queue = new Queue ();
+                       queue.Enqueue (root);
+
+                       while (queue.Count > 0) {
+                               DirectoryInfo dir = queue.Dequeue () as 
DirectoryInfo;
+
+                               if (! dir.Exists)
+                                       continue;
+
+                               //log.Debug ("Adding inotify watch to " + 
dir.FullName);
+                               Inotify.Subscribe (dir.FullName, OnInotifyEvent,
+                                                       Inotify.EventType.Create
+                                                       | 
Inotify.EventType.Delete
+                                                       | 
Inotify.EventType.MovedFrom
+                                                       | 
Inotify.EventType.MovedTo);
+
+                               foreach (DirectoryInfo subdir in 
DirectoryWalker.GetDirectoryInfos (dir))
+                                       queue.Enqueue (subdir);
+                       }
+               }
+               
+               /**
+                * Recursively traverse the files and dirctories under mail_root
+                * to find files that need to be indexed, directories that
+                * need to be watched for changes
+                */
+               public void Crawl ()
+               {
+                       if (!Directory.Exists (mail_root))
+                               return;
+
+                       mail_directories.Clear ();
+                       folder_directories.Clear ();
+                       mbox_files.Clear();
+
+                       Queue pending = new Queue ();
+                       pending.Enqueue (mail_root);
+                       folder_directories.Add (mail_root);
+                       // add inotify watch to root folder
+                       if (Inotify.Enabled)
+                               Inotify.Subscribe (mail_root, OnInotifyEvent,
+                                       Inotify.EventType.Create
+                                       | Inotify.EventType.Delete
+                                       | Inotify.EventType.MovedFrom
+                                       | Inotify.EventType.MovedTo
+                                       | Inotify.EventType.Modify);
+
+                       while (pending.Count > 0) {
+                               string dir = (string) pending.Dequeue ();
+                               Logger.Log.Debug ("Searching for mbox and 
maildirs in " + dir);
+
+                               foreach (string d in 
DirectoryWalker.GetDirectoryNames(dir)) {
+                                       if (d == "cur" || d == "new" || d == 
"tmp") {
+                                               continue;
+                                       }
+                                       string fullpath = Path.Combine(dir, d);
+                                       mail_directories.Add (fullpath);
+                                       if (Inotify.Enabled) {
+                                               Watch (fullpath);
+                                       }
+                               }
+                       }       
+
+                       // copy the contents as mail_directories, mbox_files 
might change due to async events
+                       ArrayList _mail_directories = new ArrayList 
(mail_directories);
+                       ArrayList _mbox_files = new ArrayList (mbox_files);
+                       
+                       if (queryable.ThisScheduler.ContainsByTag (mail_root)) {
+                               Logger.Log.Debug ("Not adding task for already 
running task: {0}", mail_root);
+                               return;
+                       } else {
+                               MaildirdirIndexableGenerator generator = new 
MaildirdirIndexableGenerator (this, _mail_directories);
+                               AddIIndexableTask (generator, mail_root);
+                       }
+
+                       foreach (string mbox_file in _mbox_files) {
+                               IndexMbox (mbox_file, true);
+                       }
+               }
+
+               private void AddIndexableTask (Indexable indexable, string tag)
+               {
+                       if (indexable == null)
+                               return;
+
+                       Scheduler.Task task = queryable.NewAddTask (indexable);
+                       task.Priority = Scheduler.Priority.Immediate;
+                       task.Tag = tag;
+                       queryable.ThisScheduler.Add (task);
+               }       
+
+               private void AddIIndexableTask (IIndexableGenerator generator, 
string tag)
+               {
+                       if (generator == null)
+                               return;
+
+                       Scheduler.Task task = queryable.NewAddTask (generator);
+                       task.Tag = tag;
+                       queryable.ThisScheduler.Add (task);
+               }       
+
+               /**
+                * Start a task for indexing an mbox file
+                */
+               public void IndexMbox (string mbox_file, bool initial_scan)
+               {
+                       if (queryable.ThisScheduler.ContainsByTag (mbox_file)) {
+                               Logger.Log.Debug ("Not adding task for already 
running task: {0}", mbox_file);
+                               return;
+                       }
+
+                       //Logger.Log.Debug ("Creating task to index mbox {0}", 
mbox_file);
+                       MaildirMboxIndexableGenerator generator = new 
MaildirMboxIndexableGenerator (this, mbox_file, initial_scan);
+                       AddIIndexableTask (generator, mbox_file);
+               }
+
+               /**
+                * Remove maildir mail file
+                */
+               private void RemoveMail (string file)
+               {
+                       Logger.Log.Debug ("Removing mail:" + file);
+                       Uri uri = UriFu.PathToFileUri (file);
+                       Scheduler.Task task = queryable.NewRemoveTask (uri);
+                       task.Priority = Scheduler.Priority.Immediate;
+                       task.SubPriority = 0;
+                       queryable.ThisScheduler.Add (task);
+               }
+
+               /** 
+                * Create an indexable from a maildir message
+                */
+               public Indexable MaildirMessageToIndexable (string filename)
+               {
+                       Logger.Log.Debug ("+ indexing maildir mail:" + 
filename);
+                       String folder = GetFolderMaildir(filename);
+                       Uri file_uri = UriFu.PathToFileUri (filename);
+
+                       Indexable indexable = new Indexable (file_uri);
+                       indexable.HitType = "MailMessage";
+                       indexable.MimeType = "message/rfc822";
+                       indexable.CacheContent = false;
+
+                       indexable.AddProperty (Property.NewUnsearched 
("fixme:client", "maildir"));
+                       indexable.AddProperty (Property.NewUnsearched 
("fixme:account", account_name));
+                        indexable.AddProperty (Property.NewUnsearched 
("fixme:folder", folder));
+                       indexable.ContentUri = file_uri;
+
+                       return indexable;
+               }
+       
+               /**
+                * Create an indexable from an mbox message
+                * Most of the code here is from Evo backend
+                */
+               public Indexable MessageToIndexable (string file_name, 
System.Uri uri, GMime.Message message, string folder_name)
+               {
+                       //Logger.Log.Debug ("Indexing " + uri + " in folder " + 
folder_name);
+                       Indexable indexable = new Indexable (uri);
+                       // set parent uri to the filename so that when an mbox 
file
+                       // is deleted, all the messages in that file can be 
deleted
+                       indexable.ParentUri = UriFu.PathToFileUri (file_name);
+
+                       indexable.Timestamp = message.Date.ToUniversalTime ();
+                       indexable.HitType = "MailMessage";
+                       indexable.MimeType = "message/rfc822";
+                       indexable.CacheContent = false;
+
+                       indexable.AddProperty (Property.NewUnsearched 
("fixme:client", "kmail"));
+                       indexable.AddProperty (Property.NewUnsearched 
("fixme:account", account_name));
+                        indexable.AddProperty (Property.NewUnsearched 
("fixme:folder", folder_name));
+
+                       GMime.InternetAddressList addrs;
+
+                       addrs = message.GetRecipients 
(GMime.Message.RecipientType.To);
+                       foreach (GMime.InternetAddress ia in addrs) {
+                               if (folder_name == Queryable.SentMailFolderName 
&& ia.AddressType != GMime.InternetAddressType.Group)
+                                       indexable.AddProperty 
(Property.NewKeyword ("fixme:sentTo", ia.Addr));
+                       }
+                       addrs.Dispose ();
+
+                       addrs = message.GetRecipients 
(GMime.Message.RecipientType.Cc);
+                       foreach (GMime.InternetAddress ia in addrs) {
+                               if (folder_name == Queryable.SentMailFolderName 
&& ia.AddressType != GMime.InternetAddressType.Group)
+                                       indexable.AddProperty 
(Property.NewKeyword ("fixme:sentTo", ia.Addr));
+                       }
+                       addrs.Dispose ();
+
+                       addrs = GMime.InternetAddressList.ParseString 
(GMime.Utils.HeaderDecodePhrase (message.Sender));
+                       foreach (GMime.InternetAddress ia in addrs) {
+                               if (folder_name != Queryable.SentMailFolderName 
&& ia.AddressType != GMime.InternetAddressType.Group)
+                                       indexable.AddProperty 
(Property.NewKeyword ("fixme:gotFrom", ia.Addr));
+                       }
+                       addrs.Dispose ();
+
+                       if (folder_name == Queryable.SentMailFolderName)
+                               indexable.AddProperty (Property.NewFlag 
("fixme:isSent"));
+                       else {
+                               string kmail_msg_sent = message.GetHeader 
("X-KMail-Link-Type");
+                               if (kmail_msg_sent == "reply")
+                                       indexable.AddProperty (Property.NewFlag 
("fixme:isSent"));
+                       }
+                               
+// no need to store date again, use the issent flag to determine if the date 
is sentdate or not                        
+#if false
+                       if (folder_name == Queryable.SentMailFolderName)
+                               indexable.AddProperty (Property.NewDate 
("fixme:sentdate", message.Date.ToUniversalTime ()));
+                       else
+                               indexable.AddProperty (Property.NewDate 
("fixme:received", message.Date.ToUniversalTime ()));
+#endif
+
+                       indexable.SetBinaryStream (message.Stream);
+
+                       return indexable;
+               }
+               
+               /**
+                * deleting mbox means deleting all the mails which were in 
this mbox
+                * we use the idea of parent-uri
+                * while creating indexables, we set the parent uri to be the 
uri of the mbox file
+                * so to delete all mails in the mbox we just delete all 
documents whose parent uri
+                *     is the uri of the mbox file
+                */
+               public void RemoveMbox (string file)
+               {
+                       Logger.Log.Debug ("Removing mbox:" + file);
+                       Uri uri = UriFu.PathToFileUri (file);
+                       Scheduler.Task task = queryable.NewRemoveTask (uri);
+                       task.Priority = Scheduler.Priority.Immediate;
+                       task.SubPriority = 0;
+                       queryable.ThisScheduler.Add (task);
+               }
+
+               ///////////////////////////////////////////////////////////
+
+               // Helpers
+
+               /**
+                * a maildir is of format:
+                * some_dir_in_currently_watched_directories/{cur,new,tmp}
+                * again we ignore tmp - no point trying to watch it - it will 
be moved anyway
+                * should we check with the kmail directory structure ?
+                * presence of files like directory.index, directory.index.ids ?
+                */
+               public bool IsMailDir (string dirPath)
+               {
+                       if (dirPath == null || ! (dirPath.EndsWith("cur") || 
dirPath.EndsWith("new")))
+                               return false;
+
+                       string possibleMaildir = (Directory.GetParent 
(dirPath)).FullName;
+                       if (lastGoodDirPath == possibleMaildir)
+                               return true;
+                       Logger.Log.Debug ("checking if " + possibleMaildir + " 
is a maildir ?");
+                       if (mail_directories.Contains (possibleMaildir)) {
+                               lastGoodDirPath = possibleMaildir;
+                               return true;
+                       } else
+                               return false;
+               }
+
+               /**
+                * Called when a new directory is created
+                * Decide what to do with this new directory
+                */
+               public void UpdateDirectories (string dirPath)
+               {
+                       string parentDir = (Directory.GetParent 
(dirPath)).FullName;
+                       DirectoryInfo dirinfo = new DirectoryInfo (dirPath);
+                       string dirName = dirinfo.Name;
+                       
+                       if (dirName == "cur" || dirName == "new" || dirName == 
"tmp") {
+                               // check and add the parentdir to 
mail_directories
+                               if (!mail_directories.Contains (parentDir))
+                                       mail_directories.Add (parentDir);
+                               return;
+                       }
+                       
+                       // format .name.directory - in which case add it to 
folder_dir
+                       // format name  - in which case add it to mail_dir
+                       if (dirName.EndsWith (".directory"))
+                               folder_directories.Add (dirPath);
+                       else
+                               mail_directories.Add (dirPath);
+               }
+
+               /**
+                * FIXME:if we can parse kmailrc file, then we might be
+                * able to deduce the mail folder name
+                * currently get it from the file name (mbox) or parent.parent 
directory name
+                */
+               
+               public string GetFolderMbox (string mbox_file)
+               {
+                       FileInfo fi = new FileInfo (mbox_file);
+                       return fi.Name;
+               }
+               
+               public string GetFolderMaildir (string mailFile)
+               {
+                       return (Directory.GetParent ((Directory.GetParent 
(mailFile)).FullName).Name);
+               }
+
+               private bool IgnoreFolder (string path)
+               {
+                       foreach (string exclude in excludes) {
+                               if (path.ToLower().EndsWith (exclude))
+                                       return true;
+                       }
+                       return false;
+               }
+       }
+}
diff -urN beagle-0.2.10/beagled/MaildirQueryable/MaildirQueryable.cs 
beagle-0.2.10/beagled/MaildirQueryable/MaildirQueryable.cs
--- beagle-0.2.10/beagled/MaildirQueryable/MaildirQueryable.cs  1969-12-31 
19:00:00.000000000 -0500
+++ beagle-0.2.10/beagled/MaildirQueryable/MaildirQueryable.cs  2006-09-28 
12:54:28.000000000 -0400
@@ -0,0 +1,234 @@
+//
+// MaildirQueryable.cs
+//
+// Copyright (C) 2005 Novell, Inc.
+// Copyright (C) 2005 Debajyoti Bera
+//
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+//
+
+
+using System;
+using System.Collections;
+using System.IO;
+using System.Threading;
+
+using Beagle.Util;
+
+namespace Beagle.Daemon.MaildirQueryable {
+
+       [QueryableFlavor (Name="Maildir", Domain=QueryDomain.Local, 
RequireInotify=false)]
+       public class MaildirQueryable : LuceneFileQueryable {
+
+               // for non-inotify case, poll after this number of seconds
+               public const int polling_interval_in_seconds = 300;
+               // mail folder paths
+               private string local_path;
+               // indexers - one for each mailfolder path
+               private MaildirIndexer local_indexer;
+               // global variable
+               public static bool gmime_initialized = false;
+               public static void InitializeGMime ()
+               {
+                       if (!gmime_initialized) {
+                               GMime.Global.Init ();
+                               gmime_initialized = true;
+                       }
+               }
+
+               // name of the sentmail folder - should be parsed from kmailrc
+               private string sentmail_foldername;
+               public string SentMailFolderName {
+                       get { return sentmail_foldername; }
+               }
+               
+               public MaildirQueryable () : base ("MaildirIndex")
+               {
+                       // the local mail path is different for different 
distributions
+                       local_path = GuessLocalFolderPath ();
+                       if (local_path == null) {
+                               Logger.Log.Info ("Maildir folders not found. 
Will keep trying ");
+                       } else
+                               Logger.Log.Info ("Guessing for location of 
Maildir folders ... found at " + local_path);
+
+                       local_indexer = null;
+                       sentmail_foldername = "sent-mail";
+               }
+
+               
//////////////////////////////////////////////////////////////////////////////////////////////
+
+               /**
+                * initial method called by the daemon
+                */
+               public override void Start () 
+               {
+                       base.Start ();
+                       ExceptionHandlingThread.Start (new ThreadStart 
(StartWorker));
+               }
+
+               /**
+                * for non-inotify case, this method is invoked repeatedly
+                */
+               private void CrawlHook (Scheduler.Task task)
+               {
+                       if (local_indexer != null)
+                               local_indexer.Crawl ();
+                       task.Reschedule = true;
+                       task.TriggerTime = DateTime.Now.AddSeconds 
(polling_interval_in_seconds);
+               }
+
+               /**
+                * called by Start(), starts actual work
+                * create indexers
+                * ask indexers to crawl the mails
+                * for non-inotify case, ask to poll
+                */
+               private void StartWorker ()
+               {
+                       Logger.Log.Info ("Starting Maildir backend");
+
+                       Stopwatch stopwatch = new Stopwatch ();
+                       stopwatch.Start ();
+
+                       // check if there is at all anything to crawl
+                        if ( local_path == null ) {
+                               GLib.Timeout.Add (60000, new 
GLib.TimeoutHandler (CheckForExistence));
+                               Logger.Log.Debug ("Maildir directories (local 
mail) " + " not found, will repoll.");
+                                return;
+                       }
+
+                       Logger.Log.Debug ("Starting mail crawl");
+                       State = QueryableState.Crawling;
+                       if (local_path != null) {
+                               local_indexer = new MaildirIndexer (this, 
"local", local_path);
+                               local_indexer.Crawl ();
+                       }
+                       State = QueryableState.Idle;
+                       Logger.Log.Debug ("Mail crawl done");
+
+                       if (! Inotify.Enabled) {
+                               Scheduler.Task task = Scheduler.TaskFromHook 
(new Scheduler.TaskHook (CrawlHook));
+                               task.Tag = "Crawling Maildir directories";
+                               task.Source = this;
+                               task.TriggerTime = DateTime.Now.AddSeconds 
(polling_interval_in_seconds);
+                               ThisScheduler.Add (task);
+                       }
+
+                       stopwatch.Stop ();
+                       Logger.Log.Info ("Maildir driver worker thread done in 
{0}", stopwatch);
+               }
+
+               /** 
+                * use this method to determine if we have anything to crawl 
and index
+                */
+               private bool CheckForExistence ()
+                {
+                       local_path = GuessLocalFolderPath ();
+                        if (local_path == null)
+                                return true;
+
+                       StartWorker();
+                        return false;
+                }
+
+               
/////////////////////////////////////////////////////////////////////////////
+
+               override public string GetSnippet (string[] query_terms, Hit 
hit)
+               {
+                       Logger.Log.Debug ("Fetching snippet for " + 
hit.Uri.LocalPath);
+                       // FIXME: Also handle mbox emails
+                       if (! hit.Uri.IsFile)
+                               return null;
+
+                       // Dont get snippets from attachments, they arent even 
indexed currently
+                       if (hit.ParentUri != null)
+                               return null;
+                       
+                       int mail_fd = Mono.Unix.Native.Syscall.open 
(hit.Uri.LocalPath, Mono.Unix.Native.OpenFlags.O_RDONLY);
+                       if (mail_fd == -1)
+                               return null;
+
+                       InitializeGMime ();
+                       GMime.StreamFs stream = new GMime.StreamFs (mail_fd);
+                       GMime.Parser parser = new GMime.Parser (stream);
+                       GMime.Message message = parser.ConstructMessage ();
+                       stream.Dispose ();
+                       parser.Dispose ();
+
+                       bool html = false;
+                       string body = message.GetBody (true, out html);
+                       // FIXME: Also handle snippets from html message parts 
- involves invoking html filter
+                       if (html) {
+                               Logger.Log.Debug ("No text/plain message part 
in " + hit.Uri);
+                               message.Dispose ();
+                               return null;
+                       }
+
+                       StringReader reader = new StringReader (body);
+                       string snippet = SnippetFu.GetSnippet (query_terms, 
reader);
+                       message.Dispose ();
+
+                       return snippet;
+               }
+
+               
/////////////////////////////////////////////////////////////////////////////
+               // FIXME: How to determine if an mbox hit is valid without 
scanning the whole file
+
+               public string Name {
+                       get { return "Maildir"; }
+               }
+
+               /** 
+                * path of local maildir - mine is in ~/.Mail
+                * This is distribution specific. Mandrake puts kmail mails in
+                * ~/.Mail whereas default kmail folder location is ~/Mail
+                * I guess each distribution can fix this path as they know 
what is
+                * the path.
+                * Till then, using a guesser to find out which of ~/.Mail and 
~/Mail
+                * is valid.
+                * Guesses the kmail local folder path
+                * first try ~/.Mail, then try ~/Mail
+                */
+               private string GuessLocalFolderPath ()
+               {
+                       string location1 = Path.Combine (PathFinder.HomeDir, 
"Mail");
+                       string location2 = Path.Combine (PathFinder.HomeDir, 
".Mail");
+
+                       if (GuessLocalFolder (location1))
+                               return location1;
+                       else if (GuessLocalFolder (location2))
+                               return location2;
+                       else 
+                               return null;
+               }
+
+               /**
+                * to check if the path represents a maildir directory:
+                */
+               private bool GuessLocalFolder (string path)
+               {
+                       if (! Directory.Exists (path))
+                               return false;
+                       return true;
+               }
+
+       }
+
+}
diff -urN beagle-0.2.10/beagled/Makefile.am beagle-0.2.10/beagled/Makefile.am
--- beagle-0.2.10/beagled/Makefile.am   2006-09-18 18:24:29.000000000 -0400
+++ beagle-0.2.10/beagled/Makefile.am   2006-09-28 12:53:38.000000000 -0400
@@ -291,6 +291,12 @@
        $(kmailqueryable)/KMailIndexableGenerator.cs    \
        $(kmailqueryable)/KMailIndexer.cs
 
+maildirqueryable = $(srcdir)/MaildirQueryable
+MAILDIR_QUERYABLE_CSFILES = \
+       $(maildirqueryable)/MaildirQueryable.cs         \
+       $(maildirqueryable)/MaildirIndexableGenerator.cs        \
+       $(maildirqueryable)/MaildirIndexer.cs
+
 blamqueryable = $(srcdir)/BlamQueryable
 BLAM_QUERYABLE_CSFILES =                               \
        $(blamqueryable)/BlamQueryable.cs
@@ -319,6 +325,7 @@
 DAEMON_DLL_CSFILES =                                           \
        $(LUCENE_CSFILES)                                       \
        $(KMAIL_QUERYABLE_CSFILES)                              \
+       $(MAILDIR_QUERYABLE_CSFILES)                            \
        $(FILE_SYSTEM_QUERYABLE_CSFILES)                        \
        $(GAIM_LOG_QUERYABLE_CSFILES)                           \
        $(INDEXING_SERVICE_QUERYABLE_CSFILES)                   \
_______________________________________________
Dashboard-hackers mailing list
Dashboard-hackers@gnome.org
http://mail.gnome.org/mailman/listinfo/dashboard-hackers

Reply via email to