Like other things posted here without notices to the contrary, this
code is in the public domain.

So lately I've been using this program to read my email.  It's
intended as a quick-and-dirty prototype of query-based mailreading.
Apparently there's a program called 'mboxgrep' that does something
similar, only it's in C.

#!/usr/bin/perl -w
use strict;
use IO::File;
use lib '/home/kragen/devel';
use MboxBackward;

{
  package MessageFilter;
  sub new { my ($class, %args) = @_; bless \%args, $class }
  sub fromline { $_[0]{message} = $_[0]{fromline} = $_[1] }
  sub split_hdr {
    my ($name, $value) = split /:\s+/ , $_[1], 2;
    return (lc($name), $value);
  }
  sub save_hdr {
    my ($self, $name, $value) = @_;
    $self->{hdrs}{$name} = $value;
    push @{$self->{allhdrs}{$name}}, $value;
  }
  sub header {
    my ($self, $hdr) = @_;
    $self->save_hdr($self->split_hdr($hdr));
    $_[0]{message} .= $hdr;
  }
  sub to { ($_[0]{hdrs}{to} || '') . ($_[0]{hdrs}{cc} || '') }
  sub bodyline { $_[0]{body} .= $_[1]; $_[0]{message} .= $_[1] }
  sub clear {
    my ($self) = @_;
    $self->{hdrs} = {};
    $self->{body} = '';
    $self->{allhdrs} = {};
    $self->{fromline} = undef;
    $self->{message} = '';
  }
  sub wanted { $_[0]{wanted}(@_) }
  sub match { ($_[0]{hdrs}{$_[1]} || '') =~ $_[2] }
  sub message { $_[0]{message} }
}

{
  package AllMsgs;
  use base qw(MessageFilter);
  sub wanted { 1 }
}

{
  package BriefMsgs;
  use base qw(MessageFilter);
  sub message { $_[0]->{fromline} . $_[0]->heading . $_[0]{brief_msg} }
  sub clear {
    my ($self) = @_;
    $self->SUPER::clear; 
    $self->{brief_msg} = '';
    $self->{topline} = '';
  }
  sub bodyline { $_[0]->SUPER::bodyline($_[1]); $_[0]{brief_msg} .= $_[1] }
  my %omit_hdrs = map { $_ => 1 } qw(return-path delivered-to received
    message-id date subject from references in-reply-to x-priority
    mime-version content-type content-transfer-encoding x-msmail-priority
    importance x-virus-scanned x-beenthere x-mailman-version precedence
    list-id list-post list-help list-subscribe x-list-received-date
    x-original-to x-x-sender x-originalarrivaltime
    x-spam-checker-version errors-to content-disposition list-unsubscribe);
  sub header {
    my ($self, $hdr) = @_;
    my ($name, $value) = $self->split_hdr($hdr);
    $self->save_hdr($name, $value);
    $self->{brief_msg} .= $hdr unless $omit_hdrs{$name};
  }
  sub heading {
    my ($self) = @_;
    my $from = $self->{hdrs}{from};
    my $subj = $self->{hdrs}{subject};
    chomp ($from, $subj);
    if (length($from . $subj) > 70) {
      return "From: $from\nSubject: $subj\n";
    } else {
      return "From: $from  $subj\n";
    }
  }
}

my ($queryfile, $mailfile) = @ARGV;
sub open_or_die {
  my ($file) = @_;

  my $rv = IO::File->new("<$file");
  die "opening $file: $!" unless $rv;
  return $rv;
}
my $queryh = open_or_die($queryfile);
my $query_obj = eval do { local $/; <$queryh> };
die $@ if $@;
$queryh->close();

my $inhdr = 0;
my $curhdr;
sub emit_hdr {
  $query_obj->header($curhdr) if $curhdr;
  undef $curhdr;
}
my $mboxr = MboxBackward->new(open_or_die($mailfile));
while (my $msg = $mboxr->read()) {
  $query_obj->clear();
  $inhdr = 1;
  for (split /\n/, $msg, -1) { # -1 to not drop trailing empty lines
    $_ .= "\n";
    if (/^From /) { $query_obj->fromline($_) }
    elsif ($inhdr and /^$/) { emit_hdr; $inhdr = 0; $query_obj->bodyline($_) }
    elsif ($inhdr and /^\s+/) { $curhdr .= $_ }
    elsif ($inhdr) { emit_hdr; $curhdr = $_ }
    else { $query_obj->bodyline($_) }
  }
  print $query_obj->message if $query_obj->wanted;
}
__END__

It uses the MboxBackward.pm module, which follows:
package MboxBackward;
use strict;

sub new {
  my ($class, $file, $size) = @_;
  seek $file, 0, 2 or die "Can't seek to EOF: $!";
  bless { file => $file, size => $size || 4096, buf => '' }, $class;
}

sub read {
  my ($self) = @_;
  my $f = $self->{file};
  my $size = $self->{size};
  for (;;) {
    my $where = tell $self->{file};
    if ($self->{buf} =~ /^From /m) {
      my $pos = length($self->{buf});
      while (-1 != ($pos = rindex($self->{buf}, 'From ', $pos-5))) {
        if (($pos == 0) ? ($where == 0) :
            (substr($self->{buf}, $pos-1, 1) eq "\n")) {
          return substr($self->{buf}, $pos, length($self->{buf}), '');
        }
      }
    }
    return undef if $where == 0;

    $size = $where if $where < $size;
    seek $f, -$size, 1 or die "Can't seek backwards: $!";
    my $data;
    my $count = read $f, $data, $size;
    die "Can't read: $!" unless defined $count;
    $self->{buf} = $data . $self->{buf};
    defined(seek $f, -$count, 1) or die "Can't seek: $!";
  }
}
1;
__END__

You could probably do MboxBackward more easily with the existing
read-a-file-backward-line-by-line module, but I was disconnected from
the Net the afternoon I wrote it, and I can't find it right now on the
Web.  Maybe MJD hasn't published it yet.

Here are some sample filter files:

BriefMsgs->new(wanted => sub {$_[0]->match('list-archive' => qr/irregulars/i)})

MessageFilter->new(wanted => sub {$_[0]->to =~ /[EMAIL PROTECTED]/})

BriefMsgs->new(wanted => sub {$_[0]->match('delivered-to' => qr/fork\@/)})

BriefMsgs->new(wanted => sub { $_[0]->match(from => qr/rohit/i) })

{
  package ToMe;
  use base qw(BriefMsgs);
  sub wanted { 
    my ($self) = @_;
    return ($self->to =~ /kragen\@/i
            and ($self->{hdrs}{'content-transfer-encoding'} || '') !~ /base64/
            and $self->{hdrs}{from} !~ /priceline|marinedigital/i
            and ($self->{body} !~ /<html>/i )
            and $self->{body} !~ m(Content-Type: text/html)i);
  }
}
bless {}, 'ToMe';

The main script itself doesn't have any regression tests, but the
MboxBackward module does, because it was tricky.  Here they are:

#!/usr/bin/perl -w
use strict;
use Test;
BEGIN { plan tests => 13 }
use MboxBackward;

my $msg1 = <<EOF;
>From foo
nurgle From bax
From: kragen
wumple
EOF

my $msg2 = <<EOF;
>From bar
zibble
gab
EOF

my $fn = 'mbox.tmp';

sub write_tmp_file {
  my ($mbox) = @_;
  open TMP, ">$fn" or die "Can't open $fn: $!";
  print TMP $mbox;
  close TMP;
}
sub open_or_die {
  my ($fn) = @_;
  local *MBOX;
  open MBOX, '<', $fn or die "Can't open $fn: $!";
  return *MBOX;
}

write_tmp_file("baz\n$msg1$msg2");

my $r = MboxBackward->new(open_or_die($fn), 1);
ok($r->read(), $msg2);
ok($r->read(), $msg1);
ok($r->read(), undef);
ok($r->read(), undef);
$r = MboxBackward->new(open_or_die($fn), 30);
ok($r->read(), $msg2);
ok($r->read(), $msg1);
ok($r->read(), undef);
$r = MboxBackward->new(open_or_die($fn), 1024);
ok($r->read(), $msg2);
ok($r->read(), $msg1);
ok($r->read(), undef);

write_tmp_file("");
ok( MboxBackward->new(open_or_die($fn))->read(), undef );

write_tmp_file($msg1);
$r = MboxBackward->new(open_or_die($fn));
ok($r->read(), $msg1);
ok($r->read(), undef);

Reply via email to