The MH format is widely-supported and used by various MUAs such
as mutt and sylpheed, and a MH-like format is used by mlmmj for
archives, as well.  Locking implementations for writes are
inconsistent, so this commit doesn't support writes, yet.

inotify|EVFILT_VNODE watches aren't supported, yet, either.
---
 MANIFEST                       |   3 +
 lib/PublicInbox/LEI.pm         |  13 ++--
 lib/PublicInbox/LeiConvert.pm  |   5 ++
 lib/PublicInbox/LeiImport.pm   |  23 +++++++
 lib/PublicInbox/LeiImportKw.pm |   2 +-
 lib/PublicInbox/LeiIndex.pm    |   2 +-
 lib/PublicInbox/LeiInput.pm    |  52 +++++++++++++---
 lib/PublicInbox/LeiMailSync.pm |  39 ++++++++----
 lib/PublicInbox/LeiToMail.pm   |   5 ++
 lib/PublicInbox/MHreader.pm    | 103 +++++++++++++++++++++++++++++++
 lib/PublicInbox/MdirReader.pm  |   2 +-
 lib/PublicInbox/MdirSort.pm    |  46 ++++++++++++++
 lib/PublicInbox/TestCommon.pm  |  22 ++++---
 t/mh_reader.t                  | 108 +++++++++++++++++++++++++++++++++
 14 files changed, 392 insertions(+), 33 deletions(-)
 create mode 100644 lib/PublicInbox/MHreader.pm
 create mode 100644 lib/PublicInbox/MdirSort.pm
 create mode 100644 t/mh_reader.t

diff --git a/MANIFEST b/MANIFEST
index e22674b7..8bcc3179 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -293,6 +293,7 @@ lib/PublicInbox/Linkify.pm
 lib/PublicInbox/Listener.pm
 lib/PublicInbox/Lock.pm
 lib/PublicInbox/MDA.pm
+lib/PublicInbox/MHreader.pm
 lib/PublicInbox/MID.pm
 lib/PublicInbox/MIME.pm
 lib/PublicInbox/MailDiff.pm
@@ -302,6 +303,7 @@ lib/PublicInbox/MboxGz.pm
 lib/PublicInbox/MboxLock.pm
 lib/PublicInbox/MboxReader.pm
 lib/PublicInbox/MdirReader.pm
+lib/PublicInbox/MdirSort.pm
 lib/PublicInbox/MiscIdx.pm
 lib/PublicInbox/MiscSearch.pm
 lib/PublicInbox/MsgIter.pm
@@ -543,6 +545,7 @@ t/mda-mime.eml
 t/mda.t
 t/mda_filter_rubylang.t
 t/mdir_reader.t
+t/mh_reader.t
 t/mid.t
 t/mime.t
 t/miscsearch.t
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 17431518..e0cfd55a 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -267,7 +267,7 @@ import => [ 'LOCATION...|--stdin [LABELS...]',
        'one-time import/update from URL or filesystem',
        qw(stdin| offset=i recursive|r exclude=s include|I=s new-only
        lock=s@ in-format|F=s kw! verbose|v+ incremental! mail-sync!
-       commit-delay=i),
+       commit-delay=i sort|s:s@),
        @net_opt, @c_opt ],
 'forget-mail-sync' => [ 'LOCATION...',
        'forget sync information for a mail folder', @c_opt ],
@@ -280,7 +280,7 @@ import => [ 'LOCATION...|--stdin [LABELS...]',
 'convert' => [ 'LOCATION...|--stdin',
        'one-time conversion from URL or filesystem to another format',
        qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s lock=s@ kw!
-               rsyncable),
+               rsyncable sort|s:s@),
        @net_opt, @c_opt ],
 'p2q' => [ 'LOCATION_OR_COMMIT...|--stdin',
        "use a patch to generate a query for `lei q --stdin'",
@@ -321,6 +321,9 @@ import => [ 'LOCATION...|--stdin [LABELS...]',
 my $stdin_formats = [ 'MAIL_FORMAT|eml|mboxrd|mboxcl2|mboxcl|mboxo',
                        'specify message input format' ];
 my $ls_format = [ 'OUT|plain|json|null', 'listing output format' ];
+my $sort_out = [ 'VAL|received|relevance|docid',
+               "order of results is `--output'-dependent"];
+my $sort_in = [ 'sequence|mtime|size', 'sort input (format-dependent)' ];
 
 # we use \x{a0} (non-breaking SP) to avoid wrapping in PublicInbox::LeiHelp
 my %OPTDESC = (
@@ -428,8 +431,10 @@ my %OPTDESC = (
 'limit|n=i@' => ['NUM', 'limit on number of matches (default: 10000)' ],
 'offset=i' => ['OFF', 'search result offset (default: 0)'],
 
-'sort|s=s' => [ 'VAL|received|relevance|docid',
-               "order of results is `--output'-dependent"],
+'sort|s=s      q' => $sort_out,
+'sort|s=s      lcat' => $sort_out,
+'sort|s:s@     convert' => $sort_in,
+'sort|s:s@     import' => $sort_in,
 'reverse|r' => 'reverse search results', # like sort(1)
 
 'boost=i' => 'increase/decrease priority of results (default: 0)',
diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm
index 8f628562..17a952f2 100644
--- a/lib/PublicInbox/LeiConvert.pm
+++ b/lib/PublicInbox/LeiConvert.pm
@@ -28,6 +28,11 @@ sub input_maildir_cb {
        $self->{wcb}->(undef, { kw => $kw }, $eml);
 }
 
+sub input_mh_cb {
+       my ($dn, $bn, $kw, $eml, $self) = @_;
+       $self->{wcb}->(undef, { kw => $kw }, $eml);
+}
+
 sub process_inputs { # via wq_do
        my ($self) = @_;
        local $PublicInbox::DS::in_loop = 0; # force synchronous awaitpid
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index c2552bf0..5521188c 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -53,6 +53,29 @@ sub pmdir_cb { # called via wq_io_do from 
LeiPmdir->each_mdir_fn
        }
 }
 
+sub input_mh_cb {
+       my ($mhdir, $n, $kw, $eml, $self) = @_;
+       substr($mhdir, 0, 0) = 'mh:'; # add prefix
+       my $lse = $self->{lse} //= $self->{lei}->{sto}->search;
+       my $lms = $self->{-lms_rw} //= $self->{lei}->lms; # may be 0 or undef
+       my @oidbin = $lms ? $lms->num_oidbin($mhdir, $n) : ();
+       @oidbin > 1 and warn("W: $mhdir/$n not unique:\n",
+                               map { "\t".unpack('H*', $_)."\n" } @oidbin);
+       my @docids = sort { $a <=> $b } uniqstr
+                       map { $lse->over->oidbin_exists($_) } @oidbin;
+       if (scalar @docids) {
+               $lse->kw_changed(undef, $kw, \@docids) or return;
+       }
+       if (defined $eml) {
+               my $vmd = $self->{-import_kw} ? { kw => $kw } : undef;
+               $vmd->{sync_info} = [ $mhdir, $n + 0 ] if $self->{-mail_sync};
+               $self->input_eml_cb($eml, $vmd);
+       }
+       # TODO:
+       # elsif (my $ikw = $self->{lei}->{ikw}) { # old message, kw only
+       #       $ikw->wq_io_do('ck_update_kw', [], "mh:$dir", $uid, $kw);
+}
+
 sub input_net_cb { # imap_each / nntp_each
        my ($uri, $uid, $kw, $eml, $self) = @_;
        if (defined $eml) {
diff --git a/lib/PublicInbox/LeiImportKw.pm b/lib/PublicInbox/LeiImportKw.pm
index 4b8e69fb..765e23cd 100644
--- a/lib/PublicInbox/LeiImportKw.pm
+++ b/lib/PublicInbox/LeiImportKw.pm
@@ -36,7 +36,7 @@ sub ipc_atfork_child {
 sub ck_update_kw { # via wq_io_do
        my ($self, $url, $uid, $kw) = @_;
        my @oidbin = $self->{-lms_rw}->num_oidbin($url, $uid);
-       my $uid_url = "$url/;UID=$uid";
+       my $uid_url = index($url, 'mh:') == 0 ? $url.$uid : "$url/;UID=$uid";
        @oidbin > 1 and warn("W: $uid_url not unique:\n",
                                map { "\t".unpack('H*', $_)."\n" } @oidbin);
        my @docids = sort { $a <=> $b } uniqstr
diff --git a/lib/PublicInbox/LeiIndex.pm b/lib/PublicInbox/LeiIndex.pm
index b3f3e1a0..0e329e58 100644
--- a/lib/PublicInbox/LeiIndex.pm
+++ b/lib/PublicInbox/LeiIndex.pm
@@ -35,7 +35,7 @@ sub lei_index {
 
 no warnings 'once';
 no strict 'refs';
-for my $m (qw(pmdir_cb input_net_cb)) {
+for my $m (qw(pmdir_cb input_net_cb input_mh_cb)) {
        *$m = PublicInbox::LeiImport->can($m);
 }
 
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index daba9a8e..947a7a79 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -69,6 +69,11 @@ sub input_maildir_cb {
        $self->input_eml_cb($eml);
 }
 
+sub input_mh_cb {
+       my ($dn, $n, $kw, $eml, $self) = @_;
+       $self->input_eml_cb($eml);
+}
+
 sub input_net_cb { # imap_each, nntp_each cb
        my ($url, $uid, $kw, $eml, $self) = @_;
        $self->input_eml_cb($eml);
@@ -190,7 +195,7 @@ sub input_path_url {
                $ifmt = lc($1);
        } elsif ($input =~ /\.(?:patch|eml)\z/i) {
                $ifmt = 'eml';
-       } elsif (-f $input && $input =~ m{\A(?:.+)/(?:new|cur)/([^/]+)\z}) {
+       } elsif ($input =~ m{\A(?:.+)/(?:new|cur)/([^/]+)\z} && -f $input) {
                my $bn = $1;
                my $fl = PublicInbox::MdirReader::maildir_basename_flags($bn);
                return if index($fl, 'T') >= 0;
@@ -204,6 +209,10 @@ sub input_path_url {
        my $devfd = $lei->path_to_fd($input) // return;
        if ($devfd >= 0) {
                $self->input_fh($ifmt, $lei->{$devfd}, $input, @args);
+       } elsif ($devfd < 0 && $input =~ m{\A(.+/)([0-9]+)\z} && -f $input) {
+               my ($dn, $n) = ($1, $2);
+               my $mhr = PublicInbox::MHreader->new($dn, $lei->{3});
+               $mhr->mh_read_one($n, $self->can('input_mh_cb'), $self);
        } elsif (-f $input && $ifmt eq 'eml') {
                open my $fh, '<', $input or
                                        return $lei->fail("open($input): $!");
@@ -231,6 +240,10 @@ sub input_path_url {
                                                $self->can('input_maildir_cb'),
                                                $self, @args);
                }
+       } elsif (-d _ && $ifmt eq 'mh') {
+               my $mhr = PublicInbox::MHreader->new($input.'/', $lei->{3});
+               $mhr->{sort} = $lei->{opt}->{sort};
+               $mhr->mh_each_eml($self->can('input_mh_cb'), $self, @args);
        } elsif (-d _ && $ifmt =~ /\A(?:v1|v2)\z/) {
                my $ibx = PublicInbox::Inbox->new({inboxdir => $input});
                each_ibx_eml($self, $ibx, @args);
@@ -354,13 +367,15 @@ sub prepare_inputs { # returns undef on error
                                PublicInbox::MboxReader->reads($ifmt) or return
                                        $lei->fail("$ifmt not supported");
                        } elsif (-d $input_path) { # TODO extindex
-                               $ifmt =~ /\A(?:maildir|v1|v2|extindex)\z/ or
+                               $ifmt =~ /\A(?:maildir|mh|v1|v2|extindex)\z/ or
                                        return$lei->fail("$ifmt not supported");
                                $input = $input_path;
                                add_dir $lei, $istate, $ifmt, \$input;
-                       } elsif ($self->{missing_ok} && !-e _) {
+                       } elsif ($self->{missing_ok} &&
+                                       $ifmt =~ /\A(?:maildir|mh)\z/ &&
+                                       !-e $input_path) {
                                # for "lei rm-watch" on missing Maildir
-                               $may_sync and $input = 'maildir:'.
+                               $may_sync and $input = "$ifmt:".
                                                $lei->abs_path($input_path);
                        } else {
                                my $m = "Unable to handle $input";
@@ -373,7 +388,7 @@ sub prepare_inputs { # returns undef on error
 $input is `eml', not --in-format=$in_fmt
 
                        push @{$sync->{no}}, $input if $sync;
-               } elsif (-f $input && $input =~ m{\A(.+)/(new|cur)/([^/]+)\z}) {
+               } elsif ($input =~ m{\A(.+)/(new|cur)/([^/]+)\z} && -f $input) {
                        # single file in a Maildir
                        my ($mdir, $nc, $bn) = ($1, $2, $3);
                        my $other = $mdir . ($nc eq 'new' ? '/cur' : '/new');
@@ -385,12 +400,24 @@ $input is `eml', not --in-format=$in_fmt
 
                        if ($sync) {
                                $input = $lei->abs_path($mdir) . "/$nc/$bn";
-                               push @{$sync->{ok}}, $input if $sync;
+                               push @{$sync->{ok}}, $input;
                        }
                        require PublicInbox::MdirReader;
                } else {
                        my $devfd = $lei->path_to_fd($input) // return;
-                       if ($devfd >= 0 || -f $input || -p _) {
+                       if ($devfd < 0 && $input =~ m{\A(.+)/([0-9]+)\z} &&
+                                       -f $input) { # single file in MH dir
+                               my ($mh, $n) = ($1, $2);
+                               lc($in_fmt//'eml') eq 'eml' or
+                                               return $lei->fail(<<"");
+$input is `eml', not --in-format=$in_fmt
+
+                               if ($sync) {
+                                       $input = $lei->abs_path($mh)."/$n";
+                                       push @{$sync->{ok}}, $input;
+                               }
+                               require PublicInbox::MHreader;
+                       } elsif ($devfd >= 0 || -f $input || -p _) {
                                push @{$sync->{no}}, $input if $sync;
                                push @f, $input;
                        } elsif (-d "$input/new" && -d "$input/cur") {
@@ -401,10 +428,13 @@ $input is `eml', not --in-format=$in_fmt
                                add_dir $lei, $istate, 'v1', \$input;
                        } elsif (-e "$input/ei.lock") {
                                add_dir $lei, $istate, 'extindex', \$input;
+                       } elsif (-f "$input/.mh_sequences") {
+                               add_dir $lei, $istate, 'mh', \$input;
                        } elsif ($self->{missing_ok} && !-e $input) {
                                if ($lei->{cmd} eq 'p2q') {
                                        # will run "git format-patch"
                                } elsif ($may_sync) { # for lei rm-watch
+                                       # FIXME: support MH, here
                                        $input = 'maildir:'.
                                                $lei->abs_path($input);
                                }
@@ -446,6 +476,14 @@ $input is `eml', not --in-format=$in_fmt
                        $lei->refresh_watches;
                }
        }
+       if (my $mh = $istate->{mh}) {
+               require PublicInbox::MHreader;
+               grep(!m!\Amh:!i, @$mh) and die "BUG: @$mh (no pfx)";
+               if ($may_sync && $lei->{sto}) {
+                       $lei->lms(1)->lms_write_prepare->add_folders(@$mh);
+                       # $lei->refresh_watches; TODO
+               }
+       }
        require PublicInbox::ExtSearch if $istate->{extindex};
        $self->{inputs} = $inputs;
 }
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
index 17254a82..8d00d1fa 100644
--- a/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -435,15 +435,24 @@ sub folders {
        map { $_->[0] } @{$sth->fetchall_arrayref};
 }
 
+sub blob_mismatch ($$$) {
+       my ($f, $oidhex, $rawref) = @_;
+       my $sha = $HEXLEN2SHA{length($oidhex)};
+       my $got = git_sha($sha, $rawref)->hexdigest;
+       $got eq $oidhex ? undef : warn("$f changed $oidhex => $got\n");
+}
+
 sub local_blob {
        my ($self, $oidhex, $vrfy) = @_;
        my $dbh = $self->{dbh} //= dbh_new($self);
+       my $oidbin = pack('H*', $oidhex);
+
        my $b2n = $dbh->prepare(<<'');
 SELECT f.loc,b.name FROM blob2name b
 LEFT JOIN folders f ON b.fid = f.fid
 WHERE b.oidbin = ?
 
-       $b2n->bind_param(1, pack('H*', $oidhex), SQL_BLOB);
+       $b2n->bind_param(1, $oidbin, SQL_BLOB);
        $b2n->execute;
        while (my ($d, $n) = $b2n->fetchrow_array) {
                substr($d, 0, length('maildir:')) = '';
@@ -456,19 +465,27 @@ WHERE b.oidbin = ?
                        my $f = "$d/$x/$n";
                        open my $fh, '<', $f or next;
                        # some (buggy) Maildir writers are non-atomic:
-                       next unless -s $fh;
-                       my $raw = read_all($fh, -s _);
-                       if ($vrfy) {
-                               my $sha = $HEXLEN2SHA{length($oidhex)};
-                               my $got = git_sha($sha, \$raw)->hexdigest;
-                               if ($got ne $oidhex) {
-                                       warn "$f changed $oidhex => $got\n";
-                                       next;
-                               }
-                       }
+                       my $raw = read_all($fh, -s $fh // next);
+                       next if $vrfy && blob_mismatch $f, $oidhex, \$raw;
                        return \$raw;
                }
        }
+
+       $b2n = $dbh->prepare(<<'');
+SELECT f.loc,b.uid FROM blob2num b
+LEFT JOIN folders f ON b.fid = f.fid
+WHERE b.oidbin = ? /* AND f.loc LIKE 'mh:/%' */
+
+       $b2n->bind_param(1, $oidbin, SQL_BLOB);
+       $b2n->execute;
+       while (my ($d, $n) = $b2n->fetchrow_array) {
+               substr($d, 0, length('mh:')) = '';
+               my $f = "$d/$n";
+               open my $fh, '<', $f or next;
+               my $raw = read_all($fh, -s $fh // next);
+               next if $vrfy && blob_mismatch $f, $oidhex, \$raw;
+               return \$raw;
+       }
        undef;
 }
 
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 071ba113..de75e99e 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -400,6 +400,11 @@ sub new {
                                "$dst exists and is not a directory\n";
                $lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/';
                $lei->{opt}->{save} //= \1 if $lei->{cmd} eq 'q';
+       } elsif ($fmt eq 'mh') {
+               -e $dst && !-d _ and die
+                               "$dst exists and is not a directory\n";
+               $lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/';
+               $lei->{opt}->{save} //= \1 if $lei->{cmd} eq 'q';
        } elsif (substr($fmt, 0, 4) eq 'mbox') {
                require PublicInbox::MboxReader;
                $self->can("eml2$fmt") or die "bad mbox format: $fmt\n";
diff --git a/lib/PublicInbox/MHreader.pm b/lib/PublicInbox/MHreader.pm
new file mode 100644
index 00000000..673e3e06
--- /dev/null
+++ b/lib/PublicInbox/MHreader.pm
@@ -0,0 +1,103 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# MH reader, based on Lib/mailbox.py in cpython source
+package PublicInbox::MHreader;
+use v5.12;
+use PublicInbox::InboxWritable qw(eml_from_path);
+use PublicInbox::OnDestroy;
+use PublicInbox::IO qw(try_cat);
+use PublicInbox::MdirSort;
+use Carp qw(carp);
+use autodie qw(chdir closedir opendir);
+
+my %FL2OFF = ( # mh_sequences key => our keyword
+       replied => 0,
+       flagged => 1,
+       unseen => 2, # negate
+);
+my @OFF2KW = qw(answered flagged); # [2] => unseen (negated)
+
+sub new {
+       my ($cls, $dir, $cwdfh) = @_;
+       if (substr($dir, -1) ne '/') { # TODO: do this earlier
+               carp "W: appending `/' to `$dir' (fix caller)\n";
+               $dir .= '/';
+       }
+       bless { dir => $dir, cwdfh => $cwdfh }, $cls;
+}
+
+sub read_mh_sequences ($) { # caller must chdir($self->{dir})
+       my ($self) = @_;
+       my ($fl, $off, @n);
+       my @seq = ('', '', '');
+       for (split /\n+/s, try_cat('.mh_sequences')) {
+               ($fl, @n) = split /[: \t]+/;
+               $off = $FL2OFF{$fl} // do { warn <<EOM;
+W: unknown `$fl' in $self->{dir}.mh_sequences (ignoring)
+EOM
+                       next;
+               };
+               @n = grep /\A[0-9]+\z/s, @n; # don't stat, yet
+               if (@n) {
+                       @n = sort { $b <=> $a } @n; # to avoid resize
+                       my $buf = '';
+                       vec($buf, $_, 1) = 1 for @n;
+                       $seq[$off] = $buf;
+               }
+       }
+       \@seq;
+}
+
+sub mh_each_file {
+       my ($self, $efcb, @arg) = @_;
+       opendir(my $dh, my $dir = $self->{dir});
+       my $restore = PublicInbox::OnDestroy->new($$, \&chdir, $self->{cwdfh});
+       chdir($dh);
+       if (defined(my $sort = $self->{sort})) {
+               my @sort = map {
+                       my @tmp = $_ eq '' ? ('sequence') : split(/[, ]/);
+                       # sorting by name alphabetically makes no sense for MH:
+                       for my $k (@tmp) {
+                               s/\A(\-|\+|)(?:name|)\z/$1sequence/;
+                       }
+                       @tmp;
+               } @$sort;
+               my @n = grep /\A[0-9]+\z/s, readdir $dh;
+               mdir_sort \@n, \@sort;
+               $efcb->($dir, $_, $self, @arg) for @n;
+       } else {
+               while (readdir $dh) { # perl v5.12+ to set $_ on readdir
+                       $efcb->($dir, $_, $self, @arg) if /\A[0-9]+\z/s;
+               }
+       }
+       closedir $dh; # may die
+}
+
+sub kw_for ($$) {
+       my ($self, $n) = @_;
+       my $seq = $self->{mh_seq} //= read_mh_sequences($self);
+       my @kw = map { vec($seq->[$_], $n, 1) ? $OFF2KW[$_] : () } (0, 1);
+       vec($seq->[2], $n, 1) or push @kw, 'seen';
+       \@kw;
+}
+
+sub _file2eml { # mh_each_file cb
+       my ($dir, $n, $self, $ucb, @arg) = @_;
+       my $eml = eml_from_path($n);
+       $ucb->($dir, $n, kw_for($self, $n), $eml, @arg) if $eml;
+}
+
+sub mh_each_eml {
+       my ($self, $ucb, @arg) = @_;
+       mh_each_file($self, \&_file2eml, $ucb, @arg);
+}
+
+sub mh_read_one {
+       my ($self, $n, $ucb, @arg) = @_;
+       my $restore = PublicInbox::OnDestroy->new($$, \&chdir, $self->{cwdfh});
+       chdir(my $dir = $self->{dir});
+       _file2eml($dir, $n, $self, $ucb, @arg);
+}
+
+1;
diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm
index db5f4545..2981b058 100644
--- a/lib/PublicInbox/MdirReader.pm
+++ b/lib/PublicInbox/MdirReader.pm
@@ -1,7 +1,7 @@
 # Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
-# Maildirs for now, MH eventually
+# Maildirs only (PublicInbox::MHreader exists, now)
 # ref: https://cr.yp.to/proto/maildir.html
 #      https://wiki2.dovecot.org/MailboxFormat/Maildir
 package PublicInbox::MdirReader;
diff --git a/lib/PublicInbox/MdirSort.pm b/lib/PublicInbox/MdirSort.pm
new file mode 100644
index 00000000..6bd9fb6c
--- /dev/null
+++ b/lib/PublicInbox/MdirSort.pm
@@ -0,0 +1,46 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# used for sorting MH (and (TODO) Maildir) names
+# TODO: consider sort(1) to parallelize sorting of gigantic directories
+package PublicInbox::MdirSort;
+use v5.12;
+use Time::HiRes ();
+use parent qw(Exporter);
+use Fcntl qw(S_ISREG);
+our @EXPORT = qw(mdir_sort);
+my %ST = (sequence => 0, size => 1, atime => 2, mtime => 3, ctime => 4);
+
+sub mdir_sort ($$;$) {
+       my ($ent, $sort, $max) = @_;
+       my @st;
+       my @ent = map {
+               @st = Time::HiRes::stat $_;
+               # name, size, {a,m,c}time
+               S_ISREG($st[2]) ? [ $_, @st[7..10] ] : ();
+       } @$ent;
+       @ent = grep { $_->[1] <= $max } @ent if $max;
+       use sort 'stable';
+       for my $s (@$sort) {
+               if ($s =~ /\A(\-|\+|)name\z/) {
+                       if ($1 eq '-') {
+                               @ent = sort { $b->[0] cmp $a->[0] } @ent;
+                       } else {
+                               @ent = sort { $a->[0] cmp $b->[0] } @ent;
+                       }
+               } elsif ($s =~ /\A(\-|\+|)
+                               (sequence|size|ctime|mtime|atime)\z/x) {
+                       my $key = $ST{$2};
+                       if ($1 eq '-') {
+                               @ent = sort { $b->[$key] <=> $a->[$key] } @ent;
+                       } else {
+                               @ent = sort { $a->[$key] <=> $b->[$key] } @ent;
+                       }
+               } else {
+                       die "E: unrecognized sort parameter: `$s'";
+               }
+       }
+       @$ent = map { $_->[0] } @ent;
+}
+
+1;
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index 22c50675..64fe09fa 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -24,6 +24,7 @@ BEGIN {
        @EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods
                run_script start_script key2sub xsys xsys_e xqx eml_load tick
                have_xapian_compact json_utf8 setup_public_inboxes create_inbox
+               create_dir
                create_coderepo require_bsd kernel_version check_broken_tmpfs
                quit_waiter_pipe wait_for_eof require_git_http_backend
                tcp_host_port test_lei lei lei_ok $lei_out $lei_err $lei_opt
@@ -843,26 +844,24 @@ sub my_sum {
        substr PublicInbox::SHA::sha256_hex(join('', @l)), 0, 8;
 }
 
-sub create_coderepo ($$;@) {
-       my $ident = shift;
-       my $cb = pop;
+sub create_dir (@) {
+       my ($ident, $cb) = (shift, pop);
        my %opt = @_;
        require PublicInbox::Lock;
        require PublicInbox::Import;
-       my ($base) = ($0 =~ m!\b([^/]+)\.[^\.]+\z!);
-       my ($db) = (PublicInbox::Import::default_branch() =~ m!([^/]+)\z!);
        my $tmpdir = delete $opt{tmpdir};
-       my $dir = "t/data-gen/$base.$ident-".my_sum($db, $cb, \%opt);
+       my ($base) = ($0 =~ m!\b([^/]+)\.[^\.]+\z!);
+       my $dir = "t/data-gen/$base.$ident-".my_sum($cb, \%opt);
        require File::Path;
        my $new = File::Path::make_path($dir);
        my $lk = PublicInbox::Lock->new("$dir/creat.lock");
        my $scope = $lk->lock_for_scope;
        if (!-f "$dir/creat.stamp") {
-               opendir(my $dfh, '.');
+               opendir(my $cwd, '.');
                chdir($dir);
                local %ENV = (%ENV, %COMMIT_ENV);
                $cb->($dir);
-               chdir($dfh);
+               chdir($cwd); # some $cb chdir around
                open my $s, '>', "$dir/creat.stamp";
        }
        return $dir if !defined($tmpdir);
@@ -870,6 +869,13 @@ sub create_coderepo ($$;@) {
        $tmpdir;
 }
 
+sub create_coderepo (@) {
+       my $ident = shift;
+       require PublicInbox::Import;
+       my ($db) = (PublicInbox::Import::default_branch() =~ m!([^/]+)\z!);
+       create_dir "$ident-$db", @_;
+}
+
 sub create_inbox ($;@) {
        my $ident = shift;
        my $cb = pop;
diff --git a/t/mh_reader.t b/t/mh_reader.t
new file mode 100644
index 00000000..4bc77c1e
--- /dev/null
+++ b/t/mh_reader.t
@@ -0,0 +1,108 @@
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use PublicInbox::TestCommon;
+require_ok 'PublicInbox::MHreader';
+use PublicInbox::IO qw(write_file);
+use PublicInbox::Lock;
+use PublicInbox::OnDestroy;
+use PublicInbox::Eml;
+use autodie;
+opendir my $cwdfh, '.';
+
+my $tmpdir = tmpdir;
+my $normal = create_dir 'normal', sub {
+       write_file '>', 3, "Subject: replied a\n\n";
+       write_file '>', 4, "Subject: replied b\n\n";
+       write_file '>', 1, "Subject: unseen\n\n";
+       write_file '>', 2, "Subject: unseen flagged\n\n";
+       write_file '>', '.mh_sequences', <<EOM;
+unseen: 1 2
+flagged: 2
+replied: 3 4
+EOM
+};
+
+my $for_sort = create_dir 'size', sub {
+       for (1..3) {
+               my $name = 10 - $_;
+               write_file '>', $name, "Subject: ".($_ x $_)."\n\n";
+       }
+};
+
+my $stale = create_dir 'stale', sub {
+       write_file '>', 4, "Subject: msg 4\n\n";
+       write_file '>', '.mh_sequences', <<EOM;
+unseen: 1 2
+EOM
+};
+
+{
+       my $mhr = PublicInbox::MHreader->new("$normal/", $cwdfh);
+       $mhr->{sort} = [ '' ];
+       my @res;
+       $mhr->mh_each_eml(sub { push @res, \@_; }, [ 'bogus' ]);
+       is scalar(@res), 4, 'got 4 messages' or diag explain(\@res);
+       is_deeply [map { $_->[1] } @res], [1, 2, 3, 4],
+               'got messages in expected order';
+       is scalar(grep { $_->[4]->[0] eq 'bogus' } @res), scalar(@res),
+               'cb arg passed to all messages' or diag explain(\@res);
+
+       $mhr = PublicInbox::MHreader->new("$stale/", $cwdfh);
+       @res = ();
+       $mhr->mh_each_eml(sub { push @res, \@_; });
+       is scalar(@res), 1, 'ignored stale messages';
+}
+
+test_lei(sub {
+       lei_ok qw(convert -f mboxrd), $normal;
+       my @msgs = grep /\S/s, split /^From .[^\n]+\n/sm, $lei_out;
+       my @eml = map { PublicInbox::Eml->new($_) } @msgs;
+       my $h = 'Subject';
+       @eml = sort { $a->header_raw($h) cmp $b->header_raw($h) } @eml;
+       my @has = map { scalar $_->header_raw($h) } @eml;
+       is_xdeeply \@has,
+               [ 'replied a', 'replied b', 'unseen', 'unseen flagged' ],
+               'subjects sorted';
+       $h = 'X-Status';
+       @has = map { scalar $_->header_raw($h) } @eml;
+       is_xdeeply \@has, [ 'A', 'A', undef, 'F' ], 'answered and flagged kw';
+       $h = 'Status';
+       @has = map { scalar $_->header_raw($h) } @eml;
+       is_xdeeply \@has, ['RO', 'RO', 'O', 'O'], 'read and old';
+       lei_ok qw(import +L:normal), $normal;
+       lei_ok qw(q L:normal -f mboxrd);
+       @msgs = grep /\S/s, split /^From .[^\n]+\n/sm, $lei_out;
+       my @eml2 = map { PublicInbox::Eml->new($_) } @msgs;
+       $h = 'Subject';
+       @eml2 = sort { $a->header_raw($h) cmp $b->header_raw($h) } @eml2;
+       is_xdeeply \@eml2, \@eml, 'import preserved kw';
+
+       lei_ok 'ls-mail-sync';
+       is $lei_out, 'mh:'.File::Spec->rel2abs($normal)."\n",
+               'mail sync stored';
+
+       lei_ok qw(convert -s size -f mboxrd), "mh:$for_sort";
+       chomp(my @s = grep /^Subject:/, split(/^/sm, $lei_out));
+       s/^Subject: // for @s;
+       is_xdeeply \@s, [ 1, 22, 333 ], 'sorted by size';
+
+       for my $s ([], [ 'name' ], [ 'sequence' ]) {
+               lei_ok qw(convert -f mboxrd), "mh:$for_sort", '-s', @$s;
+               chomp(@s = grep /^Subject:/, split(/^/sm, $lei_out));
+               s/^Subject: // for @s;
+               my $desc = "@$s" || '(default)';
+               is_xdeeply \@s, [ 333, 22, 1 ], "sorted by: $desc";
+       }
+
+       lei_ok qw(import +L:sorttest), "MH:$for_sort";
+       lei_ok 'ls-mail-sync', $for_sort;
+       is $lei_out, 'mh:'.File::Spec->rel2abs($for_sort)."\n",
+               "mail sync stored with `MH' normalized to `mh'";
+       lei_ok qw(index), 'mh:'.$stale;
+       lei qw(q -f mboxrd), 's:msg 4';
+       like $lei_out, qr/^Subject: msg 4\nStatus: RO\n\n\n/ms,
+               "message retrieved after `lei index'"
+});
+
+done_testing;

Reply via email to