This works with existing inotify/EVFILT_VNODE functionality to
propagate changes made from one Maildir to another Maildir.

I chose the lei/store worker process to handle this since
propagating changes back into lei-daemon on a massive scale
could lead to dead-locking while both processes are attempting
to write to each other.  Eliminating IPC overhead is a nice
side effect, but could hurt performance if Maildirs are slow.

The code for "lei export-kw" is significantly revamped to match
the new code used in the "lei/store" daemon.  It should be more
correct w.r.t. corner-cases and stale entries, but perhaps
better tests need to be written.
---
 MANIFEST                        |   1 +
 lib/PublicInbox/LeiExportKw.pm  |  24 ++++----
 lib/PublicInbox/LeiNoteEvent.pm |  14 ++---
 lib/PublicInbox/LeiStore.pm     | 105 ++++++++++++++++++++++++++++++--
 t/lei-auto-watch.t              |  45 ++++++++++++++
 5 files changed, 165 insertions(+), 24 deletions(-)
 create mode 100644 t/lei-auto-watch.t

diff --git a/MANIFEST b/MANIFEST
index cf7268ed..be6ec927 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -423,6 +423,7 @@ t/init.t
 t/ipc.t
 t/iso-2202-jp.eml
 t/kqnotify.t
+t/lei-auto-watch.t
 t/lei-convert.t
 t/lei-daemon.t
 t/lei-export-kw.t
diff --git a/lib/PublicInbox/LeiExportKw.pm b/lib/PublicInbox/LeiExportKw.pm
index 42a5ff22..78c6c6f9 100644
--- a/lib/PublicInbox/LeiExportKw.pm
+++ b/lib/PublicInbox/LeiExportKw.pm
@@ -25,12 +25,11 @@ sub export_kw_md { # LeiMailSync->each_src callback
        }
        $bn .= ':2,'.
                PublicInbox::LeiToMail::kw2suffix([keys %$sto_kw], @$unknown);
+       return if $bn eq $$id;
        my $dst = "$mdir/cur/$bn";
-       my @fail;
        my $lei = $self->{lei};
        for my $d (@try) {
                my $src = "$mdir/$d/$$id";
-               next if $src eq $dst;
 
                # we use link(2) + unlink(2) since rename(2) may
                # inadvertently clobber if the "uniquefilename" part wasn't
@@ -44,20 +43,19 @@ sub export_kw_md { # LeiMailSync->each_src callback
                        $lei->{sto}->ipc_do('lms_mv_src', "maildir:$mdir",
                                                $oidbin, $id, $bn);
                        return; # success anyways if link(2) worked
-               }
-               if ($! == ENOENT && !-e $src) { # some other process moved it
-                       $lei->{sto}->ipc_do('lms_clear_src',
-                                               "maildir:$mdir", $id);
-                       next;
-               }
-               push @fail, $src if $! != EEXIST;
+               } elsif ($! == EEXIST) { # lost race with lei/store?
+                       return;
+               } elsif ($! != ENOENT) {
+                       $lei->child_error(1, "E: link($src -> $dst): $!");
+               } # else loop @try
        }
-       return unless @fail;
-       # both tries failed
        my $e = $!;
-       my $orig = '['.join('|', @fail).']';
+       # both tries failed
        my $oidhex = unpack('H*', $oidbin);
-       $lei->child_error(1, "link($orig, $dst) ($oidhex): $e");
+       my $src = "$mdir/{".join(',', @try)."}/$$id";
+       $lei->child_error(1, "link($src -> $dst) ($oidhex): $e");
+       for (@try) { return if -e "$mdir/$_/$$id" }
+       $lei->{sto}->ipc_do('lms_clear_src', "maildir:$mdir", $id);
 }
 
 sub export_kw_imap { # LeiMailSync->each_src callback
diff --git a/lib/PublicInbox/LeiNoteEvent.pm b/lib/PublicInbox/LeiNoteEvent.pm
index 6a40ba39..41415346 100644
--- a/lib/PublicInbox/LeiNoteEvent.pm
+++ b/lib/PublicInbox/LeiNoteEvent.pm
@@ -36,32 +36,31 @@ sub note_event_arm_done ($) {
 }
 
 sub eml_event ($$$$) {
-       my ($self, $eml, $kw, $state) = @_;
+       my ($self, $eml, $vmd, $state) = @_;
        my $sto = $self->{lei}->{sto};
        my $lse = $self->{lse} //= $sto->search;
-       my $vmd = { kw => $kw };
        if ($state =~ /\Aimport-(?:rw|ro)\z/) {
                $sto->ipc_do('set_eml', $eml, $vmd);
        } elsif ($state =~ /\Aindex-(?:rw|ro)\z/) {
                my $xoids = $self->{lei}->ale->xoids_for($eml);
                $sto->ipc_do('index_eml_only', $eml, $vmd, $xoids);
        } elsif ($state =~ /\Atag-(?:rw|ro)\z/) {
-               my $c = $lse->kw_changed($eml, $kw, my $docids = []);
+               my $c = $lse->kw_changed($eml, $vmd->{kw}, my $docids = []);
                if (scalar @$docids) { # already in lei/store
                        $sto->ipc_do('set_eml_vmd', undef, $vmd, $docids) if $c;
                } elsif (my $xoids = $self->{lei}->ale->xoids_for($eml)) {
                        # it's in an external, only set kw, here
                        $sto->ipc_do('set_xvmd', $xoids, $eml, $vmd);
-               } # else { totally unknown
+               } # else { totally unknown: ignore
        } else {
                warn "unknown state: $state (in $self->{lei}->{cfg}->{'-f'})\n";
        }
 }
 
 sub maildir_event { # via wq_io_do
-       my ($self, $fn, $kw, $state) = @_;
+       my ($self, $fn, $vmd, $state) = @_;
        my $eml = PublicInbox::InboxWritable::eml_from_path($fn) // return;
-       eml_event($self, $eml, $kw, $state);
+       eml_event($self, $eml, $vmd, $state);
 }
 
 sub lei_note_event {
@@ -98,7 +97,8 @@ sub lei_note_event {
                        // return;
                return if index($fl, 'T') >= 0;
                my $kw = PublicInbox::MdirReader::flags2kw($fl);
-               $self->wq_io_do('maildir_event', [], $fn, $kw, $state);
+               my $vmd = { kw => $kw, sync_info => [ $folder, \$bn ] };
+               $self->wq_io_do('maildir_event', [], $fn, $vmd, $state);
        } # else: TODO: imap
 }
 
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 0fa2d3c0..a91b30f7 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -25,10 +25,14 @@ use PublicInbox::MID qw(mids);
 use PublicInbox::LeiSearch;
 use PublicInbox::MDA;
 use PublicInbox::Spawn qw(spawn);
+use PublicInbox::MdirReader;
+use PublicInbox::LeiToMail;
 use List::Util qw(max);
 use File::Temp ();
 use POSIX ();
 use IO::Handle (); # ->autoflush
+use Sys::Syslog qw(syslog openlog);
+use Errno qw(EEXIST ENOENT);
 
 sub new {
        my (undef, $dir, $opt) = @_;
@@ -165,12 +169,92 @@ sub _docids_for ($$) {
        sort { $a <=> $b } values %docids;
 }
 
+# n.b. similar to LeiExportKw->export_kw_md, but this is for a single eml
+sub export1_kw_md ($$$$$) {
+       my ($self, $mdir, $bn, $oidbin, $vmdish) = @_; # vmd/vmd_mod
+       my $orig = $bn;
+       my (@try, $unkn, $kw);
+       if ($bn =~ s/:2,([a-zA-Z]*)\z//) {
+               ($kw, $unkn) = PublicInbox::MdirReader::flags2kw($1);
+               if (my $set = $vmdish->{kw}) {
+                       $kw = $set;
+               } elsif (my $add = $vmdish->{'+kw'}) {
+                       @$kw{@$add} = ();
+               } elsif (my $del = $vmdish->{-kw}) {
+                       delete @$kw{@$del};
+               } # else no changes...
+               @try = qw(cur new);
+       } else { # no keywords, yet, could be in new/
+               @try = qw(new cur);
+               $unkn = [];
+               if (my $set = $vmdish->{kw}) {
+                       $kw = $set;
+               } elsif (my $add = $vmdish->{'+kw'}) {
+                       @$kw{@$add} = (); # auto-vivify
+               } else { # ignore $vmdish->{-kw}
+                       $kw = [];
+               }
+       }
+       $kw = [ keys %$kw ] if ref($kw) eq 'HASH';
+       $bn .= ':2,'. PublicInbox::LeiToMail::kw2suffix($kw, @$unkn);
+       return if $orig eq $bn; # no change
+
+       # we use link(2) + unlink(2) since rename(2) may
+       # inadvertently clobber if the "uniquefilename" part wasn't
+       # actually unique.
+       my $dst = "$mdir/cur/$bn";
+       for my $d (@try) {
+               my $src = "$mdir/$d/$orig";
+               if (link($src, $dst)) {
+                       if (!unlink($src) and $! != ENOENT) {
+                               syslog('warning', "unlink($src): $!");
+                       }
+                       # TODO: verify oidbin?
+                       lms_mv_src($self, "maildir:$mdir",
+                                       $oidbin, \$orig, $bn);
+                       return;
+               } elsif ($! == EEXIST) { # lost race with "lei export-kw"?
+                       return;
+               } elsif ($! == ENOENT) {
+                       syslog('warning', "link($src -> $dst): $!")
+               } # else loop @try
+       }
+       my $e = $!;
+       my $src = "$mdir/{".join(',', @try)."}/$orig";
+       my $oidhex = unpack('H*', $oidbin);
+       syslog('warning', "link($src -> $dst) ($oidhex): $e");
+       for (@try) { return if -e "$mdir/$_/$orig" };
+       lms_clear_src($self, "maildir:$mdir", \$orig);
+}
+
+sub sto_export_kw ($$$) {
+       my ($self, $docid, $vmdish) = @_; # vmdish (vmd or vmd_mod)
+       my ($eidx, $tl) = eidx_init($self);
+       my $lms = _lms_rw($self) // return;
+       my $xr3 = $eidx->{oidx}->get_xref3($docid, 1);
+       for my $row (@$xr3) {
+               my (undef, undef, $oidbin) = @$row;
+               my $locs = $lms->locations_for($oidbin) // next;
+               while (my ($loc, $ids) = each %$locs) {
+                       if ($loc =~ s!\Amaildir:!!i) {
+                               for my $id (@$ids) {
+                                       export1_kw_md($self, $loc, $id,
+                                                       $oidbin, $vmdish);
+                               }
+                       }
+                       # TODO: IMAP
+               }
+       }
+}
+
+# vmd = { kw => [ qw(seen ...) ], L => [ qw(inbox ...) ] }
 sub set_eml_vmd {
        my ($self, $eml, $vmd, $docids) = @_;
        my ($eidx, $tl) = eidx_init($self);
        $docids //= [ _docids_for($self, $eml) ];
        for my $docid (@$docids) {
                $eidx->idx_shard($docid)->ipc_do('set_vmd', $docid, $vmd);
+               sto_export_kw($self, $docid, $vmd);
        }
        $docids;
 }
@@ -284,6 +368,12 @@ EOF
        $docid;
 }
 
+sub _add_vmd ($$$$) {
+       my ($self, $idx, $docid, $vmd) = @_;
+       $idx->ipc_do('add_vmd', $docid, $vmd);
+       sto_export_kw($self, $docid, $vmd);
+}
+
 sub add_eml {
        my ($self, $eml, $vmd, $xoids) = @_;
        my $im = $self->{-fake_im} // $self->importer; # may create new epoch
@@ -310,7 +400,7 @@ sub add_eml {
                        @$vivify_xvmd = sort { $a <=> $b } keys(%docids);
                }
        }
-       if (@$vivify_xvmd) {
+       if (@$vivify_xvmd) { # docids list
                $xoids //= {};
                $xoids->{$smsg->{blob}} = 1;
                for my $docid (@$vivify_xvmd) {
@@ -327,7 +417,7 @@ sub add_eml {
                        for my $oid (keys %$xoids) {
                                $oidx->add_xref3($docid, -1, $oid, '.');
                        }
-                       $idx->ipc_do('add_vmd', $docid, $vmd) if $vmd;
+                       _add_vmd($self, $idx, $docid, $vmd) if $vmd;
                }
                $vivify_xvmd;
        } elsif (my @docids = _docids_for($self, $eml)) {
@@ -337,7 +427,7 @@ sub add_eml {
                        $oidx->add_xref3($docid, -1, $smsg->{blob}, '.');
                        # add_eidx_info for List-Id
                        $idx->ipc_do('add_eidx_info', $docid, '.', $eml);
-                       $idx->ipc_do('add_vmd', $docid, $vmd) if $vmd;
+                       _add_vmd($self, $idx, $docid, $vmd) if $vmd;
                }
                \@docids;
        } else { # totally new message
@@ -347,7 +437,7 @@ sub add_eml {
                $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.');
                my $idx = $eidx->idx_shard($smsg->{num});
                $idx->index_eml($eml, $smsg);
-               $idx->ipc_do('add_vmd', $smsg->{num}, $vmd) if $vmd;
+               _add_vmd($self, $idx, $smsg->{num}, $vmd) if $vmd;
                $smsg;
        }
 }
@@ -365,6 +455,7 @@ sub index_eml_only {
        set_eml($self, $eml, $vmd, $xoids);
 }
 
+# store {kw} / {L} info for a message which is only in an external
 sub _external_only ($$$) {
        my ($self, $xoids, $eml) = @_;
        my $eidx = $self->{priv_eidx};
@@ -398,6 +489,7 @@ sub update_xvmd {
                next if $seen{$docid}++;
                my $idx = $eidx->idx_shard($docid);
                $idx->ipc_do('update_vmd', $docid, $vmd_mod);
+               sto_export_kw($self, $docid, $vmd_mod);
        }
        return unless scalar(keys(%$xoids));
 
@@ -410,12 +502,14 @@ sub update_xvmd {
                        }
                        my $idx = $eidx->idx_shard($docid);
                        $idx->ipc_do('update_vmd', $docid, $vmd_mod);
+                       sto_export_kw($self, $docid, $vmd_mod);
                }
                return;
        }
        # totally unseen
        my ($smsg, $idx) = _external_only($self, $xoids, $eml);
        $idx->ipc_do('update_vmd', $smsg->{num}, $vmd_mod);
+       sto_export_kw($self, $smsg->{num}, $vmd_mod);
 }
 
 # set or update keywords for external message, called via ipc_do
@@ -433,6 +527,7 @@ sub set_xvmd {
                next if $seen{$docid}++;
                my $idx = $eidx->idx_shard($docid);
                $idx->ipc_do('set_vmd', $docid, $vmd);
+               sto_export_kw($self, $docid, $vmd);
        }
        return unless scalar(keys(%$xoids));
 
@@ -443,6 +538,7 @@ sub set_xvmd {
        # totally unseen:
        my ($smsg, $idx) = _external_only($self, $xoids, $eml);
        $idx->ipc_do('add_vmd', $smsg->{num}, $vmd);
+       sto_export_kw($self, $smsg->{num}, $vmd);
 }
 
 sub checkpoint {
@@ -497,6 +593,7 @@ sub ipc_atfork_child {
        if (my $to_close = delete($self->{to_close})) {
                close($_) for @$to_close;
        }
+       openlog('lei/store', 'pid,nowait,nofatal,ndelay', 'user');
        $self->SUPER::ipc_atfork_child;
 }
 
diff --git a/t/lei-auto-watch.t b/t/lei-auto-watch.t
new file mode 100644
index 00000000..321c0ab3
--- /dev/null
+++ b/t/lei-auto-watch.t
@@ -0,0 +1,45 @@
+#!perl -w
+# Copyright all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict; use v5.10.1; use PublicInbox::TestCommon;
+use File::Basename qw(basename);
+my ($ro_home, $cfg_path) = setup_public_inboxes;
+my $have_fast_inotify = eval { require Linux::Inotify2 } ||
+       eval { require IO::KQueue };
+
+$have_fast_inotify or
+       diag("$0 IO::KQueue or Linux::Inotify2 missing, test will be slow");
+
+test_lei(sub {
+       my $x = "$ENV{HOME}/x";
+       my $y = "$ENV{HOME}/y";
+       lei_ok qw(add-external), "$ro_home/t1";
+       lei_ok qw(q mid:testmess...@example.com -o), $x;
+       lei_ok qw(q mid:testmess...@example.com -o), $y;
+       my @x = glob("$x/cur/*");
+       my @y = glob("$y/cur/*");
+       scalar(@x) == 1 or xbail 'expected 1 file', \@x;
+       scalar(@y) == 1 or xbail 'expected 1 file', \@y;
+
+       my $oid = '9bf1002c49eb075df47247b74d69bcd555e23422';
+       lei_ok qw(inspect), "blob:$oid";
+       my $ins = json_utf8->decode($lei_out);
+       my $exp = { "maildir:$x" => [ map { basename($_) } @x ],
+               "maildir:$y" => [ map { basename($_) } @y ] };
+       is_deeply($ins->{'mail-sync'}, $exp, 'inspect as expected');
+       lei_ok qw(add-watch), $x;
+       my $dst = $x[0] . 'S';
+       rename($x[0], $dst) or xbail "rename($x[0], $dst): $!";
+       tick($have_fast_inotify ? undef : 2.1); # wait for inotify
+       my @y2 = glob("$y/*/*");
+       is_deeply(\@y2, [ "$y[0]S" ], "`seen' kw propagated to `y' dir");
+       lei_ok qw(note-event done);
+       tick; # XXX why is this needed?
+       lei_ok qw(inspect), "blob:$oid";
+       $ins = json_utf8->decode($lei_out);
+       $exp = { "maildir:$x" => [ map { basename($_) } glob("$x/*/*") ],
+               "maildir:$y" => [ map { basename($_) } glob("$y/*/*") ] };
+       is_deeply($ins->{'mail-sync'}, $exp, 'mail_sync matches FS');
+});
+
+done_testing;
--
unsubscribe: one-click, see List-Unsubscribe header
archive: https://public-inbox.org/meta/

Reply via email to