lei/store can get stuck if lei-daemon is blocked, and lei-daemon
can get stuck when a clients stderr is redirected to a pager
that isn't consumed.

So start relying on Time::HiRes::alarm to generate SIGALRM to
break out of the `print' perlop.  Unfortunately, this isn't easy
since Perl auto-restarts all writes, so we dup(2) the
destination FD and close the copy in the SIGALRM handler to
force `print' to return.

Most programs (MUAs, editors, etc.) aren't equipped to deal with
non-blocking STDERR, so we can't make the stderr file description
non-blocking.

Another way to solve this problem would be to have script/lei
send a non-blocking pipe to lei-daemon in the {2} slot and
make script/lei splice messages from the pipe to stderr.
Unfortunately, that requires more work and forces more
complexity into script/lei and slow down normal cases where
stderr doesn't get blocked.
---
 lib/PublicInbox/LEI.pm         |  3 ++-
 lib/PublicInbox/LeiStore.pm    |  8 ++++++--
 lib/PublicInbox/LeiStoreErr.pm | 27 +++++++++++++++++++++++++--
 3 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 1899bf38..06bc7ebd 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -1287,7 +1287,7 @@ sub lazy_start {
        undef $lk;
        my @st = stat($path) or die "stat($path): $!";
        my $dev_ino_expect = pack('dd', $st[0], $st[1]); # dev+ino
-       local $oldset = PublicInbox::DS::block_signals();
+       local $oldset = PublicInbox::DS::block_signals(POSIX::SIGALRM);
        die "incompatible narg=$narg" if $narg != 5;
        $PublicInbox::IPC::send_cmd or die <<"";
 (Socket::MsgHdr || Inline::C) missing/unconfigured (narg=$narg);
@@ -1369,6 +1369,7 @@ sub lazy_start {
                  strftime('%Y-%m-%dT%H:%M:%SZ', gmtime(time))," $$ ", @_);
        };
        local $SIG{PIPE} = 'IGNORE';
+       local $SIG{ALRM} = 'IGNORE';
        open STDERR, '>&STDIN' or die "redirect stderr failed: $!";
        open STDOUT, '>&STDIN' or die "redirect stdout failed: $!";
        # $daemon pipe to `lei' closed, main loop begins:
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 9923ec3f..0cb78f79 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -33,6 +33,7 @@ use IO::Handle (); # ->autoflush
 use Sys::Syslog qw(syslog openlog);
 use Errno qw(EEXIST ENOENT);
 use PublicInbox::Syscall qw(rename_noreplace);
+use PublicInbox::LeiStoreErr;
 
 sub new {
        my (undef, $dir, $opt) = @_;
@@ -112,7 +113,10 @@ sub _tail_err {
        my ($self) = @_;
        my $err = $self->{-tmp_err} // return;
        $err->clearerr; # clear EOF marker
-       print { $self->{-err_wr} } readline($err);
+       my @msg = readline($err);
+       PublicInbox::LeiStoreErr::emit($self->{-err_wr}, @msg) and return;
+       # syslog is the last resort if lei-daemon broke
+       syslog('warning', '%s', $_) for @msg;
 }
 
 sub eidx_init {
@@ -627,12 +631,12 @@ sub write_prepare {
                # Mail we import into lei are private, so headers filtered out
                # by -mda for public mail are not appropriate
                local @PublicInbox::MDA::BAD_HEADERS = ();
+               local $SIG{ALRM} = 'IGNORE';
                $self->wq_workers_start("lei/store $dir", 1, $lei->oldset, {
                                        lei => $lei,
                                        -err_wr => $w,
                                        to_close => [ $r ],
                                }, \&_sto_atexit);
-               require PublicInbox::LeiStoreErr;
                PublicInbox::LeiStoreErr->new($r, $lei);
        }
        $lei->{sto} = $self;
diff --git a/lib/PublicInbox/LeiStoreErr.pm b/lib/PublicInbox/LeiStoreErr.pm
index 47fa2277..fe4af51e 100644
--- a/lib/PublicInbox/LeiStoreErr.pm
+++ b/lib/PublicInbox/LeiStoreErr.pm
@@ -9,6 +9,30 @@ use parent qw(PublicInbox::DS);
 use PublicInbox::Syscall qw(EPOLLIN);
 use Sys::Syslog qw(openlog syslog closelog);
 use IO::Handle (); # ->blocking
+use Time::HiRes ();
+use autodie qw(open);
+our $err_wr;
+
+# We don't want blocked stderr on clients to block lei/store or lei-daemon.
+# We can't make stderr non-blocking since it can break MUAs or anything
+# lei might spawn.  So we setup a timer to wake us up after a second if
+# printing to a user's stderr hasn't completed, yet.  Unfortunately,
+# EINTR alone isn't enough since Perl auto-restarts writes on signals,
+# so to interrupt writes to clients with blocked stderr, we dup the
+# error output to $err_wr ahead-of-time and close $err_wr in the
+# SIGALRM handler to ensure `print' gets aborted:
+
+sub abort_err_wr { close($err_wr) if $err_wr; undef $err_wr }
+
+sub emit ($@) {
+       my ($efh, @msg) = @_;
+       open(local $err_wr, '>&', $efh); # fdopen(dup(fileno($efh)), "w")
+       local $SIG{ALRM} = \&abort_err_wr;
+       Time::HiRes::alarm(1.0, 0.1);
+       my $ret = print $err_wr @msg;
+       Time::HiRes::alarm(0);
+       $ret;
+}
 
 sub new {
        my ($cls, $rd, $lei) = @_;
@@ -26,8 +50,7 @@ sub event_step {
        for my $lei (values %PublicInbox::DS::DescriptorMap) {
                my $cb = $lei->can('store_path') // next;
                next if $cb->($lei) ne $self->{store_path};
-               my $err = $lei->{2} // next;
-               print $err $buf and $printed = 1;
+               emit($lei->{2} // next, $buf) and $printed = 1;
        }
        if (!$printed) {
                openlog('lei/store', 'pid,nowait,nofatal,ndelay', 'user');

Reply via email to