[PATCH 03/13] view: show thread context in the thread-aware flat view

2016-06-30 Thread Eric Wong
This lets user have a small window of the context of
the current message relative to other threads.
---
 lib/PublicInbox/Feed.pm   |   3 +-
 lib/PublicInbox/SearchView.pm |   2 +-
 lib/PublicInbox/View.pm   | 160 +-
 3 files changed, 100 insertions(+), 65 deletions(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 36802fa..73986e8 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -138,6 +138,7 @@ sub emit_html_index {
my $fh = $res->([200,['Content-Type'=>'text/html; charset=UTF-8']]);
 
my $max = $ctx->{max} || MAX_PER_PAGE;
+   $ctx->{-upfx} = '';
 
my ($footer, $param, $last);
my $state = { ctx => $ctx, seen => {}, anchor_idx => 0, fh => $fh };
@@ -174,7 +175,7 @@ sub emit_index_nosrch {
$state->{first} ||= $commit;
 
my $mime = do_cat_mail($ibx, $path) or return 0;
-   $fh->write(PublicInbox::View::index_entry($mime, $state));
+   $fh->write(PublicInbox::View::index_entry($mime, $state, 1));
1;
});
$last;
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 488822e..8771d5d 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -169,6 +169,7 @@ sub mset_thread {
}
my $skel = search_nav_bot($mset, $q). "";
my $inbox = $ctx->{-inbox};
+   $ctx->{-upfx} = '';
my $state = {
-inbox => $inbox,
anchor_idx => 1,
@@ -181,7 +182,6 @@ sub mset_thread {
prev_level => 0,
seen => {},
srch => $ctx->{srch},
-   upfx => './',
};
 
PublicInbox::View::walk_thread($th, $state,
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index a774feb..eac541d 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -97,7 +97,7 @@ sub nr_to_s ($$$) {
 
 # this is already inside a 
 sub index_entry {
-   my ($mime, $state) = @_;
+   my ($mime, $state, $more) = @_;
my $ctx = $state->{ctx};
my $srch = $ctx->{srch};
my $hdr = $mime->header_obj;
@@ -109,51 +109,76 @@ sub index_entry {
my $mid = PublicInbox::Hval->new_msgid($mid_raw);
 
my $root_anchor = $state->{root_anchor} || '';
-   my $path = $root_anchor ? '../../' : '';
-   my $href = $mid->as_href;
my $irt = in_reply_to($hdr);
 
-   $subj = ''.ascii_html($subj).'';
-   $subj = "$subj" if $root_anchor eq $id_m;
-
-   my $ts = _msg_date($hdr);
-   my $rv = "# ";
-   $rv .= $subj;
-   my $mhref = $path.$href.'/';
-   my $from = _hdr_names($hdr, 'From');
-   $rv .= "\n- $from @ $ts UTC\n";
+   my $rv = ''.ascii_html($subj).'';
+   $rv = "$rv" if $root_anchor eq $id_m;
+   $rv .= "\n";
+   $rv .= _th_index_lite($mid_raw, $irt, $id, $state);
my @tocc;
foreach my $f (qw(To Cc)) {
my $dst = _hdr_names($hdr, $f);
push @tocc, "$f: $dst" if $dst ne '';
}
+   $rv .= "From: "._hdr_names($hdr, 'From').' @ '._msg_date($hdr)." UTC\n";
$rv .= '  '.join('; +', @tocc) . "\n" if @tocc;
$rv .= "\n";
 
# scan through all parts, looking for displayable text
+   my $href = $mid->as_href;
+   my $mhref = $ctx->{-upfx}.$href.'/';
msg_iter($mime, sub { $rv .= add_text_body($mhref, $_[0]) });
-   $rv .= "\npermalink" .
-   " / raw / ";
-   my $mapping = $state->{mapping};
-   my $nr_c = $mapping->{$mid_raw} || 0;
+
+   # add the footer
+   $rv .= "\n^ ".
+   "permalink" .
+   " / raw" .
+   " / reply";
+   if (my $pct = $state->{pct}) { # used by SearchView.pm
+   $rv .= " [relevance $pct->{$mid_raw}%]";
+   }
+   $rv .= $more ? "\n\n" : "\n";
+}
+
+sub _th_index_lite {
+   my ($mid_raw, $irt, $id, $state) = @_;
+   my $rv = '';
+   my $mapping = $state->{mapping} or return $rv;
+   my $pad = '  ';
+   # map = [children, attr, node, idx, level]
+   my $map = $mapping->{$mid_raw};
+   my $nr_c = scalar @{$map->[0]};
my $nr_s = 0;
if (defined $irt) {
-   $nr_s = ($mapping->{$irt} || 0) - 1;
+   my $irt_map = $mapping->{$irt};
+   my $siblings = $irt_map->[0];
+   $nr_s = scalar(@$siblings) - 1;
$nr_s = 0 if $nr_s < 0;
-   $irt = anchor_for($irt);
-   $rv .= "#parent,";
-   } else {
-   $rv .= 'root message:';
+   $rv .= $pad . $irt_map->[1];
+   my $idx = $map->[3];
+   if ($idx > 0) {
+   

[PATCH 09/13] view: fix up some HTML injection via Message-ID vectors

2016-06-30 Thread Eric Wong
Oops, these were only introduced during the hybrid flat thread
view reworking and never deployed.
---
 lib/PublicInbox/View.pm | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 17d6de5..44130b9 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -103,7 +103,7 @@ sub index_entry {
my $subj = $hdr->header('Subject');
 
my $mid_raw = mid_clean(mid_mime($mime));
-   my $id = id_compress($mid_raw);
+   my $id = id_compress($mid_raw, 1);
my $id_m = 'm'.$id;
my $mid = PublicInbox::Hval->new_msgid($mid_raw);
 
@@ -119,20 +119,23 @@ sub index_entry {
my $dst = _hdr_names($hdr, $f);
push @tocc, "$f: $dst" if $dst ne '';
}
-   my $mapping = $ctx->{mapping};
$rv .= "From: "._hdr_names($hdr, 'From').' @ '._msg_date($hdr)." UTC";
my $upfx = $ctx->{-upfx};
-   $rv .= qq{ (permalink / };
-   $rv .= qq{raw)\n};
+   my $mhref = $upfx . $mid->as_href . '/';
+   $rv .= qq{ (permalink / };
+   $rv .= qq{raw)\n};
$rv .= '  '.join('; +', @tocc) . "\n" if @tocc;
+
+   my $mapping = $ctx->{mapping};
if (!$mapping && $irt) {
-   $rv .= qq(In-Reply-To: $irt\n)
+   my $mirt = PublicInbox::Hval->msgid($irt);
+   my $href = $upfx . $mirt->as_href . '/';
+   my $html = $mirt->as_html;
+   $rv .= qq(In-Reply-To: $html\n)
}
$rv .= "\n";
 
# scan through all parts, looking for displayable text
-   my $href = $mid->as_href;
-   my $mhref = $ctx->{-upfx}.$href.'/';
msg_iter($mime, sub { $rv .= add_text_body($mhref, $_[0]) });
 
# add the footer
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 0/13] www: hybrid flat+thread conversation view

2016-06-30 Thread Eric Wong
I've been long-dreaming of this and finally it's at least
publishable (I hope :x).  This flat view with thread skeletons
is 100% more usable than the dumb old one, but a little slower
(naturally :<)

I was originally hoping to remove the threaded /t/ endpoint
conversation view entirely to reduce server/caching overheads
but I still find it more usable in some situations.

What I still enjoy is being able to toggle between
[flat|threaded] views.

Eric Wong (13):
  www: implement hybrid flat+thread conversation view
  www: use WwwStream for dumping thread and search views
  view: show thread context in the thread-aware flat view
  view: merge $state hash with existing $ctx
  feed: add $INBOX/new.html endpoint
  view: tweak thread/index header slightly
  view: show more nearby messages in flat thread view
  www: reinstate old thread view as an option
  view: fix up some HTML injection via Message-ID vectors
  view: default to flat/hybrid thread display
  view: show thread size when linking to summary
  view: fixup bad reference to new_msgid
  www_stream: add response wrapper sub

 TODO  |   2 -
 lib/PublicInbox/Feed.pm   |  51 +++-
 lib/PublicInbox/SearchView.pm | 141 +-
 lib/PublicInbox/View.pm   | 590 +++---
 lib/PublicInbox/WWW.pm|  22 +-
 lib/PublicInbox/WwwStream.pm  |  20 +-
 t/view.t  |   3 +-
 7 files changed, 464 insertions(+), 365 deletions(-)

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 1/2] view: improve readability of msg_reply with here-doc

2016-06-30 Thread Eric Wong
This should make formatting more apparent since we can rely
on  semantics.
---
 lib/PublicInbox/View.pm | 36 ++--
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 27dd155..9d48dfc 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -54,21 +54,29 @@ sub msg_reply {
 
my ($arg, $link) = mailto_arg_link($hdr);
push @$arg, '/path/to/YOUR_REPLY';
+   $arg = join(" \\\n", '', @$arg);
+   ".
-   "You may reply publically to this message via\n".
-   "plain-text email using any one of the following methods:\n\n" .
-   "* Save the following mbox file, import it into your mail client,\n" .
-   "  and reply-to-all from there: mbox\n\n" .
-   "* Reply to all the recipients using the --to, --cc,\n" .
-   "  and --in-reply-to switches of git-send-email(1):\n\n" .
-   "  git send-email \\\n" .
-   join(" \\\n", @$arg ). "\n\n" .
-   qq(  $se_url\n\n) .
-   "* If your mail client supports setting the In-Reply-To" .
-   " header\n  via mailto: links, try the " .
-   qq(mailto: link\n) .
-   '';
+* Save the following mbox file, import it into your mail client,
+  and reply-to-all from there: mbox
+
+* Reply to all the recipients using the --to, --cc,
+  and --in-reply-to switches of git-send-email(1):
+
+  git send-email$arg
+
+  $se_url
+
+* If your mail client supports setting the In-Reply-To header
+  via mailto: links, try the mailto: link
+EOF
 }
 
 sub in_reply_to {
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 0/2] view: reply instruction tweaks

2016-06-30 Thread Eric Wong
It may be 2016, but top-posting still makes me cringe :<

Eric Wong (2):
  view: improve readability of msg_reply with here-doc
  view: reference posting style article on Wikipedia

 lib/PublicInbox/View.pm | 42 --
 1 file changed, 28 insertions(+), 14 deletions(-)

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH] searchview: show result count in thread index, for now

2016-06-30 Thread Eric Wong
I'm not sure what to show here, actually; but it's better
than triggering an uninitialized variable warning.
---
 lib/PublicInbox/SearchView.pm | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 30a310c..d019a0f 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -176,6 +176,7 @@ sub mset_thread {
$ctx->{prev_attr} = '';
$ctx->{prev_level} = 0;
$ctx->{seen} = {};
+   $ctx->{s_nr} = scalar(@m).'+ results';
 
PublicInbox::View::walk_thread($th, $ctx,
*PublicInbox::View::pre_thread);
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 12/13] view: fixup bad reference to new_msgid

2016-06-30 Thread Eric Wong
Oops, this endpoint needs testing :x
---
 lib/PublicInbox/View.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index db2bd20..b4f80d1 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -129,7 +129,7 @@ sub index_entry {
 
my $mapping = $ctx->{mapping};
if (!$mapping && $irt) {
-   my $mirt = PublicInbox::Hval->msgid($irt);
+   my $mirt = PublicInbox::Hval->new_msgid($irt);
my $href = $upfx . $mirt->as_href . '/';
my $html = $mirt->as_html;
$rv .= qq(In-Reply-To: $html\n)
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 05/13] feed: add $INBOX/new.html endpoint

2016-06-30 Thread Eric Wong
This acts like the Atom feed; but should be viewable directly
from browsers.
---
 lib/PublicInbox/Feed.pm | 27 +++
 lib/PublicInbox/WWW.pm  | 10 +-
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index ddc1e3c..c16c417 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -34,6 +34,33 @@ sub generate_html_index {
sub { emit_html_index($_[0], $ctx) };
 }
 
+sub new_html {
+   my ($ctx) = @_;
+   my @paths;
+   my (undef, $last) = each_recent_blob($ctx, sub {
+   my ($path, $commit, $ts, $u, $subj) = @_;
+   $ctx->{first} ||= $commit;
+   push @paths, $path;
+   });
+   if (!@paths) {
+   return [404, ['Content-Type', 'text/plain'],
+   ["No messages, yet\n"] ];
+   }
+   $ctx->{-html_tip} = '';
+   $ctx->{-upfx} = '';
+   my $res = PublicInbox::WwwStream->new($ctx, sub {
+   while (my $path = shift @paths) {
+   my $m = do_cat_mail($ctx->{-inbox}, $path) or next;
+   my $more = scalar @paths;
+   my $s = PublicInbox::View::index_entry($m, $ctx, $more);
+   $s .= '' unless $more;
+   return $s;
+   }
+   undef;
+   });
+   [ 200, ['Content-Type', 'text/html; charset=UTF-8'], $res ]
+}
+
 # private subs
 
 sub title_tag {
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 196486f..da5c1d3 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -75,7 +75,8 @@ sub call {
invalid_inbox($self, $ctx, $1) || get_index($ctx);
} elsif ($path_info =~ m!$INBOX_RE/(?:atom\.xml|new\.atom)\z!o) {
invalid_inbox($self, $ctx, $1) || get_atom($ctx);
-
+   } elsif ($path_info =~ m!$INBOX_RE/new\.html\z!o) {
+   invalid_inbox($self, $ctx, $1) || get_new($ctx);
} elsif ($path_info =~ m!$INBOX_RE/
($PublicInbox::GitHTTPBackend::ANY)\z!ox) {
my $path = $2;
@@ -189,6 +190,13 @@ sub get_atom {
PublicInbox::Feed::generate($ctx);
 }
 
+# /$INBOX/new.html -> HTML only
+sub get_new {
+   my ($ctx) = @_;
+   require PublicInbox::Feed;
+   PublicInbox::Feed::new_html($ctx);
+}
+
 # /$INBOX/?r=$GIT_COMMIT -> HTML only
 sub get_index {
my ($ctx) = @_;
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 01/13] www: implement hybrid flat+thread conversation view

2016-06-30 Thread Eric Wong
This should be more accessible to readers on narrow terminals
(or giant fonts) while providing a chronological view which
is also aware of message threading relationships.
---
 TODO  |   2 -
 lib/PublicInbox/SearchView.pm |  42 +++
 lib/PublicInbox/View.pm   | 248 --
 lib/PublicInbox/WWW.pm|   9 +-
 t/plack.t |   2 +-
 5 files changed, 115 insertions(+), 188 deletions(-)

diff --git a/TODO b/TODO
index f29f2f0..3b6401f 100644
--- a/TODO
+++ b/TODO
@@ -4,8 +4,6 @@ TODO items for public-inbox
 
 * mailmap support (same as git) for remapping expired email addresses
 
-* WWW: Hybrid flat view + thread skeleton (requires Xapian)
-
 * POP3 server, since some webmail providers support external POP3:
   https://public-inbox.org/meta/20160411034104.ga7...@dcvr.yhbt.net/
 
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index ae875bf..fbef411 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -163,44 +163,34 @@ sub tdump {
} else { # order by time (default for threaded view)
$th->order(*PublicInbox::View::sort_ts);
}
+   my $skel = '';
my $state = {
+   -inbox => $ctx->{-inbox},
+   anchor_idx => 1,
ctx => $ctx,
-   anchor_idx => 0,
-   pct => \%pct,
cur_level => 0,
-   -inbox => $ctx->{-inbox},
+   dst => \$skel,
fh => $fh,
+   mapping => {},
+   pct => \%pct,
+   prev_attr => '',
+   prev_level => 0,
+   seen => {},
+   srch => $ctx->{srch},
+   upfx => './',
};
$ctx->{searchview} = 1;
-   PublicInbox::View::walk_thread($th, $state, *tdump_ent);
-   PublicInbox::View::thread_adj_level($state, 0);
+   PublicInbox::View::walk_thread($th, $state,
+   *PublicInbox::View::pre_thread);
+
+   PublicInbox::View::thread_entry($state, $_, 0) for @m;
 
-   $fh->write(search_nav_bot($mset, $q). "\n\n" .
+   $fh->write(search_nav_bot($mset, $q). "\n\n" . $skel . "\n" .
foot($ctx). '');
 
$fh->close;
 }
 
-sub tdump_ent {
-   my ($state, $level, $node) = @_;
-   my $mime = $node->message;
-
-   if ($mime) {
-   # lazy load the full message from mini_mime:
-   my $mid = mid_mime($mime);
-   $mime = eval { $state->{-inbox}->msg_by_mid($mid) } and
-   $mime = Email::MIME->new($mime);
-   }
-   if ($mime) {
-   my $end = PublicInbox::View::thread_adj_level($state, $level);
-   PublicInbox::View::index_entry($mime, $level, $state);
-   $state->{fh}->write($end) if $end;
-   } else {
-   my $mid = $node->messageid;
-   PublicInbox::View::ghost_flush($state, '', $mid, $level);
-   }
-}
-
 sub foot {
my ($ctx) = @_;
my $foot = $ctx->{footer} || '';
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 30339cd..65788db 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -89,77 +89,72 @@ sub _hdr_names ($$) {
ascii_html(join(', ', PublicInbox::Address::names($val)));
 }
 
+sub nr_to_s ($$$) {
+   my ($nr, $singular, $plural) = @_;
+   return "0 $plural" if $nr == 0;
+   $nr == 1 ? "$nr $singular" : "$nr $plural";
+}
+
 # this is already inside a 
 sub index_entry {
my ($mime, $level, $state) = @_;
-   my $midx = $state->{anchor_idx}++;
+   $state->{anchor_idx}++;
my $ctx = $state->{ctx};
my $srch = $ctx->{srch};
my $hdr = $mime->header_obj;
my $subj = $hdr->header('Subject');
 
my $mid_raw = mid_clean(mid_mime($mime));
-   my $id = anchor_for($mid_raw);
-   my $seen = $state->{seen};
-   $seen->{$id} = "#$id"; # save the anchor for children, later
-
+   my $id = id_compress($mid_raw);
+   my $id_m = 'm'.$id;
my $mid = PublicInbox::Hval->new_msgid($mid_raw);
 
my $root_anchor = $state->{root_anchor} || '';
my $path = $root_anchor ? '../../' : '';
my $href = $mid->as_href;
my $irt = in_reply_to($hdr);
-   my $parent_anchor = $seen->{anchor_for($irt)} if defined $irt;
 
-   $subj = ascii_html($subj);
-   $subj = "$subj";
-   $subj = "$subj" if $root_anchor eq $id;
+   $subj = ''.ascii_html($subj).'';
+   $subj = "$subj" if $root_anchor eq $id_m;
 
my $ts = _msg_date($hdr);
-   my $rv = "";
-   $rv .= "$subj\n";
-   my $txt = "${path}$href/raw";
-   my $fh = $state->{fh};
+   my $rv = "# ";
+   $rv .= $subj;
+   my $mhref = $path.$href.'/';
my $from = _hdr_names($hdr, 

[PATCH 04/13] view: merge $state hash with existing $ctx

2016-06-30 Thread Eric Wong
This reduces the level of indirection to reach certain objects
within the hash and there are no namespace or lifetime conflicts
anyways.
---
 lib/PublicInbox/Feed.pm   |  25 +++
 lib/PublicInbox/SearchView.pm |  25 +++
 lib/PublicInbox/View.pm   | 149 --
 3 files changed, 93 insertions(+), 106 deletions(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 73986e8..ddc1e3c 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -141,7 +141,9 @@ sub emit_html_index {
$ctx->{-upfx} = '';
 
my ($footer, $param, $last);
-   my $state = { ctx => $ctx, seen => {}, anchor_idx => 0, fh => $fh };
+   $ctx->{seen} = {};
+   $ctx->{anchor_idx} = 0;
+   $ctx->{fh} = $fh;
my $srch = $ctx->{srch};
$fh->write(_html_index_top($feed_opts, $srch));
 
@@ -149,14 +151,13 @@ sub emit_html_index {
# which we must continue supporting:
my $qp = $ctx->{qp};
if ($qp && !$qp->{r} && $srch) {
-   $state->{srch} = $srch;
-   $last = PublicInbox::View::emit_index_topics($state);
+   $last = PublicInbox::View::emit_index_topics($ctx);
$param = 'o';
} else {
-   $last = emit_index_nosrch($ctx, $state);
+   $last = emit_index_nosrch($ctx);
$param = 'r';
}
-   $footer = nav_footer($ctx, $last, $feed_opts, $state, $param);
+   $footer = nav_footer($ctx, $last, $feed_opts, $param);
if ($footer) {
my $list_footer = $ctx->{footer};
$footer .= "\n\n" . $list_footer if $list_footer;
@@ -167,28 +168,28 @@ sub emit_html_index {
 }
 
 sub emit_index_nosrch {
-   my ($ctx, $state) = @_;
+   my ($ctx) = @_;
my $ibx = $ctx->{-inbox};
-   my $fh = $state->{fh};
+   my $fh = $ctx->{fh};
my (undef, $last) = each_recent_blob($ctx, sub {
my ($path, $commit, $ts, $u, $subj) = @_;
-   $state->{first} ||= $commit;
+   $ctx->{first} ||= $commit;
 
my $mime = do_cat_mail($ibx, $path) or return 0;
-   $fh->write(PublicInbox::View::index_entry($mime, $state, 1));
+   $fh->write(PublicInbox::View::index_entry($mime, $ctx, 1));
1;
});
$last;
 }
 
 sub nav_footer {
-   my ($ctx, $last, $feed_opts, $state, $param) = @_;
+   my ($ctx, $last, $feed_opts, $param) = @_;
my $qp = $ctx->{qp} or return '';
my $old_r = $qp->{$param};
my $head = '';
my $next = '';
-   my $first = $state->{first};
-   my $anchor = $state->{anchor_idx};
+   my $first = $ctx->{first};
+   my $anchor = $ctx->{anchor_idx};
 
if ($last) {
$next = qq!next!;
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 8771d5d..4af6cad 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -170,21 +170,16 @@ sub mset_thread {
my $skel = search_nav_bot($mset, $q). "";
my $inbox = $ctx->{-inbox};
$ctx->{-upfx} = '';
-   my $state = {
-   -inbox => $inbox,
-   anchor_idx => 1,
-   ctx => $ctx,
-   cur_level => 0,
-   dst => \$skel,
-   mapping => {},
-   pct => \%pct,
-   prev_attr => '',
-   prev_level => 0,
-   seen => {},
-   srch => $ctx->{srch},
-   };
+   $ctx->{anchor_idx} = 1;
+   $ctx->{cur_level} = 0;
+   $ctx->{dst} = \$skel;
+   $ctx->{mapping} = {};
+   $ctx->{pct} = \%pct;
+   $ctx->{prev_attr} = '';
+   $ctx->{prev_level} = 0;
+   $ctx->{seen} = {};
 
-   PublicInbox::View::walk_thread($th, $state,
+   PublicInbox::View::walk_thread($th, $ctx,
*PublicInbox::View::pre_thread);
 
my $msgs = \@m;
@@ -197,7 +192,7 @@ sub mset_thread {
}
if ($mime) {
$mime = Email::MIME->new($mime);
-   return PublicInbox::View::index_entry($mime, $state);
+   return PublicInbox::View::index_entry($mime, $ctx);
}
$msgs = undef;
$skel .= "\n";
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index eac541d..0b47c89 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -97,8 +97,7 @@ sub nr_to_s ($$$) {
 
 # this is already inside a 
 sub index_entry {
-   my ($mime, $state, $more) = @_;
-   my $ctx = $state->{ctx};
+   my ($mime, $ctx, $more) = @_;
my $srch = $ctx->{srch};
my $hdr = $mime->header_obj;
my $subj = $hdr->header('Subject');
@@ -108,13 +107,13 @@ sub index_entry {
my $id_m = 'm'.$id;
my $mid = PublicInbox::Hval->new_msgid($mid_raw);
 
- 

[PATCH 06/13] view: tweak thread/index header slightly

2016-06-30 Thread Eric Wong
This makes the top permalink/raw as well as the In-Reply-To
show up without search.  While we're at it, try to make
the links on the thread index from the "X siblings, Y replies"
more obvious.
---
 lib/PublicInbox/View.pm | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 0b47c89..9393d44 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -119,8 +119,15 @@ sub index_entry {
my $dst = _hdr_names($hdr, $f);
push @tocc, "$f: $dst" if $dst ne '';
}
-   $rv .= "From: "._hdr_names($hdr, 'From').' @ '._msg_date($hdr)." UTC\n";
+   my $mapping = $ctx->{mapping};
+   $rv .= "From: "._hdr_names($hdr, 'From').' @ '._msg_date($hdr)." UTC";
+   my $upfx = $ctx->{-upfx};
+   $rv .= qq{ (permalink / };
+   $rv .= qq{raw)\n};
$rv .= '  '.join('; +', @tocc) . "\n" if @tocc;
+   if (!$mapping && $irt) {
+   $rv .= qq(In-Reply-To: $irt\n)
+   }
$rv .= "\n";
 
# scan through all parts, looking for displayable text
@@ -173,11 +180,7 @@ sub _th_index_lite {
if (my $next = $node->next) {
$rv .= $pad .  $mapping->{$next->messageid}->[1];
}
-   $rv .= ".\t\t\t";
-   $rv .= "($s_s, $s_c / ";
-   my $upfx = $ctx->{-upfx};
-   $rv .= qq{permalink / };
-   $rv .= qq{raw)\n};
+   $rv .= "_ $s_s, $s_c\n";
 }
 
 sub walk_thread {
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 10/13] view: default to flat/hybrid thread display

2016-06-30 Thread Eric Wong
This is friendlier for people on small screens and usually
eliminates the need to scroll horizontally.
---
 lib/PublicInbox/View.pm | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 44130b9..22d7250 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -154,8 +154,9 @@ sub index_entry {
} else {
$threaded = "$threaded";
}
-   $rv .= " [$threaded";
-   $rv .= "|$flat]";
+   $rv .= " / [$flat";
+   $rv .= "|$threaded]";
+   $rv .= " / thread overview";
}
 
$rv .= $more ? "\n\n" : "\n";
@@ -498,7 +499,7 @@ sub thread_skel {
my $mid = mid_clean($hdr->header_raw('Message-ID'));
my $sres = $srch->get_thread($mid);
my $nr = $sres->{total};
-   my $expand = qq(expand ) .
+   my $expand = qq(expand ) .
qq(/ mbox.gz ) .
qq(/ Atom feed);
 
@@ -869,7 +870,7 @@ sub emit_topics {
}
 
$subj = PublicInbox::Hval->new($subj)->as_html;
-   $cur->[1] .= "$subj\n";
+   $cur->[1] .= "$subj\n";
$ts = fmt_ts($ts);
my $attr = " $ts UTC";
 
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 13/13] www_stream: add response wrapper sub

2016-06-30 Thread Eric Wong
This encapsulates an entire PSGI response array, hopefully
making it easier to generate responses and avoid typos when
setting the Content-Type.
---
 lib/PublicInbox/Feed.pm   | 3 +--
 lib/PublicInbox/SearchView.pm | 4 +---
 lib/PublicInbox/View.pm   | 8 +++-
 lib/PublicInbox/WWW.pm| 3 +--
 lib/PublicInbox/WwwStream.pm  | 6 ++
 t/view.t  | 3 ++-
 6 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index c16c417..2f141c4 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -48,7 +48,7 @@ sub new_html {
}
$ctx->{-html_tip} = '';
$ctx->{-upfx} = '';
-   my $res = PublicInbox::WwwStream->new($ctx, sub {
+   PublicInbox::WwwStream->response($ctx, 200, sub {
while (my $path = shift @paths) {
my $m = do_cat_mail($ctx->{-inbox}, $path) or next;
my $more = scalar @paths;
@@ -58,7 +58,6 @@ sub new_html {
}
undef;
});
-   [ 200, ['Content-Type', 'text/html; charset=UTF-8'], $res ]
 }
 
 # private subs
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 4af6cad..30a310c 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -55,9 +55,7 @@ sub sres_top_html {
$cb = mset_summary($ctx, $mset, $q);
}
}
-
-   [ $code, ['Content-Type', 'text/html; charset=UTF-8'],
-   PublicInbox::WwwStream->new($ctx, $cb) ];
+   PublicInbox::WwwStream->response($ctx, $code, $cb);
 }
 
 # display non-threaded search results similar to what users expect from
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index b4f80d1..27dd155 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -27,7 +27,7 @@ sub msg_html {
my ($ctx, $mime, $footer) = @_;
my $hdr = $mime->header_obj;
my $tip = _msg_html_prepare($hdr, $ctx);
-   PublicInbox::WwwStream->new($ctx, sub {
+   PublicInbox::WwwStream->response($ctx, 200, sub {
my ($nr, undef) = @_;
if ($nr == 1) {
$tip . multipart_text_as_html($mime, '') .
@@ -278,7 +278,7 @@ sub stream_thread ($$) {
$mime = Email::MIME->new($mime);
$ctx->{-title_html} = ascii_html($mime->header('Subject'));
$ctx->{-html_tip} = thread_index_entry($ctx, $level, $mime);
-   my $body = PublicInbox::WwwStream->new($ctx, sub {
+   PublicInbox::WwwStream->response($ctx, 200, sub {
return unless $ctx;
while (@q) {
$level = shift @q;
@@ -297,7 +297,6 @@ sub stream_thread ($$) {
$ctx = undef;
$ret;
});
-   [ 200, ['Content-Type', 'text/html; charset=UTF-8'], $body ];
 }
 
 sub thread_html {
@@ -339,7 +338,7 @@ sub thread_html {
$ctx->{-title_html} = ascii_html($mime->header('Subject'));
$ctx->{-html_tip} = ''.index_entry($mime, $ctx, scalar @$msgs);
$mime = undef;
-   my $body = PublicInbox::WwwStream->new($ctx, sub {
+   PublicInbox::WwwStream->response($ctx, 200, sub {
return unless $msgs;
while ($mime = shift @$msgs) {
$mid = mid_clean(mid_mime($mime));
@@ -352,7 +351,6 @@ sub thread_html {
$msgs = undef;
''.$skel;
});
-   [ 200, ['Content-Type', 'text/html; charset=UTF-8'], $body ];
 }
 
 sub multipart_text_as_html {
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index cbd3142..c4509bd 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -236,8 +236,7 @@ sub get_mid_html {
require Email::MIME;
my $mime = Email::MIME->new($x);
searcher($ctx);
-   [ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ],
- PublicInbox::View::msg_html($ctx, $mime, $foot) ];
+   PublicInbox::View::msg_html($ctx, $mime, $foot);
 }
 
 # /$INBOX/$MESSAGE_ID/t/
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index d2bf318..6de1b31 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -14,6 +14,12 @@ sub new {
bless { nr => 0, cb => $cb, ctx => $ctx }, $class;
 }
 
+sub response {
+   my ($class, $ctx, $code, $cb) = @_;
+   [ $code, [ 'Content-Type', 'text/html; charset=UTF-8' ],
+ $class->new($ctx, $cb) ]
+}
+
 sub _html_top ($) {
my ($self) = @_;
my $ctx = $self->{ctx};
diff --git a/t/view.t b/t/view.t
index 8a898fe..4fdd151 100644
--- a/t/view.t
+++ b/t/view.t
@@ -34,7 +34,8 @@ sub msg_html ($) {
my ($mime) = @_;
 
my $s = '';
-   my $body = PublicInbox::View::msg_html($ctx, $mime);
+   my $r = PublicInbox::View::msg_html($ctx, $mime);
+   my $body = $r->[2];
while (defined(my $buf = $body->getline)) {
  

[PATCH 11/13] view: show thread size when linking to summary

2016-06-30 Thread Eric Wong
This should give readers a better idea of what to expect.
---
 lib/PublicInbox/View.pm | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 22d7250..fac53eb 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -156,7 +156,7 @@ sub index_entry {
}
$rv .= " / [$flat";
$rv .= "|$threaded]";
-   $rv .= " / thread overview";
+   $rv .= " / $ctx->{s_nr}";
}
 
$rv .= $more ? "\n\n" : "\n";
@@ -226,7 +226,8 @@ sub _th_index_lite {
$rv .= $pad . $mapping->{$nn->messageid}->[1];
}
}
-   $rv .= "_ $s_s, $s_c\n";
+   $rv .= "_ ";
+   $rv .= "$s_s, $s_c; $ctx->{s_nr}\n";
 }
 
 sub walk_thread {
@@ -319,6 +320,7 @@ sub thread_html {
$ctx->{root_anchor} = anchor_for($mid);
$ctx->{seen} = {};
$ctx->{mapping} = {};
+   $ctx->{s_nr} = "$nr+ messages in thread";
 
my $th = thread_results($msgs);
walk_thread($th, $ctx, *pre_thread);
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 14/13] view: fix permalink and raw links at the top

2016-06-30 Thread Eric Wong
Oops :x  I really need to whip check-inbox.perl into
shape or at least start running it, again.

Fixes: e29518088b3f ("view: fix up some HTML injection via Message-ID vectors")
---
 lib/PublicInbox/View.pm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 11d8dd5..140cfee 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -138,8 +138,8 @@ sub index_entry {
$rv .= "From: "._hdr_names($hdr, 'From').' @ '._msg_date($hdr)." UTC";
my $upfx = $ctx->{-upfx};
my $mhref = $upfx . $mid->as_href . '/';
-   $rv .= qq{ (permalink / };
-   $rv .= qq{raw)\n};
+   $rv .= qq{ (permalink / };
+   $rv .= qq{raw)\n};
$rv .= '  '.join('; +', @tocc) . "\n" if @tocc;
 
my $mapping = $ctx->{mapping};
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH] view: move per-message anchor to top in conversation view

2016-06-30 Thread Eric Wong
This fixes the '^' (up) link in the $INBOX/new.html endpoint
for search-less displays.
---
 lib/PublicInbox/View.pm | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 8487c2a..11d8dd5 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -125,9 +125,10 @@ sub index_entry {
my $root_anchor = $ctx->{root_anchor} || '';
my $irt = in_reply_to($hdr);
 
-   my $rv = ''.ascii_html($subj).'';
-   $rv = "$rv" if $root_anchor eq $id_m;
-   $rv .= "\n";
+   my $rv = "* ";
+   $subj = ''.ascii_html($subj).'';
+   $subj = "$subj" if $root_anchor eq $id_m;
+   $rv .= $subj . "\n";
$rv .= _th_index_lite($mid_raw, $irt, $id, $ctx);
my @tocc;
foreach my $f (qw(To Cc)) {
@@ -241,8 +242,7 @@ sub _th_index_lite {
$rv .= $pad . $mapping->{$nn->messageid}->[1];
}
}
-   $rv .= "_ ";
-   $rv .= "$s_s, $s_c; $ctx->{s_nr}\n";
+   $rv .= $pad ."$s_s, $s_c; $ctx->{s_nr}\n";
 }
 
 sub walk_thread {
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 0/2] HTTPS smart git repository for our code

2016-06-30 Thread Eric Wong
Eating our own dogfood, git://80x24.org/public-inbox remains
available for those without curl or up-to-date TLS stack/certs.

Eric Wong (2):
  githttpbackend: allow git to be a regular scalar string
  update git repo location to https:// using GitHTTPBackend.pm

 Documentation/dc-dlvr-spam-flow.txt |  2 +-
 README  |  2 +-
 examples/public-inbox.psgi  | 14 +-
 examples/unsubscribe.psgi   |  3 ++-
 lib/PublicInbox/GitHTTPBackend.pm   |  4 ++--
 lib/PublicInbox/Unsubscribe.pm  |  2 +-
 lib/PublicInbox/WwwStream.pm|  6 +++---
 7 files changed, 23 insertions(+), 10 deletions(-)

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 1/2] githttpbackend: allow git to be a regular scalar string

2016-06-30 Thread Eric Wong
No point in forcing users to pass a hashref/object to
get a single git directory.
---
 lib/PublicInbox/GitHTTPBackend.pm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/GitHTTPBackend.pm 
b/lib/PublicInbox/GitHTTPBackend.pm
index 7267a1d..4f58c6b 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -80,7 +80,7 @@ sub serve_dumb {
return r(404);
}
 
-   my $f = "$git->{git_dir}/$path";
+   my $f = (ref $git ? $git->{git_dir} : $git) . '/' . $path;
return r(404) unless -f $f && -r _; # just in case it's a FIFO :P
my @st = stat(_);
my $size = $st[7];
@@ -179,7 +179,7 @@ sub serve_smart {
my $val = $env->{$name};
$env{$name} = $val if defined $val;
}
-   my $git_dir = $git->{git_dir};
+   my $git_dir = ref $git ? $git->{git_dir} : $git;
$env{GIT_HTTP_EXPORT_ALL} = '1';
$env{PATH_TRANSLATED} = "$git_dir/$path";
my %rdr = ( 0 => fileno($in) );
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH] www_stream: fix search for new.html endpoint

2016-06-30 Thread Eric Wong
We want to avoid the bare './' wherever possible, but it
doesn't seem possible here.
---
 lib/PublicInbox/WwwStream.pm | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 6de1b31..d9abb5a 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -39,7 +39,8 @@ sub _html_top ($) {
}
# XXX gross, for SearchView.pm
my $extra = $ctx->{-extra_form_html} || '';
-   $top = qq{$top} .
+   my $action = $upfx eq '' ? './' : $upfx;
+   $top = qq{$top} .
  qq{ } .
  $extra .
  qq{} .
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH] searchview: add missing newline in search results

2016-06-30 Thread Eric Wong
Hrm... is there a more obvious way to do an internal API for
this while still being streamable?
---
 lib/PublicInbox/SearchView.pm | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index d019a0f..ce1eff1 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -191,7 +191,8 @@ sub mset_thread {
}
if ($mime) {
$mime = Email::MIME->new($mime);
-   return PublicInbox::View::index_entry($mime, $ctx);
+   return PublicInbox::View::index_entry($mime, $ctx,
+   scalar @$msgs);
}
$msgs = undef;
$skel .= "\n";
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 3/2] git: allow cloning from the URL root, too

2016-06-30 Thread Eric Wong
This means we can still show non-git users a somewhat browseable
URL with a link to the README.html file while allowing git users
to type less when cloning.

All of the following are supported:

git clone https://public-inbox.org/ public-inbox
git clone https://public-inbox.org/public-inbox
git clone https://public-inbox.org/public-inbox.git
torsocks git clone http://ou63pmih66umazou.onion/public-inbox
---
 Documentation/dc-dlvr-spam-flow.txt | 2 +-
 README  | 2 +-
 examples/public-inbox.psgi  | 4 +++-
 lib/PublicInbox/GitHTTPBackend.pm   | 2 +-
 lib/PublicInbox/WwwStream.pm| 4 ++--
 5 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/Documentation/dc-dlvr-spam-flow.txt 
b/Documentation/dc-dlvr-spam-flow.txt
index d151d27..81aba76 100644
--- a/Documentation/dc-dlvr-spam-flow.txt
+++ b/Documentation/dc-dlvr-spam-flow.txt
@@ -45,4 +45,4 @@ delivery path as well as removing the message from the git 
tree.
 * spamc / spamd - SpamAssassin: http://spamassassin.apache.org/
 
 * report-spam / dc-dlvr - distributed with public-inbox in the scripts/
-  directory: git clone https://public-inbox.org/public-inbox.git
+  directory: git clone https://public-inbox.org/ public-inbox
diff --git a/README b/README
index ca4e2a8..f56d68d 100644
--- a/README
+++ b/README
@@ -87,7 +87,7 @@ Hacking
 
 Source code is available via git:
 
-   git clone https://public-inbox.org/public-inbox.git
+   git clone https://public-inbox.org/ public-inbox
 
 See below for contact info.
 
diff --git a/examples/public-inbox.psgi b/examples/public-inbox.psgi
index a90a2bc..11e2a6e 100644
--- a/examples/public-inbox.psgi
+++ b/examples/public-inbox.psgi
@@ -47,7 +47,9 @@ builder {
sub {
my ($env) = @_;
# share public-inbox.git code!
-   if ($src && $env->{PATH_INFO} =~ m!\A/public-inbox\.git/(.*)!) {
+   if ($src && $env->{PATH_INFO} =~
+   m!\A/(?:public-inbox(?:\.git)?/)?
+   ($PublicInbox::GitHTTPBackend::ANY)\z!xo) {
PublicInbox::GitHTTPBackend::serve($env, $src, $1);
} else {
$www->call($env);
diff --git a/lib/PublicInbox/GitHTTPBackend.pm 
b/lib/PublicInbox/GitHTTPBackend.pm
index 4f58c6b..b485192 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -23,7 +23,7 @@ my @binary = qw!
objects/pack/pack-[a-f0-9]{40}\.(?:pack|idx)
!;
 
-our $ANY = join('|', @binary, @text);
+our $ANY = join('|', @binary, @text, 'git-upload-pack');
 my $BIN = join('|', @binary);
 my $TEXT = join('|', @text);
 
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 97a6dc7..87a461e 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -7,7 +7,7 @@ use strict;
 use warnings;
 use PublicInbox::Hval qw(ascii_html);
 use URI;
-use constant PI_URL => 'https://public-inbox.org/public-inbox.git';
+use constant PI_URL => 'https://public-inbox.org/';
 
 sub new {
my ($class, $ctx, $cb) = @_;
@@ -87,7 +87,7 @@ sub _html_end {
'- ' . $desc,
$urls,
'Archived served using code from public-inbox:',
-   "\tgit clone $url",
+   "\tgit clone $url public-inbox",
).'';
 }
 
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 4/2] www_stream: fix stupid typo :x

2016-06-30 Thread Eric Wong
Note to self: remember to run tests

Fixes: 52052329aced ("git: allow cloning from the URL root, too")
---
 lib/PublicInbox/WwwStream.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 87a461e..fdab4da 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -87,7 +87,7 @@ sub _html_end {
'- ' . $desc,
$urls,
'Archived served using code from public-inbox:',
-   "\tgit clone $url public-inbox",
+   qq(\tgit clone $url public-inbox),
).'';
 }
 
-- 
EW
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH] t/watch_maildir: quiet down spam check warning

2016-06-30 Thread Eric Wong
Probably better than bloating our own API with configurable
warning streams and such...
---
 t/watch_maildir.t | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/t/watch_maildir.t b/t/watch_maildir.t
index 2138963..3969c80 100644
--- a/t/watch_maildir.t
+++ b/t/watch_maildir.t
@@ -94,7 +94,10 @@ More majordomo info at  
http://vger.kernel.org/majordomo-info.html\n);
local $ENV{PATH} = $fail_path;
PublicInbox::Emergency->new($maildir)->prepare(\$msg);
$config->{'publicinboxwatch.spamcheck'} = 'spamc';
-   PublicInbox::WatchMaildir->new($config)->scan;
+   {
+   local $SIG{__WARN__} = sub {}; # quiet spam check warning
+   PublicInbox::WatchMaildir->new($config)->scan;
+   }
@list = $git->qx(qw(ls-tree -r --name-only refs/heads/master));
is(scalar @list, 0, 'tree has no files spamc checked');
is(unlink(glob("$maildir/new/*")), 1);
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 2/2] examples: add varnish-4.vcl

2016-07-01 Thread Eric Wong
Well, I'm fumbling along with this config.  Might as well
fumble along with it publically :)
---
 examples/varnish-4.vcl | 74 ++
 1 file changed, 74 insertions(+)
 create mode 100644 examples/varnish-4.vcl

diff --git a/examples/varnish-4.vcl b/examples/varnish-4.vcl
new file mode 100644
index 000..7439679
--- /dev/null
+++ b/examples/varnish-4.vcl
@@ -0,0 +1,74 @@
+# Example VCL for Varnish 4.0 with public-inbox WWW code
+# This is based on what shipped for 3.x a long time ago (I think)
+# and I'm hardly an expert in VCL (nor should we expect anybody
+# who maintains a public-inbox HTTP interface to be).
+#
+# It seems to work for providing some protection from traffic
+# bursts; but perhaps the public-inbox WWW interface can someday
+# provide enough out-of-the-box performance that configuration
+# of an extra component is pointless.
+
+vcl 4.0;
+backend default {
+   .host = "127.0.0.1";
+   .port = "280";
+}
+
+sub vcl_recv {
+   if (req.restarts == 0) {
+   if (req.http.x-forwarded-for) {
+   set req.http.X-Forwarded-For =
+   req.http.X-Forwarded-For + ", " + client.ip;
+   } else {
+   set req.http.X-Forwarded-For = client.ip;
+   }
+   }
+   if (req.method != "GET" &&
+   req.method != "HEAD" &&
+   req.method != "PUT" &&
+   req.method != "POST" &&
+   req.method != "TRACE" &&
+   req.method != "OPTIONS" &&
+   req.method != "DELETE") {
+   /* Non-RFC2616 or CONNECT which is weird. */
+   return (pipe);
+   }
+   if (req.method != "GET" && req.method != "HEAD") {
+   /* We only deal with GET and HEAD by default */
+   return (pass);
+   }
+   if (req.http.Authorization || req.http.Cookie) {
+   /* Not cacheable by default */
+   return (pass);
+   }
+   return (hash);
+}
+
+sub vcl_hash {
+   hash_data(req.url);
+   if (req.http.host) {
+   hash_data(req.http.host);
+   } else {
+   hash_data(server.ip);
+   }
+   if (req.http.X-Forwarded-Proto) {
+   hash_data(req.http.X-Forwarded-Proto);
+   }
+   return (lookup);
+}
+
+sub vcl_backend_response {
+   set beresp.grace = 60s;
+   set beresp.do_stream = true;
+   if (beresp.ttl <= 0s ||
+   beresp.http.Set-Cookie ||
+   beresp.http.Vary == "*") {
+   /* Mark as "Hit-For-Pass" for the next 2 minutes */
+   set beresp.ttl = 120 s;
+   set beresp.uncacheable = true;
+   return (deliver);
+   } else {
+   set beresp.ttl = 10s;
+   }
+   return (deliver);
+}
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 0/2] performance-related notes+docs

2016-07-01 Thread Eric Wong
Opposing goals here, yes, I'm hell bent on sticking to a
scripting language for hackability and eliminating the need to
provide reproducible builds.  On the other hand, this should
be able to scale to handle LKML.

Eric Wong (2):
  TODO: update documentation for performance items
  examples: add varnish-4.vcl

 TODO   | 11 +++-
 examples/varnish-4.vcl | 74 ++
 2 files changed, 84 insertions(+), 1 deletion(-)
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 4/6] www: use PSGI env directly

2016-07-02 Thread Eric Wong
More work on on the Plack::Request/CGI.pm removal front,
No need to access the PSGI env through an extra hash lookup.
---
 lib/PublicInbox/SearchView.pm | 2 +-
 lib/PublicInbox/WWW.pm| 9 -
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index ce1eff1..15bb823 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -86,7 +86,7 @@ sub mset_summary {
 sub err_txt {
my ($ctx, $err) = @_;
my $u = '//xapian.org/docs/queryparser.html';
-   $u = PublicInbox::Hval::prurl($ctx->{cgi}->{env}, $u);
+   $u = PublicInbox::Hval::prurl($ctx->{env}, $u);
$err =~ s/^\s*Exception:\s*//; # bad word to show users :P
$err = ascii_html($err);
"\nBad query: $err\n" .
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 1e23c43..940e1c5 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -152,7 +152,7 @@ sub invalid_inbox {
$ctx->{git_dir} = $obj->{mainrepo};
$ctx->{git} = $obj->git;
# for PublicInbox::HTTP::weaken_task:
-   $ctx->{cgi}->{env}->{'pi-httpd.inbox'} = $obj;
+   $ctx->{env}->{'pi-httpd.inbox'} = $obj;
$ctx->{-inbox} = $obj;
$ctx->{inbox} = $inbox;
return;
@@ -162,7 +162,7 @@ sub invalid_inbox {
# generation and link things intended for nntp:// to https?://,
# so try to infer links and redirect them to the appropriate
# list URL.
-   $self->news_www->call($ctx->{cgi}->{env});
+   $self->news_www->call($ctx->{env});
 }
 
 # returns undef if valid, array ref response if invalid
@@ -284,7 +284,7 @@ sub footer {
$ctx->{footer} = join("\n",
'- ' . $desc,
"A {cgi}->{env}, PI_URL) .
+   PublicInbox::Hval::prurl($ctx->{env}, PI_URL) .
'">public-inbox, ' .
'anybody may post in plain-text (not HTML):',
$addr,
@@ -388,13 +388,12 @@ sub legacy_redirects {
} elsif ($path_info =~ m!$INBOX_RE/(\S+/\S+)/f\z!o) {
r301($ctx, $1, $2);
} else {
-   $self->news_www->call($ctx->{cgi}->{env});
+   $self->news_www->call($ctx->{env});
}
 }
 
 sub r301 {
my ($ctx, $inbox, $mid, $suffix) = @_;
-   my $cgi = $ctx->{cgi};
my $obj = $ctx->{-inbox};
unless ($obj) {
my $r404 = invalid_inbox($ctx->{www}, $ctx, $inbox);
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 5/6] view: rely on internal query parser for 'o' param

2016-07-02 Thread Eric Wong
Plack::Request will check the request body by merely
calling "param", totally unnecessary and sneaky.
---
 lib/PublicInbox/View.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index fd882aa..1527959 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -908,7 +908,7 @@ sub emit_topics {
 
 sub emit_index_topics {
my ($ctx) = @_;
-   my ($off) = (($ctx->{cgi}->param('o') || '0') =~ /(\d+)/);
+   my ($off) = (($ctx->{qp}->{o} || '0') =~ /(\d+)/);
$ctx->{order} = [];
$ctx->{subjs} = {};
$ctx->{latest} = {};
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 3/6] extmsg: rework to use Inbox objects

2016-07-02 Thread Eric Wong
This is less code and hopefully easier-to-understand.
---
 lib/PublicInbox/ExtMsg.pm | 102 --
 lib/PublicInbox/Inbox.pm  |   5 +++
 2 files changed, 49 insertions(+), 58 deletions(-)

diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index e15abab..4b9e025 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -24,62 +24,53 @@ our @EXT_URL = (
 sub ext_msg {
my ($ctx) = @_;
my $pi_config = $ctx->{pi_config};
-   my $inbox = $ctx->{inbox};
+   my $cur = $ctx->{-inbox};
my $mid = $ctx->{mid};
-   my $cgi = $ctx->{cgi};
-   my $env = $cgi->{env};
+   my $env = $ctx->{env};
 
eval { require PublicInbox::Search };
my $have_xap = $@ ? 0 : 1;
-   my (@nox, @pfx);
+   my (@nox, @ibx);
 
foreach my $k (keys %$pi_config) {
$k =~ /\Apublicinbox\.([A-Z0-9a-z-]+)\.url\z/ or next;
my $name = $1;
-   next if $name eq $inbox;
-
-   my $git_dir = $pi_config->{"publicinbox.$name.mainrepo"};
-   defined $git_dir or next;
-
-   my $url = $pi_config->{"publicinbox.$name.url"};
-   defined $url or next;
-
-   $url =~ s!/+\z!!;
-   $url = PublicInbox::Hval::prurl($env, $url);
+   next if $name eq $cur->{name};
+   my $other = $pi_config->lookup_name($name) or next;
+   next unless $other->base_url;
+
+   my $s = $other->search;
+   if (!$s) {
+   push @nox, $other;
+   next;
+   }
 
# try to find the URL with Xapian to avoid forking
-   if ($have_xap) {
-   my $s;
-   my $doc_id = eval {
-   $s = PublicInbox::Search->new($git_dir);
-   $s->find_unique_doc_id('mid', $mid);
-   };
-   if ($@) {
-   # xapian not configured for this repo
-   } else {
-   # maybe we found it!
-   return r302($url, $mid) if (defined $doc_id);
-
-   # no point in trying the fork fallback if we
-   # know Xapian is up-to-date but missing the
-   # message in the current repo
-   push @pfx, { git_dir => $git_dir, url => $url };
-   next;
-   }
+   my $doc_id = eval { $s->find_unique_doc_id('mid', $mid) };
+   if ($@) {
+   # xapian not configured properly for this repo
+   push @nox, $other;
+   next;
}
 
-   # queue up for forking after we've tried Xapian on all of them
-   push @nox, { git_dir => $git_dir, url => $url };
+   # maybe we found it!
+   return r302($other, $mid) if defined $doc_id;
+
+   # no point in trying the fork fallback if we
+   # know Xapian is up-to-date but missing the
+   # message in the current repo
+   push @ibx, $other;
}
 
-   # Xapian not installed or configured for some repos
-   my $path = "HEAD:" . mid2path($mid);
+   # Xapian not installed or configured for some repos,
+   # do a full MID check:
+   if (@nox) {
+   my $path = mid2path($mid);
+   foreach my $other (@nox) {
+   my (undef, $type, undef) = $other->path_check($path);
 
-   foreach my $n (@nox) {
-   # TODO: reuse existing PublicInbox::Git objects to save forks
-   my $git = PublicInbox::Git->new($n->{git_dir});
-   my (undef, $type, undef) = $git->check($path);
-   return r302($n->{url}, $mid) if ($type && $type eq 'blob');
+   return r302($other, $mid) if $type && $type eq 'blob';
+   }
}
 
# fall back to partial MID matching
@@ -88,22 +79,15 @@ sub ext_msg {
 
eval { require PublicInbox::Msgmap };
my $have_mm = $@ ? 0 : 1;
-   my $base_url = $cgi->base->as_string;
if ($have_mm) {
my $tmp_mid = $mid;
-   my $url;
 again:
-   $url = $base_url . $inbox;
-   unshift @pfx, { git_dir => $ctx->{git_dir}, url => $url };
-   foreach my $pfx (@pfx) {
-   my $git_dir = delete $pfx->{git_dir} or next;
-   my $mm = eval { PublicInbox::Msgmap->new($git_dir) };
-
-   $mm or next;
+   unshift @ibx, $cur;
+   foreach my $ibx (@ibx) {
+   my $mm = $ibx->mm or next;
if (my $res = 

[PATCH 2/6] inbox: base_url method takes PSGI env hashref instead

2016-07-02 Thread Eric Wong
This is lighter and we can work further towards eliminating
our Plack::Request dependency entirely.
---
 lib/PublicInbox/Feed.pm  |  4 +---
 lib/PublicInbox/Inbox.pm | 12 
 lib/PublicInbox/Mbox.pm  |  2 +-
 lib/PublicInbox/WWW.pm   |  2 +-
 lib/PublicInbox/WwwStream.pm |  9 ++---
 t/view.t |  1 +
 6 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 2f141c4..ffbf5c8 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -297,13 +297,11 @@ sub get_feedopts {
my $pi_config = $ctx->{pi_config};
my $inbox = $ctx->{inbox};
my $obj = $ctx->{-inbox};
-   my $cgi = $ctx->{cgi};
my %rv = ( description => $obj->description );
 
$rv{address} = $obj->{address};
$rv{id_addr} = $obj->{-primary_address};
-   my $url_base;
-   $url_base = $obj->base_url($cgi); # CGI may be undef
+   my $url_base = $obj->base_url($ctx->{env});
if (my $mid = $ctx->{mid}) { # per-thread feed:
$rv{atomurl} = "$url_base$mid/t.atom";
} else {
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index ada713c..96c9265 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -71,10 +71,14 @@ sub cloneurl {
 }
 
 sub base_url {
-   my ($self, $prq) = @_; # Plack::Request
-   if (defined $prq) {
-   my $url = $prq->base->as_string;
-   $url .= '/' if $url !~ m!/\z!; # for mount in Plack::Builder
+   my ($self, $env) = @_;
+   if ($env) { # PSGI env
+   my $scheme = $env->{'psgi.url_scheme'};
+   my $host_port = $env->{HTTP_HOST} ||
+   "$env->{SERVER_NAME}:$env->{SERVER_PORT}";
+   my $url = "$scheme://$host_port". ($env->{SCRIPT_NAME} || '/');
+   # for mount in Plack::Builder
+   $url .= '/' if $url !~ m!/\z!;
$url .= $self->{name} . '/';
} else {
# either called from a non-PSGI environment (e.g. NNTP/POP3)
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 1c97f95..9dad0f6 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -28,7 +28,7 @@ sub msg_str {
$header_obj->header_set($d);
}
my $ibx = $ctx->{-inbox};
-   my $base = $ibx->base_url($ctx->{cgi});
+   my $base = $ibx->base_url($ctx->{env});
my $mid = mid_clean($header_obj->header('Message-ID'));
$mid = uri_escape_utf8($mid);
my @append = (
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index c4509bd..1e23c43 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -401,7 +401,7 @@ sub r301 {
return $r404 if $r404;
$obj = $ctx->{-inbox};
}
-   my $url = $obj->base_url($cgi);
+   my $url = $obj->base_url($ctx->{env});
my $qs = $ctx->{env}->{QUERY_STRING};
$url .= (uri_escape_utf8($mid) . '/') if (defined $mid);
$url .= $suffix if (defined $suffix);
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index fdab4da..be6ce2e 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -62,13 +62,8 @@ sub _html_end {
my $obj = $ctx->{-inbox};
my $desc = ascii_html($obj->description);
 
-   # FIXME: cleanup
-   my $env = $ctx->{env};
-   my $scheme = $env->{'psgi.url_scheme'};
-   my $host_port = $env->{HTTP_HOST} ||
-   "$env->{SERVER_NAME}:$env->{SERVER_PORT}";
-   my $http = "$scheme://$host_port".($env->{SCRIPT_NAME} || '/');
-   $http = URI->new($http . $obj->{name})->canonical->as_string;
+   my $http = $obj->base_url($ctx->{env});
+   chop $http;
my %seen = ( $http => 1 );
my @urls = ($http);
foreach my $u (@{$obj->cloneurl}) {
diff --git a/t/view.t b/t/view.t
index 4fdd151..4cee439 100644
--- a/t/view.t
+++ b/t/view.t
@@ -25,6 +25,7 @@ my $ctx = {
-inbox => Plack::Util::inline_object(
name => 'test',
search => sub { undef },
+   base_url => sub { 'http://example.com/' },
cloneurl => sub {[]},
description => sub { '' }),
 };
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 0/6] misc cleanups

2016-07-02 Thread Eric Wong
Should be pretty obvious, and the iffstat looks good :)

Eric Wong (6):
  TODO: clarify streaming Email::MIME replacement
  inbox: base_url method takes PSGI env hashref instead
  extmsg: rework to use Inbox objects
  www: use PSGI env directly
  view: rely on internal query parser for 'o' param
  www: remove Plack::Request dependency entirely

 TODO  |   1 +
 lib/PublicInbox/ExtMsg.pm | 102 ++
 lib/PublicInbox/Feed.pm   |  13 +++---
 lib/PublicInbox/Inbox.pm  |  17 +--
 lib/PublicInbox/Mbox.pm   |   2 +-
 lib/PublicInbox/SearchView.pm |   2 +-
 lib/PublicInbox/View.pm   |   2 +-
 lib/PublicInbox/WWW.pm|  24 +-
 lib/PublicInbox/WwwStream.pm  |   9 +---
 script/public-inbox-httpd |   1 -
 t/httpd-corner.t  |   2 +-
 t/httpd-unix.t|   2 +-
 t/httpd.t |   2 +-
 t/plack.t |   2 +-
 t/psgi_attach.t   |   2 +-
 t/psgi_mount.t|   2 +-
 t/view.t  |   1 +
 17 files changed, 86 insertions(+), 100 deletions(-)
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 1/6] TODO: clarify streaming Email::MIME replacement

2016-07-02 Thread Eric Wong
I bet there's a billion other improvements to be made elsewhere.
---
 TODO | 1 +
 1 file changed, 1 insertion(+)

diff --git a/TODO b/TODO
index 30ba38b..644fa03 100644
--- a/TODO
+++ b/TODO
@@ -53,6 +53,7 @@ all need to be considered for everything we introduce)
 * streaming Email::MIME replacement: currently we generate many
   allocations/strings for headers we never look at and slurp
   entire message bodies into memory.
+  (this is pie-in-the-sky territory...)
 
 * Allow in-place Xapian updates without clobbering the whole
   index (versioning each doc data entry?) for big archives
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 6/6] www: remove Plack::Request dependency entirely

2016-07-02 Thread Eric Wong
Lighter and ever-so-slightly faster!

Most importantly, this won't do non-obvious stuff behind our
backs like trying to parse a POST request body for a query
string param.
---
 lib/PublicInbox/Feed.pm   |  9 +
 lib/PublicInbox/WWW.pm| 13 +
 script/public-inbox-httpd |  1 -
 t/httpd-corner.t  |  2 +-
 t/httpd-unix.t|  2 +-
 t/httpd.t |  2 +-
 t/plack.t |  2 +-
 t/psgi_attach.t   |  2 +-
 t/psgi_mount.t|  2 +-
 9 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index ffbf5c8..2983514 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -119,17 +119,18 @@ sub end_feed {
 
 sub emit_atom_thread {
my ($cb, $ctx) = @_;
-   my $res = $ctx->{srch}->get_thread($ctx->{mid});
+   my $mid = $ctx->{mid};
+   my $res = $ctx->{srch}->get_thread($mid);
return _no_thread($cb) unless $res->{total};
my $feed_opts = get_feedopts($ctx);
my $fh = $cb->([200, ['Content-Type' => 'application/atom+xml']]);
+   my $ibx = $ctx->{-inbox};
+   my $html_url = $ibx->base_url($ctx->{env});
+   $html_url .= PublicInbox::Hval->new_msgid($mid)->as_href;
 
-   my $html_url = $feed_opts->{atomurl} = $ctx->{self_url};
-   $html_url =~ s!/t\.atom\z!/!;
$feed_opts->{url} = $html_url;
$feed_opts->{emit_header} = 1;
 
-   my $ibx = $ctx->{-inbox};
foreach my $msg (@{$res->{msgs}}) {
my $s = feed_entry($feed_opts, mid2path($msg->mid), $ibx);
$fh->write($s) if defined $s;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 940e1c5..5425308 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -13,7 +13,6 @@ package PublicInbox::WWW;
 use 5.008;
 use strict;
 use warnings;
-use Plack::Request;
 use PublicInbox::Config;
 use PublicInbox::Hval;
 use URI::Escape qw(uri_escape_utf8 uri_unescape);
@@ -40,9 +39,7 @@ sub run {
 
 sub call {
my ($self, $env) = @_;
-   my $cgi = Plack::Request->new($env);
-   my $ctx = { cgi => $cgi, env => $env, www => $self,
-   pi_config => $self->{pi_config} };
+   my $ctx = { env => $env, www => $self, pi_config => $self->{pi_config} 
};
 
# we don't care about multi-value
my %qp = map {
@@ -267,10 +264,11 @@ sub footer {
my $urls;
my @urls = @{$obj->cloneurl};
my %seen = map { $_ => 1 } @urls;
-   my $cgi = $ctx->{cgi};
-   my $http = $cgi->base->as_string . $obj->{name};
+   my $env = $ctx->{env};
+   my $http = $obj->base_url($env);
+   chop $http;
$seen{$http} or unshift @urls, $http;
-   my $ssoma_url = PublicInbox::Hval::prurl($ctx->{env}, SSOMA_URL);
+   my $ssoma_url = PublicInbox::Hval::prurl($env, SSOMA_URL);
if (scalar(@urls) == 1) {
$urls = "URL for ssoma or git clone --mirror $urls[0]);
@@ -329,7 +327,6 @@ sub get_thread_mbox {
 sub get_thread_atom {
my ($ctx) = @_;
searcher($ctx) or return need_search($ctx);
-   $ctx->{self_url} = $ctx->{cgi}->uri->as_string;
require PublicInbox::Feed;
PublicInbox::Feed::generate_thread_atom($ctx);
 }
diff --git a/script/public-inbox-httpd b/script/public-inbox-httpd
index f19582f..8ba42c2 100755
--- a/script/public-inbox-httpd
+++ b/script/public-inbox-httpd
@@ -9,7 +9,6 @@ use Plack::Util;
 use PublicInbox::Daemon;
 use PublicInbox::HTTP;
 use PublicInbox::HTTPD;
-use Plack::Request;
 use Plack::Builder;
 my %httpds;
 my $app;
diff --git a/t/httpd-corner.t b/t/httpd-corner.t
index b9eaa6f..5ecc69b 100644
--- a/t/httpd-corner.t
+++ b/t/httpd-corner.t
@@ -7,7 +7,7 @@ use warnings;
 use Test::More;
 use Time::HiRes qw(gettimeofday tv_interval);
 
-foreach my $mod (qw(Plack::Util Plack::Request Plack::Builder Danga::Socket
+foreach my $mod (qw(Plack::Util Plack::Builder Danga::Socket
HTTP::Date HTTP::Status)) {
eval "require $mod";
plan skip_all => "$mod missing for httpd-corner.t" if $@;
diff --git a/t/httpd-unix.t b/t/httpd-unix.t
index 16f7bdd..ef827fc 100644
--- a/t/httpd-unix.t
+++ b/t/httpd-unix.t
@@ -5,7 +5,7 @@ use strict;
 use warnings;
 use Test::More;
 
-foreach my $mod (qw(Plack::Util Plack::Request Plack::Builder Danga::Socket
+foreach my $mod (qw(Plack::Util Plack::Builder Danga::Socket
HTTP::Date HTTP::Status)) {
eval "require $mod";
plan skip_all => "$mod missing for httpd-unix.t" if $@;
diff --git a/t/httpd.t b/t/httpd.t
index 0e19b56..c2e7360 100644
--- a/t/httpd.t
+++ b/t/httpd.t
@@ -4,7 +4,7 @@ use strict;
 use warnings;
 use Test::More;
 
-foreach my $mod (qw(Plack::Util Plack::Request Plack::Builder Danga::Socket
+foreach my $mod (qw(Plack::Util Plack::Builder Danga::Socket
HTTP::Date HTTP::Status)) {
  

[PATCH] linkify: allow '!' in URLs

2016-07-02 Thread Eric Wong
GoogleGroups URLs often contain '!' in them
---
 lib/PublicInbox/Linkify.pm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index 25f0b48..d4df689 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -15,9 +15,9 @@ use warnings;
 use Digest::SHA qw/sha1_hex/;
 
 my $SALT = rand;
-my $LINK_RE = qr!\b((?:ftps?|https?|nntps?|gopher)://
+my $LINK_RE = qr{\b((?:ftps?|https?|nntps?|gopher)://
 [\@:\w\.-]+/
-?[,:~\$\@\w\+\&\?\.\%\;/#=-]*)!x;
+?[!,:~\$\@\w\+\&\?\.\%\;/#=-]*)}x;
 
 sub new { bless {}, shift }
 
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 0/3] misc NNTP improvements

2016-07-02 Thread Eric Wong
A couple of minor cleanups and shutdown improvement for NNTP.

Eric Wong (3):
  config: introduce each_inbox for iteration
  nntp: simplify update_idle_time
  nntp: respect 3 minute idle time for shutdown

 lib/PublicInbox/Config.pm | 18 ++
 lib/PublicInbox/ExtMsg.pm | 32 +---
 lib/PublicInbox/NNTP.pm   | 20 ++--
 lib/PublicInbox/NNTPD.pm  | 21 +
 t/nntpd.t |  1 +
 5 files changed, 55 insertions(+), 37 deletions(-)

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 2/3] nntp: simplify update_idle_time

2016-07-02 Thread Eric Wong
This ought to make things easier when we add TLS support.
---
 lib/PublicInbox/NNTP.pm | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index 4b116a7..b07e184 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -60,9 +60,8 @@ sub next_tick () {
 
 sub update_idle_time ($) {
my ($self) = @_;
-   my $tmp = $self->{sock} or return;
-   $tmp = fileno($tmp);
-   defined $tmp and $EXPMAP->{$tmp} = [ now(), $self ];
+   my $fd = $self->{fd};
+   defined $fd and $EXPMAP->{$fd} = [ now(), $self ];
 }
 
 # reduce FD pressure by closing some "git cat-file --batch" processes
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 3/3] nntp: respect 3 minute idle time for shutdown

2016-07-02 Thread Eric Wong
This avoids breaking clients on graceful shutdown since
NNTP responses should usually be quick.
---
 lib/PublicInbox/NNTP.pm | 15 ---
 t/nntpd.t   |  1 +
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index b07e184..56d0838 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -998,10 +998,19 @@ sub watch_read {
$rv;
 }
 
+sub not_idle_long ($$) {
+   my ($self, $now) = @_;
+   defined(my $fd = $self->{fd}) or return;
+   my $ary = $EXPMAP->{$fd} or return;
+   my $exp_at = $ary->[0] + $EXPTIME;
+   $exp_at > $now;
+}
+
 # for graceful shutdown in PublicInbox::Daemon:
-sub busy () {
-   my ($self) = @_;
-   ($self->{rbuf} ne '' || $self->{long_res} || $self->{write_buf_size});
+sub busy {
+   my ($self, $now) = @_;
+   ($self->{rbuf} ne '' || $self->{long_res} || $self->{write_buf_size} ||
+not_idle_long($self, $now));
 }
 
 1;
diff --git a/t/nntpd.t b/t/nntpd.t
index 5875b73..7192d78 100644
--- a/t/nntpd.t
+++ b/t/nntpd.t
@@ -219,6 +219,7 @@ EOF
is(scalar @r, 1, 'only one response line');
}
 
+   $n = $s = undef;
is($pid, waitpid($pid, 0), 'nntpd exited successfully');
my $eout = eval {
local $/;
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH] view: remove self-linkification in thread views

2016-07-02 Thread Eric Wong
There is no point for diverting readers' attention with
an unnecessary link, here.
---
 lib/PublicInbox/View.pm | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 1527959..e8395ae 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -219,6 +219,7 @@ sub _th_index_lite {
my $this = $map->[1];
$this =~ s!\n\z!\n!s;
$this =~ s!]+>([^<]+)!$1!s; # no point linking to self
$rv .= "@ $this";
my $node = $map->[2];
if (my $child = $node->child) {
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 0/2] varnish tweaks and such

2016-07-02 Thread Eric Wong
Less configuration and more explanation is better :)

Eric Wong (2):
  examples: remove X-Forwarded-For mentions
  examples/varnish-4.vcl: comments and tweaks

 examples/public-inbox.psgi |  2 +-
 examples/varnish-4.vcl | 34 ++
 script/public-inbox.cgi|  2 +-
 3 files changed, 16 insertions(+), 22 deletions(-)
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH] githttpbackend: match Content-Type of git-http-backend(1)

2016-07-02 Thread Eric Wong
This will allow cache proxies such as Varnish to avoid
caching data sent by us.
---
 lib/PublicInbox/GitHTTPBackend.pm | 22 ++
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/lib/PublicInbox/GitHTTPBackend.pm 
b/lib/PublicInbox/GitHTTPBackend.pm
index b485192..a9c0e9c 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -64,15 +64,29 @@ sub drop_client ($) {
}
 }
 
+my $prev = 0;
+my $exp;
+sub cache_one_year {
+   my ($h) = @_;
+   my $t = time + 31536000;
+   push @$h, 'Expires', $t == $prev ? $exp : ($exp = time2str($prev = $t)),
+   'Cache-Control', 'public, max-age=31536000';
+}
+
 sub serve_dumb {
my ($env, $git, $path) = @_;
 
my @h;
my $type;
-   if ($path =~ /\A(?:$BIN)\z/o) {
-   $type = 'application/octet-stream';
-   push @h, 'Expires', time2str(time + 31536000);
-   push @h, 'Cache-Control', 'public, max-age=31536000';
+   if ($path =~ m!\Aobjects/[a-f0-9]{2}/[a-f0-9]{38}\z!) {
+   $type = 'application/x-git-loose-object';
+   cache_one_year(\@h);
+   } elsif ($path =~ m!\Aobjects/pack/pack-[a-f0-9]{40}\.pack\z!) {
+   $type = 'application/x-git-packed-objects';
+   cache_one_year(\@h);
+   } elsif ($path =~ m!\Aobjects/pack/pack-[a-f0-9]{40}\.idx\z!) {
+   $type = 'application/x-git-packed-objects-toc';
+   cache_one_year(\@h);
} elsif ($path =~ /\A(?:$TEXT)\z/o) {
$type = 'text/plain';
push @h, @no_cache;
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH] wwwstream: wording/grammar tweaks in trailer

2016-07-02 Thread Eric Wong
git.git documentation uses "clonable" so that's probably
a better term than "clone-able".  Also, shorten the section
for retrieving our code and remove an obvious typo.
---
 lib/PublicInbox/WwwStream.pm | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index be6ce2e..285416d 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -57,7 +57,7 @@ sub _html_top ($) {
 
 sub _html_end {
my ($self) = @_;
-   my $urls = 'Archives are clone-able:';
+   my $urls = 'Archives are clonable:';
my $ctx = $self->{ctx};
my $obj = $ctx->{-inbox};
my $desc = ascii_html($obj->description);
@@ -81,8 +81,8 @@ sub _html_end {
''.join("\n",
'- ' . $desc,
$urls,
-   'Archived served using code from public-inbox:',
-   qq(\tgit clone $url public-inbox),
+   'Served with public-inbox: '.
+   qq(git clone $url public-inbox),
).'';
 }
 
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 2/2] examples/varnish-4.vcl: comments and tweaks

2016-07-02 Thread Eric Wong
Document and simplify things a bit.  The major functional change
is we no longer waste space caching objects from dumb HTTP
clones.
---
 examples/varnish-4.vcl | 26 ++
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/examples/varnish-4.vcl b/examples/varnish-4.vcl
index 999f954..2429603 100644
--- a/examples/varnish-4.vcl
+++ b/examples/varnish-4.vcl
@@ -10,24 +10,15 @@
 
 vcl 4.0;
 backend default {
+   # this is where public-inbox-http listens
.host = "127.0.0.1";
.port = "280";
 }
 
 sub vcl_recv {
-   if (req.method != "GET" &&
-   req.method != "HEAD" &&
-   req.method != "PUT" &&
-   req.method != "POST" &&
-   req.method != "TRACE" &&
-   req.method != "OPTIONS" &&
-   req.method != "DELETE") {
-   /* Non-RFC2616 or CONNECT which is weird. */
-   return (pipe);
-   }
+   /* pipe POST and any other weird methods directly to backend */
if (req.method != "GET" && req.method != "HEAD") {
-   /* We only deal with GET and HEAD by default */
-   return (pass);
+   return (pipe);
}
if (req.http.Authorization || req.http.Cookie) {
/* Not cacheable by default */
@@ -36,6 +27,13 @@ sub vcl_recv {
return (hash);
 }
 
+sub vcl_pipe {
+   # By default Connection: close is set on all piped requests by varnish,
+   # but public-inbox-httpd supports persistent connections well :)
+   unset bereq.http.connection;
+   return (pipe);
+}
+
 sub vcl_hash {
hash_data(req.url);
if (req.http.host) {
@@ -43,6 +41,7 @@ sub vcl_hash {
} else {
hash_data(server.ip);
}
+   /* we generate fully-qualified URLs for Atom feeds and redirects */
if (req.http.X-Forwarded-Proto) {
hash_data(req.http.X-Forwarded-Proto);
}
@@ -53,6 +52,8 @@ sub vcl_backend_response {
set beresp.grace = 60s;
set beresp.do_stream = true;
if (beresp.ttl <= 0s ||
+   /* no point in caching stuff git already stores on disk */
+   beresp.http.Content-Type ~ "application/x-git" ||
beresp.http.Set-Cookie ||
beresp.http.Vary == "*") {
/* Mark as "Hit-For-Pass" for the next 2 minutes */
@@ -60,6 +61,7 @@ sub vcl_backend_response {
set beresp.uncacheable = true;
return (deliver);
} else {
+   /* short TTL for up-to-dateness, our PSGI is not that slow */
set beresp.ttl = 10s;
}
return (deliver);
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH] address: remove quotes around names

2016-06-19 Thread Eric Wong
They're needless for actual display once outside of email
headers.  But we will still show them when displaying mock
headers in the permalink view.
---
 lib/PublicInbox/Address.pm | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/Address.pm b/lib/PublicInbox/Address.pm
index 8b3daf5..772aded 100644
--- a/lib/PublicInbox/Address.pm
+++ b/lib/PublicInbox/Address.pm
@@ -18,7 +18,8 @@ sub from_name {
$name =~ s/\@.*//;
}
$name =~ tr/\r\n\t/ /;
-   $name =~ s/\A\s*//;
+   $name =~ s/\A['"\s]*//;
+   $name =~ s/['"\s]*\z//;
$name;
 }
 
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 2/7] feed: avoid needless method dispatches on 404

2016-06-19 Thread Eric Wong
We overuse streaming, here.  Allow Content-Length to be
calculated in this case.
---
 lib/PublicInbox/Feed.pm | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 045e495..d88421b 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -79,9 +79,8 @@ sub emit_atom {
 
 sub _no_thread {
my ($cb) = @_;
-   my $fh = $cb->([404, ['Content-Type' => 'text/plain']]);
-   $fh->write("No feed found for thread\n");
-   $fh->close;
+   $cb->([404, ['Content-Type', 'text/plain'],
+   ["No feed found for thread\n"]]);
 }
 
 sub end_feed {
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 0/2] search: support YYYYMMDD search ranges

2016-08-16 Thread Eric Wong
Not deployed to clear-net sites, yet, I'm reindexing the
http://czquwvybam4bgbro.onion/git/ onion right now.

Eric Wong (2):
  search: drop pointless range processors for Unix timestamp
  search: add MMDD search range via "d:" prefix

 lib/PublicInbox/Search.pm| 13 +++--
 lib/PublicInbox/SearchIdx.pm |  7 ++-
 t/search.t   |  9 +
 3 files changed, 18 insertions(+), 11 deletions(-)

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



Re: Draft of Git Rev News edition 18

2016-08-16 Thread Eric Wong
Josh Triplett <j...@joshtriplett.org> wrote:
> On Tue, Aug 16, 2016 at 09:30:27AM +0000, Eric Wong wrote:
> > Jakub Narębski <jna...@gmail.com> wrote:
> > > It's a great pity that https://public-inbox.org/ is just
> > > directory index, not a true home page.
> > 
> > +Cc meta@public-inbox.org
> > 
> > I'm not sure one could do better while staying true to the
> > minimalist nature of plain-text email.
> > 
> > In the spirit of decentralization, there may not be /a/
> > homepage, but many.   Everything is meant to clonable with each
> > public-inbox, so maybe every public-inbox will have a code
> > branch attached to it with the source+docs bundled.
> 
> It'd be nice if it had a prominent list of all lists available; as far
> as I can tell, the main page has no link to /git/.

I'm not sure that's necessary; most of the traffic seems to come
from /git/MESSAGE_ID/ links posted by others.  So it's
probably more inside-out exposure than anything.

As for other projects, I'm not aware of anybody else using it,
yet.  I have some small projects using it, but most of those are
one-off throwaways and I'm not comfortable promoting those along
with public-inbox.  I admit: I'm not comfortable promoting
anything I do, really.

I do wish more people would start using the .onions, though...
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH] HACKING: minor updates and add to the website

2016-08-16 Thread Eric Wong
Also, at least add one of the Tor mirrors (the rest will
be discoverable through the mirrors themselves).
---
 Documentation/include.mk |  3 ++-
 HACKING  | 19 +++
 README   |  5 +
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/Documentation/include.mk b/Documentation/include.mk
index 9427887..396a258 100644
--- a/Documentation/include.mk
+++ b/Documentation/include.mk
@@ -4,7 +4,8 @@ all::
 
 RSYNC = rsync
 RSYNC_DEST = public-inbox.org:/srv/public-inbox/
-docs := README COPYING INSTALL TODO $(shell git ls-files 'Documentation/*.txt')
+docs := README COPYING INSTALL TODO HACKING
+docs += $(shell git ls-files 'Documentation/*.txt')
 INSTALL = install
 POD2MAN ?= pod2man
 POD2MAN_OPTS = -v --stderr -d 1994-10-02 -c 'public-inbox user manual'
diff --git a/HACKING b/HACKING
index a766820..3c2235a 100644
--- a/HACKING
+++ b/HACKING
@@ -1,8 +1,14 @@
 hacking public-inbox
 
 
-Send all patches via to our self-hosting inbox at meta@public-inbox.org
-It is archived at .
+Send all patches and "git request-pull"-formatted emails to our
+self-hosting inbox at meta@public-inbox.org
+It is archived at: https://public-inbox.org/meta/
+and http://hjrcffqmbrq6wope.onion/meta/ (using Tor)
+
+Contributions are email-driven, just like contributing to git
+itself or the Linux kernel; however anonymous and pseudonymous
+contributions will always be welcome.
 
 Please consider our goals in mind:
 
@@ -37,5 +43,10 @@ in scripting languages (currently Perl 5).
 Performance should be reasonably good for server administrators, too,
 and we will sacrifice features to achieve predictable performance.
 
-See design_www.txt and design_notes.txt in the Documentation/ directory
-for design decisions made during development.
+See design_www.txt and design_notes.txt in the Documentation/
+directory for design decisions made during development.
+
+For now, one may optionally subscribe to the mailing list by
+sending an email to: meta+subscr...@public-inbox.org
+(and confirming).  However, reading over the mailing list is
+the least reliable method of reading a public-inbox.
diff --git a/README b/README
index f56d68d..7618727 100644
--- a/README
+++ b/README
@@ -111,6 +111,11 @@ The archives are readable via NNTP or HTTP:
nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
https://public-inbox.org/meta/
 
+And as Tor hidden services:
+
+   http://hjrcffqmbrq6wope.onion/meta/
+   nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
+
 You may also clone all messages via git:
 
git clone --mirror https://public-inbox.org/meta/
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



Re: Working with public-inbox.org [Was: [PATCH] rev-parse: respect core.hooksPath in --git-path]

2016-08-16 Thread Eric Wong
Junio C Hamano  wrote:
> Stefan Beller  writes:
> > * Should the public-inbox offer another link to patches 1-n, without
> >   the cover letter? Or should it add instructions:
> >
> > If this is a patch series you can apply it locally as:
> > curl  >tmpXXX
> > git am tmpXXX && git am --skip && git am --continue

Currently for web users, I suggest:

curl $URL >tmpXXX

# open tmp and tag+copy to patchesXXX using MUA of choice:
# (also seems to be what Jeff describes):
mutt -f tmpXXX

git am patches

> I do not think it is sensible for "cover-letter" specific
> instructions.  However, I do not think it is unreasonable to either
> add another mbox.gz link or replace the behaviour of mbox.gz link so
> that you can grab a mbox that contains "this message and everything
> after it in the thread".  That way, I could open the first message,
> see something like this I found in your message:
> 
> >> Thread overview: 4+ messages in thread (expand / mbox.gz / Atom feed / 
> >> [top])
> >> 2016-08-15 23:06 Jacob Keller [this message]
> >> 2016-08-15 23:07 ` [PATCH v6 1/3] diff.c: remove output_prefix_length 
> >> field Jacob Keller
> >> 2016-08-15 23:07 ` [PATCH v6 2/3] graph: add support for --line-prefix on 
> >> all graph-aware output Jacob Keller
> >> 2016-08-15 23:07 ` [PATCH v6 3/3] diff: add SUBMODULE_DIFF format to 
> >> display submodule diff Jacob Keller
> 
> and then go to 1/3 and click that "this and everything that
> follows".

Adding more links might still fall down in cases where
fixup/squash patches are sent for specific patches in a series;
or when a v{N+1} series is posted in-reply-to an existing
series.

Perhaps adding checkbox next to each item might work as a
select-to-include-in-mbox download form.  However, I'm already
finding the lack of horizontal space disconcerting.

Maybe the -MM-DD could be shortened to MMDD.  It would
be closer to the date searching syntax used by mairix, as well
as the search enhancement I started working on earlier today:

  https://public-inbox.org/meta/20160816084926.29394-...@80x24.org/T/
  (still will deploy soonish)
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH] TODO: several updates

2017-02-07 Thread Eric Wong
Always plenty to do while working on this...
---
 TODO | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/TODO b/TODO
index b85887a..3163b8a 100644
--- a/TODO
+++ b/TODO
@@ -11,15 +11,28 @@ all need to be considered for everything we introduce)
 
 * POP3 server, since some webmail providers support external POP3:
   https://public-inbox.org/meta/20160411034104.ga7...@dcvr.yhbt.net/
+  Perhaps make this depend solely the NNTP server and work as a proxy.
+  Meaning users can run this without needing a full copy of the
+  archives in a git repository.
+
+* HTTP and NNTP proxy support.  Allow us to be a frontend for
+  firewalled off (or Tor-exclusive) instances.  The use case is
+  for offering a publically accessible IP with a cheap VPS,
+  yet storing large amounts of data on computers without a
+  public IP behind a home Internet connection.
 
 * TLS support for various daemons (including STARTTLS for NNTP and POP3)
 
+* NNTP COMPRESS extension (see innd)
+
 * Combined "super server" for NNTP/HTTP/POP3 to reduce memory overhead
 
 * Optional reply-to-list support for mirroring lists that want it :<
   Reply-to-list encourages the existing list as a single-point-of-failure,
   but having an extra mirror using public-inbox code is nice regardless.
 
+* Optional reply-to-nobody for dead lists.
+
 * Configurable linkification for per-inbox shorthands:
   "$gmane/123456" could be configured to expand to the
   appropriate link pointing to the gmane.org list archives,
@@ -36,7 +49,7 @@ all need to be considered for everything we introduce)
 
 * configurable constants (index limits, search results)
 
-* handle messages with multiple Message-IDs
+* handle messages with multiple Message-IDs (how?)
 
 * handle broken double-bracketed References properly (maybe)
   and totally broken Message-IDs
-- 
EW




[PATCH] config: do not slurp lines into memory

2017-02-08 Thread Eric Wong
There's no need to hold everything in memory, here,
since apparently "foreach" will read everything at
once in array context

(for some reason, I thought Perl5 was smart enough
 to avoid creating a temporary array, here...)
---
 lib/PublicInbox/Config.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 28b5bdb..f6275cd 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -111,7 +111,7 @@ sub git_config_dump {
my $fh = popen_rd(\@cmd) or die "popen_rd failed for $file: $!\n";
my %rv;
local $/ = "\n";
-   foreach my $line (<$fh>) {
+   while (defined(my $line = <$fh>)) {
chomp $line;
my ($k, $v) = split(/=/, $line, 2);
my $cur = $rv{$k};
-- 
EW




[PATCH] searchidx: deal with empty In-Reply-To and References headers

2017-02-06 Thread Eric Wong
In some messages, these headers exist, but have empty values.
Do not let empty values throw off our search indexer to tie
threads together, as it can make non-sensical threads grouped
to a Message-Id of "" (empty string).

See

for an example of such a message.

Thanks-to: Johannes Schindelin 
  
---
 Not fixed on the live sites, yet, but it will be once reindexing
 finishes (eatmydata public-inbox-index --reindex $GIT_DIR)

 lib/PublicInbox/SearchIdx.pm | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index d63dd7c..1142ca7 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -292,11 +292,15 @@ sub link_message {
my $mime = $smsg->{mime};
my $hdr = $mime->header_obj;
my $refs = $hdr->header_raw('References');
-   my @refs = $refs ? ($refs =~ /<([^>]+)>/g) : ();
+   my @refs = defined $refs ? ($refs =~ /<([^>]+)>/g) : ();
my $irt = $hdr->header_raw('In-Reply-To');
if (defined $irt) {
-   $irt = mid_clean($irt);
-   $irt = undef if $mid eq $irt;
+   if ($irt eq '') {
+   $irt = undef;
+   } else {
+   $irt = mid_clean($irt);
+   $irt = undef if $mid eq $irt;
+   }
}
 
my $tid;
-- 
EW



[PATCH 1/3] searchidx: reindex clobbers old thread IDs

2017-02-06 Thread Eric Wong
We cannot always reuse thread IDs since our threading
logic may change as bugs are fixed.
---
 lib/PublicInbox/SearchIdx.pm | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 1142ca7..bc003c6 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -157,6 +157,10 @@ sub add_message {
# it will also clobber any existing regular message
$doc_id = $smsg->{doc_id};
$old_tid = $smsg->thread_id;
+
+   # no need to remove_term for old_tid, we use a new
+   # doc to replace the old one when reindexing:
+   $old_tid = undef if $self->{reindex};
}
$smsg = PublicInbox::SearchMsg->new($mime);
my $doc = $smsg->{doc};
@@ -464,7 +468,7 @@ sub _git_log {
 sub _index_sync {
my ($self, $opts) = @_;
my $tip = $opts->{ref} || 'HEAD';
-   my $reindex = $opts->{reindex};
+   $self->{reindex} = $opts->{reindex};
my ($mkey, $last_commit, $lx, $xlog);
$self->{git}->batch_prepare;
my $xdb = _xdb_acquire($self);
@@ -474,7 +478,7 @@ sub _index_sync {
$mkey = 'last_commit';
$last_commit = $xdb->get_metadata('last_commit');
$lx = $last_commit;
-   if ($reindex) {
+   if ($self->{reindex}) {
$lx = '';
$mkey = undef if $last_commit ne '';
}
-- 
EW




[PATCH 3/3] search: schema version bump for empty References/In-Reply-To

2017-02-06 Thread Eric Wong
We cannot distinguish between legitimate ghosts and mis-threaded
messages before commit 83425ef12e4b65cdcecd11ddcb38175d4a91d5a0
("searchidx: deal with empty In-Reply-To and References headers")
so we must rebuild the index in parallel to fix it.
---
 lib/PublicInbox/Search.pm | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index c909424..8c72fa1 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -39,7 +39,9 @@ use constant {
# 10 - optimize doc for NNTP overviews
# 11 - merge threads when vivifying ghosts
# 12 - change MMDD value column to numeric
-   SCHEMA_VERSION => 12,
+   # 13 - fix threading for empty References/In-Reply-To
+   #  (commit 83425ef12e4b65cdcecd11ddcb38175d4a91d5a0)
+   SCHEMA_VERSION => 13,
 
# n.b. FLAG_PURE_NOT is expensive not suitable for a public website
# as it could become a denial-of-service vector
-- 
EW




[PATCH 0/3] force reindex for threading changes

2017-02-06 Thread Eric Wong
We cannot rely on in-place --reindex to handle thread_id
changes when we fix threading bugs in the search indexer
like in commit 83425ef12e4b65cdcecd11ddcb38175d4a91d5a0
("searchidx: deal with empty In-Reply-To and References headers")

So, bump the schema version and pay the cost of requiring
extra disk space to create a new index in parallel.




[PATCH 2/3] Revert "searchidx: reindex clobbers old thread IDs"

2017-02-06 Thread Eric Wong
Oops, that's broken, too.  I guess the only way to reindex
after fixing the thread detection is to start from scratch.

This reverts commit 5d91adedf5f33ef1cb87df2a86306ddf370b4f8d.
---
 lib/PublicInbox/SearchIdx.pm | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index bc003c6..1142ca7 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -157,10 +157,6 @@ sub add_message {
# it will also clobber any existing regular message
$doc_id = $smsg->{doc_id};
$old_tid = $smsg->thread_id;
-
-   # no need to remove_term for old_tid, we use a new
-   # doc to replace the old one when reindexing:
-   $old_tid = undef if $self->{reindex};
}
$smsg = PublicInbox::SearchMsg->new($mime);
my $doc = $smsg->{doc};
@@ -468,7 +464,7 @@ sub _git_log {
 sub _index_sync {
my ($self, $opts) = @_;
my $tip = $opts->{ref} || 'HEAD';
-   $self->{reindex} = $opts->{reindex};
+   my $reindex = $opts->{reindex};
my ($mkey, $last_commit, $lx, $xlog);
$self->{git}->batch_prepare;
my $xdb = _xdb_acquire($self);
@@ -478,7 +474,7 @@ sub _index_sync {
$mkey = 'last_commit';
$last_commit = $xdb->get_metadata('last_commit');
$lx = $last_commit;
-   if ($self->{reindex}) {
+   if ($reindex) {
$lx = '';
$mkey = undef if $last_commit ne '';
}
-- 
EW




[PATCH] searchview: clarify numeric summary at bottom

2017-02-05 Thread Eric Wong
Xapian can only give estimated results when a result limit is
given to it, so make clear it is an estimate to avoid showing
non-sensical ranges when no results are returned.
---
 lib/PublicInbox/SearchView.pm | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index ccc53ab..5a95a05 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -145,15 +145,21 @@ sub search_nav_bot {
my $o = $q->{o};
my $end = $o + $nr;
my $beg = $o + 1;
-   my $rv = "Results $beg-$end of $total";
+   my $rv = '';
+   if ($beg <= $end) {
+   $rv .= "Results $beg-$end of $total";
+   $rv .= ' (estimated)' if $end != $total;
+   } else {
+   $rv .= "No more results, only $total";
+   }
my $n = $o + $LIM;
 
if ($n < $total) {
my $qs = $q->qs_html(o => $n);
-   $rv .= qq{, next}
+   $rv .= qq{  next}
}
if ($o > 0) {
-   $rv .= $n < $total ? '/' : ',  ';
+   $rv .= $n < $total ? '/' : '   ';
my $p = $o - $LIM;
my $qs = $q->qs_html(o => ($p > 0 ? $p : 0));
$rv .= qq{prev};
-- 
EW




[PATCH 2/2] add filter for Subject: tags

2017-01-25 Thread Eric Wong
Some mailing lists add annoying tags into the Subject line which
discourages readers from doing proper mail organization on the
client side.  They also waste precious screen space and
attention span.

Remove them from our archives to reduce clutter.
---
 MANIFEST |  2 ++
 lib/PublicInbox/Filter/SubjectTag.pm | 33 +
 t/filter_subjecttag.t| 27 +++
 3 files changed, 62 insertions(+)
 create mode 100644 lib/PublicInbox/Filter/SubjectTag.pm
 create mode 100644 t/filter_subjecttag.t

diff --git a/MANIFEST b/MANIFEST
index 76fd1da..f16843a 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -51,6 +51,7 @@ lib/PublicInbox/ExtMsg.pm
 lib/PublicInbox/Feed.pm
 lib/PublicInbox/Filter/Base.pm
 lib/PublicInbox/Filter/Mirror.pm
+lib/PublicInbox/Filter/SubjectTag.pm
 lib/PublicInbox/Filter/Vger.pm
 lib/PublicInbox/GetlineBody.pm
 lib/PublicInbox/Git.pm
@@ -127,6 +128,7 @@ t/fail-bin/spamc
 t/feed.t
 t/filter_base.t
 t/filter_mirror.t
+t/filter_subjecttag.t
 t/filter_vger.t
 t/git-http-backend.psgi
 t/git-http-backend.t
diff --git a/lib/PublicInbox/Filter/SubjectTag.pm 
b/lib/PublicInbox/Filter/SubjectTag.pm
new file mode 100644
index 000..1d28142
--- /dev/null
+++ b/lib/PublicInbox/Filter/SubjectTag.pm
@@ -0,0 +1,33 @@
+# Copyright (C) 2017 all contributors 
+# License: AGPL-3.0+ 
+
+# Filter for various [tags] in subjects
+package PublicInbox::Filter::SubjectTag;
+use strict;
+use warnings;
+use base qw(PublicInbox::Filter::Base);
+
+sub new {
+   my ($class, %opts) = @_;
+   my $tag = delete $opts{-tag};
+   die "tag not defined!\n" unless defined $tag && $tag ne '';
+   my $self = $class->SUPER::new(%opts);
+   $self->{tag_re} = qr/\A\s*(re:\s+|)\Q$tag\E\s*/i;
+   $self;
+}
+
+sub scrub {
+   my ($self, $mime) = @_;
+   my $subj = $mime->header('Subject');
+   $subj =~ s/$self->{tag_re}/$1/; # $1 is "Re: "
+   $mime->header_str_set('Subject', $subj);
+   $self->ACCEPT($mime);
+}
+
+# no suffix/article rejection for mirrors
+sub delivery {
+   my ($self, $mime) = @_;
+   $self->scrub($mime);
+}
+
+1;
diff --git a/t/filter_subjecttag.t b/t/filter_subjecttag.t
new file mode 100644
index 000..54a219e
--- /dev/null
+++ b/t/filter_subjecttag.t
@@ -0,0 +1,27 @@
+# Copyright (C) 2017 all contributors 
+# License: AGPL-3.0+ 
+use strict;
+use warnings;
+use Test::More;
+use Email::MIME;
+use_ok 'PublicInbox::Filter::SubjectTag';
+
+my $f = eval { PublicInbox::Filter::SubjectTag->new };
+like($@, qr/tag not defined/, 'error without args');
+$f = PublicInbox::Filter::SubjectTag->new('-tag', '[foo]');
+is(ref $f, 'PublicInbox::Filter::SubjectTag', 'new object created');
+
+my $mime = Email::MIME->new(<
+Subject: =?UTF-8?B?UmU6IFtmb29dIEVsw4PCqWFub3I=?=
+
+EOF
+
+$mime = $f->delivery($mime);
+is($mime->header('Subject'), "Re: El\xc3\xa9anor", 'filtered with Re:');
+
+$mime->header_str_set('Subject', '[FOO] bar');
+$mime = $f->delivery($mime);
+is($mime->header('Subject'), 'bar', 'filtered non-reply');
+
+done_testing();
-- 
EW




[PATCH 1/2] watchmaildir: allow arguments for filters

2017-01-25 Thread Eric Wong
We'll want to allow some degree of configuration for
various mailing lists.
---
 lib/PublicInbox/WatchMaildir.pm | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index 0b284bd..1823c24 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -238,11 +238,17 @@ sub _scrubber_for {
my ($inbox) = @_;
my $f = $inbox->{filter};
if ($f && $f =~ /::/) {
+   my @args;
+   # basic line splitting, only
+   # Perhaps we can have proper quote splitting one day...
+   ($f, @args) = split(/\s+/, $f) if $f =~ /\s+/;
+
eval "require $f";
if ($@) {
warn $@;
} else {
-   return $f->new;
+   # e.g: PublicInbox::Filter::Vger->new(@args)
+   return $f->new(@args);
}
}
undef;
-- 
EW




[PATCH 0/2] -watch: support configurable subject filters

2017-01-25 Thread Eric Wong
Unfortunately, some mailing list administrators insist on tagging
Subject: lines with the list name instead of teaching users to
look for list-specific headers and build better user interfaces
for managing their own mail.

So, public-inbox now provide an interface to strip those tags
from Subject lines to reduce clutter when mirroring those
archives.

Eric Wong (2):
  watchmaildir: allow arguments for filters
  add filter for Subject: tags

 MANIFEST |  2 ++
 lib/PublicInbox/Filter/SubjectTag.pm | 33 +
 lib/PublicInbox/WatchMaildir.pm  |  8 +++-
 t/filter_subjecttag.t| 27 +++
 4 files changed, 69 insertions(+), 1 deletion(-)




Re: [PATCH] handle repeated References and In-Reply-To headers

2017-02-11 Thread Eric Wong
Eric Wong <e...@80x24.org> wrote:
> It seems possible for git-send-email(1) to generate repeated
> repeated instances of References and In-Reply-To headers,
> as evidenced in:
> 
> https://public-inbox.org/git/2016124541.8216-17-vascomalme...@sapo.pt/raw

RFC to fix git-send-email here:

   https://public-inbox.org/git/20170212003432.GA19519@starla/T/



[PATCH v2] t/mime: quiet warnings for old versions of Email::Simple

2017-02-11 Thread Eric Wong
This is fixed in the newest versions of Email::Simple,
but not the version in Debian jessie (2.203)
---
 v1 actually broke the intent of the test :x

 t/mime.t | 1 +
 1 file changed, 1 insertion(+)

diff --git a/t/mime.t b/t/mime.t
index c4bdcf0..b0e2290 100644
--- a/t/mime.t
+++ b/t/mime.t
@@ -8,6 +8,7 @@ use Test::More;
 use_ok 'PublicInbox::MIME';
 use PublicInbox::MsgIter;
 
+local $SIG{__WARN__} = sub {};
 my $msg = PublicInbox::MIME->new(
 'From:   Richard Hansen 
 To: g...@vger.kernel.org
-- 
EW




[PATCH] handle repeated References and In-Reply-To headers

2017-02-11 Thread Eric Wong
It seems possible for git-send-email(1) to generate repeated
repeated instances of References and In-Reply-To headers,
as evidenced in:

https://public-inbox.org/git/2016124541.8216-17-vascomalme...@sapo.pt/raw

This causes a mismatch between how our search indexer threads
and how our HTML view handles threading.  In the future, View.pm
will use the smsg-parsed {references} field and avoid redoing
Email::MIME header parsing.

We will still need to figure out a way to deal with messages
with repeated Message-IDs, at some point, too.
---
 lib/PublicInbox/SearchIdx.pm| 30 ++
 lib/PublicInbox/SearchThread.pm |  2 +-
 lib/PublicInbox/View.pm | 19 +++
 3 files changed, 18 insertions(+), 33 deletions(-)

diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 1142ca7..8a529c6 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -291,17 +291,12 @@ sub link_message {
my $mid = $smsg->mid;
my $mime = $smsg->{mime};
my $hdr = $mime->header_obj;
-   my $refs = $hdr->header_raw('References');
-   my @refs = defined $refs ? ($refs =~ /<([^>]+)>/g) : ();
-   my $irt = $hdr->header_raw('In-Reply-To');
-   if (defined $irt) {
-   if ($irt eq '') {
-   $irt = undef;
-   } else {
-   $irt = mid_clean($irt);
-   $irt = undef if $mid eq $irt;
-   }
-   }
+
+   # last References should be IRT, but some mail clients do things
+   # out of order, so trust IRT over References iff IRT exists
+   my @refs = ($hdr->header_raw('References'),
+   $hdr->header_raw('In-Reply-To'));
+   @refs = ((join(' ', @refs)) =~ /<([^>]+)>/g);
 
my $tid;
if (@refs) {
@@ -309,15 +304,6 @@ sub link_message {
my @orig_refs = @refs;
@refs = ();
 
-   if (defined $irt) {
-   # to check MAX_MID_SIZE
-   push @orig_refs, $irt;
-
-   # below, we will ensure IRT (if specified)
-   # is the last References
-   $uniq{$irt} = 1;
-   }
-
# prevent circular references via References: here:
foreach my $ref (@orig_refs) {
if (length($ref) > MAX_MID_SIZE) {
@@ -329,10 +315,6 @@ sub link_message {
}
}
 
-   # last References should be IRT, but some mail clients do things
-   # out of order, so trust IRT over References iff IRT exists
-   push @refs, $irt if defined $irt;
-
if (@refs) {
$smsg->{references} = '<'.join('> <', @refs).'>';
 
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 2cd066d..2966907 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -7,7 +7,7 @@
 # Mail::Thread is unmaintained and unavailable on some distros.
 # We also do not want pruning or subject grouping, since we want
 # to encourage strict threading and hopefully encourage people
-# to use proper In-Reply-To.
+# to use proper In-Reply-To/References.
 #
 # This includes fixes from several open bugs for Mail::Thread
 #
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 2c37cd4..0b1ec75 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -92,13 +92,13 @@ EOF
 
 sub in_reply_to {
my ($hdr) = @_;
-   my $irt = $hdr->header_raw('In-Reply-To');
-
-   return mid_clean($irt) if defined $irt && $irt ne '';
-
-   my $refs = $hdr->header_raw('References');
-   if ($refs && $refs =~ /<([^>]+)>\s*\z/s) {
-   return $1;
+   my %mid = map { $_ => 1 } $hdr->header_raw('Message-ID');
+   my @refs = ($hdr->header_raw('References'),
+   $hdr->header_raw('In-Reply-To'));
+   @refs = ((join(' ', @refs)) =~ /<([^>]+)>/g);
+   while (defined(my $irt = pop @refs)) {
+   next if $mid{"<$irt>"};
+   return $irt;
}
undef;
 }
@@ -201,7 +201,10 @@ sub _th_index_lite {
my $rv = '';
my $mapping = $ctx->{mapping} or return $rv;
my $pad = '  ';
-   my ($attr, $node, $idx, $level) = @{$mapping->{$mid_raw}};
+   my $mid_map = $mapping->{$mid_raw};
+   defined $mid_map or
+   return 'public-inbox BUG: '.ascii_html($mid_raw).' not mapped';
+   my ($attr, $node, $idx, $level) = @$mid_map;
my $children = $node->{children};
my $nr_c = scalar @$children;
my $nr_s = 0;
-- 
EW




[PATCH] t/mime.t: quiet warning during test, at least

2017-02-11 Thread Eric Wong
This is fixed in the newest versions of Email::Simple,
but not the version in Debian jessie (2.203)
---
 t/mime.t | 1 +
 1 file changed, 1 insertion(+)

diff --git a/t/mime.t b/t/mime.t
index c4bdcf0..3e29dbc 100644
--- a/t/mime.t
+++ b/t/mime.t
@@ -20,6 +20,7 @@ Content-Type: multipart/signed; 
protocol="application/pkcs7-signature"; micalg=s
 boundary="94eb2c0bc864b76ba30545b2bca9"
 
 --94eb2c0bc864b76ba30545b2bca9
+Content-Type: text/plain
 
 Richard Hansen (2):
   diff: document behavior of relative diff.orderFile
-- 
EW




Re: Working with public-inbox.org [Was: [PATCH] rev-parse: respect core.hooksPath in --git-path]

2017-02-13 Thread Eric Wong
Arif Khokar  wrote:
> On 02/13/2017 09:37 AM, Johannes Schindelin wrote:
> >I actually had expected *you* to put in a little bit of an effort, too. In
> >fact, I was very disappointed that you did not even look into porting that
> >script to use public-inbox instead of GMane.
> 
> I wasn't aware of that expectation.  My idea was to use NNTP as a way to
> facilitate the development of a new git utility that would serve as the
> inverse of git-send-email (sort of like the relationship between git
> format-patch and git am), rather than using a

Speaking for myself, I usually don't expect much, especially
from newcomers.  So I am disappointed to see Dscho's disappointment
aimed at you, Arif.  Especially since you're not a regular and
we have no idea how much free time, attention span, or familiarity
with Bourne shell you have.

> IIRC, I had posted some proof-of-concept Perl code to do so back in August
> in 
> 
> 
> Looking at public-inbox now at the archives of this group, it appears that
> several of the messages I sent weren't archived for some reason (and I
> didn't see any more responses to what I posted at the time).  The messages
> are accessible via NNTP when connecting to gmane though.

It looks like it went to gmane via the meta@public-inbox.org to
gmane.mail.public-inbox.general mirror, not via the git@vger mirror.
I can't find it on git@vger's mail-archive.com mirror, either:

https://mail-archive.com/search?q=Arif+Khokar=git%40vger.kernel.org

> Also, looking at the source of the message I referenced, it appears that my
> MUA decided to base64 encode the message for some reason (which may have
> resulted in it getting filtered by those who I sent the message to).

It probably wasn't base64, but maybe it was one of these:
http://vger.kernel.org/majordomo-taboos.txt

Or it was the SPF softfail which you can see in the headers on both
gmane and public-inbox.
It might even be the '_' (underscore) in your other address.
But even Junio gets dropped by vger sometimes:
https://public-inbox.org/git/20170127035753.GA2604@dcvr/

But if I had to guess, vger gets hit by truckloads of spam and
the the backscatter volume could become unimaginable, so perhaps
it has good reason to discard silently.



Anyways, the eventual goal of public-inbox is to flip the
mailing list model backwards into "archives first" mode,
so a message needs to make it into public archives before
it goes out to subscribers.  That might prevent or avoid
such problems... *shrug*



[PATCH] www: do not unescape PATH_INFO twice

2017-02-14 Thread Eric Wong
PSGI specs already require PATH_INFO to be unescaped;
so our tests were wrong, too.
---
 lib/PublicInbox/WWW.pm | 2 +-
 t/cgi.t| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 430e6b1..62e4ca4 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -165,7 +165,7 @@ sub invalid_inbox_mid {
my $ret = invalid_inbox($ctx, $inbox);
return $ret if $ret;
 
-   $ctx->{mid} = $mid = uri_unescape($mid);
+   $ctx->{mid} = $mid;
if ($mid =~ /\A[a-f0-9]{40}\z/) {
# this is horiffically wasteful for legacy URLs:
if ($mid = mid2blob($ctx)) {
diff --git a/t/cgi.t b/t/cgi.t
index 092ad8c..7740966 100644
--- a/t/cgi.t
+++ b/t/cgi.t
@@ -148,7 +148,7 @@ EOF
$im->add($reply);
$im->done;
 
-   my $res = cgi_run("/test/slashy%2fasdf\@example.com/raw");
+   my $res = cgi_run("/test/slashy/asdf\@example.com/raw");
like($res->{body}, qr/Message-Id: <\Q$slashy_mid\E>/,
"slashy mid raw hit");
 
-- 
EW




[PATCH] watchmaildir: limit live importer processes

2017-01-18 Thread Eric Wong
We don't want to be triggering OOM or swapping on weaker
systems when we have dozens of inboxes as potential targets.
---
 lib/PublicInbox/WatchMaildir.pm | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index d08f229..0b284bd 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -224,7 +224,14 @@ sub _importer_for {
my $addr = $inbox->{-primary_address};
PublicInbox::Import->new($git, $name, $addr, $inbox);
};
-   $self->{importers}->{"$im"} = $im;
+
+   my $importers = $self->{importers};
+   if (scalar(keys(%$importers)) > 2) {
+   delete $importers->{"$im"};
+   _done_for_now($self);
+   }
+
+   $importers->{"$im"} = $im;
 }
 
 sub _scrubber_for {
-- 
EW




[PATCH] mime: avoid SUPER usage in Email::MIME subclass

2017-01-18 Thread Eric Wong
We must call Email::Simple methods directly in our monkey patch
for Email::MIME to call the intended method.  Using SUPER in our
subclass would instead hit a different, unintended method in
Email::MIME.

Reported-by: Junio C Hamano 

---
 lib/PublicInbox/MIME.pm |  6 ++---
 t/mime.t| 60 +
 2 files changed, 63 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/MIME.pm b/lib/PublicInbox/MIME.pm
index 792fffd..54925a8 100644
--- a/lib/PublicInbox/MIME.pm
+++ b/lib/PublicInbox/MIME.pm
@@ -37,7 +37,7 @@ sub parts_multipart {
   return $self->parts_single_part
 unless $boundary and $self->body_raw =~ /^--\Q$boundary\E\s*$/sm;
 
-  $self->{body_raw} = $self->SUPER::body;
+  $self->{body_raw} = Email::Simple::body($self);
 
   # rfc1521 7.2.1
   my ($body, $epilogue) = split /^--\Q$boundary\E--\s*$/sm, $self->body_raw, 2;
@@ -45,13 +45,13 @@ sub parts_multipart {
   # Split on boundaries, but keep blank lines after them intact
   my @bits = split /^--\Q$boundary\E\s*?(?=$self->{mycrlf})/m, ($body || '');
 
-  $self->SUPER::body_set(undef);
+  Email::Simple::body_set($self, undef);
 
   # If there are no headers in the potential MIME part, it's just part of the
   # body.  This is a horrible hack, although it's debatable whether it was
   # better or worse when it was $self->{body} = shift @bits ... -- rjbs,
   # 2006-11-27
-  $self->SUPER::body_set(shift @bits) if ($bits[0] || '') !~ /.*:.*/;
+  Email::Simple::body_set($self, shift @bits) if ($bits[0] || '') !~ /.*:.*/;
 
   my $bits = @bits;
 
diff --git a/t/mime.t b/t/mime.t
index cd3303d..c4bdcf0 100644
--- a/t/mime.t
+++ b/t/mime.t
@@ -6,6 +6,7 @@ use strict;
 use warnings;
 use Test::More;
 use_ok 'PublicInbox::MIME';
+use PublicInbox::MsgIter;
 
 my $msg = PublicInbox::MIME->new(
 'From:   Richard Hansen 
@@ -54,4 +55,63 @@ my $exp = 'Richard Hansen (2):
 ok($msg->isa('Email::MIME'), 'compatible with Email::MIME');
 is($parts[0]->body, $exp, 'body matches expected');
 
+
+my $raw = q^Date:   Wed, 18 Jan 2017 13:28:32 -0500
+From:   Santiago Torres 
+To: Junio C Hamano 
+Cc: g...@vger.kernel.org, p...@peff.net, sunsh...@sunshineco.com,
+walt...@verbum.org, Lukas Puehringer 
+Subject: Re: [PATCH v6 4/6] builtin/tag: add --format argument for tag -v
+Message-ID: <20170118182831.pkhqu2np3bh2puei@LykOS.localdomain>
+References: <20170117233723.23897-1-santi...@nyu.edu>
+ <20170117233723.23897-5-santi...@nyu.edu>
+ 
+ 
+MIME-Version: 1.0
+Content-Type: multipart/signed; micalg=pgp-sha256;
+protocol="application/pgp-signature"; boundary="r24xguofrazenjwe"
+Content-Disposition: inline
+In-Reply-To: 
+
+
+--r24xguofrazenjwe
+Content-Type: text/plain; charset=us-ascii
+Content-Disposition: inline
+Content-Transfer-Encoding: quoted-printable
+
+your tree directly?=20
+
+--r24xguofrazenjwe
+Content-Type: application/pgp-signature; name="signature.asc"
+
+-BEGIN PGP SIGNATURE-
+
+=7wIb
+-END PGP SIGNATURE-
+
+--r24xguofrazenjwe--
+
+^;
+
+$msg = PublicInbox::MIME->new($raw);
+my $nr = 0;
+msg_iter($msg, sub {
+   my ($part, $level, @ex) = @{$_[0]};
+   if ($ex[0] == 1) {
+   is($part->body_str, "your tree directly? \r\n", 'body OK');
+   } elsif ($ex[0] == 2) {
+   is($part->body, "-BEGIN PGP SIGNATURE-\n\n" .
+   "=7wIb\n" .
+   "-END PGP SIGNATURE-\n",
+   'sig "matches"');
+   } else {
+   fail "unexpected part\n";
+   }
+   $nr++;
+});
+
+is($nr, 2, 'got 2 parts');
+is($msg->as_string, $raw,
+   'stringified sufficiently close to original');
+
 done_testing();
-- 
EW




[PATCH] learn: implement "rm" only functionality

2017-01-18 Thread Eric Wong
Do not consider this interface stable, but I just needed a
way to remove mis-imported multipart messages so
public-inbox-watch could pick them up again from my Maildir.
---
 script/public-inbox-learn | 25 ++---
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index a696d34..38c8324 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -16,7 +16,7 @@ $Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is 
imperfect
 use PublicInbox::Address;
 use PublicInbox::Spamcheck::Spamc;
 my $train = shift or die "usage: $usage\n";
-if ($train !~ /\A(?:ham|spam)\z/) {
+if ($train !~ /\A(?:ham|spam|rm)\z/) {
die "`$train' not recognized.\nusage: $usage\n";
 }
 
@@ -27,15 +27,18 @@ my $mime = PublicInbox::MIME->new(eval {
local $/;
my $data = scalar ;
$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
-   eval {
-   if ($train eq 'ham') {
-   $spamc->hamlearn(\$data);
-   } else {
-   $spamc->spamlearn(\$data);
-   }
-   die "spamc failed with: $?\n" if $?;
-   };
-   $err = $@;
+
+   if ($train ne 'rm') {
+   eval {
+   if ($train eq 'ham') {
+   $spamc->hamlearn(\$data);
+   } elsif ($train eq 'spam') {
+   $spamc->spamlearn(\$data);
+   }
+   die "spamc failed with: $?\n" if $?;
+   };
+   $err = $@;
+   }
$data
 });
 
@@ -61,7 +64,7 @@ foreach my $recipient (keys %dests) {
my $email = $ENV{GIT_COMMITTER_EMAIL} || $recipient;
my $im = PublicInbox::Import->new($git, $name, $email);
 
-   if ($train eq "spam") {
+   if ($train eq "spam" || $train eq "rm") {
# This needs to be idempotent, as my inotify trainer
# may train for each cross-posted message, and this
# script already learns for every list in
-- 
EW




Re: [PATCH v6 4/6] builtin/tag: add --format argument for tag -v

2017-01-18 Thread Eric Wong
Junio C Hamano  wrote:
> Santiago Torres  writes:
> 
> <>???
> 
> Eric, I've noticed that this message
> 
>   
> http://public-inbox.org/git/20170118182831.pkhqu2np3bh2puei@LykOS.localdomain/
> 
> and all messages from Santiago appear empty when they come via
> public-inbox.org; the reason I suspect we haven't heard much
> complaints is because nobody else around here sends multipart/signed
> disposition inline other than Santiago.

Eeep!  This looks like a regression I introduced when working
around Richard Hansen's S/MIME mails the other week on git@vger:

  https://public-inbox.org/meta/2017011035.GB27356@dcvr/T/#u

Worse is they now corrupted on the way in into the git repo
because of search indexing.  Will fix ASAP.  Thanks for the
heads up.



Re: Mail archives in Git using ssoma

2016-08-21 Thread Eric Wong
"W. Trevor King" <wk...@tremily.us> wrote:
> On Sun, Aug 21, 2016 at 06:37:04PM +, Eric Wong wrote:
> > Btw, for public-inbox, I'm using git-fast-import now, so imports are
> > a bit faster and $GIT_DIR/ssoma.index is no longer used.  This was
> > crucial for getting git@vger archives imported in a reasonable time.
> 
> ssoma-mda imports 22k notmuch messages in around 15 minutes (with
> profiling enabled), and:

In contrast, git@vger is around 300K messages.  LKML is well
into the millions, and I hope public-inbox (and git!) can handle
that one day, even on cheap hardware (haven't tried).

One problem I noticed with ssoma-mda is that it gets slower as
more messages get imported, since all those files sit in the
index, and the git index format is bad for incremental updates
with big, flat trees.  Big trees are a general problem with git:

I'm now storing blob IDs directly in Xapian and will be
using them more to avoid tree lookups.  tree creation
lookups degrade the same way the index does as they
get bigger.

Currently it's using 2/38 of the SHA-1 like git loose
objects; a goal might be to move towards supporting 2/2/36
(or deeper) as Jeff noted substantial object traversal
improvements:

https://public-inbox.org/git/20160805092805.w3nwv2l6jkbuw...@sigill.intra.peff.net/

Of course, support for 2/38 will be retained for old
archives/messages.

>   $ python -m cProfile -o profile import.py notmuch.mbox
>   $ python -c "import pstats; p=pstats.Stats('profile'); 
> p.sort_stats('cumulative').print_stats(10)"
>   Sun Aug 21 12:56:49 2016profile
> 
>101823722 function calls (99078415 primitive calls) in 885.069 
> seconds
> 
>  Ordered by: cumulative time
>  List reduced from 1145 to 10 due to restriction <10>
> 
>  ncalls  tottime  percall  cumtime  percall filename:lineno(function)
>70/10.0020.000  885.069  885.069 {built-in method exec}
>   10.1110.111  885.069  885.069 
> /home/wking/src/notmuch/notmuch-archives.git/import.py:9()
>   10.4000.400  884.915  884.915 
> /home/wking/src/notmuch/notmuch-archives.git/import.py:17(import_mbox)
>   228750.6010.000  863.3710.038 
> /home/wking/src/notmuch/notmuch-archives.git/ssoma_mda.py:362(deliver)
>   228758.9430.000  810.4590.035 
> /home/wking/src/notmuch/notmuch-archives.git/ssoma_mda.py:207(append)
>   228750.4180.000  308.3530.013 
> /home/wking/.local/lib64/python3.4/site-packages/pygit2/index.py:146(write_tree)
>   22875  307.8550.013  307.8550.013 {built-in method 
> git_index_write_tree}
>   228740.5750.000  279.2930.012 
> /home/wking/.local/lib64/python3.4/site-packages/pygit2/index.py:238(diff_to_tree)
>   22874  278.5010.012  278.5010.012 {built-in method 
> git_diff_tree_to_index}

It looks like writing the index is already the slowest, here, in
terms of total time, too.  It might be interesting if you
profiled each *-mda invocation to see the degradation from the
first to last message.

>   228750.0880.000   80.4130.004 
> /home/wking/.local/lib64/python3.4/site-packages/pygit2/index.py:99(read)
> 
> 38 ms per ssoma delivery is probably fast enough, especially if you

Not even close for me :)

> are invoking ssoma-mda once per message, since process setup will take a 
> similar amount of time:
> 
>   $ time python -c 'print("hello")'
>   hello
> 
>   real0m0.016s
>   user0m0.013s
>   sys 0m0.003s
> 
> It's possible that fast-import would shave a few ms off the pygit2
> addition (I'm not sure, and maybe pygit2 is faster than fast-import).
> But I doubt it matters enough either way to be worth changing unless
> you are dealing with a really large corpus.

One key feature is fast-import avoids writing an index entirely.
I think pygit2 would have to learn that, too.
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



Re: Mail archives in Git using ssoma (Docker image)

2016-08-21 Thread Eric Wong
"W. Trevor King" <wk...@tremily.us> wrote:
> On Sun, Aug 21, 2016 at 12:08:52PM +, Eric Wong wrote:
> > "W. Trevor King" <wk...@tremily.us> wrote:
> > > This is the ssoma archive (with the data in it).  I just set up a
> > > basic HTTP archive (following [1]) based on a Docker image [2] (Gentoo
> > > doesn't package all the Perl dependencies public-inbox needs).
> > 
> > Ugh, that sucks (sorry, not a fan of Docker).
> > 
> > What's missing from Gentoo?
> 
> Gentoo doesn't package (or I couldn't find the package for)
> Encode::MIME::Header or Mail::Thread.  I tried installing things from
> CPAN, but ran into a compile-time error from the ‘cpan’ invocationand
> gave up ;).  I can try and reproduce the error if you're curious, but
> I don't have it handy at the moment.

Encode::MIME::Header is distributed with perl itself on Debian and also
the stock upstream install.  Not sure if there's an option you missed or
disabled.

Which perl version do you use?

perl on 5.14 Debian wheezy even seems to have it.  I actually
still want everything to work on 5.8, since that seems to be
the de-facto baseline in the wild.


Mail::Thread is one .pm, and I'll probably replace it with
something (same algorithm) which can use half the memory by
avoiding wrapper object abstractions (it's probably the biggest
memory hog at the moment).

lib/PublicInbox/Thread.pm already has 3 monkey patches to workaround
upstream bugs in Mail::Thread.  It's dead upstream, and not available on
FreeBSD, either.

> > >   $ git config -f srv/notmuch.git/config publicinbox.http 
> > > http://tremily.us
> > >   $ git config -f srv/notmuch.git/config publicinbox.email 
> > > notm...@notmuchmail.org
> > 
> > That should probably be:
> > 
> > ; based on your [3]
> > git config -f srv/notmuch.git/config \
> > publicinbox.notmuch.url http://tremily.us/notmuch
> > 
> > git config -f srv/notmuch.git/config \
> > publicinbox.notmuch.address notm...@notmuchmail.org
> > 
> > ; this is crucial for all the public-inbox-* tools
> > git config -f srv/notmuch.git/config \
> > publicinbox.notmuch.mainrepo /path/to/notmuch.git
> 
> I was using these in the Dockerfile's CMD:
> 
>   (cd /srv;
>for NAME in *;
>do
>  CONF="/srv/${NAME}/config";
>  public-inbox-init "${NAME}" "/srv/${NAME}" $(git config -f "${CONF}" 
> publicinbox.http) $(git config -f "${CONF}" publicinbox.email);
>done) && …
> 
> Are you saying that I can skip the ~/.public-inbox/config entries
> setup by public-inbox-init if I set publicinbox.{name}.* in the ssoma
> repository's config?  That would be nice.

Erm, sorry, no, I mean ~/.public-inbox/config as the "git config -f"
arg in the above commands.  Your original config was
meaningless in the context of public-inbox itself; I don't
recall public-inbox relies on $GIT_DIR/config much (if at all)
outside of standard git things.

Using ~/.public-inbox/config is required for multi-inbox lookups
(since you normally run MDA w/o args)

You can also override ~/.public-inbox/config by setting the
PI_CONFIG env (like GIT_CONFIG).

> I don't see a point to having {name} in ssoma-config settings though,
> since you're already in a single bucket by that point (using
> publicinbox.{name}.* makes sense in the multi-bucket
> ~/.public-inbox/config).
> 
> > > It's not updating automatically yet, but that will probably look
> > > like:
> > > 
> > > 1. Pull new mbox [4].
> > > 2. Import into notmuch-arcives [5].
> > > 3. Re-run public-inbox-index (this could probably be via ‘docker exec …’.
> > > 
> > > But I'll have to test that to confirm.  And ideally we'd be using
> > > ssoma-mda or similar directly, instead of going through mbox, but I'd
> > > rather get the official headers on the stored mail than be efficient
> > > ;).
> > 
> > For mirroring existing lists, I started using public-inbox-watch
> > which currently watches Maildirs.
> 
> If I had a Maildir locally, I'd just use procmail and push new
> messages into ssoma-mda.  I'm using the import script because my local
> mail has “how we delivered this to Trevor” headers (which I don't want
> to add) but the downloaded mbox has “how we delivered this to
> notm...@notmuchmail.org” (which seems like a better fit for a shared
> ssoma repo).

I don't mind extra/different headers.   The majority of messages in
public-inbox.org/git/ has messages that were delivered to gmane;
recent ones are delivered to me, and some holes were filled in by
Jeff King's archives.

Re: Working with public-inbox.org [Was: [PATCH] rev-parse: respect core.hooksPath in --git-path]

2016-08-19 Thread Eric Wong
Johannes Schindelin <johannes.schinde...@gmx.de> wrote:
> On Thu, 18 Aug 2016, Eric Wong wrote:
> > Johannes Schindelin <johannes.schinde...@gmx.de> wrote:
> >
> > > Old dogs claim the mail list-approach works for them. Nope. Doesn't.
> > > Else you would not have written all those custom scripts.
> > 
> > git and cogito started as a bunch of custom scripts, too.
> 
> The difference is that neither git nor cogito were opinionated. Those
> custom scripts are. They are for one particular workflow, with one
> particular mail client, with a strong bias to a Unix-y environment.
> 
> I work really hard to make Git for Windows as easy and fun to use as
> possible. I just wish that we were working together to make it as easy and
> fun to contribute to Git, too.

I guess this is a fundamental difference between *nix and
Windows culture.

I enjoy using and contributing to git because it interacts well
with generic tools.  *nix kernels are optimized for this with
decent (not great)[*] process spawning and IPC performance.

I know Windows users have major performance problems with
shell scripts; but they are also largely helpless to improve
Windows kernel performance.

So, I guess monolithic tools became popular on Windows, instead.

> > I see a choice of mail client as no different than a choice of
> > text editor.  Neither my mail client or text editor is heavily
> > customized.  The key feature I rely on from both tools is piping
> > data to external commands.
> 
> There you go. That key feature seems to be unavailable in the most
> wide-spread email client: Outlook. So by not restricting the choice you
> should make it possible to use that mail client, too, right?
> 
> We do not even have a section on Outlook in our SubmittingPatches.
> 
> Okay, if not the most popular mail client, then web mail? Nope, nope,
> nope. No piping *at all* to external commands from there.
> 
> So you basically slam the door shut on the vast majority of email users.

Users have a choice to use a more scriptable mail client
(but I guess the OS nudges users towards monolithic tools)

It's unfortunate the world is so full of proprietary things;
but I think it's our responsibility as Free Software developers
to encourage the use of Free (or "Open Source") tools which
users can control.

> That is not leaving much choice to the users in my book.

Users of alpine, gnus, mutt, sylpheed, thunderbird, kmail,
roundcube, squirelmail, etc. can all download the source, hack,
fix and customize things.  It's easier with smaller software,
of course:  git-send-email does not even require learning
the build process or separate download.



> > Users ought to be able to pick, choose, and replace tools as
> > they wish as long as an interchange format remains stable
> > and widely-supported.
> 
> Right. Let's talk about the interchange format called mails, for the data
> called patches. Is it stable and widely-supported?
> 
> Can users really pick and choose the tools they like most to send patches
> to the Git project? Like, the Outlook client? Or the GMail client?

Personally, I don't mind patches as MIME attachments if that
avoids corruption, MIME seems well-supported at this point.
It's not my call, though.  But as Stephan pointed, Linus
does it, too.



[*] Guess what: I have performance problems with fork/execve on
Linux, too.  However, Linux developers already provide
mechanisms to improve spawn performance (CLONE_VFORK and
vfork(2)); so the next step is to get userspace like dash,
make, perl, etc to support these.

glibc 2.24 was just released with an improved posix_spawn
for Linux (using CLONE_VFORK), so that's a step forward and
might make sharing code with Windows easier, too.

It's not a high priority for me at the moment, but I intend
to get everything in my toolset which relies on fork+execve
to use posix_spawn or vfork+execve instead.  I have the
source to all of these, so at least I can do something
about it.
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



Re: Working with public-inbox.org [Was: [PATCH] rev-parse: respect core.hooksPath in --git-path]

2016-08-19 Thread Eric Wong
Stefan Beller  wrote:
> Maybe we should invent a patch format that copes with broken whitespace?

No redundant new formats, please.  MIME attachments are already
widely-supported and fine by me.  But it's not my call for git.
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 4/4] doc: avoid conflicting with MakeMaker variable names

2016-08-21 Thread Eric Wong
We want the pod2man(1) executable for handling certain
options.  Also, use the correct year while we're at it :P
---
 Documentation/include.mk | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/Documentation/include.mk b/Documentation/include.mk
index bd9df8c..5d73028 100644
--- a/Documentation/include.mk
+++ b/Documentation/include.mk
@@ -7,12 +7,12 @@ RSYNC_DEST = public-inbox.org:/srv/public-inbox/
 docs := README COPYING INSTALL TODO HACKING
 docs += $(shell git ls-files 'Documentation/*.txt')
 INSTALL = install
-POD2MAN ?= pod2man
-POD2MAN_OPTS = -v --stderr -d 1994-10-02 -c 'public-inbox user manual'
-pod2man = $(POD2MAN) $(POD2MAN_OPTS)
-POD2TEXT = pod2text
-POD2TEXT_OPTS = --stderr
-pod2text = $(POD2TEXT) $(POD2TEXT_OPTS)
+PODMAN = pod2man
+PODMAN_OPTS = -v --stderr -d 1993-10-02 -c 'public-inbox user manual'
+podman = $(PODMAN) $(PODMAN_OPTS)
+PODTEXT = pod2text
+PODTEXT_OPTS = --stderr
+podtext = $(PODTEXT) $(PODTEXT_OPTS)
 
 m1 =
 m1 += public-inbox-mda
@@ -42,7 +42,7 @@ install-man: man
test -z "$(man7)" || $(INSTALL) -m 644 $(man7) $(DESTDIR)$(man7dir)
 
 %.1 : Documentation/%.pod
-   $(pod2man) -s 1 $< $@+ && mv $@+ $@
+   $(podman) -s 1 $< $@+ && mv $@+ $@
 
 mantxt = $(addprefix Documentation/, $(addsuffix .txt, $(m1)))
 docs += $(mantxt)
@@ -50,7 +50,7 @@ docs += $(mantxt)
 all :: $(mantxt)
 
 Documentation/%.txt : Documentation/%.pod
-   $(pod2text) $< $@+ && mv $@+ $@
+   $(podtext) $< $@+ && mv $@+ $@
 
 txt2pre = ./Documentation/txt2pre <$< >$@+ && touch -r $< $@+ && mv $@+ $@
 txt := INSTALL README COPYING TODO
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 1/4] README: add link to source code mirrors

2016-08-21 Thread Eric Wong
Centralization sucks, so we mirror everything.
---
 README | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README b/README
index 7618727..3235275 100644
--- a/README
+++ b/README
@@ -88,6 +88,8 @@ Hacking
 Source code is available via git:
 
git clone https://public-inbox.org/ public-inbox
+   git clone git://repo.or.cz/public-inbox
+   torsocks git clone http://hjrcffqmbrq6wope.onion/public-inbox
 
 See below for contact info.
 
@@ -119,6 +121,7 @@ And as Tor hidden services:
 You may also clone all messages via git:
 
git clone --mirror https://public-inbox.org/meta/
+   torsocks git clone --mirror http://hjrcffqmbrq6wope.onion/meta/
 
 Or pass the same git repository URL for ssoma using the instructions at:
 
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH 3/4] avoid spaces after shell redirection operators

2016-08-21 Thread Eric Wong
This makes us closer to git.git style (though I'm not quite sure
why we do this...)
---
 Documentation/include.mk   | 4 ++--
 Documentation/public-inbox-mda.pod | 2 +-
 scripts/dc-dlvr| 4 ++--
 scripts/dc-dlvr.pre| 6 +++---
 scripts/report-spam| 8 
 5 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/Documentation/include.mk b/Documentation/include.mk
index 396a258..bd9df8c 100644
--- a/Documentation/include.mk
+++ b/Documentation/include.mk
@@ -52,7 +52,7 @@ all :: $(mantxt)
 Documentation/%.txt : Documentation/%.pod
$(pod2text) $< $@+ && mv $@+ $@
 
-txt2pre = ./Documentation/txt2pre < $< > $@+ && touch -r $< $@+ && mv $@+ $@
+txt2pre = ./Documentation/txt2pre <$< >$@+ && touch -r $< $@+ && mv $@+ $@
 txt := INSTALL README COPYING TODO
 dtxt := design_notes.txt design_www.txt dc-dlvr-spam-flow.txt
 dtxt := $(addprefix Documentation/, $(dtxt)) $(mantxt)
@@ -67,7 +67,7 @@ html: $(docs_html)
 gz_docs := $(addsuffix .gz, $(docs) $(docs_html))
 rsync_docs := $(gz_docs) $(docs) $(txt) $(docs_html)
 %.gz: %
-   gzip -9 --rsyncable < $< > $@+
+   gzip -9 --rsyncable <$< >$@+
touch -r $< $@+
mv $@+ $@
 
diff --git a/Documentation/public-inbox-mda.pod 
b/Documentation/public-inbox-mda.pod
index 62f87d6..18fcdd0 100644
--- a/Documentation/public-inbox-mda.pod
+++ b/Documentation/public-inbox-mda.pod
@@ -4,7 +4,7 @@ public-inbox-mda - mail delivery for public-inbox
 
 =head1 SYNOPSIS
 
-B E message
+B EMESSAGE
 
 =head1 DESCRIPTION
 
diff --git a/scripts/dc-dlvr b/scripts/dc-dlvr
index a38760f..81193b2 100755
--- a/scripts/dc-dlvr
+++ b/scripts/dc-dlvr
@@ -57,14 +57,14 @@ err=$?
 set -e
 
 case $err in
-1) $DELIVER -m INBOX.spam < $TMPMSG ;;
+1) $DELIVER -m INBOX.spam <$TMPMSG ;;
 *)
# users may override normal delivery and have it go elsewhere
if test -r ~/.dc-dlvr.rc
then
. ~/.dc-dlvr.rc
else
-   $DELIVER -m INBOX < $TMPMSG
+   $DELIVER -m INBOX <$TMPMSG
fi
;;
 esac
diff --git a/scripts/dc-dlvr.pre b/scripts/dc-dlvr.pre
index c10e80c..d7bc1b5 100644
--- a/scripts/dc-dlvr.pre
+++ b/scripts/dc-dlvr.pre
@@ -5,7 +5,7 @@
 export PATH=/usr/local/bin:/usr/bin:/bin
 trap 'err=$?; set +e; test $err -eq 0 || rm -f $TMPMSG; exit $err' EXIT
 case $1,$CLIENT_ADDRESS in
-pispam,) exec public-inbox-learn spam < $TMPMSG ;;
-piham,) exec public-inbox-learn ham < $TMPMSG ;;
+pispam,) exec public-inbox-learn spam <$TMPMSG ;;
+piham,) exec public-inbox-learn ham <$TMPMSG ;;
 esac
-exec public-inbox-mda < $TMPMSG
+exec public-inbox-mda <$TMPMSG
diff --git a/scripts/report-spam b/scripts/report-spam
index 0015ef0..325f571 100755
--- a/scripts/report-spam
+++ b/scripts/report-spam
@@ -32,14 +32,14 @@ PI_USER=pi
 
 case $1 in
 *[/.]spam/cur/*) # non-new messages in spam get trained
-   $DO_SENDMAIL $PI_USER+pispam < $1
-   exec $DO_SENDMAIL $USER+trainspam < $1
+   $DO_SENDMAIL $PI_USER+pispam <$1
+   exec $DO_SENDMAIL $USER+trainspam <$1
;;
 *:2,*S*) # otherwise, seen messages only
case $1 in
*:2,*T*) exit 0 ;; # ignore trashed messages
esac
-   $DO_SENDMAIL $PI_USER+piham < $1
-   exec $DO_SENDMAIL $USER+trainham < $1
+   $DO_SENDMAIL $PI_USER+piham <$1
+   exec $DO_SENDMAIL $USER+trainham <$1
;;
 esac
-- 
EW

--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



Re: Mail archives in Git using ssoma (Docker image)

2016-08-21 Thread Eric Wong
+Cc meta@public-inbox.org

"W. Trevor King"  wrote:
> On Sat, Aug 20, 2016 at 09:36:31PM -0700, W. Trevor King wrote:
> > [2]: git://tremily.us/notmuch-archives.git

Cool!

> This is the ssoma archive (with the data in it).  I just set up a
> basic HTTP archive (following [1]) based on a Docker image [2] (Gentoo
> doesn't package all the Perl dependencies public-inbox needs).

Ugh, that sucks (sorry, not a fan of Docker).

What's missing from Gentoo?

It should be easy to copy just the necessary .pm files and use
PERL5LIB environment to point to the correct path (man perlrun).

I'm conciously avoiding XS (compiled) extensions to make
installation/distribution easier.

> Dockerfile for rebuilding the image is in [2].  I'm currently hosting
> the archives (HTTP only) at [3].  Spinning that up from the Docker
> image looks like:
> 
>   $ mkdir srv
>   $ git clone --bare git://tremily.us/notmuch-archives.git srv/notmuch
>   $ echo 'Notmuch -- Just an email system' >srv/notmuch.git/description
>   $ git config -f srv/notmuch.git/config publicinbox.http http://tremily.us
>   $ git config -f srv/notmuch.git/config publicinbox.email 
> notm...@notmuchmail.org

That should probably be:

; based on your [3]
git config -f srv/notmuch.git/config \
publicinbox.notmuch.url http://tremily.us/notmuch

git config -f srv/notmuch.git/config \
publicinbox.notmuch.address notm...@notmuchmail.org

; this is crucial for all the public-inbox-* tools
git config -f srv/notmuch.git/config \
publicinbox.notmuch.mainrepo /path/to/notmuch.git

I'm sorry that most of this is still undocumented at the moment,
but it's my first priority once I'm done sorting out some
non-computing-related stuff.

>   $ docker run --name notmuch-archives -d -p 80:8080 -v ${PWD}/srv/:/srv/ 
> wking/public-inbox
> 
> (although I'm using -p ###:8080 and have an Nginx reverse-proxy in
> front).  It's not updating automatically yet, but that will probably
> look like:
> 
> 1. Pull new mbox [4].
> 2. Import into notmuch-archives [5].
> 3. Re-run public-inbox-index (this could probably be via ‘docker exec …’.
> 
> But I'll have to test that to confirm.  And ideally we'd be using
> ssoma-mda or similar directly, instead of going through mbox, but I'd
> rather get the official headers on the stored mail than be efficient
> ;).

For mirroring existing lists, I started using public-inbox-watch
which currently watches Maildirs.  The config knobs are sorta
documented from my announcement to git@vger:

https://public-inbox.org/git/20160710004813.ga20...@dcvr.yhbt.net/
http://hjrcffqmbrq6wope.onion/git/20160710004813.ga20...@dcvr.yhbt.net/

Initial import (w/o spamassassin) was done with
scripts/import_vger_from_mbox in the source:

torsocks git clone http://hjrcffqmbrq6wope.onion/public-inbox
git clone https://public-inbox.org/ public-inbox
git clone git://repo.or.cz/public-inbox

I recommend public-inbox-watch for mirroring existing lists
(such as what I did with git@vger) but public-inbox-mda for
self-hosted lists (such as meta@public-inbox.org).

> One shift from Gmane's mid.gmane.org/… is that the public-inbox UI
> Message-ID lookup is per-bucket, and public-inbox seems to be
> encouraging per-list buckets.

The public-inbox-nntpd interface supports mid lookups across all
inboxes in that instance; so it should be doable in the WWW
interface, too.  Either way, I think it has to be O(n) where (n)
is the number of Xapian DBs, though.

I already have news.public-inbox.org hooked up to both
NNTP and HTTP(*), so I plan on making

http://news.public-inbox.org/

to work like:

nntp://news.public-inbox.org/

(*) Right now, it just redirects $GROUP to the HTTP interface:
http://news.public-inbox.org/$NEWSGROUP -> http://...


And the WWW interface already has fallbacks to scan + link
across inboxes, so s/git/meta/ the above URLs and you'll get
a link to the message on /git/ instead of /meta/

http://hjrcffqmbrq6wope.onion/meta/20160710004813.ga20...@dcvr.yhbt.net/

> And while I feel like I had a good grasp of the ssoma format two years
> ago, I know very little about Perl and public-inbox.  I'm sure you
> could setup a public-inbox host that is more efficient than what's
> currently in my Docker image.

Feel free to ask me + meta@public-inbox.org if you have any
questions or need help.  Writing documentation doesn't come
naturally to me, so it's easier for me to answer emails.

I try to make it not very Perly.  I don't think I'll bother with
CPAN, for example  (I don't think I successfully got my PAUSE
account activated; not a fan of registrations, either).

But there will definitely be tarball releases for distros
soonish.  (mainly targeting Debian at the moment, but FreeBSD is
on the table).

> Cheers,
> Trevor
> 
> [1]: http://public-inbox.org/INSTALL
> [2]: https://hub.docker.com/r/wking/public-inbox/
> [3]: 

Re: Working with public-inbox.org [Was: [PATCH] rev-parse: respect core.hooksPath in --git-path]

2016-08-22 Thread Eric Wong
Johannes Schindelin <johannes.schinde...@gmx.de> wrote:
> On Fri, 19 Aug 2016, Eric Wong wrote:
> > Johannes Schindelin <johannes.schinde...@gmx.de> wrote:
> > > On Thu, 18 Aug 2016, Eric Wong wrote:
> > > > Johannes Schindelin <johannes.schinde...@gmx.de> wrote:
> > > >
> > > > > Old dogs claim the mail list-approach works for them. Nope.
> > > > > Doesn't.  Else you would not have written all those custom
> > > > > scripts.
> > > > 
> > > > git and cogito started as a bunch of custom scripts, too.
> > > 
> > > The difference is that neither git nor cogito were opinionated. Those
> > > custom scripts are. They are for one particular workflow, with one
> > > particular mail client, with a strong bias to a Unix-y environment.



> > I guess this is a fundamental difference between *nix and Windows
> > culture.
> 
> I do not understand how you get from "I wish to make it fun to contribute
> to Git" to "there is a fundamental difference between *nix and Windows
> culture".

Sorry, I over-quoted by 3 lines.



> > > We do not even have a section on Outlook in our SubmittingPatches.
> > > 
> > > Okay, if not the most popular mail client, then web mail? Nope, nope,
> > > nope. No piping *at all* to external commands from there.
> > > 
> > > So you basically slam the door shut on the vast majority of email users.
> > 
> > Users have a choice to use a more scriptable mail client
> > (but I guess the OS nudges users towards monolithic tools)
> 
> You call that choice. Are you serious?
> 
> > > That is not leaving much choice to the users in my book.
> > 
> > Users of alpine, gnus, mutt, sylpheed, thunderbird, kmail,
> > roundcube, squirelmail, etc. can all download the source, hack,
> > fix and customize things.  It's easier with smaller software,
> > of course:  git-send-email does not even require learning
> > the build process or separate download.
> 
> Now I am getting upset. This is a BS argument. Sure, I can hack the source
> of these tools.
>
> But why on earth do I *have* to? Why can't we use or create an open
> contribution process *that works without having to work so hard to be able
> to contribute*?

The process we have is already open.  It may be *nix-centric,
and *nix may be picky about it's friends, but it is open:

Anybody can still contribute today without any sort of
registration, credentialism, or terms-of-service(*).

I am looking beyond git.

I hate signing up for websites.  For many years, I have used
Debian as a proxy for other projects with less open contribution
processes:

apt-get source ...; ; reportbug ...

Of course, going through Debian maintainers is not always
reliable or efficient.

I foolishly hoped git-svn would put an end to all the
registration-required bug tracker instances so I could just
send my changes directly to upstream maintainers without any
sort of registration.  Did not happen :<

> So unfortunately this thread has devolved. Which is sad. Because all I
> wanted is to have a change in Git's submission process that would not
> exclude *so many* developers. That is really all I care about. Not about
> tools. Not about open vs proprietary, or standards.
> 
> I just want developers who are already familiar with Git, and come up with
> an improvement to Git itself, to be able to contribute it without having
> to pull out their hair in despair.

We want the same thing.  I just want to go farther and get
people familiar with (federated|decentralized) tools instead of
proprietary and centralized ones.



(*) I wish git could get rid of the DCO, even.  But at least
it's far better than the "papers, please" policy for some
GNU projects.
--
unsubscribe: meta+unsubscr...@public-inbox.org
archive: https://public-inbox.org/meta/



[PATCH] config: use "publicinboxlimiter" prefix

2016-09-02 Thread Eric Wong
Just having "limiter" in the prefix may confuse
it with something else.  Use the full prefix to
avoid this confusion.
---
 lib/PublicInbox/Config.pm | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 5adcd0c..8d66cf8 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -90,8 +90,12 @@ sub limiter {
my ($self, $name) = @_;
$self->{-limiters}->{$name} ||= do {
require PublicInbox::Qspawn;
-   my $key = "limiter.$name.max";
-   PublicInbox::Qspawn::Limiter->new($self->{$key});
+   my $max;
+   # XXX "limiter..max" was a historical mistake
+   foreach my $pfx (qw(publicinboxlimiter limiter)) {
+   $max ||= $self->{"$pfx.$name.max"};
+   }
+   PublicInbox::Qspawn::Limiter->new($max);
};
 }
 
-- 
EW




[PATCH] doc: new docs for user-level commands

2016-09-06 Thread Eric Wong
Hopefully more folks can download and run public-inbox,
nowadays.
---
 .gitignore  |   1 +
 Documentation/.gitignore|   2 +-
 Documentation/include.mk|  25 -
 Documentation/public-inbox-config.pod   | 151 
 Documentation/public-inbox-daemon.pod   | 171 
 Documentation/public-inbox-httpd.pod|  40 
 Documentation/public-inbox-index.pod|  97 ++
 Documentation/public-inbox-mda.pod  |  27 +++--
 Documentation/public-inbox-nntpd.pod|  53 ++
 Documentation/public-inbox-overview.pod | 108 
 Documentation/public-inbox-watch.pod| 121 ++
 HACKING |   4 +-
 MANIFEST|   8 ++
 lib/PublicInbox/WWW.pod |  56 +++
 14 files changed, 849 insertions(+), 15 deletions(-)
 create mode 100644 Documentation/public-inbox-config.pod
 create mode 100644 Documentation/public-inbox-daemon.pod
 create mode 100644 Documentation/public-inbox-httpd.pod
 create mode 100644 Documentation/public-inbox-index.pod
 create mode 100644 Documentation/public-inbox-nntpd.pod
 create mode 100644 Documentation/public-inbox-overview.pod
 create mode 100644 Documentation/public-inbox-watch.pod
 create mode 100644 lib/PublicInbox/WWW.pod

diff --git a/.gitignore b/.gitignore
index 3b333a5..6a44471 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,5 +9,6 @@
 *.1
 *.5
 *.7
+*.8
 *.html
 *.gz
diff --git a/Documentation/.gitignore b/Documentation/.gitignore
index 8ba4186..107ad36 100644
--- a/Documentation/.gitignore
+++ b/Documentation/.gitignore
@@ -1 +1 @@
-/public-inbox-mda.txt
+/public-inbox-*.txt
diff --git a/Documentation/include.mk b/Documentation/include.mk
index 9d2c3b0..5154d4b 100644
--- a/Documentation/include.mk
+++ b/Documentation/include.mk
@@ -15,37 +15,58 @@ PODTEXT = pod2text
 PODTEXT_OPTS = --stderr
 podtext = $(PODTEXT) $(PODTEXT_OPTS)
 
+# MakeMaker only seems to support manpage sections 1 and 3...
 m1 =
 m1 += public-inbox-mda
+m1 += public-inbox-httpd
+m1 += public-inbox-nntpd
+m1 += public-inbox-watch
 m5 =
+m5 += public-inbox-config
 m7 =
+m7 += public-inbox-overview
+m8 =
+m8 += public-inbox-daemon
 
 man1 := $(addsuffix .1, $(m1))
 man5 := $(addsuffix .5, $(m5))
 man7 := $(addsuffix .7, $(m7))
+man8 := $(addsuffix .8, $(m8))
 
 all:: man html
 
-man: $(man1) $(man5) $(man7)
+man: $(man1) $(man5) $(man7) $(man8)
 
+prefix ?= $(PREFIX)
 prefix ?= $(HOME)
 mandir ?= $(prefix)/share/man
 man1dir = $(mandir)/man1
 man5dir = $(mandir)/man5
 man7dir = $(mandir)/man7
+man8dir = $(mandir)/man8
 
 install-man: man
test -z "$(man1)" || $(INSTALL) -d -m 755 $(DESTDIR)$(man1dir)
test -z "$(man5)" || $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir)
test -z "$(man7)" || $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
+   test -z "$(man8)" || $(INSTALL) -d -m 755 $(DESTDIR)$(man8dir)
test -z "$(man1)" || $(INSTALL) -m 644 $(man1) $(DESTDIR)$(man1dir)
test -z "$(man5)" || $(INSTALL) -m 644 $(man5) $(DESTDIR)$(man5dir)
test -z "$(man7)" || $(INSTALL) -m 644 $(man7) $(DESTDIR)$(man7dir)
+   test -z "$(man8)" || $(INSTALL) -m 644 $(man8) $(DESTDIR)$(man8dir)
+
+doc_install :: install-man
 
 %.1 %.5 %.7 %.8 : Documentation/%.pod
$(podman) -s $(subst .,,$(suffix $@)) $< $@+ && mv $@+ $@
 
-mantxt = $(addprefix Documentation/, $(addsuffix .txt, $(m1)))
+manuals :=
+manuals += $(m1)
+manuals += $(m5)
+manuals += $(m7)
+manuals += $(m8)
+
+mantxt = $(addprefix Documentation/, $(addsuffix .txt, $(manuals)))
 docs += $(mantxt)
 
 all :: $(mantxt)
diff --git a/Documentation/public-inbox-config.pod 
b/Documentation/public-inbox-config.pod
new file mode 100644
index 000..0037645
--- /dev/null
+++ b/Documentation/public-inbox-config.pod
@@ -0,0 +1,151 @@
+=head1 NAME
+
+public-inbox-config - public-inbox config file description
+
+=head1 SYNOPSIS
+
+~/.public-inbox/config
+
+=head1 DESCRIPTION
+
+The public-inbox config file is parseable by L.
+This is a global configuration file for mapping/discovering
+all public-inboxes used by a particular user.
+
+=head1 CONFIGURATION FILE
+
+=head2 EXAMPLE
+
+   [publicinbox "test"]
+   mainrepo = /home/user/path/to/test.git
+   ; multiple addresses are supported
+   address = t...@example.com
+   ; address = altern...@example.com
+   url = http://example.com/test
+   newsgroup = inbox.test
+
+=head2 VARIABLES
+
+=over 8
+
+=item publicinbox..address
+
+The email address of the public-inbox.  May be specified
+more than once for merging multiple mailing lists (or migrating
+to new addresses).  This must be specified at least once,
+the first value will be considered the primary address for
+informational purposes.
+
+Default: none, required
+
+=item publicinbox..mainrepo
+
+The 

Re: Draft of Git Rev News edition 18

2016-09-06 Thread Eric Wong
Josh Triplett <j...@joshtriplett.org> wrote:
> On Tue, Aug 16, 2016 at 09:27:04PM +0000, Eric Wong wrote:
> > As for other projects, I'm not aware of anybody else using it,
> > yet.  I have some small projects using it, but most of those are
> > one-off throwaways and I'm not comfortable promoting those along
> > with public-inbox.  I admit: I'm not comfortable promoting
> > anything I do, really.
> 
> Please take this as encouragement to do so.  I'd love to see the
> public-inbox equivalent to the main page of https://lists.debian.org/ ,
> as an example.  (And I'd love to have public-inbox archives of Debian
> mailing lists.)

Just pushed out some POD (which should build to manpages),
so maybe early adopters can start hosting mirrors themselves(*).

   https://public-inbox.org/meta/20160907004907.1479-...@80x24.org/

I hope public-inbox-overview(7) is a good starting point
(along with the existing INSTALL) and there'll be more docs
coming at some point...


Writing documentation tends to make my attention span drift all
over the place; so maybe parts don't make sense or were glossed
over, but I'll be glad to help clarify anything.  (Responding
to emails is generally easier for me since I can answer things
specifically, tough to do for generic docs)


I'll try to get a tarball release out soonish,
but my schedule is unpredictable.


(*) None of the code has had any security audit, yet;
and there's no warranty of course.



[PATCH 3/3] doc: set release and section properly for manpages

2016-09-01 Thread Eric Wong
This will be important as we will have more of them.
---
 Documentation/include.mk | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Documentation/include.mk b/Documentation/include.mk
index 4583f95..9d2c3b0 100644
--- a/Documentation/include.mk
+++ b/Documentation/include.mk
@@ -9,6 +9,7 @@ docs += $(shell git ls-files 'Documentation/*.txt')
 INSTALL = install
 PODMAN = pod2man
 PODMAN_OPTS = -v --stderr -d 1993-10-02 -c 'public-inbox user manual'
+PODMAN_OPTS += -r public-inbox.git
 podman = $(PODMAN) $(PODMAN_OPTS)
 PODTEXT = pod2text
 PODTEXT_OPTS = --stderr
@@ -41,8 +42,8 @@ install-man: man
test -z "$(man5)" || $(INSTALL) -m 644 $(man5) $(DESTDIR)$(man5dir)
test -z "$(man7)" || $(INSTALL) -m 644 $(man7) $(DESTDIR)$(man7dir)
 
-%.1 : Documentation/%.pod
-   $(podman) -s 1 $< $@+ && mv $@+ $@
+%.1 %.5 %.7 %.8 : Documentation/%.pod
+   $(podman) -s $(subst .,,$(suffix $@)) $< $@+ && mv $@+ $@
 
 mantxt = $(addprefix Documentation/, $(addsuffix .txt, $(m1)))
 docs += $(mantxt)
-- 
EW




[PATCH 1/3] txt2pre: use public-inbox internal APIs

2016-09-01 Thread Eric Wong
Since this is bundled with the source, we might as well use
internal APIs to avoid having duplicate code (and bugs :P)
---
 Documentation/include.mk |  3 ++-
 Documentation/txt2pre| 30 ++
 2 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/Documentation/include.mk b/Documentation/include.mk
index 5d73028..51a914b 100644
--- a/Documentation/include.mk
+++ b/Documentation/include.mk
@@ -52,7 +52,8 @@ all :: $(mantxt)
 Documentation/%.txt : Documentation/%.pod
$(podtext) $< $@+ && mv $@+ $@
 
-txt2pre = ./Documentation/txt2pre <$< >$@+ && touch -r $< $@+ && mv $@+ $@
+txt2pre = $(PERL) -I lib ./Documentation/txt2pre <$< >$@+ && \
+   touch -r $< $@+ && mv $@+ $@
 txt := INSTALL README COPYING TODO
 dtxt := design_notes.txt design_www.txt dc-dlvr-spam-flow.txt
 dtxt := $(addprefix Documentation/, $(dtxt)) $(mantxt)
diff --git a/Documentation/txt2pre b/Documentation/txt2pre
index 2dd1597..72de0b7 100755
--- a/Documentation/txt2pre
+++ b/Documentation/txt2pre
@@ -7,28 +7,18 @@
 # and requires indentation to output preformatted text.
 use strict;
 use warnings;
-use Encode qw/encode/;
+use PublicInbox::Linkify;
+use PublicInbox::Hval qw(ascii_html);
+
 my $str = eval { local $/; <> };
-my %xhtml_map = (
-   '"' => '',
-   '&' => '',
-   "'" => '',
-   '<' => '',
-   '>' => '',
-);
-$str =~ s/([<>&'"])/$xhtml_map{$1}/ge;
-$str = encode('us-ascii', $str, Encode::HTMLCREF);
 my ($title) = ($str =~ /\A([^\n]+)/);
-
-# temporarily swap  for escape so our s!! to add href works.
-# there's probably a way to do this with only a single s!! ...
-$str =~ s!!\e!g;
-$str =~ s!\b((nntp|ftp|https?)://[\w+\+\&\?\.\%\;/#-]+)!$1!g;
-
-$str =~ s!\e!!g; # swap escapes back to 
+$title = ascii_html($title);
+my $l = PublicInbox::Linkify->new;
+$str = $l->linkify_1($str);
+$str = ascii_html($str);
+$str = $l->linkify_2($str);
 
 print '',
-  '',
+  qq(),
   "$title",
-  "\n",  $str , '';
+  "",  $str , '';
-- 
EW




[PATCH 2/3] txt2pre: allow overriding title via env

2016-09-01 Thread Eric Wong
This will allow reasonable titles to be generated for
manpages.
---
 Documentation/include.mk | 2 +-
 Documentation/txt2pre| 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/Documentation/include.mk b/Documentation/include.mk
index 51a914b..4583f95 100644
--- a/Documentation/include.mk
+++ b/Documentation/include.mk
@@ -59,7 +59,7 @@ dtxt := design_notes.txt design_www.txt dc-dlvr-spam-flow.txt
 dtxt := $(addprefix Documentation/, $(dtxt)) $(mantxt)
 
 %.html: %.txt
-   $(txt2pre)
+   TITLE="$(basename $( };
-my ($title) = ($str =~ /\A([^\n]+)/);
+my $title = $ENV{TITLE};
+($title) = ($str =~ /\A([^\n]+)/) unless $title;
 $title = ascii_html($title);
 my $l = PublicInbox::Linkify->new;
 $str = $l->linkify_1($str);
-- 
EW




[PATCH] watch: use "publicinboxwatch" namespace

2016-09-01 Thread Eric Wong
We'll keep supporting "publicinboxlearn" indefinitely,
but "publicinboxwatch" is probably more appropriate
at the moment.

Noticed while writing documentation.
---
 lib/PublicInbox/WatchMaildir.pm | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index f34419a..c8ea3ed 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -18,21 +18,25 @@ sub new {
my ($class, $config) = @_;
my (%mdmap, @mdir, $spamc);
 
-   # XXX is "publicinboxlearn" really a good namespace for this?
-   my $k = 'publicinboxlearn.watchspam';
-   if (my $spamdir = $config->{$k}) {
-   if ($spamdir =~ s/\Amaildir://) {
-   $spamdir =~ s!/+\z!!;
-   # skip "new", no MUA has seen it, yet.
-   my $cur = "$spamdir/cur";
-   push @mdir, $cur;
-   $mdmap{$cur} = 'watchspam';
-   } else {
-   warn "unsupported $k=$spamdir\n";
+   # "publicinboxwatch" is the documented namespace
+   # "publicinboxlearn" is legacy but may be supported
+   # indefinitely...
+   foreach my $pfx (qw(publicinboxwatch publicinboxlearn)) {
+   my $k = "$pfx.watchspam";
+   if (my $spamdir = $config->{$k}) {
+   if ($spamdir =~ s/\Amaildir://) {
+   $spamdir =~ s!/+\z!!;
+   # skip "new", no MUA has seen it, yet.
+   my $cur = "$spamdir/cur";
+   push @mdir, $cur;
+   $mdmap{$cur} = 'watchspam';
+   } else {
+   warn "unsupported $k=$spamdir\n";
+   }
}
}
 
-   $k = 'publicinboxwatch.spamcheck';
+   my $k = 'publicinboxwatch.spamcheck';
my $spamcheck = $config->{$k};
if ($spamcheck) {
if ($spamcheck eq 'spamc') {
-- 
EW




[PATCH 0/10] search: more mairix prefix compatibility

2016-09-08 Thread Eric Wong
This brings us closer to the behavior of mairix(1) for search
by supporting n:, t:, c:, f:, tc:, tcf:, n:, b:, and bs:
prefixes as documented in the mairix(1) manpage.

We also introduce the use of q: and nq: prefixes for quoted and
non-quoted text, respectively.

There is a schema version change in [PATCH 7/10] to maintain
compatibility with Debian 7.x wheezy installs.  The in-place
reindexing would've been expensive anyways, so perhaps the
schema bump is a good idea, anyways, as creating a fresh index
should be faster than --reindex.

Eric Wong (10):
  search: allow searching user fields (To/Cc/From)
  search: drop longer subject: prefix for search
  search: more granular message body searching
  search: fix space regressions from recent changes
  search: match quote detection behavior of view
  search: increase term positions for each quoted hunk
  search: fix compatibility with Debian wheezy
  search: avoid mindlessly calling body_set
  search: match the behavior of WWW for indexing text
  search: index attachment filenames

 lib/PublicInbox/Search.pm|  32 +---
 lib/PublicInbox/SearchIdx.pm | 104 -
 t/search.t   | 120 ---
 3 files changed, 206 insertions(+), 50 deletions(-)




[PATCH 02/10] search: drop longer subject: prefix for search

2016-09-08 Thread Eric Wong
We only document the "s:" anyways.  While the long name is more
descriptive, the ambiguity makes agnostic caching (by Varnish or
similar) slightly harder and longer URLs are more likely to be
accidentally truncated when shared.
---
 lib/PublicInbox/Search.pm |  1 -
 t/search.t| 14 +++---
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index aec459b..3b25b66 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -58,7 +58,6 @@ my %bool_pfx_external = (
 );
 
 my %prob_prefix = (
-   subject => 'S',
s => 'S', # for mairix compatibility
m => 'Q', # 'mid' is exact, 'm' can do partial
f => 'A', # for mairix compatibility
diff --git a/t/search.t b/t/search.t
index bb0861a..7abaf83 100644
--- a/t/search.t
+++ b/t/search.t
@@ -123,19 +123,19 @@ sub filter_mids {
is($res->{total}, 0, "path variant `$p' does not match");
}
 
-   $res = $ro->query('subject:(Hello world)');
+   $res = $ro->query('s:(Hello world)');
@res = filter_mids($res);
-   is_deeply(\@res, \@exp, 'got expected results for subject:() match');
+   is_deeply(\@res, \@exp, 'got expected results for s:() match');
 
-   $res = $ro->query('subject:"Hello world"');
+   $res = $ro->query('s:"Hello world"');
@res = filter_mids($res);
-   is_deeply(\@res, \@exp, 'got expected results for subject:"" match');
+   is_deeply(\@res, \@exp, 'got expected results for s:"" match');
 
-   $res = $ro->query('subject:"Hello world"', {limit => 1});
+   $res = $ro->query('s:"Hello world"', {limit => 1});
is(scalar @{$res->{msgs}}, 1, "limit works");
my $first = $res->{msgs}->[0];
 
-   $res = $ro->query('subject:"Hello world"', {offset => 1});
+   $res = $ro->query('s:"Hello world"', {offset => 1});
is(scalar @{$res->{msgs}}, 1, "offset works");
my $second = $res->{msgs}->[0];
 
@@ -181,7 +181,7 @@ sub filter_mids {
$rw_commit->();
$ro->reopen;
 
-   # Subject:
+   # subject
my $res = $ro->query('ghost');
my @exp = sort qw(ghost-message@s ghost-reply@s);
my @res = filter_mids($res);
-- 
EW




[PATCH] view: handle missing Content-Type in message

2016-09-08 Thread Eric Wong
Email::MIME internally assumes "text/plain" for messages
missing a Content-Type, but does not expose that in the
Email::MIME::content_type API method.  We must assume it
ourselves to avoid uninitialized value warnings for the
rare (nowadays) MUAs which do not set it.
---
 lib/PublicInbox/View.pm | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 6997c1c..9359209 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -413,7 +413,6 @@ sub attach_link (;$) {
my $nl = $idx[-1] > 1 ? "\n" : '';
my $idx = join('.', @idx);
my $size = bytes::length($part->body);
-   $ct ||= 'text/plain';
 
# hide attributes normally, unless we want to aid users in
# spotting MUA problems:
@@ -444,10 +443,10 @@ sub attach_link (;$) {
 sub add_text_body {
my ($upfx, $p) = @_; # from msg_iter: [ Email::MIME, depth, @idx ]
my ($part, $depth, @idx) = @$p;
-   my $ct = $part->content_type;
+   my $ct = $part->content_type || 'text/plain';
my $fn = $part->filename;
 
-   if (defined $ct && $ct =~ m!\btext/x?html\b!i) {
+   if ($ct =~ m!\btext/x?html\b!i) {
return attach_link($upfx, $ct, $p, $fn);
}
 
-- 
EW




[PATCH] import: hoist out _check_path function

2016-09-08 Thread Eric Wong
This reduces duplication, slightly.  We may be using it
yet again in a to-be-introduced function (or we may not
introduce it).
---
 lib/PublicInbox/Import.pm | 37 ++---
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index c2beb19..09dd38d 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -75,6 +75,15 @@ sub norm_body ($) {
$b
 }
 
+sub _check_path () {
+   my ($r, $w, $tip, $path) = @_;
+   return if $tip eq '';
+   print $w "ls $tip $path\n" or wfail;
+   local $/ = "\n";
+   defined(my $info = <$r>) or die "EOF from fast-import: $!";
+   $info =~ /\Amissing / ? undef : $info;
+}
+
 # returns undef on non-existent
 # ('MISMATCH', msg) on mismatch
 # (:MARK, msg) on success
@@ -86,20 +95,16 @@ sub remove {
 
my ($r, $w) = $self->gfi_start;
my $tip = $self->{tip};
-   return ('MISSING', undef) if $tip eq '';
-
-   print $w "ls $tip $path\n" or wfail;
-   local $/ = "\n";
-   my $check = <$r>;
-   defined $check or die "EOF from fast-import / ls: $!";
-   return ('MISSING', undef) if $check =~ /\Amissing /;
-   $check =~ m!\A100644 blob ([a-f0-9]{40})\t!s or die "not blob: $check";
+   my $info = _check_path($r, $w, $tip, $path) or return ('MISSING',undef);
+   $info =~ m!\A100644 blob ([a-f0-9]{40})\t!s or die "not blob: $info";
my $blob = $1;
+
print $w "cat-blob $blob\n" or wfail;
-   $check = <$r>;
-   defined $check or die "EOF from fast-import / cat-blob: $!";
-   $check =~ /\A[a-f0-9]{40} blob (\d+)\n\z/ or
-   die "unexpected cat-blob response: $check";
+   local $/ = "\n";
+   $info = <$r>;
+   defined $info or die "EOF from fast-import / cat-blob: $!";
+   $info =~ /\A[a-f0-9]{40} blob (\d+)\n\z/ or
+   die "unexpected cat-blob response: $info";
my $left = $1;
my $offset = 0;
my $buf = '';
@@ -162,13 +167,7 @@ sub add {
 
my ($r, $w) = $self->gfi_start;
my $tip = $self->{tip};
-   if ($tip ne '') {
-   print $w "ls $tip $path\n" or wfail;
-   local $/ = "\n";
-   my $check = <$r>;
-   defined $check or die "EOF from fast-import: $!";
-   return unless $check =~ /\Amissing /;
-   }
+   _check_path($r, $w, $tip, $path) and return;
 
# kill potentially confusing/misleading headers
$mime->header_set($_) for qw(bytes lines content-length status);
-- 
EW




[PATCH] doc: document PERL_INLINE_DIRECTORY usage

2016-09-08 Thread Eric Wong
For now, we will document this since it allows better
performance without the burden of extensions.  Perhaps one day
far in the future Perl can natively support vfork(2) AND that
version of Perl will be widely available, but I suspect that day
is at least a decade away, if not two:

https://rt.perl.org/Ticket/Display.html?id=128227
---
 Documentation/public-inbox-daemon.pod | 12 
 Documentation/public-inbox-watch.pod  |  6 ++
 2 files changed, 18 insertions(+)

diff --git a/Documentation/public-inbox-daemon.pod 
b/Documentation/public-inbox-daemon.pod
index 42beda6..72794a5 100644
--- a/Documentation/public-inbox-daemon.pod
+++ b/Documentation/public-inbox-daemon.pod
@@ -133,6 +133,18 @@ See L
 Used by systemd (and compatible) installations for socket
 activation.  See L and L.
 
+=item PERL_INLINE_DIRECTORY
+
+Pointing this to point to a writable directory enables the use
+of L and L extensions which may provide
+platform-specific performance improvements.  Currently, this
+enables the use of L which speeds up subprocess
+spawning with the Linux kernel.
+
+public-inbox will never enable L automatically without
+this environment variable set.  See L and L
+for more details.
+
 =back
 
 =head1 UPGRADING
diff --git a/Documentation/public-inbox-watch.pod 
b/Documentation/public-inbox-watch.pod
index 404303e..a59ba32 100644
--- a/Documentation/public-inbox-watch.pod
+++ b/Documentation/public-inbox-watch.pod
@@ -101,6 +101,12 @@ startup.
 config file. default: ~/.public-inbox/config
 See L
 
+=item PERL_INLINE_DIRECTORY
+
+This may affect any public-inbox processes, but is intended
+for long-lived ones such as C or network
+daemons.  See L.
+
 =back
 
 =head1 CONTACT
-- 
EW




[PATCH] TODO: updates for done items

2016-09-09 Thread Eric Wong
The existing string -> number date range Xapian query is good
enough, and having too much flexibility is probably bad for
caching (as well as increasing our attack surface, because
parsing queries is tricky).

Tags-as-skiplists are probably not worth the effort given
Xapian, and we may have to import old messages after-the-fact,
anyways, and message delivery for mirrors is never orderly.

Other items are all done and need to be maintained (like the
search engine docs for the mairix-compatibility features that
just got pushed out)
---
 TODO | 17 +++--
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/TODO b/TODO
index dfee6e5..d2efcbb 100644
--- a/TODO
+++ b/TODO
@@ -14,9 +14,6 @@ all need to be considered for everything we introduce)
 
 * TLS support for various daemons (including STARTTLS for NNTP and POP3)
 
-* Unix socket support for HTTP and POP3 server for use behind reverse
-  proxies like nginx
-
 * Combined "super server" for NNTP/HTTP/POP3 to reduce memory overhead
 
 * Optional reply-to-list support for mirroring lists that want it :<
@@ -41,12 +38,6 @@ all need to be considered for everything we introduce)
 
 * configurable constants (index limits, search results)
 
-* use tags as date-based skiplists for navigating history
-  (maybe not needed with Xapian support nowadays?)
-
-* handle Xapian date range queries:
-  http://mid.gmane.org/20151005222157.ge5...@survex.com
-
 * handle messages with multiple Message-IDs
 
 * handle broken double-bracketed References properly (maybe)
@@ -57,15 +48,13 @@ all need to be considered for everything we introduce)
 * portability to FreeBSD (and other Free Software *BSDs)
   ugh... https://rt.cpan.org/Ticket/Display.html?id=116615
 
-* documentation (pod/text)
+* improve documentation
 
 * linkify thread skeletons better
   https://public-inbox.org/git/6E3699DEA672430CAEA6DEFEDE6918F4@PhilipOakley/
 
 * generate sample CSS for use with Stylish/dillo/etc
 
-* builtin-help for search engine syntax
-
 * streaming Email::MIME replacement: currently we generate many
   allocations/strings for headers we never look at and slurp
   entire message bodies into memory.
@@ -74,6 +63,6 @@ all need to be considered for everything we introduce)
 * use REQUEST_URI properly for CGI / mod_perl2 compatibility
   with Message-IDs which include '%' (done?)
 
-* more test cases (use git fast-import to speed up creation)
+* more and better test cases (use git fast-import to speed up creation)
 
-* large mbox/Maildir/MH/NNTP spool import (use git fast-import)
+* large mbox/Maildir/MH/NNTP spool import (see PublicInbox::Import)
-- 
EW




[PATCH] nntp: cleanup: move use statements out of sub scope

2016-09-09 Thread Eric Wong
This clarifies the code somewhat, and we don't care to lazy-load
in NNTP.pm anyways since this is only used for a long-lived
daemon.
---
 lib/PublicInbox/NNTP.pm | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index 7bfc6dd..b7143ff 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -15,6 +15,8 @@ require PublicInbox::EvCleanup;
 use Email::Simple;
 use POSIX qw(strftime);
 use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC);
+use Digest::SHA qw(sha1_hex);
+use Time::Local qw(timegm timelocal);
 use constant {
r501 => '501 command syntax error',
r221 => '221 Header follows',
@@ -237,7 +239,6 @@ sub cmd_listgroup ($;$) {
 
 sub parse_time ($$;$) {
my ($date, $time, $gmt) = @_;
-   use Time::Local qw();
my ($hh, $mm, $ss) = unpack('A2A2A2', $time);
if (defined $gmt) {
$gmt =~ /\A(?:UTC|GMT)\z/i or die "GM invalid: $gmt";
@@ -255,9 +256,9 @@ sub parse_time ($$;$) {
}
}
if ($gmt) {
-   Time::Local::timegm($ss, $mm, $hh, $DD, $MM - 1, $);
+   timegm($ss, $mm, $hh, $DD, $MM - 1, $);
} else {
-   Time::Local::timelocal($ss, $mm, $hh, $DD, $MM - 1, $);
+   timelocal($ss, $mm, $hh, $DD, $MM - 1, $);
}
 }
 
@@ -286,7 +287,6 @@ sub wildmat2re (;$) {
return $_[0] = qr/.*/ if (!defined $_[0] || $_[0] eq '*');
my %keep;
my $salt = rand;
-   use Digest::SHA qw(sha1_hex);
my $tmp = $_[0];
 
$tmp =~ s#(?

[PATCH 03/17] thread: pass array refs instead of entire arrays

2016-10-05 Thread Eric Wong
Copying large arrays is expensive, so avoid it.
This reduces /$INBOX/ time by around 1%.
---
 lib/PublicInbox/SearchThread.pm | 25 +
 lib/PublicInbox/SearchView.pm   |  4 ++--
 lib/PublicInbox/View.pm |  4 ++--
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 41fe859..e086132 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -141,9 +141,9 @@ sub order {
$root->order_children( $ordersub );
 
# and untangle it
-   my @kids = $root->children;
-   $self->{rootset} = \@kids;
-   $root->remove_child($_) for @kids;
+   my $kids = $root->children;
+   $self->{rootset} = $kids;
+   $root->remove_child($_) for @$kids;
 }
 
 package PublicInbox::SearchThread::Container;
@@ -163,7 +163,7 @@ sub add_child {
croak "Cowardly refusing to become my own parent: $self"
  if $self == $child;
 
-   if (grep { $_ == $child } $self->children) {
+   if (grep { $_ == $child } @{$self->children}) {
# All is potentially correct with the world
$child->parent($self);
return;
@@ -220,14 +220,15 @@ sub children {
push @children, $visitor;
$visitor = $visitor->next
}
-   return @children;
+   \@children;
 }
 
 sub set_children {
-   my $self = shift;
-   my $walk = $self->child( shift );
-   while (@_) { $walk = $walk->next( shift ) }
-   $walk->next(undef) if $walk;
+   my ($self, $children) = @_;
+   my $walk = $self->{child} = shift @$children;
+   do {
+   $walk = $walk->{next} = shift @$children;
+   } while ($walk);
 }
 
 sub order_children {
@@ -238,9 +239,9 @@ sub order_children {
 
my $sub = sub {
my $cont = shift;
-   my @children = $cont->children;
-   return if @children < 2;
-   $cont->set_children( $ordersub->( @children ) );
+   my $children = $cont->children;
+   return if @$children < 2;
+   $cont->set_children( $ordersub->( $children ) );
};
$self->iterate_down( undef, $sub );
undef $sub;
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index da31109..0d54c3d 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -156,10 +156,10 @@ sub mset_thread {
$th->thread;
if ($q->{r}) { # order by relevance
$th->order(sub {
-   sort { (eval { $pct{$b->topmost->messageid} } || 0)
+   [ sort { (eval { $pct{$b->topmost->messageid} } || 0)
<=>
(eval { $pct{$a->topmost->messageid} } || 0)
-   } @_;
+   } @{$_[0]} ];
});
} else { # order by time (default for threaded view)
$th->order(*PublicInbox::View::sort_ts);
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 9f1bf46..e90efda 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -856,10 +856,10 @@ sub skel_dump {
 }
 
 sub sort_ts {
-   sort {
+   [ sort {
(eval { $a->topmost->message->header('X-PI-TS') } || 0) <=>
(eval { $b->topmost->message->header('X-PI-TS') } || 0)
-   } @_;
+   } @{$_[0]} ];
 }
 
 sub _tryload_ghost ($$) {
-- 
EW




[PATCH 05/17] inbox: deal with ghost smsg

2016-10-05 Thread Eric Wong
smsg will be undef for ghost messages in a subsequent commit
---
 lib/PublicInbox/Inbox.pm | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 414973c..8c63908 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -211,6 +211,8 @@ sub msg_by_path ($$;$) {
 sub msg_by_smsg ($$;$) {
my ($self, $smsg, $ref) = @_;
 
+   return unless defined $smsg; # ghost
+
# backwards compat to fallback to msg_by_mid
# TODO: remove if we bump SCHEMA_VERSION in Search.pm:
defined(my $blob = $smsg->blob) or return msg_by_mid($self, $smsg->mid);
-- 
EW




[PATCH 10/17] thread: avoid incrementing undefined value

2016-10-05 Thread Eric Wong
It is pointless to increment when setting a true value is
simpler as there is no need to read before writing.
---
 lib/PublicInbox/SearchThread.pm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index dad783e..ba31f43 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -179,7 +179,7 @@ sub recurse_down {
my %seen;
my @q = ($self);
while (my $cont = shift @q) {
-   $seen{$cont}++;
+   $seen{$cont} = 1;
$callback->($cont);
 
if (my $next = $cont->{next}) {
@@ -209,7 +209,7 @@ sub order_children {
push @visited, $walk;
 
# spot/break loops
-   $seen{$walk}++;
+   $seen{$walk} = 1;
 
my $child = $walk->{child};
if ($child && $seen{$child}) {
-- 
EW




[PATCH 14/17] thread: use hash + array instead of hand-rolled linked list

2016-10-05 Thread Eric Wong
This starts to show noticeable performance improvements when
attempting to thread over 400 messages; but the improvement
may not be measurable with less.

However, the resulting code is much shorter and (IMHO)
much easier to understand.
---
 MANIFEST|   1 +
 lib/PublicInbox/SearchThread.pm | 158 +---
 lib/PublicInbox/View.pm |  28 ---
 t/thread-cycle.t|  86 ++
 4 files changed, 138 insertions(+), 135 deletions(-)
 create mode 100644 t/thread-cycle.t

diff --git a/MANIFEST b/MANIFEST
index bcc4121..3a4d7c4 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -155,6 +155,7 @@ t/qspawn.t
 t/search.t
 t/spamcheck_spamc.t
 t/spawn.t
+t/thread-cycle.t
 t/utf8.mbox
 t/view.t
 t/watch_maildir.t
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 153eef2..05de9ec 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -32,9 +32,8 @@ sub new {
 sub thread {
my $self = shift;
_add_message($self, $_) foreach @{$self->{messages}};
-   $self->{rootset} = [
-   grep { !$_->{parent} } values %{$self->{id_table}} ];
-   delete $self->{id_table};
+   my $id_table = delete $self->{id_table};
+   $self->{rootset} = [ grep { !$_->{parent} } values %$id_table ];
 }
 
 sub _get_cont_for_id ($$) {
@@ -82,156 +81,67 @@ sub _add_message ($$) {
 
 sub order {
my ($self, $ordersub) = @_;
-
-   # make a fake root
-   my $root = _get_cont_for_id($self, 'fakeroot');
-   $root->add_child( $_ ) for @{ $self->{rootset} };
-
-   # sort it
-   $root->order_children( $ordersub );
-
-   # and untangle it
-   my $kids = $root->children;
-   $self->{rootset} = $kids;
-   $root->remove_child($_) for @$kids;
+   my $rootset = $ordersub->($self->{rootset});
+   $self->{rootset} = $rootset;
+   $_->order_children($ordersub) for @$rootset;
 }
 
 package PublicInbox::SearchThread::Msg;
+use strict;
+use warnings;
 use Carp qw(croak);
 use Scalar::Util qw(weaken);
 
-sub new { my $self = shift; bless { id => shift }, $self; }
+sub new {
+   bless {
+   id => $_[1],
+   children => {}, # becomes an array when sorted by ->order(...)
+   }, $_[0];
+}
 
 sub add_child {
my ($self, $child) = @_;
croak "Cowardly refusing to become my own parent: $self"
  if $self == $child;
 
-   if (grep { $_ == $child } @{$self->children}) {
-   # All is potentially correct with the world
-   weaken($child->{parent} = $self);
-   return;
-   }
-
-   my $parent = $child->{parent};
-   remove_child($parent, $child) if $parent;
+   my $cid = $child->{id};
+   $self->{children}->{$cid} = $child;
 
-   $child->{next} = $self->{child};
-   $self->{child} = $child;
-   weaken($child->{parent} = $self);
-}
-
-sub remove_child {
-   my ($self, $child) = @_;
-
-   my $x = $self->{child} or return;
-   if ($x == $child) {  # First one's easy.
-   $self->{child} = $child->{next};
-   $child->{parent} = $child->{next} = undef;
-   return;
+   # reparenting:
+   if (defined(my $parent = $child->{parent})) {
+   delete $parent->{children}->{$cid};
}
 
-   my $prev = $x;
-   while ($x = $x->{next}) {
-   if ($x == $child) {
-   $prev->{next} = $x->{next}; # Unlink x
-   $x->{next} = $x->{parent} = undef; # Deparent it
-   return;
-   }
-   $prev = $x;
-   }
-   # oddly, we can get here
-   $child->{next} = $child->{parent} = undef;
+   weaken($child->{parent} = $self);
 }
 
 sub has_descendent {
-   my ($self, $child) = @_;
+   my ($cur, $child) = @_;
my %seen;
-   my @q = ($self);
-   while (my $cont = shift @q) {
-   $seen{$cont} = 1;
+   my @q = ($cur->{parent} || $cur);
 
-   return 1 if $cont == $child;
+   while (defined($cur = shift @q)) {
+   return 1 if $cur == $child;
 
-   if (my $next = $cont->{next}) {
-   if ($seen{$next}) {
-   $cont->{next} = undef;
-   } else {
-   push @q, $next;
-   }
-   }
-   if (my $child = $cont->{child}) {
-   if ($seen{$child}) {
-   $cont->{child} = undef;
-   } else {
-   push @q, $child;
-   }
+   if (!$seen{$cur}++) {
+   push @q, values %{$cur->{children}};
}
}
0;
 }
 
-sub children {
-   my $self = shift;
-   my @children;
-   my $visitor = $self->{child};
- 

[PATCH 13/17] thread: fix sorting without topmost

2016-10-05 Thread Eric Wong
This bug was hidden, and we may not be able to efficiently
implement a topmost subroutine with the hash-based (vs
linked-list) based container for threading in the next
commit.
---
 lib/PublicInbox/SearchView.pm | 5 ++---
 lib/PublicInbox/View.pm   | 4 ++--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index ebeb41f..cfe6dff 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -155,9 +155,8 @@ sub mset_thread {
$th->thread;
if ($q->{r}) { # order by relevance
$th->order(sub {
-   [ sort { (eval { $pct{$b->topmost->{id}} } || 0)
-   <=>
-   (eval { $pct{$a->topmost->{id}} } || 0)
+   [ sort { ( $pct{$b->{id}} || 0) <=>
+( $pct{$a->{id}} || 0)
} @{$_[0]} ];
});
} else { # order by time (default for threaded view)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 7554d54..c09b4a2 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -857,8 +857,8 @@ sub skel_dump {
 
 sub sort_ts {
[ sort {
-   (eval { $a->topmost->{smsg}->ts } || 0) <=>
-   (eval { $b->topmost->{smsg}->ts } || 0)
+   (eval { $a->{smsg}->ts } || 0) <=>
+   (eval { $b->{smsg}->ts } || 0)
} @{$_[0]} ];
 }
 
-- 
EW




[PATCH 16/17] t/thread-cycle: test self-referential messages

2016-10-05 Thread Eric Wong
Some broken (or malicious) mailers may include a generated
Message-ID in its References header, so be prepared for it.
---
 t/thread-cycle.t | 39 +--
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/t/thread-cycle.t b/t/thread-cycle.t
index 4d60f7e..0e1ecfe 100644
--- a/t/thread-cycle.t
+++ b/t/thread-cycle.t
@@ -51,18 +51,7 @@ my @msgs = map {
}
 );
 
-my $th = PublicInbox::SearchThread->new(\@msgs);
-$th->thread;
-$th->order(sub { [ sort { $a->{id} cmp $b->{id} } @{$_[0]} ] });
-my $st = '';
-my @q = map { (0, $_) } @{$th->{rootset}};
-while (@q) {
-   my $level = shift @q;
-   my $node = shift @q or next;
-   $st .= (" "x$level). "$node->{id}\n";
-   my $cl = $level + 1;
-   unshift @q, map { ($cl, $_) } @{$node->{children}}
-}
+my $st = thread_to_s(\@msgs);
 
 SKIP: {
skip 'Mail::Thread missing', 1 unless $mt;
@@ -71,7 +60,7 @@ SKIP: {
$mt->order(sub { sort { $a->messageid cmp $b->messageid } @_ });
my $check = '';
 
-   @q = map { (0, $_) } $mt->rootset;
+   my @q = map { (0, $_) } $mt->rootset;
while (@q) {
my $level = shift @q;
my $node = shift @q or next;
@@ -81,6 +70,28 @@ SKIP: {
is($check, $st, 'Mail::Thread output matches');
 }
 
+@msgs = map { bless $_, 'PublicInbox::SearchMsg' } (
+   { mid => 'a@b' },
+   { mid => 'b@c', references => ' ' },
+   { mid => 'd@e', references => '' },
+);
+
+is(thread_to_s(\@msgs), "a\@b\n b\@c\nd\@e\n", 'ok with self-references');
+
 done_testing();
 
-1;
+sub thread_to_s {
+   my $th = PublicInbox::SearchThread->new(shift);
+   $th->thread;
+   $th->order(sub { [ sort { $a->{id} cmp $b->{id} } @{$_[0]} ] });
+   my $st = '';
+   my @q = map { (0, $_) } @{$th->{rootset}};
+   while (@q) {
+   my $level = shift @q;
+   my $node = shift @q or next;
+   $st .= (" "x$level). "$node->{id}\n";
+   my $cl = $level + 1;
+   unshift @q, map { ($cl, $_) } @{$node->{children}};
+   }
+   $st;
+}
-- 
EW




[PATCH 15/17] view: remove redundant children array in thread views

2016-10-05 Thread Eric Wong
Each node has an entire arrayref of its children nowadays, so
there's no need to waste time and memory creating another one.
---
 lib/PublicInbox/View.pm | 63 -
 1 file changed, 26 insertions(+), 37 deletions(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index d0c6d33..0f00458 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -201,19 +201,16 @@ sub _th_index_lite {
my $rv = '';
my $mapping = $ctx->{mapping} or return $rv;
my $pad = '  ';
-   # map = [children, attr, node, idx, level]
-   my $map = $mapping->{$mid_raw};
-   my $children = $map->[0];
+   my ($attr, $node, $idx, $level) = @{$mapping->{$mid_raw}};
+   my $children = $node->{children};
my $nr_c = scalar @$children;
my $nr_s = 0;
-   my $level = $map->[4];
-   my $idx = $map->[3];
my $siblings;
my $irt_map = $mapping->{$irt} if defined $irt;
if (defined $irt_map) {
-   $siblings = $irt_map->[0];
+   $siblings = $irt_map->[1]->{children};
$nr_s = scalar(@$siblings) - 1;
-   $rv .= $pad . $irt_map->[1];
+   $rv .= $pad . $irt_map->[0];
if ($idx > 0) {
my $prev = $siblings->[$idx - 1];
my $pmid = $prev->{id};
@@ -222,40 +219,38 @@ sub _th_index_lite {
$rv .= pad_link($pmid, $level, $s);
} elsif ($idx == 2) {
my $ppmid = $siblings->[0]->{id};
-   $rv .= $pad . $mapping->{$ppmid}->[1];
+   $rv .= $pad . $mapping->{$ppmid}->[0];
}
-   $rv .= $pad . $mapping->{$pmid}->[1];
+   $rv .= $pad . $mapping->{$pmid}->[0];
}
}
my $s_s = nr_to_s($nr_s, 'sibling', 'siblings');
my $s_c = nr_to_s($nr_c, 'reply', 'replies');
-   my $this = $map->[1];
-   $this =~ s!\n\z!\n!s;
-   $this =~ s!]+>([^<]+)!$1!s; # no point linking to self
-   $rv .= "@ $this";
-   my $node = $map->[2];
+   $attr =~ s!\n\z!\n!s;
+   $attr =~ s!]+>([^<]+)!$1!s; # no point linking to self
+   $rv .= "@ $attr";
if ($nr_c) {
my $cmid = $children->[0]->{id};
-   $rv .= $pad . $mapping->{$cmid}->[1];
+   $rv .= $pad . $mapping->{$cmid}->[0];
if ($nr_c > 2) {
my $s = ($nr_c - 1). ' more replies';
$rv .= pad_link($cmid, $level + 1, $s);
} elsif (my $cn = $children->[1]) {
-   $rv .= $pad . $mapping->{$cn->{id}}->[1];
+   $rv .= $pad . $mapping->{$cn->{id}}->[0];
}
}
 
my $next = $siblings->[$idx+1] if $siblings && $idx >= 0;
if ($next) {
my $nmid = $next->{id};
-   $rv .= $pad . $mapping->{$nmid}->[1];
+   $rv .= $pad . $mapping->{$nmid}->[0];
my $nnext = $nr_s - $idx;
if ($nnext > 2) {
my $s = ($nnext - 1).' subsequent siblings';
$rv .= pad_link($nmid, $level, $s);
} elsif (my $nn = $siblings->[$idx + 2]) {
-   $rv .= $pad . $mapping->{$nn->{id}}->[1];
+   $rv .= $pad . $mapping->{$nn->{id}}->[0];
}
}
$rv .= $pad ."$s_s, $s_c; $ctx->{s_nr}\n";
@@ -263,26 +258,20 @@ sub _th_index_lite {
 
 sub walk_thread {
my ($th, $ctx, $cb) = @_;
-   my @q = map { (0, $_) } @{$th->{rootset}};
+   my @q = map { (0, $_, -1) } @{$th->{rootset}};
while (@q) {
-   my $level = shift @q;
-   my $node = shift @q or next;
-   $cb->($ctx, $level, $node);
+   my ($level, $node, $i) = splice(@q, 0, 3);
+   defined $node or next;
+   $cb->($ctx, $level, $node, $i);
++$level;
-   unshift @q, map { ($level, $_) } @{$node->{children}};
+   $i = 0;
+   unshift @q, map { ($level, $_, $i++) } @{$node->{children}};
}
 }
 
 sub pre_thread  {
-   my ($ctx, $level, $node) = @_;
-   my $mapping = $ctx->{mapping};
-   my $idx = -1;
-   if (my $parent = $node->{parent}) {
-   my $m = $mapping->{$parent->{id}}->[0];
-   $idx = scalar @$m;
-   push @$m, $node;
-   }
-   $mapping->{$node->{id}} = [ [], '', $node, $idx, $level ];
+   my ($ctx, $level, $node, $idx) = @_;
+   $ctx->{mapping}->{$node->{id}} = [ '', $node, $idx, $level ];
skel_dump($ctx, 

[PATCH 17/17] thread: remove weaken dependency

2016-10-05 Thread Eric Wong
We have to walk through all the messages after threading
anyways to build the rootset, so we can just delete all
the parent references at that point.
---
 lib/PublicInbox/SearchThread.pm | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
index 05de9ec..003a8d1 100644
--- a/lib/PublicInbox/SearchThread.pm
+++ b/lib/PublicInbox/SearchThread.pm
@@ -33,7 +33,7 @@ sub thread {
my $self = shift;
_add_message($self, $_) foreach @{$self->{messages}};
my $id_table = delete $self->{id_table};
-   $self->{rootset} = [ grep { !$_->{parent} } values %$id_table ];
+   $self->{rootset} = [ grep { !delete $_->{parent} } values %$id_table ];
 }
 
 sub _get_cont_for_id ($$) {
@@ -90,7 +90,6 @@ package PublicInbox::SearchThread::Msg;
 use strict;
 use warnings;
 use Carp qw(croak);
-use Scalar::Util qw(weaken);
 
 sub new {
bless {
@@ -112,7 +111,7 @@ sub add_child {
delete $parent->{children}->{$cid};
}
 
-   weaken($child->{parent} = $self);
+   $child->{parent} = $self;
 }
 
 sub has_descendent {
-- 
EW




[PATCH 01/17] view: remove "subject dummy" references

2016-10-05 Thread Eric Wong
We will not care for inexact threading by subject or pruning.
---
 lib/PublicInbox/View.pm | 8 
 1 file changed, 8 deletions(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 9359209..a3b2681 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -723,8 +723,6 @@ sub anchor_for {
 
 sub ghost_parent {
my ($upfx, $mid) = @_;
-   # 'subject dummy' is used internally by Mail::Thread
-   return '[no common parent]' if ($mid eq 'subject dummy');
 
$mid = PublicInbox::Hval->new_msgid($mid);
my $href = $mid->{href};
@@ -838,12 +836,6 @@ sub skel_dump {
my $dst = $ctx->{dst};
my $mapping = $ctx->{mapping};
my $map = $mapping->{$mid} if $mapping;
-   if ($mid eq 'subject dummy') {
-   my $ncp = "\t[no common parent]\n";
-   $map->[1] = $ncp if $map;
-   $$dst .= $ncp;
-   return;
-   }
my $d = $ctx->{pct} ? '[irrelevant] ' # search result
: ' [not found] ';
$d .= indent_for($level) . th_pfx($level);
-- 
EW




<    1   2   3   4   5   6   7   8   9   10   >