Coderepo changes are probably more common than inbox changes, so
it probably makes sense to rescan and look for new coderepos on
404s, especially since we serve mirrored manifest.js.gz as-is.

I noticed my git.kernel.org mirror was serving manifest.js.gz
pointing to irretrievable repositories.  This should stop that.

We'll also drop the underscore ('_') and use `coderepo'
everywhere to be consistent with our documentation.

We may serve new inboxes in a similar way down the line, too;
but this change only affects coderepos for now since we can
guarantee the inbox manifest.js.gz never contains irretrievable
inboxes as it's dynamically generated.
---
v2 quiets down the "foo.dir unset" messages when clients try
bogus URLs.

Interdiff against v1:

        diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
        index a2b9c41e..2f1b4122 100644
        --- a/lib/PublicInbox/Config.pm
        +++ b/lib/PublicInbox/Config.pm
        @@ -348,10 +348,7 @@ sub parse_cgitrc {
         sub fill_coderepo {
                my ($self, $nick) = @_;
                my $pfx = "coderepo.$nick";
        -       my $dir = $self->{"$pfx.dir"} // do { # aka "GIT_DIR"
        -               warn "$pfx.dir unset\n";
        -               return;
        -       };
        +       my $dir = $self->{"$pfx.dir"} // return undef; # aka "GIT_DIR"
                my $git = PublicInbox::Git->new($dir);
                if (defined(my $cgits = $self->{"$pfx.cgiturl"})) {
                        $git->{cgit_url} = $cgits = _array($cgits);
        @@ -414,7 +411,8 @@ sub repo_objs {
                                }
                                my $repo = $coderepos->{$nick} //=
                                                        fill_coderepo($self, 
$nick);
        -                       push @repo_objs, $repo if $repo;
        +                       $repo ? push(@repo_objs, $repo) :
        +                               warn("coderepo.$nick.dir unset\n");
                        }
                        if (scalar @repo_objs) {
                                for (@repo_objs) {

 lib/PublicInbox/Cgit.pm        |  2 +-
 lib/PublicInbox/Config.pm      | 60 ++++++++++++++++++++++------------
 lib/PublicInbox/WwwCoderepo.pm | 26 ++++++++-------
 t/clone-coderepo.t             | 25 ++++++++++++++
 4 files changed, 79 insertions(+), 34 deletions(-)

diff --git a/lib/PublicInbox/Cgit.pm b/lib/PublicInbox/Cgit.pm
index a1a2e828..4265cfb2 100644
--- a/lib/PublicInbox/Cgit.pm
+++ b/lib/PublicInbox/Cgit.pm
@@ -91,7 +91,7 @@ sub call {
        # handle requests without spawning cgit iff possible:
        if ($path_info =~ m!\A/(.+?)/($PublicInbox::GitHTTPBackend::ANY)\z!ox) {
                my ($nick, $path) = ($1, $2);
-               if (my $git = $self->{pi_cfg}->{-code_repos}->{$nick}) {
+               if (my $git = $self->{pi_cfg}->get_coderepo($nick)) {
                        return serve($env, $git, $path);
                }
        } elsif ($path_info =~ m!$self->{static}! &&
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index df488f23..2f1b4122 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -47,7 +47,7 @@ sub new {
        $self->{-by_eidx_key} = {};
        $self->{-no_obfuscate} = {};
        $self->{-limiters} = {};
-       $self->{-code_repos} = {}; # nick => PublicInbox::Git object
+       $self->{-coderepos} = {}; # nick => PublicInbox::Git object
 
        if (my $no = delete $self->{'publicinbox.noobfuscate'}) {
                $no = _array($no);
@@ -263,10 +263,11 @@ sub scan_tree_coderepo ($$) {
        scan_path_coderepo($self, $path, $path);
 }
 
-sub scan_projects_coderepo ($$$) {
-       my ($self, $list, $path) = @_;
-       open my $fh, '<', $list or do {
-               warn "failed to open cgit projectlist=$list: $!\n";
+sub scan_projects_coderepo ($$) {
+       my ($self, $path) = @_;
+       my $l = $self->{-cgit_project_list} // die 'BUG: no cgit_project_list';
+       open my $fh, '<', $l or do {
+               warn "failed to open cgit project-list=$l: $!\n";
                return;
        };
        while (<$fh>) {
@@ -275,6 +276,16 @@ sub scan_projects_coderepo ($$$) {
        }
 }
 
+sub apply_cgit_scan_path {
+       my ($self, @paths) = @_;
+       @paths or @paths = @{$self->{-cgit_scan_path}};
+       if (defined($self->{-cgit_project_list})) {
+               for my $p (@paths) { scan_projects_coderepo($self, $p) }
+       } else {
+               for my $p (@paths) { scan_tree_coderepo($self, $p) }
+       }
+}
+
 sub parse_cgitrc {
        my ($self, $cgitrc, $nesting) = @_;
        $cgitrc //= $self->{'publicinbox.cgitrc'} // return;
@@ -317,14 +328,12 @@ sub parse_cgitrc {
                        my ($k, $v) = ($1, $2);
                        $k =~ tr/-/_/;
                        $self->{"-cgit_$k"} = $v;
+                       delete $self->{-cgit_scan_path} if $k eq 'project_list';
                } elsif (m!\Ascan-path=(.+)\z!) {
                        # this depends on being after project-list in the
                        # config file, just like cgit.c
-                       if (defined(my $list = $self->{-cgit_project_list})) {
-                               scan_projects_coderepo($self, $list, $1);
-                       } else {
-                               scan_tree_coderepo($self, $1);
-                       }
+                       push @{$self->{-cgit_scan_path}}, $1;
+                       apply_cgit_scan_path($self, $1);
                } elsif (m!\A(?:css|favicon|logo|repo\.logo)=(/.+)\z!) {
                        # absolute paths for static files via PublicInbox::Cgit
                        $self->{-cgit_static}->{$1} = 1;
@@ -336,13 +345,10 @@ sub parse_cgitrc {
 }
 
 # parse a code repo, only git is supported at the moment
-sub fill_code_repo {
+sub fill_coderepo {
        my ($self, $nick) = @_;
        my $pfx = "coderepo.$nick";
-       my $dir = $self->{"$pfx.dir"} // do { # aka "GIT_DIR"
-               warn "$pfx.dir unset\n";
-               return;
-       };
+       my $dir = $self->{"$pfx.dir"} // return undef; # aka "GIT_DIR"
        my $git = PublicInbox::Git->new($dir);
        if (defined(my $cgits = $self->{"$pfx.cgiturl"})) {
                $git->{cgit_url} = $cgits = _array($cgits);
@@ -389,12 +395,12 @@ sub get_1 {
 
 sub repo_objs {
        my ($self, $ibxish) = @_;
-       my $ibx_code_repos = $ibxish->{coderepo} // return;
+       my $ibx_coderepos = $ibxish->{coderepo} // return;
        $ibxish->{-repo_objs} // do {
                parse_cgitrc($self, undef, 0);
-               my $code_repos = $self->{-code_repos};
+               my $coderepos = $self->{-coderepos};
                my @repo_objs;
-               for my $nick (@$ibx_code_repos) {
+               for my $nick (@$ibx_coderepos) {
                        my @parts = split(m!/!, $nick);
                        for (@parts) {
                                @parts = () unless valid_foo_name($_);
@@ -403,9 +409,10 @@ sub repo_objs {
                                warn "invalid coderepo name: `$nick'\n";
                                next;
                        }
-                       my $repo = $code_repos->{$nick} //=
-                                               fill_code_repo($self, $nick);
-                       push @repo_objs, $repo if $repo;
+                       my $repo = $coderepos->{$nick} //=
+                                               fill_coderepo($self, $nick);
+                       $repo ? push(@repo_objs, $repo) :
+                               warn("coderepo.$nick.dir unset\n");
                }
                if (scalar @repo_objs) {
                        for (@repo_objs) {
@@ -614,4 +621,15 @@ sub glob2re ($) {
        ($changes - $qm) ? $schema_host_port.$re : undef;
 }
 
+sub get_coderepo {
+       my ($self, $nick) = @_;
+       $self->{-coderepos}->{$nick} // do {
+               defined($self->{-cgit_scan_path}) ? do {
+                       apply_cgit_scan_path($self);
+                       $self->{-coderepos}->{$nick} =
+                                       fill_coderepo($self, $nick);
+               } : undef;
+       };
+}
+
 1;
diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm
index 730c2d0a..d7ac44ed 100644
--- a/lib/PublicInbox/WwwCoderepo.pm
+++ b/lib/PublicInbox/WwwCoderepo.pm
@@ -45,10 +45,10 @@ sub prepare_coderepos {
        # TODO: support gitweb and other repository viewers?
        $pi_cfg->parse_cgitrc(undef, 0);
 
-       my $code_repos = $pi_cfg->{-code_repos};
+       my $coderepos = $pi_cfg->{-coderepos};
        for my $k (grep(/\Acoderepo\.(?:.+)\.dir\z/, keys %$pi_cfg)) {
                $k = substr($k, length('coderepo.'), -length('.dir'));
-               $code_repos->{$k} //= $pi_cfg->fill_code_repo($k);
+               $coderepos->{$k} //= $pi_cfg->fill_coderepo($k);
        }
 
        # associate inboxes and extindices with coderepos for search:
@@ -288,36 +288,38 @@ sub srv { # endpoint called by PublicInbox::WWW
        my $path_info = $ctx->{env}->{PATH_INFO};
        my $git;
        # handle clone requests
-       my $cr = $self->{pi_cfg}->{-code_repos};
+       my $pi_cfg = $self->{pi_cfg};
        if ($path_info =~ m!\A/(.+?)/($PublicInbox::GitHTTPBackend::ANY)\z!x and
-               ($git = $cr->{$1})) {
+               ($git = $pi_cfg->get_coderepo($1))) {
                        PublicInbox::GitHTTPBackend::serve($ctx->{env},$git,$2);
-       } elsif ($path_info =~ m!\A/(.+?)/\z! and ($ctx->{git} = $cr->{$1})) {
+       } elsif ($path_info =~ m!\A/(.+?)/\z! and
+                       ($ctx->{git} = $pi_cfg->get_coderepo($1))) {
                $ctx->{wcr} = $self;
                sub { summary($ctx, $_[0]) }; # $_[0] = wcb
        } elsif ($path_info =~ m!\A/(.+?)/([a-f0-9]+)/s/([^/]+)?\z! and
-                       ($ctx->{git} = $cr->{$1})) {
+                       ($ctx->{git} = $pi_cfg->get_coderepo($1))) {
                $ctx->{lh} = $self->{log_fh};
                PublicInbox::ViewVCS::show($ctx, $2, $3);
        } elsif ($path_info =~ m!\A/(.+?)/tree/(.*)\z! and
-                       ($ctx->{git} = $cr->{$1})) {
+                       ($ctx->{git} = $pi_cfg->get_coderepo($1))) {
                $ctx->{lh} = $self->{log_fh};
                PublicInbox::RepoTree::srv_tree($ctx, $2) // r(404);
        } elsif ($path_info =~ m!\A/(.+?)/snapshot/([^/]+)\z! and
-                       ($ctx->{git} = $cr->{$1})) {
+                       ($ctx->{git} = $pi_cfg->get_coderepo($1))) {
                $ctx->{wcr} = $self;
                PublicInbox::RepoSnapshot::srv($ctx, $2) // r(404);
        } elsif ($path_info =~ m!\A/(.+?)/atom/(.*)\z! and
-                       ($ctx->{git} = $cr->{$1})) {
+                       ($ctx->{git} = $pi_cfg->get_coderepo($1))) {
                $ctx->{lh} = $self->{log_fh};
                PublicInbox::RepoAtom::srv_atom($ctx, $2) // r(404);
        } elsif ($path_info =~ m!\A/(.+?)/tags\.atom\z! and
-                       ($ctx->{git} = $cr->{$1})) {
+                       ($ctx->{git} = $pi_cfg->get_coderepo($1))) {
                PublicInbox::RepoAtom::srv_tags_atom($ctx);
        } elsif ($path_info =~ m!\A/(.+?)/(refs/(?:heads|tags))/\z! and
-                       ($ctx->{git} = $cr->{$1})) {
+                       ($ctx->{git} = $pi_cfg->get_coderepo($1))) {
                refs_foo($self, $ctx, $2);
-       } elsif ($path_info =~ m!\A/(.+?)\z! and ($git = $cr->{$1})) {
+       } elsif ($path_info =~ m!\A/(.+?)\z! and
+                       ($git = $pi_cfg->get_coderepo($1))) {
                my $qs = $ctx->{env}->{QUERY_STRING};
                my $url = $git->base_url($ctx->{env});
                $url .= "?$qs" if $qs ne '';
diff --git a/t/clone-coderepo.t b/t/clone-coderepo.t
index e117293e..e28041e9 100644
--- a/t/clone-coderepo.t
+++ b/t/clone-coderepo.t
@@ -188,4 +188,29 @@ $set_manifest->($m);
 $test_puh->('--objstore=');
 ok(-e "$tmpdir/dst/objstore", 'objstore created');
 
+# ensure new repos can be detected
+{
+       xsys_e([qw(/bin/cp -Rp a.git c.git)], undef, { -C => "$tmpdir/src" });
+       open my $fh, '>>', "$tmpdir/src/projects.list" or xbail "open $!";
+       say $fh 'c.git' or xbail "say $!";
+       close $fh or xbail "close $!";
+       xsys_e([qw(git clone -q), "${url}c.git", "$tmpdir/dst/c.git"]);
+       SKIP: {
+               require_mods(qw(Plack::Test::ExternalServer LWP::UserAgent), 1);
+               use_ok($_) for qw(HTTP::Request::Common);
+               chop(my $uri = $url) eq '/' or xbail "BUG: no /";
+               local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = $uri;
+               my %opt = (ua => LWP::UserAgent->new);
+               $opt{ua}->max_redirect(0);
+               $opt{client} = sub {
+                       my ($cb) = @_;
+                       my $res = $cb->(GET('/c.git/'));
+                       is($res->code, 200, 'got 200 response for /');
+                       $res = $cb->(GET('/c.git/tree/'));
+                       is($res->code, 200, 'got 200 response for /tree');
+               };
+               Plack::Test::ExternalServer::test_psgi(%opt);
+       }
+}
+
 done_testing;

Reply via email to