Coderepo changes are probably more common than inbox changes, so it probably makes sense to rescan and look for new coderepos on 404s, especially since we serve mirrored manifest.js.gz as-is.
I noticed my git.kernel.org mirror was serving manifest.js.gz pointing to irretrievable repositories. This should stop that. We'll also drop the underscore ('_') and use `coderepo' everywhere to be consistent with our documentation. We may serve new inboxes in a similar way down the line, too; but this change only affects coderepos for now since we can guarantee the inbox manifest.js.gz never contains irretrievable inboxes as it's dynamically generated. --- v2 quiets down the "foo.dir unset" messages when clients try bogus URLs. Interdiff against v1: diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index a2b9c41e..2f1b4122 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -348,10 +348,7 @@ sub parse_cgitrc { sub fill_coderepo { my ($self, $nick) = @_; my $pfx = "coderepo.$nick"; - my $dir = $self->{"$pfx.dir"} // do { # aka "GIT_DIR" - warn "$pfx.dir unset\n"; - return; - }; + my $dir = $self->{"$pfx.dir"} // return undef; # aka "GIT_DIR" my $git = PublicInbox::Git->new($dir); if (defined(my $cgits = $self->{"$pfx.cgiturl"})) { $git->{cgit_url} = $cgits = _array($cgits); @@ -414,7 +411,8 @@ sub repo_objs { } my $repo = $coderepos->{$nick} //= fill_coderepo($self, $nick); - push @repo_objs, $repo if $repo; + $repo ? push(@repo_objs, $repo) : + warn("coderepo.$nick.dir unset\n"); } if (scalar @repo_objs) { for (@repo_objs) { lib/PublicInbox/Cgit.pm | 2 +- lib/PublicInbox/Config.pm | 60 ++++++++++++++++++++++------------ lib/PublicInbox/WwwCoderepo.pm | 26 ++++++++------- t/clone-coderepo.t | 25 ++++++++++++++ 4 files changed, 79 insertions(+), 34 deletions(-) diff --git a/lib/PublicInbox/Cgit.pm b/lib/PublicInbox/Cgit.pm index a1a2e828..4265cfb2 100644 --- a/lib/PublicInbox/Cgit.pm +++ b/lib/PublicInbox/Cgit.pm @@ -91,7 +91,7 @@ sub call { # handle requests without spawning cgit iff possible: if ($path_info =~ m!\A/(.+?)/($PublicInbox::GitHTTPBackend::ANY)\z!ox) { my ($nick, $path) = ($1, $2); - if (my $git = $self->{pi_cfg}->{-code_repos}->{$nick}) { + if (my $git = $self->{pi_cfg}->get_coderepo($nick)) { return serve($env, $git, $path); } } elsif ($path_info =~ m!$self->{static}! && diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index df488f23..2f1b4122 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -47,7 +47,7 @@ sub new { $self->{-by_eidx_key} = {}; $self->{-no_obfuscate} = {}; $self->{-limiters} = {}; - $self->{-code_repos} = {}; # nick => PublicInbox::Git object + $self->{-coderepos} = {}; # nick => PublicInbox::Git object if (my $no = delete $self->{'publicinbox.noobfuscate'}) { $no = _array($no); @@ -263,10 +263,11 @@ sub scan_tree_coderepo ($$) { scan_path_coderepo($self, $path, $path); } -sub scan_projects_coderepo ($$$) { - my ($self, $list, $path) = @_; - open my $fh, '<', $list or do { - warn "failed to open cgit projectlist=$list: $!\n"; +sub scan_projects_coderepo ($$) { + my ($self, $path) = @_; + my $l = $self->{-cgit_project_list} // die 'BUG: no cgit_project_list'; + open my $fh, '<', $l or do { + warn "failed to open cgit project-list=$l: $!\n"; return; }; while (<$fh>) { @@ -275,6 +276,16 @@ sub scan_projects_coderepo ($$$) { } } +sub apply_cgit_scan_path { + my ($self, @paths) = @_; + @paths or @paths = @{$self->{-cgit_scan_path}}; + if (defined($self->{-cgit_project_list})) { + for my $p (@paths) { scan_projects_coderepo($self, $p) } + } else { + for my $p (@paths) { scan_tree_coderepo($self, $p) } + } +} + sub parse_cgitrc { my ($self, $cgitrc, $nesting) = @_; $cgitrc //= $self->{'publicinbox.cgitrc'} // return; @@ -317,14 +328,12 @@ sub parse_cgitrc { my ($k, $v) = ($1, $2); $k =~ tr/-/_/; $self->{"-cgit_$k"} = $v; + delete $self->{-cgit_scan_path} if $k eq 'project_list'; } elsif (m!\Ascan-path=(.+)\z!) { # this depends on being after project-list in the # config file, just like cgit.c - if (defined(my $list = $self->{-cgit_project_list})) { - scan_projects_coderepo($self, $list, $1); - } else { - scan_tree_coderepo($self, $1); - } + push @{$self->{-cgit_scan_path}}, $1; + apply_cgit_scan_path($self, $1); } elsif (m!\A(?:css|favicon|logo|repo\.logo)=(/.+)\z!) { # absolute paths for static files via PublicInbox::Cgit $self->{-cgit_static}->{$1} = 1; @@ -336,13 +345,10 @@ sub parse_cgitrc { } # parse a code repo, only git is supported at the moment -sub fill_code_repo { +sub fill_coderepo { my ($self, $nick) = @_; my $pfx = "coderepo.$nick"; - my $dir = $self->{"$pfx.dir"} // do { # aka "GIT_DIR" - warn "$pfx.dir unset\n"; - return; - }; + my $dir = $self->{"$pfx.dir"} // return undef; # aka "GIT_DIR" my $git = PublicInbox::Git->new($dir); if (defined(my $cgits = $self->{"$pfx.cgiturl"})) { $git->{cgit_url} = $cgits = _array($cgits); @@ -389,12 +395,12 @@ sub get_1 { sub repo_objs { my ($self, $ibxish) = @_; - my $ibx_code_repos = $ibxish->{coderepo} // return; + my $ibx_coderepos = $ibxish->{coderepo} // return; $ibxish->{-repo_objs} // do { parse_cgitrc($self, undef, 0); - my $code_repos = $self->{-code_repos}; + my $coderepos = $self->{-coderepos}; my @repo_objs; - for my $nick (@$ibx_code_repos) { + for my $nick (@$ibx_coderepos) { my @parts = split(m!/!, $nick); for (@parts) { @parts = () unless valid_foo_name($_); @@ -403,9 +409,10 @@ sub repo_objs { warn "invalid coderepo name: `$nick'\n"; next; } - my $repo = $code_repos->{$nick} //= - fill_code_repo($self, $nick); - push @repo_objs, $repo if $repo; + my $repo = $coderepos->{$nick} //= + fill_coderepo($self, $nick); + $repo ? push(@repo_objs, $repo) : + warn("coderepo.$nick.dir unset\n"); } if (scalar @repo_objs) { for (@repo_objs) { @@ -614,4 +621,15 @@ sub glob2re ($) { ($changes - $qm) ? $schema_host_port.$re : undef; } +sub get_coderepo { + my ($self, $nick) = @_; + $self->{-coderepos}->{$nick} // do { + defined($self->{-cgit_scan_path}) ? do { + apply_cgit_scan_path($self); + $self->{-coderepos}->{$nick} = + fill_coderepo($self, $nick); + } : undef; + }; +} + 1; diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm index 730c2d0a..d7ac44ed 100644 --- a/lib/PublicInbox/WwwCoderepo.pm +++ b/lib/PublicInbox/WwwCoderepo.pm @@ -45,10 +45,10 @@ sub prepare_coderepos { # TODO: support gitweb and other repository viewers? $pi_cfg->parse_cgitrc(undef, 0); - my $code_repos = $pi_cfg->{-code_repos}; + my $coderepos = $pi_cfg->{-coderepos}; for my $k (grep(/\Acoderepo\.(?:.+)\.dir\z/, keys %$pi_cfg)) { $k = substr($k, length('coderepo.'), -length('.dir')); - $code_repos->{$k} //= $pi_cfg->fill_code_repo($k); + $coderepos->{$k} //= $pi_cfg->fill_coderepo($k); } # associate inboxes and extindices with coderepos for search: @@ -288,36 +288,38 @@ sub srv { # endpoint called by PublicInbox::WWW my $path_info = $ctx->{env}->{PATH_INFO}; my $git; # handle clone requests - my $cr = $self->{pi_cfg}->{-code_repos}; + my $pi_cfg = $self->{pi_cfg}; if ($path_info =~ m!\A/(.+?)/($PublicInbox::GitHTTPBackend::ANY)\z!x and - ($git = $cr->{$1})) { + ($git = $pi_cfg->get_coderepo($1))) { PublicInbox::GitHTTPBackend::serve($ctx->{env},$git,$2); - } elsif ($path_info =~ m!\A/(.+?)/\z! and ($ctx->{git} = $cr->{$1})) { + } elsif ($path_info =~ m!\A/(.+?)/\z! and + ($ctx->{git} = $pi_cfg->get_coderepo($1))) { $ctx->{wcr} = $self; sub { summary($ctx, $_[0]) }; # $_[0] = wcb } elsif ($path_info =~ m!\A/(.+?)/([a-f0-9]+)/s/([^/]+)?\z! and - ($ctx->{git} = $cr->{$1})) { + ($ctx->{git} = $pi_cfg->get_coderepo($1))) { $ctx->{lh} = $self->{log_fh}; PublicInbox::ViewVCS::show($ctx, $2, $3); } elsif ($path_info =~ m!\A/(.+?)/tree/(.*)\z! and - ($ctx->{git} = $cr->{$1})) { + ($ctx->{git} = $pi_cfg->get_coderepo($1))) { $ctx->{lh} = $self->{log_fh}; PublicInbox::RepoTree::srv_tree($ctx, $2) // r(404); } elsif ($path_info =~ m!\A/(.+?)/snapshot/([^/]+)\z! and - ($ctx->{git} = $cr->{$1})) { + ($ctx->{git} = $pi_cfg->get_coderepo($1))) { $ctx->{wcr} = $self; PublicInbox::RepoSnapshot::srv($ctx, $2) // r(404); } elsif ($path_info =~ m!\A/(.+?)/atom/(.*)\z! and - ($ctx->{git} = $cr->{$1})) { + ($ctx->{git} = $pi_cfg->get_coderepo($1))) { $ctx->{lh} = $self->{log_fh}; PublicInbox::RepoAtom::srv_atom($ctx, $2) // r(404); } elsif ($path_info =~ m!\A/(.+?)/tags\.atom\z! and - ($ctx->{git} = $cr->{$1})) { + ($ctx->{git} = $pi_cfg->get_coderepo($1))) { PublicInbox::RepoAtom::srv_tags_atom($ctx); } elsif ($path_info =~ m!\A/(.+?)/(refs/(?:heads|tags))/\z! and - ($ctx->{git} = $cr->{$1})) { + ($ctx->{git} = $pi_cfg->get_coderepo($1))) { refs_foo($self, $ctx, $2); - } elsif ($path_info =~ m!\A/(.+?)\z! and ($git = $cr->{$1})) { + } elsif ($path_info =~ m!\A/(.+?)\z! and + ($git = $pi_cfg->get_coderepo($1))) { my $qs = $ctx->{env}->{QUERY_STRING}; my $url = $git->base_url($ctx->{env}); $url .= "?$qs" if $qs ne ''; diff --git a/t/clone-coderepo.t b/t/clone-coderepo.t index e117293e..e28041e9 100644 --- a/t/clone-coderepo.t +++ b/t/clone-coderepo.t @@ -188,4 +188,29 @@ $set_manifest->($m); $test_puh->('--objstore='); ok(-e "$tmpdir/dst/objstore", 'objstore created'); +# ensure new repos can be detected +{ + xsys_e([qw(/bin/cp -Rp a.git c.git)], undef, { -C => "$tmpdir/src" }); + open my $fh, '>>', "$tmpdir/src/projects.list" or xbail "open $!"; + say $fh 'c.git' or xbail "say $!"; + close $fh or xbail "close $!"; + xsys_e([qw(git clone -q), "${url}c.git", "$tmpdir/dst/c.git"]); + SKIP: { + require_mods(qw(Plack::Test::ExternalServer LWP::UserAgent), 1); + use_ok($_) for qw(HTTP::Request::Common); + chop(my $uri = $url) eq '/' or xbail "BUG: no /"; + local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = $uri; + my %opt = (ua => LWP::UserAgent->new); + $opt{ua}->max_redirect(0); + $opt{client} = sub { + my ($cb) = @_; + my $res = $cb->(GET('/c.git/')); + is($res->code, 200, 'got 200 response for /'); + $res = $cb->(GET('/c.git/tree/')); + is($res->code, 200, 'got 200 response for /tree'); + }; + Plack::Test::ExternalServer::test_psgi(%opt); + } +} + done_testing;