On Mon, Feb 15, 2021 at 04:53:17PM +0100, Theo Buehler wrote:
> On Fri, Feb 12, 2021 at 10:01:38AM +0100, Claudio Jeker wrote:
> > On Mon, Feb 08, 2021 at 05:15:40PM +0100, Claudio Jeker wrote:
> > > Split the repository code into two parts:
> > > 
> > > - fetch of the trust anchors (the certs referenced by TAL files)
> > > - fetch of the MFT files of a repository
> > > 
> > > While the two things kind of look similar there are some differences.
> > > 
> > > - TA files are loaded via rsync or https URI (only one file needs to be
> > >   loaded)
> > > - MFT files need everything inside the repository to be loaded since they
> > >   reference to other files (.roa, .cer, .crl). These repositories are
> > >   synced once with rsync and many mft may be part of a repo. Also these
> > >   repositories can be synced via rsync or RRDP
> > > 
> > > To simplify these diverse options it is time to split the code up.
> > > Introduce a ta_lookup() along with repo_lookup(). Refactor the repo_lookup
> > > code into subfunctions repo_alloc() and repo_fetch() (both are also used
> > > by ta_lookup()). Use the caRepository URI to figure out the base URI.
> > > Simplify rsync_uri_parse() into rsync_base_uri() which clips of excess
> > > directories from the URI (else thousends of individual rsync calls would
> > > be made against the RIR's CA repos).
> > > 
> > > The big change is that the layout of the cache directory is changed.
> > > The cache will now have two base directories:
> > > - ta/ (for all trust anchors)
> > > - rsync/ (for all other repositories)
> > > 
> > 
> > My plan at the moment is that rpki-client will split the cache directory
> > into three parts. ta/, rsync/, and rrdp/. This is done to ensure that data
> > does not get mixed up. Once this is in then my next step is to support
> > https:// links in TAL files and fetch the trust anchor via https instead
> > of rsync. Later RRDP will follow.
> 
> This looks good to me although I have a couple of small nits inline.
> 
> ok tb
> 
> > 
> > -- 
> > :wq Claudio
> > 
> > Index: extern.h
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v
> > retrieving revision 1.42
> > diff -u -p -r1.42 extern.h
> > --- extern.h        8 Feb 2021 09:22:53 -0000       1.42
> > +++ extern.h        8 Feb 2021 13:44:22 -0000
> > @@ -392,9 +392,7 @@ void             proc_parser(int) __attribute__((n
> >  
> >  /* Rsync-specific. */
> >  
> > -int                 rsync_uri_parse(const char **, size_t *,
> > -                   const char **, size_t *, const char **, size_t *,
> > -                   enum rtype *, const char *);
> > +char               *rsync_base_uri(const char *);
> >  void                proc_rsync(char *, char *, int) 
> > __attribute__((noreturn));
> >  
> >  /* Logging (though really used for OpenSSL errors). */
> > Index: main.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
> > retrieving revision 1.98
> > diff -u -p -r1.98 main.c
> > --- main.c  5 Feb 2021 12:26:52 -0000       1.98
> > +++ main.c  8 Feb 2021 13:50:20 -0000
> > @@ -78,11 +78,12 @@
> >   * An rsync repository.
> >   */
> >  struct     repo {
> > -   char            *repo;  /* repository rsync URI */
> > -   char            *local; /* local path name */
> > -   char            *notify; /* RRDB notify URI if available */
> > -   size_t           id; /* identifier (array index) */
> > -   int              loaded; /* whether loaded or not */
> > +   char            *repouri;       /* CA repository base URI */
> > +   char            *local;         /* local path name */
> > +   char            *uris[2];       /* URIs to fetch from */
> > +   size_t           id;            /* identifier (array index) */
> > +   int              uriidx;        /* which URI is fetched */
> > +   int              loaded;        /* whether loaded or not */
> >  };
> >  
> >  size_t     entity_queue;
> > @@ -284,33 +285,12 @@ entityq_add(struct entityq *q, char *fil
> >  }
> >  
> >  /*
> > - * Look up a repository, queueing it for discovery if not found.
> > + * Allocat a new repository be extending the repotable.
> >   */
> > -static const struct repo *
> > -repo_lookup(const char *uri)
> > +static struct repo *
> > +repo_alloc(void)
> >  {
> > -   const char      *host, *mod;
> > -   size_t           hostsz, modsz, i;
> > -   char            *local;
> > -   struct repo     *rp;
> > -   struct ibuf     *b;
> > -
> > -   if (!rsync_uri_parse(&host, &hostsz,
> > -       &mod, &modsz, NULL, NULL, NULL, uri))
> > -           errx(1, "%s: malformed", uri);
> > -
> > -   if (asprintf(&local, "%.*s/%.*s", (int)hostsz, host,
> > -       (int)modsz, mod) == -1)
> > -           err(1, "asprintf");
> > -
> > -   /* Look up in repository table. */
> > -
> > -   for (i = 0; i < rt.reposz; i++) {
> > -           if (strcmp(rt.repos[i].local, local))
> > -                   continue;
> > -           free(local);
> > -           return &rt.repos[i];
> > -   }
> > +   struct repo *rp;
> >  
> >     rt.repos = reallocarray(rt.repos,
> >             rt.reposz + 1, sizeof(struct repo));
> 
> This line could be unwrapped. The code could also be simplified by using
> recallocarray() (it looks like the -portable update.sh is prepared for
> that).

I leave that for later. There are a bunch of reallocarrays around the code
that are used in the same way. Let's change them all.
 
> > @@ -320,28 +300,99 @@ repo_lookup(const char *uri)
> >     rp = &rt.repos[rt.reposz++];
> >     memset(rp, 0, sizeof(struct repo));
> >     rp->id = rt.reposz - 1;
> > -   rp->local = local;
> >  
> > -   if ((rp->repo = strndup(uri, mod + modsz - uri)) == NULL)
> > -           err(1, "strdup");
> > +   return rp;
> > +}
> >  
> > -   if (!noop) {
> > -           if (asprintf(&local, "%s", rp->local) == -1)
> > -                   err(1, "asprintf");
> > -           logx("%s: pulling from network", local);
> > -           if ((b = ibuf_dynamic(256, UINT_MAX)) == NULL)
> > -                   err(1, NULL);
> > -           io_simple_buffer(b, &rp->id, sizeof(rp->id));
> > -           io_str_buffer(b, local);
> > -           io_str_buffer(b, rp->repo);
> > -           ibuf_close(&rsyncq, b);
> > -           free(local);
> > -   } else {
> > +static void
> > +repo_fetch(struct repo *rp)
> > +{
> > +   struct ibuf     *b;
> > +
> > +   if (noop) {
> >             rp->loaded = 1;
> >             logx("%s: using cache", rp->local);
> >             stats.repos++;
> >             /* there is nothing in the queue so no need to flush */
> > +           return;
> > +   }
> > +
> > +   logx("%s: pulling from network", rp->local);
> > +   if ((b = ibuf_dynamic(256, UINT_MAX)) == NULL)
> > +           err(1, NULL);
> > +   io_simple_buffer(b, &rp->id, sizeof(rp->id));
> > +   io_str_buffer(b, rp->local);
> > +   io_str_buffer(b, rp->uris[0]);
> > +   ibuf_close(&rsyncq, b);
> > +}
> > +
> > +/*
> > + * Look up a trust anchor, queueing it for download if not found.
> > + */
> > +static const struct repo *
> > +ta_lookup(const struct tal *tal)
> > +{
> > +   struct repo     *rp;
> > +   char            *local;
> > +   size_t          i, j;
> > +
> > +   if (asprintf(&local, "ta/%s", tal->descr) == -1)
> > +           err(1, "asprinf");
> > +
> > +   /* Look up in repository table. (Lookup should actually fail here) */
> > +   for (i = 0; i < rt.reposz; i++) {
> > +           if (rt.repos[i].repouri != NULL ||
> > +               strcmp(rt.repos[i].local, local))
> 
> While I think this is correct, it looks odd. Couldn't this check for
> rt.repos[i].local != NULL?

No. The rt.repos[i].repouri check is there to skip repo's added by
repo_lookup. In repo_lookup the same check is there but the other way
around. local on the other hand can't be NULL, it will always be set.

I think the repouri check could be removed since there is no way that have
conflicting local repo paths. I will just remove the check.
 
> > +                   continue;
> > +           free(local);
> > +           return &rt.repos[i];
> >     }
> > +
> > +   rp = repo_alloc();
> > +   rp->local = local;
> > +   for (i = 0, j = 0; i < tal->urisz && j < 2; i++) {
> > +           if (strncasecmp(tal->uri[i], "rsync://", 8) != 0)
> 
> Likely just a result of refactoring, but the strcmp() above didn't explicitly
> compare to 0. I would stick to one idiom (explicit comparison seems to be used
> more frequently in this code).
 
Yeah, my lazyness shines through. I added the != 0 to the strcmp() calls.

> > +                   continue;       /* ignore non rsync URI for now */
> > +           rp->uris[j++] = tal->uri[i];
> > +   }
> > +   if (j == 0)
> > +           errx(1, "TAL file has no rsync:// URI");
> > +
> > +   repo_fetch(rp);
> > +   return rp;
> > +}
> > +
> > +/*
> > + * Look up a repository, queueing it for discovery if not found.
> > + */
> > +static const struct repo *
> > +repo_lookup(const char *uri)
> > +{
> > +   char            *local, *repo;
> > +   struct repo     *rp;
> > +   size_t           i;
> > +
> > +   if ((repo = rsync_base_uri(uri)) == NULL)
> > +           return NULL;
> > +
> > +   /* Look up in repository table. */
> > +   for (i = 0; i < rt.reposz; i++) {
> > +           if (rt.repos[i].repouri == NULL ||
> > +               strcmp(rt.repos[i].repouri, repo))
> 
> ditto
> 
> > +                   continue;
> > +           free(repo);
> > +           return &rt.repos[i];
> > +   }
> > +
> > +   rp = repo_alloc();
> > +   rp->repouri = repo;
> > +   local = strchr(repo, ':') + strlen("://");
> > +   if (asprintf(&rp->local, "rsync/%s", local) == -1)
> > +           err(1, "asprintf");
> > +   if ((rp->uris[0] = strdup(repo)) == NULL)
> > +           err(1, "strdup");
> > +
> > +   repo_fetch(rp);
> >     return rp;
> >  }
> >  
> > @@ -353,7 +404,10 @@ repo_filename(const struct repo *repo, c
> >  {
> >     char *nfile;
> >  
> > -   uri += strlen(repo->repo) + 1;
> > +   if (strstr(uri, repo->repouri) != uri)
> > +           errx(1, "%s: URI outside of repository", uri);
> > +   uri += strlen(repo->repouri) + 1;       /* skip base and '/' */
> > +
> >     if (asprintf(&nfile, "%s/%s", repo->local, uri) == -1)
> >             err(1, "asprintf");
> >     return nfile;
> > @@ -484,22 +538,17 @@ queue_add_from_tal(struct entityq *q, co
> >  {
> >     char                    *nfile;
> >     const struct repo       *repo;
> > -   const char              *uri = NULL;
> > -   size_t                   i;
> > +   const char              *uri;
> >  
> >     assert(tal->urisz);
> >  
> > -   for (i = 0; i < tal->urisz; i++) {
> > -           uri = tal->uri[i];
> > -           if (strncasecmp(uri, "rsync://", 8) == 0)
> > -                   break;
> > -   }
> > -   if (uri == NULL)
> > -           errx(1, "TAL file has no rsync:// URI");
> > -
> >     /* Look up the repository. */
> > -   repo = repo_lookup(uri);
> > -   nfile = repo_filename(repo, uri);
> > +   repo = ta_lookup(tal);
> > +
> > +   uri = strrchr(repo->uris[0], '/');
> > +   assert(uri);
> > +   if (asprintf(&nfile, "%s/%s", repo->local, uri + 1) == -1)
> > +           err(1, "asprintf");
> >  
> >     entityq_add(q, nfile, RTYPE_CER, repo, tal->pkey,
> >         tal->pkeysz, tal->descr);
> > @@ -515,6 +564,9 @@ queue_add_from_cert(struct entityq *q, c
> >     char                    *nfile;
> >  
> >     repo = repo_lookup(cert->mft);
> > +   if (repo == NULL) /* bad repository URI */
> > +           return;
> > +
> >     nfile = repo_filename(repo, cert->mft);
> >  
> >     entityq_add(q, nfile, RTYPE_MFT, repo, NULL, 0, NULL);
> > @@ -1081,8 +1133,10 @@ main(int argc, char *argv[])
> >  
> >     /* Memory cleanup. */
> >     for (i = 0; i < rt.reposz; i++) {
> > +           free(rt.repos[i].repouri);
> >             free(rt.repos[i].local);
> > -           free(rt.repos[i].repo);
> > +           free(rt.repos[i].uris[0]);
> > +           free(rt.repos[i].uris[1]);
> >     }
> >     free(rt.repos);
> >  
> > Index: rsync.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/rpki-client/rsync.c,v
> > retrieving revision 1.16
> > diff -u -p -r1.16 rsync.c
> > --- rsync.c 3 Feb 2021 09:29:22 -0000       1.16
> > +++ rsync.c 8 Feb 2021 13:43:44 -0000
> > @@ -45,110 +45,50 @@ struct rsyncproc {
> >  };
> >  
> >  /*
> > - * Conforms to RFC 5781.
> > - * Note that "Source" is broken down into the module, path, and also
> > - * file type relevant to RPKI.
> > - * Any of the pointers (except "uri") may be NULL.
> > - * Returns zero on failure, non-zero on success.
> > + * Return the base of a rsync URI (rsync://hostname/module). The
> > + * caRepository provided by the RIR CAs point deeper than they should
> > + * which would result in many rsync calls for almost every subdirectory.
> > + * This is inefficent so instead crop the URI to a common base.
> > + * The returned string needs to be freed by the caller.
> >   */
> > -int
> > -rsync_uri_parse(const char **hostp, size_t *hostsz,
> > -    const char **modulep, size_t *modulesz,
> > -    const char **pathp, size_t *pathsz,
> > -    enum rtype *rtypep, const char *uri)
> > +char *
> > +rsync_base_uri(const char *uri)
> >  {
> > -   const char      *host, *module, *path;
> > -   size_t           sz;
> > -
> > -   /* Initialise all output values to NULL or 0. */
> > -
> > -   if (hostsz != NULL)
> > -           *hostsz = 0;
> > -   if (modulesz != NULL)
> > -           *modulesz = 0;
> > -   if (pathsz != NULL)
> > -           *pathsz = 0;
> > -   if (hostp != NULL)
> > -           *hostp = 0;
> > -   if (modulep != NULL)
> > -           *modulep = 0;
> > -   if (pathp != NULL)
> > -           *pathp = 0;
> > -   if (rtypep != NULL)
> > -           *rtypep = RTYPE_EOF;
> > +   const char *host, *module, *rest;
> >  
> >     /* Case-insensitive rsync URI. */
> > -
> >     if (strncasecmp(uri, "rsync://", 8)) {
> 
> ditto
> 
> >             warnx("%s: not using rsync schema", uri);
> > -           return 0;
> > +           return NULL;
> >     }
> >  
> >     /* Parse the non-zero-length hostname. */
> > -
> >     host = uri + 8;
> >  
> >     if ((module = strchr(host, '/')) == NULL) {
> >             warnx("%s: missing rsync module", uri);
> > -           return 0;
> > +           return NULL;
> >     } else if (module == host) {
> >             warnx("%s: zero-length rsync host", uri);
> > -           return 0;
> > +           return NULL;
> >     }
> >  
> > -   if (hostp != NULL)
> > -           *hostp = host;
> > -   if (hostsz != NULL)
> > -           *hostsz = module - host;
> > -
> >     /* The non-zero-length module follows the hostname. */
> > -
> > -   if (module[1] == '\0') {
> > +   module++;
> > +   if (*module == '\0') {
> >             warnx("%s: zero-length rsync module", uri);
> > -           return 0;
> > +           return NULL;
> >     }
> >  
> > -   module++;
> > -
> >     /* The path component is optional. */
> > -
> > -   if ((path = strchr(module, '/')) == NULL) {
> > -           assert(*module != '\0');
> > -           if (modulep != NULL)
> > -                   *modulep = module;
> > -           if (modulesz != NULL)
> > -                   *modulesz = strlen(module);
> > -           return 1;
> > -   } else if (path == module) {
> > +   if ((rest = strchr(module, '/')) == NULL) {
> > +           return strdup(uri);
> > +   } else if (rest == module) {
> >             warnx("%s: zero-length module", uri);
> > -           return 0;
> > -   }
> > -
> > -   if (modulep != NULL)
> > -           *modulep = module;
> > -   if (modulesz != NULL)
> > -           *modulesz = path - module;
> > -
> > -   path++;
> > -   sz = strlen(path);
> > -
> > -   if (pathp != NULL)
> > -           *pathp = path;
> > -   if (pathsz != NULL)
> > -           *pathsz = sz;
> > -
> > -   if (rtypep != NULL && sz > 4) {
> > -           if (strcasecmp(path + sz - 4, ".roa") == 0)
> > -                   *rtypep = RTYPE_ROA;
> > -           else if (strcasecmp(path + sz - 4, ".mft") == 0)
> > -                   *rtypep = RTYPE_MFT;
> > -           else if (strcasecmp(path + sz - 4, ".cer") == 0)
> > -                   *rtypep = RTYPE_CER;
> > -           else if (strcasecmp(path + sz - 4, ".crl") == 0)
> > -                   *rtypep = RTYPE_CRL;
> > +           return NULL;
> >     }
> >  
> > -   return 1;
> > +   return strndup(uri, rest - uri);
> >  }
> >  
> >  static void
> > 

Thanks for the review.

-- 
:wq Claudio

Reply via email to