On Mon, Feb 15, 2021 at 04:53:17PM +0100, Theo Buehler wrote: > On Fri, Feb 12, 2021 at 10:01:38AM +0100, Claudio Jeker wrote: > > On Mon, Feb 08, 2021 at 05:15:40PM +0100, Claudio Jeker wrote: > > > Split the repository code into two parts: > > > > > > - fetch of the trust anchors (the certs referenced by TAL files) > > > - fetch of the MFT files of a repository > > > > > > While the two things kind of look similar there are some differences. > > > > > > - TA files are loaded via rsync or https URI (only one file needs to be > > > loaded) > > > - MFT files need everything inside the repository to be loaded since they > > > reference to other files (.roa, .cer, .crl). These repositories are > > > synced once with rsync and many mft may be part of a repo. Also these > > > repositories can be synced via rsync or RRDP > > > > > > To simplify these diverse options it is time to split the code up. > > > Introduce a ta_lookup() along with repo_lookup(). Refactor the repo_lookup > > > code into subfunctions repo_alloc() and repo_fetch() (both are also used > > > by ta_lookup()). Use the caRepository URI to figure out the base URI. > > > Simplify rsync_uri_parse() into rsync_base_uri() which clips of excess > > > directories from the URI (else thousends of individual rsync calls would > > > be made against the RIR's CA repos). > > > > > > The big change is that the layout of the cache directory is changed. > > > The cache will now have two base directories: > > > - ta/ (for all trust anchors) > > > - rsync/ (for all other repositories) > > > > > > > My plan at the moment is that rpki-client will split the cache directory > > into three parts. ta/, rsync/, and rrdp/. This is done to ensure that data > > does not get mixed up. Once this is in then my next step is to support > > https:// links in TAL files and fetch the trust anchor via https instead > > of rsync. Later RRDP will follow. > > This looks good to me although I have a couple of small nits inline. > > ok tb > > > > > -- > > :wq Claudio > > > > Index: extern.h > > =================================================================== > > RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v > > retrieving revision 1.42 > > diff -u -p -r1.42 extern.h > > --- extern.h 8 Feb 2021 09:22:53 -0000 1.42 > > +++ extern.h 8 Feb 2021 13:44:22 -0000 > > @@ -392,9 +392,7 @@ void proc_parser(int) __attribute__((n > > > > /* Rsync-specific. */ > > > > -int rsync_uri_parse(const char **, size_t *, > > - const char **, size_t *, const char **, size_t *, > > - enum rtype *, const char *); > > +char *rsync_base_uri(const char *); > > void proc_rsync(char *, char *, int) > > __attribute__((noreturn)); > > > > /* Logging (though really used for OpenSSL errors). */ > > Index: main.c > > =================================================================== > > RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v > > retrieving revision 1.98 > > diff -u -p -r1.98 main.c > > --- main.c 5 Feb 2021 12:26:52 -0000 1.98 > > +++ main.c 8 Feb 2021 13:50:20 -0000 > > @@ -78,11 +78,12 @@ > > * An rsync repository. > > */ > > struct repo { > > - char *repo; /* repository rsync URI */ > > - char *local; /* local path name */ > > - char *notify; /* RRDB notify URI if available */ > > - size_t id; /* identifier (array index) */ > > - int loaded; /* whether loaded or not */ > > + char *repouri; /* CA repository base URI */ > > + char *local; /* local path name */ > > + char *uris[2]; /* URIs to fetch from */ > > + size_t id; /* identifier (array index) */ > > + int uriidx; /* which URI is fetched */ > > + int loaded; /* whether loaded or not */ > > }; > > > > size_t entity_queue; > > @@ -284,33 +285,12 @@ entityq_add(struct entityq *q, char *fil > > } > > > > /* > > - * Look up a repository, queueing it for discovery if not found. > > + * Allocat a new repository be extending the repotable. > > */ > > -static const struct repo * > > -repo_lookup(const char *uri) > > +static struct repo * > > +repo_alloc(void) > > { > > - const char *host, *mod; > > - size_t hostsz, modsz, i; > > - char *local; > > - struct repo *rp; > > - struct ibuf *b; > > - > > - if (!rsync_uri_parse(&host, &hostsz, > > - &mod, &modsz, NULL, NULL, NULL, uri)) > > - errx(1, "%s: malformed", uri); > > - > > - if (asprintf(&local, "%.*s/%.*s", (int)hostsz, host, > > - (int)modsz, mod) == -1) > > - err(1, "asprintf"); > > - > > - /* Look up in repository table. */ > > - > > - for (i = 0; i < rt.reposz; i++) { > > - if (strcmp(rt.repos[i].local, local)) > > - continue; > > - free(local); > > - return &rt.repos[i]; > > - } > > + struct repo *rp; > > > > rt.repos = reallocarray(rt.repos, > > rt.reposz + 1, sizeof(struct repo)); > > This line could be unwrapped. The code could also be simplified by using > recallocarray() (it looks like the -portable update.sh is prepared for > that).
I leave that for later. There are a bunch of reallocarrays around the code that are used in the same way. Let's change them all. > > @@ -320,28 +300,99 @@ repo_lookup(const char *uri) > > rp = &rt.repos[rt.reposz++]; > > memset(rp, 0, sizeof(struct repo)); > > rp->id = rt.reposz - 1; > > - rp->local = local; > > > > - if ((rp->repo = strndup(uri, mod + modsz - uri)) == NULL) > > - err(1, "strdup"); > > + return rp; > > +} > > > > - if (!noop) { > > - if (asprintf(&local, "%s", rp->local) == -1) > > - err(1, "asprintf"); > > - logx("%s: pulling from network", local); > > - if ((b = ibuf_dynamic(256, UINT_MAX)) == NULL) > > - err(1, NULL); > > - io_simple_buffer(b, &rp->id, sizeof(rp->id)); > > - io_str_buffer(b, local); > > - io_str_buffer(b, rp->repo); > > - ibuf_close(&rsyncq, b); > > - free(local); > > - } else { > > +static void > > +repo_fetch(struct repo *rp) > > +{ > > + struct ibuf *b; > > + > > + if (noop) { > > rp->loaded = 1; > > logx("%s: using cache", rp->local); > > stats.repos++; > > /* there is nothing in the queue so no need to flush */ > > + return; > > + } > > + > > + logx("%s: pulling from network", rp->local); > > + if ((b = ibuf_dynamic(256, UINT_MAX)) == NULL) > > + err(1, NULL); > > + io_simple_buffer(b, &rp->id, sizeof(rp->id)); > > + io_str_buffer(b, rp->local); > > + io_str_buffer(b, rp->uris[0]); > > + ibuf_close(&rsyncq, b); > > +} > > + > > +/* > > + * Look up a trust anchor, queueing it for download if not found. > > + */ > > +static const struct repo * > > +ta_lookup(const struct tal *tal) > > +{ > > + struct repo *rp; > > + char *local; > > + size_t i, j; > > + > > + if (asprintf(&local, "ta/%s", tal->descr) == -1) > > + err(1, "asprinf"); > > + > > + /* Look up in repository table. (Lookup should actually fail here) */ > > + for (i = 0; i < rt.reposz; i++) { > > + if (rt.repos[i].repouri != NULL || > > + strcmp(rt.repos[i].local, local)) > > While I think this is correct, it looks odd. Couldn't this check for > rt.repos[i].local != NULL? No. The rt.repos[i].repouri check is there to skip repo's added by repo_lookup. In repo_lookup the same check is there but the other way around. local on the other hand can't be NULL, it will always be set. I think the repouri check could be removed since there is no way that have conflicting local repo paths. I will just remove the check. > > + continue; > > + free(local); > > + return &rt.repos[i]; > > } > > + > > + rp = repo_alloc(); > > + rp->local = local; > > + for (i = 0, j = 0; i < tal->urisz && j < 2; i++) { > > + if (strncasecmp(tal->uri[i], "rsync://", 8) != 0) > > Likely just a result of refactoring, but the strcmp() above didn't explicitly > compare to 0. I would stick to one idiom (explicit comparison seems to be used > more frequently in this code). Yeah, my lazyness shines through. I added the != 0 to the strcmp() calls. > > + continue; /* ignore non rsync URI for now */ > > + rp->uris[j++] = tal->uri[i]; > > + } > > + if (j == 0) > > + errx(1, "TAL file has no rsync:// URI"); > > + > > + repo_fetch(rp); > > + return rp; > > +} > > + > > +/* > > + * Look up a repository, queueing it for discovery if not found. > > + */ > > +static const struct repo * > > +repo_lookup(const char *uri) > > +{ > > + char *local, *repo; > > + struct repo *rp; > > + size_t i; > > + > > + if ((repo = rsync_base_uri(uri)) == NULL) > > + return NULL; > > + > > + /* Look up in repository table. */ > > + for (i = 0; i < rt.reposz; i++) { > > + if (rt.repos[i].repouri == NULL || > > + strcmp(rt.repos[i].repouri, repo)) > > ditto > > > + continue; > > + free(repo); > > + return &rt.repos[i]; > > + } > > + > > + rp = repo_alloc(); > > + rp->repouri = repo; > > + local = strchr(repo, ':') + strlen("://"); > > + if (asprintf(&rp->local, "rsync/%s", local) == -1) > > + err(1, "asprintf"); > > + if ((rp->uris[0] = strdup(repo)) == NULL) > > + err(1, "strdup"); > > + > > + repo_fetch(rp); > > return rp; > > } > > > > @@ -353,7 +404,10 @@ repo_filename(const struct repo *repo, c > > { > > char *nfile; > > > > - uri += strlen(repo->repo) + 1; > > + if (strstr(uri, repo->repouri) != uri) > > + errx(1, "%s: URI outside of repository", uri); > > + uri += strlen(repo->repouri) + 1; /* skip base and '/' */ > > + > > if (asprintf(&nfile, "%s/%s", repo->local, uri) == -1) > > err(1, "asprintf"); > > return nfile; > > @@ -484,22 +538,17 @@ queue_add_from_tal(struct entityq *q, co > > { > > char *nfile; > > const struct repo *repo; > > - const char *uri = NULL; > > - size_t i; > > + const char *uri; > > > > assert(tal->urisz); > > > > - for (i = 0; i < tal->urisz; i++) { > > - uri = tal->uri[i]; > > - if (strncasecmp(uri, "rsync://", 8) == 0) > > - break; > > - } > > - if (uri == NULL) > > - errx(1, "TAL file has no rsync:// URI"); > > - > > /* Look up the repository. */ > > - repo = repo_lookup(uri); > > - nfile = repo_filename(repo, uri); > > + repo = ta_lookup(tal); > > + > > + uri = strrchr(repo->uris[0], '/'); > > + assert(uri); > > + if (asprintf(&nfile, "%s/%s", repo->local, uri + 1) == -1) > > + err(1, "asprintf"); > > > > entityq_add(q, nfile, RTYPE_CER, repo, tal->pkey, > > tal->pkeysz, tal->descr); > > @@ -515,6 +564,9 @@ queue_add_from_cert(struct entityq *q, c > > char *nfile; > > > > repo = repo_lookup(cert->mft); > > + if (repo == NULL) /* bad repository URI */ > > + return; > > + > > nfile = repo_filename(repo, cert->mft); > > > > entityq_add(q, nfile, RTYPE_MFT, repo, NULL, 0, NULL); > > @@ -1081,8 +1133,10 @@ main(int argc, char *argv[]) > > > > /* Memory cleanup. */ > > for (i = 0; i < rt.reposz; i++) { > > + free(rt.repos[i].repouri); > > free(rt.repos[i].local); > > - free(rt.repos[i].repo); > > + free(rt.repos[i].uris[0]); > > + free(rt.repos[i].uris[1]); > > } > > free(rt.repos); > > > > Index: rsync.c > > =================================================================== > > RCS file: /cvs/src/usr.sbin/rpki-client/rsync.c,v > > retrieving revision 1.16 > > diff -u -p -r1.16 rsync.c > > --- rsync.c 3 Feb 2021 09:29:22 -0000 1.16 > > +++ rsync.c 8 Feb 2021 13:43:44 -0000 > > @@ -45,110 +45,50 @@ struct rsyncproc { > > }; > > > > /* > > - * Conforms to RFC 5781. > > - * Note that "Source" is broken down into the module, path, and also > > - * file type relevant to RPKI. > > - * Any of the pointers (except "uri") may be NULL. > > - * Returns zero on failure, non-zero on success. > > + * Return the base of a rsync URI (rsync://hostname/module). The > > + * caRepository provided by the RIR CAs point deeper than they should > > + * which would result in many rsync calls for almost every subdirectory. > > + * This is inefficent so instead crop the URI to a common base. > > + * The returned string needs to be freed by the caller. > > */ > > -int > > -rsync_uri_parse(const char **hostp, size_t *hostsz, > > - const char **modulep, size_t *modulesz, > > - const char **pathp, size_t *pathsz, > > - enum rtype *rtypep, const char *uri) > > +char * > > +rsync_base_uri(const char *uri) > > { > > - const char *host, *module, *path; > > - size_t sz; > > - > > - /* Initialise all output values to NULL or 0. */ > > - > > - if (hostsz != NULL) > > - *hostsz = 0; > > - if (modulesz != NULL) > > - *modulesz = 0; > > - if (pathsz != NULL) > > - *pathsz = 0; > > - if (hostp != NULL) > > - *hostp = 0; > > - if (modulep != NULL) > > - *modulep = 0; > > - if (pathp != NULL) > > - *pathp = 0; > > - if (rtypep != NULL) > > - *rtypep = RTYPE_EOF; > > + const char *host, *module, *rest; > > > > /* Case-insensitive rsync URI. */ > > - > > if (strncasecmp(uri, "rsync://", 8)) { > > ditto > > > warnx("%s: not using rsync schema", uri); > > - return 0; > > + return NULL; > > } > > > > /* Parse the non-zero-length hostname. */ > > - > > host = uri + 8; > > > > if ((module = strchr(host, '/')) == NULL) { > > warnx("%s: missing rsync module", uri); > > - return 0; > > + return NULL; > > } else if (module == host) { > > warnx("%s: zero-length rsync host", uri); > > - return 0; > > + return NULL; > > } > > > > - if (hostp != NULL) > > - *hostp = host; > > - if (hostsz != NULL) > > - *hostsz = module - host; > > - > > /* The non-zero-length module follows the hostname. */ > > - > > - if (module[1] == '\0') { > > + module++; > > + if (*module == '\0') { > > warnx("%s: zero-length rsync module", uri); > > - return 0; > > + return NULL; > > } > > > > - module++; > > - > > /* The path component is optional. */ > > - > > - if ((path = strchr(module, '/')) == NULL) { > > - assert(*module != '\0'); > > - if (modulep != NULL) > > - *modulep = module; > > - if (modulesz != NULL) > > - *modulesz = strlen(module); > > - return 1; > > - } else if (path == module) { > > + if ((rest = strchr(module, '/')) == NULL) { > > + return strdup(uri); > > + } else if (rest == module) { > > warnx("%s: zero-length module", uri); > > - return 0; > > - } > > - > > - if (modulep != NULL) > > - *modulep = module; > > - if (modulesz != NULL) > > - *modulesz = path - module; > > - > > - path++; > > - sz = strlen(path); > > - > > - if (pathp != NULL) > > - *pathp = path; > > - if (pathsz != NULL) > > - *pathsz = sz; > > - > > - if (rtypep != NULL && sz > 4) { > > - if (strcasecmp(path + sz - 4, ".roa") == 0) > > - *rtypep = RTYPE_ROA; > > - else if (strcasecmp(path + sz - 4, ".mft") == 0) > > - *rtypep = RTYPE_MFT; > > - else if (strcasecmp(path + sz - 4, ".cer") == 0) > > - *rtypep = RTYPE_CER; > > - else if (strcasecmp(path + sz - 4, ".crl") == 0) > > - *rtypep = RTYPE_CRL; > > + return NULL; > > } > > > > - return 1; > > + return strndup(uri, rest - uri); > > } > > > > static void > > Thanks for the review. -- :wq Claudio