On Mon, Feb 08, 2021 at 05:15:40PM +0100, Claudio Jeker wrote: > Split the repository code into two parts: > > - fetch of the trust anchors (the certs referenced by TAL files) > - fetch of the MFT files of a repository > > While the two things kind of look similar there are some differences. > > - TA files are loaded via rsync or https URI (only one file needs to be > loaded) > - MFT files need everything inside the repository to be loaded since they > reference to other files (.roa, .cer, .crl). These repositories are > synced once with rsync and many mft may be part of a repo. Also these > repositories can be synced via rsync or RRDP > > To simplify these diverse options it is time to split the code up. > Introduce a ta_lookup() along with repo_lookup(). Refactor the repo_lookup > code into subfunctions repo_alloc() and repo_fetch() (both are also used > by ta_lookup()). Use the caRepository URI to figure out the base URI. > Simplify rsync_uri_parse() into rsync_base_uri() which clips of excess > directories from the URI (else thousends of individual rsync calls would > be made against the RIR's CA repos). > > The big change is that the layout of the cache directory is changed. > The cache will now have two base directories: > - ta/ (for all trust anchors) > - rsync/ (for all other repositories) >
My plan at the moment is that rpki-client will split the cache directory into three parts. ta/, rsync/, and rrdp/. This is done to ensure that data does not get mixed up. Once this is in then my next step is to support https:// links in TAL files and fetch the trust anchor via https instead of rsync. Later RRDP will follow. -- :wq Claudio Index: extern.h =================================================================== RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v retrieving revision 1.42 diff -u -p -r1.42 extern.h --- extern.h 8 Feb 2021 09:22:53 -0000 1.42 +++ extern.h 8 Feb 2021 13:44:22 -0000 @@ -392,9 +392,7 @@ void proc_parser(int) __attribute__((n /* Rsync-specific. */ -int rsync_uri_parse(const char **, size_t *, - const char **, size_t *, const char **, size_t *, - enum rtype *, const char *); +char *rsync_base_uri(const char *); void proc_rsync(char *, char *, int) __attribute__((noreturn)); /* Logging (though really used for OpenSSL errors). */ Index: main.c =================================================================== RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v retrieving revision 1.98 diff -u -p -r1.98 main.c --- main.c 5 Feb 2021 12:26:52 -0000 1.98 +++ main.c 8 Feb 2021 13:50:20 -0000 @@ -78,11 +78,12 @@ * An rsync repository. */ struct repo { - char *repo; /* repository rsync URI */ - char *local; /* local path name */ - char *notify; /* RRDB notify URI if available */ - size_t id; /* identifier (array index) */ - int loaded; /* whether loaded or not */ + char *repouri; /* CA repository base URI */ + char *local; /* local path name */ + char *uris[2]; /* URIs to fetch from */ + size_t id; /* identifier (array index) */ + int uriidx; /* which URI is fetched */ + int loaded; /* whether loaded or not */ }; size_t entity_queue; @@ -284,33 +285,12 @@ entityq_add(struct entityq *q, char *fil } /* - * Look up a repository, queueing it for discovery if not found. + * Allocat a new repository be extending the repotable. */ -static const struct repo * -repo_lookup(const char *uri) +static struct repo * +repo_alloc(void) { - const char *host, *mod; - size_t hostsz, modsz, i; - char *local; - struct repo *rp; - struct ibuf *b; - - if (!rsync_uri_parse(&host, &hostsz, - &mod, &modsz, NULL, NULL, NULL, uri)) - errx(1, "%s: malformed", uri); - - if (asprintf(&local, "%.*s/%.*s", (int)hostsz, host, - (int)modsz, mod) == -1) - err(1, "asprintf"); - - /* Look up in repository table. */ - - for (i = 0; i < rt.reposz; i++) { - if (strcmp(rt.repos[i].local, local)) - continue; - free(local); - return &rt.repos[i]; - } + struct repo *rp; rt.repos = reallocarray(rt.repos, rt.reposz + 1, sizeof(struct repo)); @@ -320,28 +300,99 @@ repo_lookup(const char *uri) rp = &rt.repos[rt.reposz++]; memset(rp, 0, sizeof(struct repo)); rp->id = rt.reposz - 1; - rp->local = local; - if ((rp->repo = strndup(uri, mod + modsz - uri)) == NULL) - err(1, "strdup"); + return rp; +} - if (!noop) { - if (asprintf(&local, "%s", rp->local) == -1) - err(1, "asprintf"); - logx("%s: pulling from network", local); - if ((b = ibuf_dynamic(256, UINT_MAX)) == NULL) - err(1, NULL); - io_simple_buffer(b, &rp->id, sizeof(rp->id)); - io_str_buffer(b, local); - io_str_buffer(b, rp->repo); - ibuf_close(&rsyncq, b); - free(local); - } else { +static void +repo_fetch(struct repo *rp) +{ + struct ibuf *b; + + if (noop) { rp->loaded = 1; logx("%s: using cache", rp->local); stats.repos++; /* there is nothing in the queue so no need to flush */ + return; + } + + logx("%s: pulling from network", rp->local); + if ((b = ibuf_dynamic(256, UINT_MAX)) == NULL) + err(1, NULL); + io_simple_buffer(b, &rp->id, sizeof(rp->id)); + io_str_buffer(b, rp->local); + io_str_buffer(b, rp->uris[0]); + ibuf_close(&rsyncq, b); +} + +/* + * Look up a trust anchor, queueing it for download if not found. + */ +static const struct repo * +ta_lookup(const struct tal *tal) +{ + struct repo *rp; + char *local; + size_t i, j; + + if (asprintf(&local, "ta/%s", tal->descr) == -1) + err(1, "asprinf"); + + /* Look up in repository table. (Lookup should actually fail here) */ + for (i = 0; i < rt.reposz; i++) { + if (rt.repos[i].repouri != NULL || + strcmp(rt.repos[i].local, local)) + continue; + free(local); + return &rt.repos[i]; } + + rp = repo_alloc(); + rp->local = local; + for (i = 0, j = 0; i < tal->urisz && j < 2; i++) { + if (strncasecmp(tal->uri[i], "rsync://", 8) != 0) + continue; /* ignore non rsync URI for now */ + rp->uris[j++] = tal->uri[i]; + } + if (j == 0) + errx(1, "TAL file has no rsync:// URI"); + + repo_fetch(rp); + return rp; +} + +/* + * Look up a repository, queueing it for discovery if not found. + */ +static const struct repo * +repo_lookup(const char *uri) +{ + char *local, *repo; + struct repo *rp; + size_t i; + + if ((repo = rsync_base_uri(uri)) == NULL) + return NULL; + + /* Look up in repository table. */ + for (i = 0; i < rt.reposz; i++) { + if (rt.repos[i].repouri == NULL || + strcmp(rt.repos[i].repouri, repo)) + continue; + free(repo); + return &rt.repos[i]; + } + + rp = repo_alloc(); + rp->repouri = repo; + local = strchr(repo, ':') + strlen("://"); + if (asprintf(&rp->local, "rsync/%s", local) == -1) + err(1, "asprintf"); + if ((rp->uris[0] = strdup(repo)) == NULL) + err(1, "strdup"); + + repo_fetch(rp); return rp; } @@ -353,7 +404,10 @@ repo_filename(const struct repo *repo, c { char *nfile; - uri += strlen(repo->repo) + 1; + if (strstr(uri, repo->repouri) != uri) + errx(1, "%s: URI outside of repository", uri); + uri += strlen(repo->repouri) + 1; /* skip base and '/' */ + if (asprintf(&nfile, "%s/%s", repo->local, uri) == -1) err(1, "asprintf"); return nfile; @@ -484,22 +538,17 @@ queue_add_from_tal(struct entityq *q, co { char *nfile; const struct repo *repo; - const char *uri = NULL; - size_t i; + const char *uri; assert(tal->urisz); - for (i = 0; i < tal->urisz; i++) { - uri = tal->uri[i]; - if (strncasecmp(uri, "rsync://", 8) == 0) - break; - } - if (uri == NULL) - errx(1, "TAL file has no rsync:// URI"); - /* Look up the repository. */ - repo = repo_lookup(uri); - nfile = repo_filename(repo, uri); + repo = ta_lookup(tal); + + uri = strrchr(repo->uris[0], '/'); + assert(uri); + if (asprintf(&nfile, "%s/%s", repo->local, uri + 1) == -1) + err(1, "asprintf"); entityq_add(q, nfile, RTYPE_CER, repo, tal->pkey, tal->pkeysz, tal->descr); @@ -515,6 +564,9 @@ queue_add_from_cert(struct entityq *q, c char *nfile; repo = repo_lookup(cert->mft); + if (repo == NULL) /* bad repository URI */ + return; + nfile = repo_filename(repo, cert->mft); entityq_add(q, nfile, RTYPE_MFT, repo, NULL, 0, NULL); @@ -1081,8 +1133,10 @@ main(int argc, char *argv[]) /* Memory cleanup. */ for (i = 0; i < rt.reposz; i++) { + free(rt.repos[i].repouri); free(rt.repos[i].local); - free(rt.repos[i].repo); + free(rt.repos[i].uris[0]); + free(rt.repos[i].uris[1]); } free(rt.repos); Index: rsync.c =================================================================== RCS file: /cvs/src/usr.sbin/rpki-client/rsync.c,v retrieving revision 1.16 diff -u -p -r1.16 rsync.c --- rsync.c 3 Feb 2021 09:29:22 -0000 1.16 +++ rsync.c 8 Feb 2021 13:43:44 -0000 @@ -45,110 +45,50 @@ struct rsyncproc { }; /* - * Conforms to RFC 5781. - * Note that "Source" is broken down into the module, path, and also - * file type relevant to RPKI. - * Any of the pointers (except "uri") may be NULL. - * Returns zero on failure, non-zero on success. + * Return the base of a rsync URI (rsync://hostname/module). The + * caRepository provided by the RIR CAs point deeper than they should + * which would result in many rsync calls for almost every subdirectory. + * This is inefficent so instead crop the URI to a common base. + * The returned string needs to be freed by the caller. */ -int -rsync_uri_parse(const char **hostp, size_t *hostsz, - const char **modulep, size_t *modulesz, - const char **pathp, size_t *pathsz, - enum rtype *rtypep, const char *uri) +char * +rsync_base_uri(const char *uri) { - const char *host, *module, *path; - size_t sz; - - /* Initialise all output values to NULL or 0. */ - - if (hostsz != NULL) - *hostsz = 0; - if (modulesz != NULL) - *modulesz = 0; - if (pathsz != NULL) - *pathsz = 0; - if (hostp != NULL) - *hostp = 0; - if (modulep != NULL) - *modulep = 0; - if (pathp != NULL) - *pathp = 0; - if (rtypep != NULL) - *rtypep = RTYPE_EOF; + const char *host, *module, *rest; /* Case-insensitive rsync URI. */ - if (strncasecmp(uri, "rsync://", 8)) { warnx("%s: not using rsync schema", uri); - return 0; + return NULL; } /* Parse the non-zero-length hostname. */ - host = uri + 8; if ((module = strchr(host, '/')) == NULL) { warnx("%s: missing rsync module", uri); - return 0; + return NULL; } else if (module == host) { warnx("%s: zero-length rsync host", uri); - return 0; + return NULL; } - if (hostp != NULL) - *hostp = host; - if (hostsz != NULL) - *hostsz = module - host; - /* The non-zero-length module follows the hostname. */ - - if (module[1] == '\0') { + module++; + if (*module == '\0') { warnx("%s: zero-length rsync module", uri); - return 0; + return NULL; } - module++; - /* The path component is optional. */ - - if ((path = strchr(module, '/')) == NULL) { - assert(*module != '\0'); - if (modulep != NULL) - *modulep = module; - if (modulesz != NULL) - *modulesz = strlen(module); - return 1; - } else if (path == module) { + if ((rest = strchr(module, '/')) == NULL) { + return strdup(uri); + } else if (rest == module) { warnx("%s: zero-length module", uri); - return 0; - } - - if (modulep != NULL) - *modulep = module; - if (modulesz != NULL) - *modulesz = path - module; - - path++; - sz = strlen(path); - - if (pathp != NULL) - *pathp = path; - if (pathsz != NULL) - *pathsz = sz; - - if (rtypep != NULL && sz > 4) { - if (strcasecmp(path + sz - 4, ".roa") == 0) - *rtypep = RTYPE_ROA; - else if (strcasecmp(path + sz - 4, ".mft") == 0) - *rtypep = RTYPE_MFT; - else if (strcasecmp(path + sz - 4, ".cer") == 0) - *rtypep = RTYPE_CER; - else if (strcasecmp(path + sz - 4, ".crl") == 0) - *rtypep = RTYPE_CRL; + return NULL; } - return 1; + return strndup(uri, rest - uri); } static void