On Thu, Jan 13, 2022 at 10:51:33PM +0100, Theo Buehler wrote:
> On Thu, Jan 13, 2022 at 05:05:57PM +0100, Claudio Jeker wrote:
> > This diff adds a new cache subdir called "valid". This is the place where
> > all verified and good files are stored after a run. It makes -n work a lot
> > better since -n will now only look at what's inside "valid" and ignore
> > "rsync" and "rrdp".
> > 
> > The trust anchors are still stored in "ta" even if valid.
> > The rsync repo will only hold temporary files and be empty otherwise.
> > The rrdp repo still may contain some superfluous files that people
> > included in their snapshots. Not keeping them could result in extra
> > snapshot downloads since delta updates could refer to these files.
> > Since there is a valid repo, rrdp no longer needs an additional temp
> > directory for its sync.
> > 
> > With this rsync will use the --compare-dest feature and use the valid
> > cache as a base.
> > 
> > The file cleanup at the end got more complex. There is now
> > repo_move_valid() and repo_cleanup_rrdp() to do two initial steps of
> > cleanup before the tree traversal starts. The fts function now uses
> > some fts features to find a) empty directories and b) superfluous files.
> > 
> > I think the code changes outside of repo.c should be straight forward.
> > 
> > To the rpki connoisseur, this currently only implements a simple newest
> > file wins startegy. The handling of .mft and their serial number will
> > follow once this is in.
> 
> I have made a first pass over this. I will need to sleep on this and
> think through the repo.c changes in more detail with a fresh head.
> 
> Couple leaks and a few stylistic remarks noted inline.

Adjusted diff below. Thanks for the review of this and also for all the
previous diffs.

-- 
:wq Claudio

Index: extern.h
===================================================================
RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v
retrieving revision 1.103
diff -u -p -r1.103 extern.h
--- extern.h    13 Jan 2022 13:46:03 -0000      1.103
+++ extern.h    13 Jan 2022 13:52:42 -0000
@@ -341,7 +341,7 @@ enum publish_type {
 struct entity {
        TAILQ_ENTRY(entity) entries;
        char            *path;          /* path relative to repository */
-       char            *file;          /* filename */
+       char            *file;          /* filename or valid repo path */
        unsigned char   *data;          /* optional data blob */
        size_t           datasz;        /* length of optional data blob */
        unsigned int     repoid;        /* repository identifier */
@@ -380,6 +380,7 @@ struct stats {
        size_t   vrps; /* total number of vrps */
        size_t   uniqs; /* number of unique vrps */
        size_t   del_files; /* number of files removed in cleanup */
+       size_t   extra_files; /* number of superfluous files */
        size_t   del_dirs; /* number of directories removed in cleanup */
        size_t   brks; /* number of BGPsec Router Key (BRK) certificates */
        struct timeval  elapsed_time;
@@ -506,7 +507,7 @@ void                 rrdp_clear(unsigned int);
 void            rrdp_save_state(unsigned int, struct rrdp_session *);
 int             rrdp_handle_file(unsigned int, enum publish_type, char *,
                    char *, size_t, char *, size_t);
-char           *repo_basedir(const struct repo *);
+char           *repo_basedir(const struct repo *, int);
 unsigned int    repo_id(const struct repo *);
 const char     *repo_uri(const struct repo *);
 struct repo    *ta_lookup(int, struct tal *);
@@ -520,7 +521,8 @@ void                 rsync_finish(unsigned int, int);
 void            http_finish(unsigned int, enum http_result, const char *);
 void            rrdp_finish(unsigned int, int);
 
-void            rsync_fetch(unsigned int, const char *, const char *);
+void            rsync_fetch(unsigned int, const char *, const char *,
+                   const char *);
 void            http_fetch(unsigned int, const char *, const char *, int);
 void            rrdp_fetch(unsigned int, const char *, const char *,
                    struct rrdp_session *);
Index: main.c
===================================================================
RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
retrieving revision 1.175
diff -u -p -r1.175 main.c
--- main.c      13 Jan 2022 13:18:41 -0000      1.175
+++ main.c      14 Jan 2022 08:57:30 -0000
@@ -151,20 +151,22 @@ entity_write_repo(struct repo *rp)
        struct ibuf *b;
        enum rtype type = RTYPE_REPO;
        unsigned int repoid;
-       char *path;
+       char *path, *altpath;
        int talid = 0;
 
        repoid = repo_id(rp);
-       path = repo_basedir(rp);
+       path = repo_basedir(rp, 0);
+       altpath = repo_basedir(rp, 1);
        b = io_new_buffer();
        io_simple_buffer(b, &type, sizeof(type));
        io_simple_buffer(b, &repoid, sizeof(repoid));
        io_simple_buffer(b, &talid, sizeof(talid));
        io_str_buffer(b, path);
-       io_str_buffer(b, NULL);
+       io_str_buffer(b, altpath);
        io_buf_buffer(b, NULL, 0);
        io_close_buffer(&procq, b);
        free(path);
+       free(altpath);
 }
 
 /*
@@ -254,14 +256,15 @@ rrdp_fetch(unsigned int id, const char *
  * Request a repository sync via rsync URI to directory local.
  */
 void
-rsync_fetch(unsigned int id, const char *uri, const char *local)
+rsync_fetch(unsigned int id, const char *uri, const char *local,
+    const char *base)
 {
        struct ibuf     *b;
 
        b = io_new_buffer();
        io_simple_buffer(b, &id, sizeof(id));
        io_str_buffer(b, local);
-       io_str_buffer(b, NULL);
+       io_str_buffer(b, base);
        io_str_buffer(b, uri);
        io_close_buffer(&rsyncq, b);
 }
@@ -1246,8 +1249,8 @@ main(int argc, char *argv[])
        logx("Certificate revocation lists: %zu", stats.crls);
        logx("Ghostbuster records: %zu", stats.gbrs);
        logx("Repositories: %zu", stats.repos);
-       logx("Cleanup: removed %zu files, %zu directories",
-           stats.del_files, stats.del_dirs);
+       logx("Cleanup: removed %zu files, %zu directories, %zu superfluous",
+           stats.del_files, stats.del_dirs, stats.extra_files);
        logx("VRP Entries: %zu (%zu unique)", stats.vrps, stats.uniqs);
 
        /* Memory cleanup. */
Index: output-json.c
===================================================================
RCS file: /cvs/src/usr.sbin/rpki-client/output-json.c,v
retrieving revision 1.22
diff -u -p -r1.22 output-json.c
--- output-json.c       4 Nov 2021 11:32:55 -0000       1.22
+++ output-json.c       12 Jan 2022 12:00:48 -0000
@@ -78,6 +78,7 @@ outputheader_json(FILE *out, struct stat
            "\t\t\"vrps\": %zu,\n"
            "\t\t\"uniquevrps\": %zu,\n"
            "\t\t\"cachedir_del_files\": %zu,\n"
+           "\t\t\"cachedir_superfluous_files\": %zu,\n"
            "\t\t\"cachedir_del_dirs\": %zu\n"
            "\t},\n\n",
            st->mfts, st->mfts_fail, st->mfts_stale,
@@ -85,7 +86,7 @@ outputheader_json(FILE *out, struct stat
            st->gbrs,
            st->repos,
            st->vrps, st->uniqs,
-           st->del_files, st->del_dirs) < 0)
+           st->del_files, st->extra_files, st->del_dirs) < 0)
                return -1;
        return 0;
 }
Index: parser.c
===================================================================
RCS file: /cvs/src/usr.sbin/rpki-client/parser.c,v
retrieving revision 1.36
diff -u -p -r1.36 parser.c
--- parser.c    13 Jan 2022 14:58:21 -0000      1.36
+++ parser.c    14 Jan 2022 09:19:48 -0000
@@ -50,6 +50,7 @@ static struct crl_tree         crlt = RB_INITIA
 struct parse_repo {
        RB_ENTRY(parse_repo)     entry;
        char                    *path;
+       char                    *validpath;
        unsigned int             id;
 };
 
@@ -72,20 +73,75 @@ repo_get(unsigned int id)
 }
 
 static void
-repo_add(unsigned int id, char *path)
+repo_add(unsigned int id, char *path, char *validpath)
 {
        struct parse_repo *rp;
 
-       if ((rp = malloc(sizeof(*rp))) == NULL)
+       if ((rp = calloc(1, sizeof(*rp))) == NULL)
                err(1, NULL);
        rp->id = id;
-       if ((rp->path = strdup(path)) == NULL)
-               err(1, NULL);
+       if (path != NULL)
+               if ((rp->path = strdup(path)) == NULL)
+                       err(1, NULL);
+       if (validpath != NULL)
+               if ((rp->validpath = strdup(validpath)) == NULL)
+                       err(1, NULL);
 
        if (RB_INSERT(repo_tree, &repos, rp) != NULL)
                errx(1, "repository already added: id %d, %s", id, path);
 }
 
+/*
+ * Build access path to file based on repoid, path and file values.
+ * If wantalt == 1 the function can return NULL, if wantalt == 0 it
+ * can not fail.
+ */
+static char *
+parse_filepath(unsigned int repoid, const char *path, const char *file,
+    int wantalt)
+{
+       struct parse_repo       *rp;
+       char                    *fn, *repopath;
+
+       /* build file path based on repoid, entity path and filename */
+       rp = repo_get(repoid);
+       if (rp == NULL) {
+               /* no repo so no alternative path. */
+               if (wantalt)
+                       return NULL;
+
+               if (path == NULL) {
+                       if ((fn = strdup(file)) == NULL)
+                               err(1, NULL);
+               } else {
+                       if (asprintf(&fn, "%s/%s", path, file) == -1)
+                               err(1, NULL);
+               }
+       } else {
+               if (wantalt || rp->path == NULL)
+                       repopath = rp->validpath;
+               else
+                       repopath = rp->path;
+
+               if (repopath == NULL)
+                       return NULL;
+
+               if (path == NULL) {
+                       if (asprintf(&fn, "%s/%s", repopath, file) == -1)
+                               err(1, NULL);
+               } else {
+                       if (asprintf(&fn, "%s/%s/%s", repopath, path,
+                           file) == -1)
+                               err(1, NULL);
+               }
+       }
+       return fn;
+}
+
+/*
+ * Callback for X509_verify_cert() to handle critical extensions in old
+ * LibreSSL libraries or OpenSSL libs without RFC3779 support.
+ */
 static int
 verify_cb(int ok, X509_STORE_CTX *store_ctx)
 {
@@ -229,13 +285,8 @@ int
 mft_check(const char *fn, struct mft *p)
 {
        size_t  i;
-       int     fd, rc = 1;
-       char    *cp, *h, *path = NULL;
-
-       /* Check hash of file now, but first build path for it */
-       cp = strrchr(fn, '/');
-       assert(cp != NULL);
-       assert(cp - fn < INT_MAX);
+       int     fd, try, rc = 1;
+       char    *h, *path;
 
        for (i = 0; i < p->filesz; i++) {
                const struct mftfile *m = &p->files[i];
@@ -246,15 +297,24 @@ mft_check(const char *fn, struct mft *p)
                        free(h);
                        continue;
                }
-               if (asprintf(&path, "%.*s/%s", (int)(cp - fn), fn,
-                   m->file) == -1)
-                       err(1, NULL);
-               fd = open(path, O_RDONLY);
+
+               fd = -1;
+               try = 0;
+               path = NULL;
+               do {
+                       free(path);
+                       if ((path = parse_filepath(p->repoid, p->path, m->file,
+                           try++)) == NULL)
+                               break;
+                       fd = open(path, O_RDONLY);
+               } while (fd == -1 && try < 2);
+
+               free(path);
+
                if (!valid_filehash(fd, m->hash, sizeof(m->hash))) {
                        warnx("%s: bad message digest for %s", fn, m->file);
                        rc = 0;
                }
-               free(path);
        }
 
        return rc;
@@ -631,33 +691,40 @@ build_crls(const struct crl *crl, STACK_
 }
 
 static char *
-parse_filepath(struct entity *entp)
+parse_load_file(struct entity *entp, unsigned char **f, size_t *flen)
 {
-       struct parse_repo       *rp;
-       char                    *file;
+       char *file, *nfile;
 
-       /* build file path based on repoid, entity path and filename */
-       rp = repo_get(entp->repoid);
-       if (rp == NULL) {
-               if (entp->path == NULL) {
-                       if ((file = strdup(entp->file)) == NULL)
-                                       err(1, NULL);
-               } else {
-                       if (asprintf(&file, "%s/%s", entp->path,
-                           entp->file) == -1)
-                               err(1, NULL);
-               }
-       } else {
-               if (entp->path == NULL) {
-                       if (asprintf(&file, "%s/%s", rp->path,
-                           entp->file) == -1)
-                               err(1, NULL);
-               } else {
-                       if (asprintf(&file, "%s/%s/%s", rp->path,
-                           entp->path, entp->file) == -1)
-                               err(1, NULL);
-               }
+       file = parse_filepath(entp->repoid, entp->path, entp->file, 0);
+
+       /* TAL files include the data already */
+       if (entp->type == RTYPE_TAL) {
+               *f = NULL;
+               *flen = 0;
+               return file;
        }
+
+       *f = load_file(file, flen);
+       if (*f != NULL)
+               return file;
+
+       if (errno != ENOENT)
+               goto fail;
+
+       /* try alternate file location */
+       nfile = parse_filepath(entp->repoid, entp->path, entp->file, 1);
+       if (nfile == NULL)
+               goto fail;
+
+       free(file);
+       file = nfile;
+
+       *f = load_file(file, flen);
+       if (*f != NULL)
+               return file;
+
+fail:
+       warn("parse file %s", file);
        return file;
 }
 
@@ -680,20 +747,14 @@ parse_entity(struct entityq *q, struct m
 
                /* handle RTYPE_REPO first */
                if (entp->type == RTYPE_REPO) {
-                       repo_add(entp->repoid, entp->path);
+                       repo_add(entp->repoid, entp->path, entp->file);
                        entity_free(entp);
                        continue;
                }
 
-               f = NULL;
-               file = parse_filepath(entp);
-               if (entp->type != RTYPE_TAL) {
-                       f = load_file(file, &flen);
-                       if (f == NULL)
-                               warn("%s", file);
-               }
+               file = parse_load_file(entp, &f, &flen);
 
-               /* pass back at least type and filename */
+               /* pass back at least type, repoid and filename */
                b = io_new_buffer();
                io_simple_buffer(b, &entp->type, sizeof(entp->type));
                io_str_buffer(b, file);
Index: repo.c
===================================================================
RCS file: /cvs/src/usr.sbin/rpki-client/repo.c,v
retrieving revision 1.24
diff -u -p -r1.24 repo.c
--- repo.c      13 Jan 2022 14:57:02 -0000      1.24
+++ repo.c      14 Jan 2022 09:05:46 -0000
@@ -58,8 +58,6 @@ struct rrdprepo {
        SLIST_ENTRY(rrdprepo)    entry;
        char                    *notifyuri;
        char                    *basedir;
-       char                    *temp;
-       struct filepath_tree     added;
        struct filepath_tree     deleted;
        unsigned int             id;
        enum repo_state          state;
@@ -92,6 +90,7 @@ struct repo {
        SLIST_ENTRY(repo)        entry;
        char                    *repouri;
        char                    *notifyuri;
+       char                    *basedir;
        const struct rrdprepo   *rrdp;
        const struct rsyncrepo  *rsync;
        const struct tarepo     *ta;
@@ -105,7 +104,7 @@ static SLIST_HEAD(, repo)   repos = SLIST_
 /* counter for unique repo id */
 unsigned int           repoid;
 
-static struct rsyncrepo        *rsync_get(const char *, int);
+static struct rsyncrepo        *rsync_get(const char *, const char *);
 static void             remove_contents(char *);
 
 /*
@@ -168,26 +167,6 @@ filepath_exists(struct filepath_tree *tr
 }
 
 /*
- * Return true if a filepath entry exists that starts with path.
- */
-static int
-filepath_dir_exists(struct filepath_tree *tree, char *path)
-{
-       struct filepath needle;
-       struct filepath *res;
-
-       needle.file = path;
-       res = RB_NFIND(filepath_tree, tree, &needle);
-       while (res != NULL && strstr(res->file, path) == res->file) {
-               /* make sure that filepath actually is in that path */
-               if (res->file[strlen(path)] == '/')
-                       return 1;
-               res = RB_NEXT(filepath_tree, tree, res);
-       }
-       return 0;
-}
-
-/*
  * Remove entry from tree and free it.
  */
 static void
@@ -284,7 +263,8 @@ repo_state(struct repo *rp)
                return rp->rsync->state;
        if (rp->rrdp)
                return rp->rrdp->state;
-       errx(1, "%s: bad repo", rp->repouri);
+       /* No backend so sync is by definition done. */
+       return REPO_DONE;
 }
 
 /*
@@ -305,7 +285,8 @@ repo_done(const void *vp, int ok)
                        if (!ok) {
                                /* try to fall back to rsync */
                                rp->rrdp = NULL;
-                               rp->rsync = rsync_get(rp->repouri, 0);
+                               rp->rsync = rsync_get(rp->repouri,
+                                   rp->basedir);
                                /* need to check if it was already loaded */
                                if (repo_state(rp) != REPO_LOADING)
                                        entityq_flush(&rp->queue, rp);
@@ -362,7 +343,7 @@ ta_fetch(struct tarepo *tr)
                 * Create destination location.
                 * Build up the tree to this point.
                 */
-               rsync_fetch(tr->id, tr->uri[tr->uriidx], tr->basedir);
+               rsync_fetch(tr->id, tr->uri[tr->uriidx], tr->basedir, NULL);
        } else {
                int fd;
 
@@ -387,9 +368,6 @@ ta_get(struct tal *tal)
 
        /* no need to look for possible other repo */
 
-       if (tal->urisz == 0)
-               errx(1, "TAL %s has no URI", tal->descr);
-
        if ((tr = calloc(1, sizeof(*tr))) == NULL)
                err(1, NULL);
        tr->id = ++repoid;
@@ -405,17 +383,7 @@ ta_get(struct tal *tal)
        tal->urisz = 0;
        tal->uri = NULL;
 
-       if (noop) {
-               tr->state = REPO_DONE;
-               logx("ta/%s: using cache", tr->descr);
-               repo_done(tr, 0);
-       } else {
-               /* try to create base directory */
-               if (mkpath(tr->basedir) == -1)
-                       warn("mkpath %s", tr->basedir);
-
-               ta_fetch(tr);
-       }
+       ta_fetch(tr);
 
        return tr;
 }
@@ -447,7 +415,7 @@ ta_free(void)
 }
 
 static struct rsyncrepo *
-rsync_get(const char *uri, int nofetch)
+rsync_get(const char *uri, const char *validdir)
 {
        struct rsyncrepo *rr;
        char *repo;
@@ -470,22 +438,16 @@ rsync_get(const char *uri, int nofetch)
        rr->repouri = repo;
        rr->basedir = repo_dir(repo, "rsync", 0);
 
-       if (noop || nofetch) {
-               rr->state = REPO_DONE;
-               logx("%s: using cache", rr->basedir);
-               repo_done(rr, 0);
-       } else {
-               /* create base directory */
-               if (mkpath(rr->basedir) == -1) {
-                       warn("mkpath %s", rr->basedir);
-                       rsync_finish(rr->id, 0);
-                       return rr;
-               }
-
-               logx("%s: pulling from %s", rr->basedir, rr->repouri);
-               rsync_fetch(rr->id, rr->repouri, rr->basedir);
+       /* create base directory */
+       if (mkpath(rr->basedir) == -1) {
+               warn("mkpath %s", rr->basedir);
+               rsync_finish(rr->id, 0);
+               return rr;
        }
 
+       logx("%s: pulling from %s", rr->basedir, rr->repouri);
+       rsync_fetch(rr->id, rr->repouri, rr->basedir, validdir);
+
        return rr;
 }
 
@@ -513,25 +475,19 @@ rsync_free(void)
        }
 }
 
-static int rrdprepo_fetch(struct rrdprepo *);
-
 /*
  * Build local file name base on the URI and the rrdprepo info.
  */
 static char *
-rrdp_filename(const struct rrdprepo *rr, const char *uri, int temp)
+rrdp_filename(const struct rrdprepo *rr, const char *uri, int valid)
 {
        char *nfile;
-       char *dir = rr->basedir;
-
-       if (temp)
-               dir = rr->temp;
-
-       if (!valid_uri(uri, strlen(uri), "rsync://")) {
-               warnx("%s: bad URI %s", rr->basedir, uri);
-               return NULL;
-       }
+       const char *dir = rr->basedir;
 
+       if (valid)
+               dir = "valid";
+       if (!valid_uri(uri, strlen(uri), "rsync://"))
+               errx(1, "%s: bad URI %s", rr->basedir, uri);
        uri += strlen("rsync://");      /* skip proto */
        if (asprintf(&nfile, "%s/%s", dir, uri) == -1)
                err(1, NULL);
@@ -575,28 +531,46 @@ rrdp_free(void)
 
                free(rr->notifyuri);
                free(rr->basedir);
-               free(rr->temp);
 
-               filepath_free(&rr->added);
                filepath_free(&rr->deleted);
 
                free(rr);
        }
 }
 
-static struct rrdprepo *
-rrdp_basedir(const char *dir)
+/*
+ * Check if a directory is an active rrdp repository.
+ * Returns 1 if found else 0.
+ */
+static int
+rrdp_is_active(const char *dir)
 {
        struct rrdprepo *rr;
 
        SLIST_FOREACH(rr, &rrdprepos, entry)
-               if (strcmp(dir, rr->basedir) == 0) {
-                       if (rr->state == REPO_FAILED)
-                               return NULL;
-                       return rr;
-               }
+               if (strcmp(dir, rr->basedir) == 0)
+                       return rr->state != REPO_FAILED;
 
-       return NULL;
+       return 0;
+}
+
+/*
+ * Check if the URI is actually covered by one of the repositories
+ * that depend on this RRDP repository.
+ * Returns 1 if the URI is valid, 0 if no repouri matches the URI.
+ */
+static int
+rrdp_uri_valid(struct rrdprepo *rr, const char *uri)
+{
+       struct repo *rp;
+
+       SLIST_FOREACH(rp, &repos, entry) {
+               if (rp->rrdp != rr)
+                       continue;
+               if (strncmp(uri, rp->repouri, strlen(rp->repouri)) == 0)
+                       return 1;
+       }
+       return 0;
 }
 
 /*
@@ -740,8 +714,9 @@ fail:
 }
 
 static struct rrdprepo *
-rrdp_get(const char *uri, int nofetch)
+rrdp_get(const char *uri)
 {
+       struct rrdp_session state = { 0 };
        struct rrdprepo *rr;
 
        SLIST_FOREACH(rr, &rrdprepos, entry)
@@ -761,28 +736,24 @@ rrdp_get(const char *uri, int nofetch)
                err(1, NULL);
        rr->basedir = repo_dir(uri, "rrdp", 1);
 
-       RB_INIT(&rr->added);
        RB_INIT(&rr->deleted);
 
-       if (noop || nofetch) {
-               rr->state = REPO_DONE;
-               logx("%s: using cache", rr->notifyuri);
-               repo_done(rr, 0);
-       } else {
-               /* create base directory */
-               if (mkpath(rr->basedir) == -1) {
-                       warn("mkpath %s", rr->basedir);
-                       rrdp_finish(rr->id, 0);
-                       return rr;
-               }
-               if (rrdprepo_fetch(rr) == -1) {
-                       rrdp_finish(rr->id, 0);
-                       return rr;
-               }
 
-               logx("%s: pulling from %s", rr->notifyuri, "network");
+       /* create base directory */
+       if (mkpath(rr->basedir) == -1) {
+               warn("mkpath %s", rr->basedir);
+               rrdp_finish(rr->id, 0);
+               return rr;
        }
 
+       /* parse state and start the sync */
+       rrdp_parse_state(rr, &state);
+       rrdp_fetch(rr->id, rr->notifyuri, rr->notifyuri, &state);
+       free(state.session_id);
+       free(state.last_mod);
+
+       logx("%s: pulling from %s", rr->notifyuri, "network");
+
        return rr;
 }
 
@@ -800,7 +771,6 @@ rrdp_clear(unsigned int id)
 
        /* remove rrdp repository contents */
        remove_contents(rr->basedir);
-       remove_contents(rr->temp);
 }
 
 /*
@@ -816,8 +786,8 @@ rrdp_handle_file(unsigned int id, enum p
        struct rrdprepo *rr;
        struct filepath *fp;
        ssize_t s;
-       char *fn;
-       int fd = -1;
+       char *fn = NULL;
+       int fd = -1, try = 0;
 
        rr = rrdp_find(id);
        if (rr == NULL)
@@ -825,21 +795,20 @@ rrdp_handle_file(unsigned int id, enum p
        if (rr->state == REPO_FAILED)
                return -1;
 
+       /* check hash of original file for updates and deletes */
        if (pt == PUB_UPD || pt == PUB_DEL) {
                if (filepath_exists(&rr->deleted, uri)) {
                        warnx("%s: already deleted", uri);
                        return 0;
                }
-               fp = filepath_find(&rr->added, uri);
-               if (fp == NULL) {
-                       if ((fn = rrdp_filename(rr, uri, 0)) == NULL)
-                               return 0;
-               } else {
-                       filepath_put(&rr->added, fp);
-                       if ((fn = rrdp_filename(rr, uri, 1)) == NULL)
+               /* try to open file first in rrdp then in valid repo */
+               do {
+                       free(fn);
+                       if ((fn = rrdp_filename(rr, uri, try++)) == NULL)
                                return 0;
-               }
-               fd = open(fn, O_RDONLY);
+                       fd = open(fn, O_RDONLY);
+               } while (fd == -1 && try < 2);
+
                if (!valid_filehash(fd, hash, hlen)) {
                        warnx("%s: bad file digest for %s", rr->notifyuri, fn);
                        free(fn);
@@ -848,6 +817,7 @@ rrdp_handle_file(unsigned int id, enum p
                free(fn);
        }
 
+       /* write new content or mark uri as deleted. */
        if (pt == PUB_DEL) {
                filepath_add(&rr->deleted, uri);
        } else {
@@ -855,8 +825,8 @@ rrdp_handle_file(unsigned int id, enum p
                if (fp != NULL)
                        filepath_put(&rr->deleted, fp);
 
-               /* add new file to temp dir */
-               if ((fn = rrdp_filename(rr, uri, 1)) == NULL)
+               /* add new file to rrdp dir */
+               if ((fn = rrdp_filename(rr, uri, 0)) == NULL)
                        return 0;
 
                if (repo_mkpath(fn) == -1)
@@ -876,7 +846,6 @@ rrdp_handle_file(unsigned int id, enum p
                if ((size_t)s != dlen)  /* impossible */
                        errx(1, "short write %s", fn);
                free(fn);
-               filepath_add(&rr->added, uri);
        }
 
        return 1;
@@ -890,66 +859,6 @@ fail:
 }
 
 /*
- * Initiate a RRDP sync, create the required temporary directory and
- * parse a possible state file before sending the request to the RRDP process.
- */
-static int
-rrdprepo_fetch(struct rrdprepo *rr)
-{
-       struct rrdp_session state = { 0 };
-
-       if (asprintf(&rr->temp, "%s.XXXXXXXX", rr->basedir) == -1)
-               err(1, NULL);
-       if (mkdtemp(rr->temp) == NULL) {
-               warn("mkdtemp %s", rr->temp);
-               return -1;
-       }
-
-       rrdp_parse_state(rr, &state);
-       rrdp_fetch(rr->id, rr->notifyuri, rr->notifyuri, &state);
-
-       free(state.session_id);
-       free(state.last_mod);
-
-       return 0;
-}
-
-static int
-rrdp_merge_repo(struct rrdprepo *rr)
-{
-       struct filepath *fp, *nfp;
-       char *fn, *rfn;
-
-       RB_FOREACH_SAFE(fp, filepath_tree, &rr->added, nfp) {
-               fn = rrdp_filename(rr, fp->file, 1);
-               rfn = rrdp_filename(rr, fp->file, 0);
-
-               if (fn == NULL || rfn == NULL)
-                       errx(1, "bad filepath");        /* should not happen */
-
-               if (repo_mkpath(rfn) == -1) {
-                       goto fail;
-               }
-
-               if (rename(fn, rfn) == -1) {
-                       warn("rename %s", rfn);
-                       goto fail;
-               }
-
-               free(rfn);
-               free(fn);
-               filepath_put(&rr->added, fp);
-       }
-
-       return 1;
-
-fail:
-       free(rfn);
-       free(fn);
-       return 0;
-}
-
-/*
  * RSYNC sync finished, either with or without success.
  */
 void
@@ -993,6 +902,8 @@ rsync_finish(unsigned int id, int ok)
                    rr->basedir);
                stats.rsync_fails++;
                rr->state = REPO_FAILED;
+               /* clear rsync repo since it failed */
+               remove_contents(rr->basedir);
        }
 
        repo_done(rr, ok);
@@ -1013,19 +924,20 @@ rrdp_finish(unsigned int id, int ok)
        if (rr->state != REPO_LOADING)
                return;
 
-       if (ok && rrdp_merge_repo(rr)) {
+       if (ok) {
                logx("%s: loaded from network", rr->notifyuri);
                stats.rrdp_repos++;
                rr->state = REPO_DONE;
-               repo_done(rr, ok);
        } else {
                logx("%s: load from network failed, fallback to rsync",
                    rr->notifyuri);
                stats.rrdp_fails++;
                rr->state = REPO_FAILED;
-               remove_contents(rr->temp);
-               repo_done(rr, 0);
+               /* clear the RRDP repo since it failed */
+               remove_contents(rr->basedir);
        }
+
+       repo_done(rr, ok);
 }
 
 /*
@@ -1082,6 +994,9 @@ ta_lookup(int id, struct tal *tal)
 {
        struct repo     *rp;
 
+       if (tal->urisz == 0)
+               errx(1, "TAL %s has no URI", tal->descr);
+
        /* Look up in repository table. (Lookup should actually fail here) */
        SLIST_FOREACH(rp, &repos, entry) {
                if (strcmp(rp->repouri, tal->descr) == 0)
@@ -1089,11 +1004,24 @@ ta_lookup(int id, struct tal *tal)
        }
 
        rp = repo_alloc(id);
+       rp->basedir = repo_dir(tal->descr, "ta", 0);
        if ((rp->repouri = strdup(tal->descr)) == NULL)
                err(1, NULL);
 
+       /* try to create base directory */
+       if (mkpath(rp->basedir) == -1)
+               warn("mkpath %s", rp->basedir);
+
+       /* check if sync disabled ... */
+       if (noop) {
+               logx("ta/%s: using cache", rp->repouri);
+               entityq_flush(&rp->queue, rp);
+               return rp;
+       }
+
        rp->ta = ta_get(tal);
 
+       /* need to check if it was already loaded */
        if (repo_state(rp) != REPO_LOADING)
                entityq_flush(&rp->queue, rp);
 
@@ -1130,6 +1058,7 @@ repo_lookup(int talid, const char *uri, 
        }
 
        rp = repo_alloc(talid);
+       rp->basedir = repo_dir(repouri, "valid", 0);
        rp->repouri = repouri;
        if (notify != NULL)
                if ((rp->notifyuri = strdup(notify)) == NULL)
@@ -1141,12 +1070,24 @@ repo_lookup(int talid, const char *uri, 
                nofetch = 1;
        }
 
-       /* try RRDP first if available */
+       /* try to create base directory */
+       if (mkpath(rp->basedir) == -1)
+               warn("mkpath %s", rp->basedir);
+
+       /* check if sync disabled ... */
+       if (noop || nofetch) {
+               logx("%s: using cache", rp->basedir);
+               entityq_flush(&rp->queue, rp);
+               return rp;
+       }
+
+       /* ... else try RRDP first if available then rsync */
        if (notify != NULL)
-               rp->rrdp = rrdp_get(notify, nofetch);
+               rp->rrdp = rrdp_get(notify);
        if (rp->rrdp == NULL)
-               rp->rsync = rsync_get(uri, nofetch);
+               rp->rsync = rsync_get(uri, rp->basedir);
 
+       /* need to check if it was already loaded */
        if (repo_state(rp) != REPO_LOADING)
                entityq_flush(&rp->queue, rp);
 
@@ -1169,24 +1110,29 @@ repo_byid(unsigned int id)
 }
 
 /*
- * Return the repository base directory.
+ * Return the repository base or alternate directory.
  * Returned string must be freed by caller.
  */
 char *
-repo_basedir(const struct repo *rp)
+repo_basedir(const struct repo *rp, int wantvalid)
 {
-       char *path;
+       char *path = NULL;
 
-       if (rp->ta) {
-               if ((path = strdup(rp->ta->basedir)) == NULL)
-                       err(1, NULL);
-       } else if (rp->rsync) {
-               if ((path = strdup(rp->rsync->basedir)) == NULL)
+       if (!wantvalid) {
+               if (rp->ta) {
+                       if ((path = strdup(rp->ta->basedir)) == NULL)
+                               err(1, NULL);
+               } else if (rp->rsync) {
+                       if ((path = strdup(rp->rsync->basedir)) == NULL)
+                               err(1, NULL);
+               } else if (rp->rrdp) {
+                       path = rrdp_filename(rp->rrdp, rp->repouri, 0);
+               } else
+                       path = NULL;    /* only valid repo available */
+       } else if (rp->basedir != NULL) {
+               if ((path = strdup(rp->basedir)) == NULL)
                        err(1, NULL);
-       } else if (rp->rrdp) {
-               path = rrdp_filename(rp->rrdp, rp->repouri, 0);
-       } else
-               errx(1, "%s: bad repo", rp->repouri);
+       }
 
        return path;
 }
@@ -1289,64 +1235,173 @@ add_to_del(char **del, size_t *dsz, char
        return del;
 }
 
+/*
+ * Delayed delete of files from RRDP. Since RRDP has no security built-in
+ * this code needs to check if this RRDP repository is actually allowed to
+ * remove the file referenced by the URI.
+ */
 static char **
-repo_rrdp_cleanup(struct filepath_tree *tree, struct rrdprepo *rr,
-    char **del, size_t *delsz)
+repo_cleanup_rrdp(struct filepath_tree *tree, char **del, size_t *delsz)
 {
+       struct rrdprepo *rr;
        struct filepath *fp, *nfp;
        char *fn;
-
-       RB_FOREACH_SAFE(fp, filepath_tree, &rr->deleted, nfp) {
-               fn = rrdp_filename(rr, fp->file, 0);
-               /* temp dir will be cleaned up by repo_cleanup() */
-
-               if (fn == NULL)
-                       errx(1, "bad filepath");        /* should not happen */
-
-               if (!filepath_exists(tree, fn))
+       
+       SLIST_FOREACH(rr, &rrdprepos, entry) {
+               RB_FOREACH_SAFE(fp, filepath_tree, &rr->deleted, nfp) {
+                       if (!rrdp_uri_valid(rr, fp->file)) {
+                               warnx("%s: external URI %s", rr->notifyuri,
+                                   fp->file);
+                               filepath_put(&rr->deleted, fp);
+                               continue;
+                       }
+                       /* try to remove file from rrdp repo ... */
+                       fn = rrdp_filename(rr, fp->file, 0);
                        del = add_to_del(del, delsz, fn);
-               else
-                       warnx("%s: referenced file supposed to be deleted", fn);
+                       free(fn);
 
-               free(fn);
-               filepath_put(&rr->deleted, fp);
+                       /* ... and from the valid repository if unused. */
+                       fn = rrdp_filename(rr, fp->file, 1);
+                       if (!filepath_exists(tree, fn))
+                               del = add_to_del(del, delsz, fn);
+                       else
+                               warnx("%s: referenced file supposed to be "
+                                   "deleted", fn);
+
+                       free(fn);
+                       filepath_put(&rr->deleted, fp);
+               }
        }
 
        return del;
 }
 
+/*
+ * All files in tree are valid and should be moved to the valid repository
+ * if not already there. Rename the files to the new path and readd the
+ * filepath entry with the new path if successful.
+ */
+static void
+repo_move_valid(struct filepath_tree *tree)
+{
+       struct filepath *fp, *nfp;
+       size_t rsyncsz = strlen("rsync/");
+       size_t rrdpsz = strlen("rrdp/");
+       char *fn, *base;
+
+       RB_FOREACH_SAFE(fp, filepath_tree, tree, nfp) {
+               if (strncmp(fp->file, "rsync/", rsyncsz) != 0 &&
+                   strncmp(fp->file, "rrdp/", rrdpsz) != 0)
+                       continue; /* not a temporary file path */
+
+               if (strncmp(fp->file, "rsync/", rsyncsz) == 0) {
+                       if (asprintf(&fn, "valid/%s", fp->file + rsyncsz) == -1)
+                               err(1, NULL);
+               } else {
+                       base = strchr(fp->file + rrdpsz, '/');
+                       assert(base != NULL);
+                       if (asprintf(&fn, "valid/%s", base + 1) == -1)
+                               err(1, NULL);
+               }
+
+               if (repo_mkpath(fn) == -1) {
+                       free(fn);
+                       continue;
+               }
+
+               if (rename(fp->file, fn) == -1) {
+                       warn("rename %s", fn);
+                       free(fn);
+                       continue;
+               }
+
+               /* switch filepath node to new path */
+               RB_REMOVE(filepath_tree, tree, fp);
+               free(fp->file);
+               fp->file = fn;
+               if (RB_INSERT(filepath_tree, tree, fp) != NULL)
+                       errx(1, "both possibilities of file present");
+       }
+}
+
+#define        BASE_DIR        (void *)0x62617365
+#define        RSYNC_DIR       (void *)0x73796e63
+#define        RRDP_DIR        (void *)0x52524450
+
 void
 repo_cleanup(struct filepath_tree *tree)
 {
        size_t i, cnt, delsz = 0, dirsz = 0;
        char **del = NULL, **dir = NULL;
-       char *argv[4] = { "ta", "rsync", "rrdp", NULL };
-       struct rrdprepo *rr;
+       char *argv[5] = { "ta", "rsync", "rrdp", "valid", NULL };
        FTS *fts;
        FTSENT *e;
 
+       /* first move temp files which have been used to valid dir */
+       repo_move_valid(tree);
+       /* then delete files requested by rrdp */ 
+       del = repo_cleanup_rrdp(tree, del, &delsz);
+
        if ((fts = fts_open(argv, FTS_PHYSICAL | FTS_NOSTAT, NULL)) == NULL)
                err(1, "fts_open");
        errno = 0;
        while ((e = fts_read(fts)) != NULL) {
                switch (e->fts_info) {
                case FTS_NSOK:
-                       if (!filepath_exists(tree, e->fts_path))
+                       /* handle rrdp .state files explicitly */
+                       if (e->fts_parent->fts_pointer == RRDP_DIR &&
+                           e->fts_level == 2 &&
+                           strcmp(e->fts_name, ".state") == 0) {
+                               e->fts_parent->fts_number++;
+                       } else if (filepath_exists(tree, e->fts_path)) {
+                               e->fts_parent->fts_number++;
+                       } else if (e->fts_parent->fts_pointer == RRDP_DIR) {
+                               /* can't delete these extra files */
+                               e->fts_parent->fts_number++;
+                               stats.extra_files++;
+                               if (verbose > 1)
+                                       logx("superfluous %s", e->fts_path);
+                       } else {
+                               if (e->fts_parent->fts_pointer == RSYNC_DIR) {
+                                       /* no need to keep rsync files */
+                                       stats.extra_files++;
+                                       if (verbose > 1)
+                                               logx("superfluous %s",
+                                                   e->fts_path);
+                               }
                                del = add_to_del(del, &delsz,
                                    e->fts_path);
+                       }
                        break;
                case FTS_D:
-                       /* special cleanup for rrdp directories */
-                       if ((rr = rrdp_basedir(e->fts_path)) != NULL) {
-                               del = repo_rrdp_cleanup(tree, rr, del, &delsz);
-                               if (fts_set(fts, e, FTS_SKIP) == -1)
-                                       err(1, "fts_set");
+                       if (e->fts_level == FTS_ROOTLEVEL) {
+                               if (strcmp("rsync", e->fts_path) == 0)
+                                       e->fts_pointer = RSYNC_DIR;
+                               else if (strcmp("rrdp", e->fts_path) == 0)
+                                       e->fts_pointer = RRDP_DIR;
+                               else
+                                       e->fts_pointer = BASE_DIR;
+                       } else
+                               e->fts_pointer = e->fts_parent->fts_pointer;
+
+                       /*
+                        * special handling for rrdp directories,
+                        * clear them if they are not used anymore but
+                        * only if rrdp is active.
+                        */
+                       if (e->fts_pointer == RRDP_DIR && !noop && rrdpon &&
+                           e->fts_level == 1) {
+                               if (!rrdp_is_active(e->fts_path))
+                                       e->fts_pointer = NULL;
                        }
                        break;
                case FTS_DP:
-                       if (!filepath_dir_exists(tree, e->fts_path))
-                               dir = add_to_del(dir, &dirsz,
-                                   e->fts_path);
+                       if (e->fts_level == FTS_ROOTLEVEL)
+                               break;
+                       if (e->fts_number == 0)
+                               dir = add_to_del(dir, &dirsz, e->fts_path);
+
+                       e->fts_parent->fts_number += e->fts_number;
                        break;
                case FTS_SL:
                case FTS_SLNONE:
@@ -1356,8 +1411,7 @@ repo_cleanup(struct filepath_tree *tree)
                case FTS_NS:
                case FTS_ERR:
                        if (e->fts_errno == ENOENT &&
-                           (strcmp(e->fts_path, "rsync") == 0 ||
-                           strcmp(e->fts_path, "rrdp") == 0))
+                           e->fts_level == FTS_ROOTLEVEL)
                                continue;
                        warnx("fts_read %s: %s", e->fts_path,
                            strerror(e->fts_errno));
@@ -1388,7 +1442,7 @@ repo_cleanup(struct filepath_tree *tree)
                free(del[i]);
        }
        free(del);
-       stats.del_files = cnt;
+       stats.del_files += cnt;
 
        cnt = 0;
        for (i = 0; i < dirsz; i++) {
@@ -1399,7 +1453,7 @@ repo_cleanup(struct filepath_tree *tree)
                free(dir[i]);
        }
        free(dir);
-       stats.del_dirs = cnt;
+       stats.del_dirs += cnt;
 }
 
 void
@@ -1410,6 +1464,8 @@ repo_free(void)
        while ((rp = SLIST_FIRST(&repos)) != NULL) {
                SLIST_REMOVE_HEAD(&repos, entry);
                free(rp->repouri);
+               free(rp->notifyuri);
+               free(rp->basedir);
                free(rp);
        }
 
@@ -1418,6 +1474,9 @@ repo_free(void)
        rsync_free();
 }
 
+/*
+ * Remove all files and directories under base but do not remove base itself.
+ */
 static void
 remove_contents(char *base)
 {

Reply via email to