This diff adds a new cache subdir called "valid". This is the place where
all verified and good files are stored after a run. It makes -n work a lot
better since -n will now only look at what's inside "valid" and ignore
"rsync" and "rrdp".

The trust anchors are still stored in "ta" even if valid.
The rsync repo will only hold temporary files and be empty otherwise.
The rrdp repo still may contain some superfluous files that people
included in their snapshots. Not keeping them could result in extra
snapshot downloads since delta updates could refer to these files.
Since there is a valid repo, rrdp no longer needs an additional temp
directory for its sync.

With this rsync will use the --compare-dest feature and use the valid
cache as a base.

The file cleanup at the end got more complex. There is now
repo_move_valid() and repo_cleanup_rrdp() to do two initial steps of
cleanup before the tree traversal starts. The fts function now uses
some fts features to find a) empty directories and b) superfluous files.

I think the code changes outside of repo.c should be straight forward.

To the rpki connoisseur, this currently only implements a simple newest
file wins startegy. The handling of .mft and their serial number will
follow once this is in.
-- 
:wq Claudio

? obj
Index: extern.h
===================================================================
RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v
retrieving revision 1.103
diff -u -p -r1.103 extern.h
--- extern.h    13 Jan 2022 13:46:03 -0000      1.103
+++ extern.h    13 Jan 2022 15:50:40 -0000
@@ -341,7 +341,7 @@ enum publish_type {
 struct entity {
        TAILQ_ENTRY(entity) entries;
        char            *path;          /* path relative to repository */
-       char            *file;          /* filename */
+       char            *file;          /* filename or valid repo path */
        unsigned char   *data;          /* optional data blob */
        size_t           datasz;        /* length of optional data blob */
        unsigned int     repoid;        /* repository identifier */
@@ -380,6 +380,7 @@ struct stats {
        size_t   vrps; /* total number of vrps */
        size_t   uniqs; /* number of unique vrps */
        size_t   del_files; /* number of files removed in cleanup */
+       size_t   extra_files; /* number of superfluous files */
        size_t   del_dirs; /* number of directories removed in cleanup */
        size_t   brks; /* number of BGPsec Router Key (BRK) certificates */
        struct timeval  elapsed_time;
@@ -506,7 +507,7 @@ void                 rrdp_clear(unsigned int);
 void            rrdp_save_state(unsigned int, struct rrdp_session *);
 int             rrdp_handle_file(unsigned int, enum publish_type, char *,
                    char *, size_t, char *, size_t);
-char           *repo_basedir(const struct repo *);
+char           *repo_basedir(const struct repo *, int);
 unsigned int    repo_id(const struct repo *);
 const char     *repo_uri(const struct repo *);
 struct repo    *ta_lookup(int, struct tal *);
@@ -520,7 +521,8 @@ void                 rsync_finish(unsigned int, int);
 void            http_finish(unsigned int, enum http_result, const char *);
 void            rrdp_finish(unsigned int, int);
 
-void            rsync_fetch(unsigned int, const char *, const char *);
+void            rsync_fetch(unsigned int, const char *, const char *,
+                   const char *);
 void            http_fetch(unsigned int, const char *, const char *, int);
 void            rrdp_fetch(unsigned int, const char *, const char *,
                    struct rrdp_session *);
Index: main.c
===================================================================
RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
retrieving revision 1.175
diff -u -p -r1.175 main.c
--- main.c      13 Jan 2022 13:18:41 -0000      1.175
+++ main.c      13 Jan 2022 15:50:40 -0000
@@ -151,17 +151,18 @@ entity_write_repo(struct repo *rp)
        struct ibuf *b;
        enum rtype type = RTYPE_REPO;
        unsigned int repoid;
-       char *path;
+       char *path, *altpath;
        int talid = 0;
 
        repoid = repo_id(rp);
-       path = repo_basedir(rp);
+       path = repo_basedir(rp, 0);
+       altpath = repo_basedir(rp, 1);
        b = io_new_buffer();
        io_simple_buffer(b, &type, sizeof(type));
        io_simple_buffer(b, &repoid, sizeof(repoid));
        io_simple_buffer(b, &talid, sizeof(talid));
        io_str_buffer(b, path);
-       io_str_buffer(b, NULL);
+       io_str_buffer(b, altpath);
        io_buf_buffer(b, NULL, 0);
        io_close_buffer(&procq, b);
        free(path);
@@ -254,14 +255,15 @@ rrdp_fetch(unsigned int id, const char *
  * Request a repository sync via rsync URI to directory local.
  */
 void
-rsync_fetch(unsigned int id, const char *uri, const char *local)
+rsync_fetch(unsigned int id, const char *uri, const char *local,
+    const char *base)
 {
        struct ibuf     *b;
 
        b = io_new_buffer();
        io_simple_buffer(b, &id, sizeof(id));
        io_str_buffer(b, local);
-       io_str_buffer(b, NULL);
+       io_str_buffer(b, base);
        io_str_buffer(b, uri);
        io_close_buffer(&rsyncq, b);
 }
@@ -1246,8 +1248,8 @@ main(int argc, char *argv[])
        logx("Certificate revocation lists: %zu", stats.crls);
        logx("Ghostbuster records: %zu", stats.gbrs);
        logx("Repositories: %zu", stats.repos);
-       logx("Cleanup: removed %zu files, %zu directories",
-           stats.del_files, stats.del_dirs);
+       logx("Cleanup: removed %zu files, %zu directories, %zu superfluous",
+           stats.del_files, stats.del_dirs, stats.extra_files);
        logx("VRP Entries: %zu (%zu unique)", stats.vrps, stats.uniqs);
 
        /* Memory cleanup. */
Index: output-json.c
===================================================================
RCS file: /cvs/src/usr.sbin/rpki-client/output-json.c,v
retrieving revision 1.22
diff -u -p -r1.22 output-json.c
--- output-json.c       4 Nov 2021 11:32:55 -0000       1.22
+++ output-json.c       13 Jan 2022 15:50:40 -0000
@@ -78,6 +78,7 @@ outputheader_json(FILE *out, struct stat
            "\t\t\"vrps\": %zu,\n"
            "\t\t\"uniquevrps\": %zu,\n"
            "\t\t\"cachedir_del_files\": %zu,\n"
+           "\t\t\"cachedir_superfluous_files\": %zu,\n"
            "\t\t\"cachedir_del_dirs\": %zu\n"
            "\t},\n\n",
            st->mfts, st->mfts_fail, st->mfts_stale,
@@ -85,7 +86,7 @@ outputheader_json(FILE *out, struct stat
            st->gbrs,
            st->repos,
            st->vrps, st->uniqs,
-           st->del_files, st->del_dirs) < 0)
+           st->del_files, st->extra_files, st->del_dirs) < 0)
                return -1;
        return 0;
 }
Index: parser.c
===================================================================
RCS file: /cvs/src/usr.sbin/rpki-client/parser.c,v
retrieving revision 1.36
diff -u -p -r1.36 parser.c
--- parser.c    13 Jan 2022 14:58:21 -0000      1.36
+++ parser.c    13 Jan 2022 15:50:40 -0000
@@ -50,6 +50,7 @@ static struct crl_tree         crlt = RB_INITIA
 struct parse_repo {
        RB_ENTRY(parse_repo)     entry;
        char                    *path;
+       char                    *validpath;
        unsigned int             id;
 };
 
@@ -72,20 +73,77 @@ repo_get(unsigned int id)
 }
 
 static void
-repo_add(unsigned int id, char *path)
+repo_add(unsigned int id, char *path, char *validpath)
 {
        struct parse_repo *rp;
 
-       if ((rp = malloc(sizeof(*rp))) == NULL)
+       if ((rp = calloc(1, sizeof(*rp))) == NULL)
                err(1, NULL);
        rp->id = id;
-       if ((rp->path = strdup(path)) == NULL)
-               err(1, NULL);
+       if (path)
+               if ((rp->path = strdup(path)) == NULL)
+                       err(1, NULL);
+       if (validpath)
+               if ((rp->validpath = strdup(validpath)) == NULL)
+                       err(1, NULL);
 
        if (RB_INSERT(repo_tree, &repos, rp) != NULL)
                errx(1, "repository already added: id %d, %s", id, path);
 }
 
+/*
+ * Build access path to file based on repoid, path and file values.
+ * If wantalt == 1 the function can return NULL, if wantalt == 0 it
+ * can not fail.
+ */
+static char *
+parse_filepath(unsigned int repoid, const char *path, const char *file,
+    int wantalt)
+{
+       struct parse_repo       *rp;
+       char                    *fn, *repopath;
+
+       /* build file path based on repoid, entity path and filename */
+       rp = repo_get(repoid);
+       if (rp == NULL) {
+               /* no repo so no alternative path. */
+               if (wantalt == 1)
+                       return NULL;
+
+               if (path == NULL) {
+                       if ((fn = strdup(file)) == NULL)
+                                       err(1, NULL);
+               } else {
+                       if (asprintf(&fn, "%s/%s", path, file) == -1)
+                               err(1, NULL);
+               }
+       } else {
+               if (wantalt == 1)
+                       repopath = rp->validpath;
+               else if (rp->path == NULL) 
+                       repopath = rp->validpath;
+               else
+                       repopath = rp->path;
+
+               if (repopath == NULL)
+                       return NULL;
+
+               if (path == NULL) {
+                       if (asprintf(&fn, "%s/%s", repopath, file) == -1)
+                               err(1, NULL);
+               } else {
+                       if (asprintf(&fn, "%s/%s/%s", repopath, path,
+                           file) == -1)
+                               err(1, NULL);
+               }
+       }
+       return fn;
+}
+
+/*
+ * Callback for X509_verify_cert() to handle critical extensions in old
+ * LibreSSL libraries or OpenSSL libs without RFC3779 support.
+ */
 static int
 verify_cb(int ok, X509_STORE_CTX *store_ctx)
 {
@@ -229,13 +287,8 @@ int
 mft_check(const char *fn, struct mft *p)
 {
        size_t  i;
-       int     fd, rc = 1;
-       char    *cp, *h, *path = NULL;
-
-       /* Check hash of file now, but first build path for it */
-       cp = strrchr(fn, '/');
-       assert(cp != NULL);
-       assert(cp - fn < INT_MAX);
+       int     fd, try, rc = 1;
+       char    *h, *path;
 
        for (i = 0; i < p->filesz; i++) {
                const struct mftfile *m = &p->files[i];
@@ -246,15 +299,24 @@ mft_check(const char *fn, struct mft *p)
                        free(h);
                        continue;
                }
-               if (asprintf(&path, "%.*s/%s", (int)(cp - fn), fn,
-                   m->file) == -1)
-                       err(1, NULL);
-               fd = open(path, O_RDONLY);
+
+               fd = -1;
+               try = 0;
+               path = NULL;
+               do {
+                       free(path);
+                       if ((path = parse_filepath(p->repoid, p->path, m->file,
+                           try++)) == NULL)
+                               break;
+                       fd = open(path, O_RDONLY);
+               } while (fd == -1 && try < 2);
+
+               free(path);
+
                if (!valid_filehash(fd, m->hash, sizeof(m->hash))) {
                        warnx("%s: bad message digest for %s", fn, m->file);
                        rc = 0;
                }
-               free(path);
        }
 
        return rc;
@@ -630,37 +692,6 @@ build_crls(const struct crl *crl, STACK_
                err(1, "sk_X509_CRL_push");
 }
 
-static char *
-parse_filepath(struct entity *entp)
-{
-       struct parse_repo       *rp;
-       char                    *file;
-
-       /* build file path based on repoid, entity path and filename */
-       rp = repo_get(entp->repoid);
-       if (rp == NULL) {
-               if (entp->path == NULL) {
-                       if ((file = strdup(entp->file)) == NULL)
-                                       err(1, NULL);
-               } else {
-                       if (asprintf(&file, "%s/%s", entp->path,
-                           entp->file) == -1)
-                               err(1, NULL);
-               }
-       } else {
-               if (entp->path == NULL) {
-                       if (asprintf(&file, "%s/%s", rp->path,
-                           entp->file) == -1)
-                               err(1, NULL);
-               } else {
-                       if (asprintf(&file, "%s/%s/%s", rp->path,
-                           entp->path, entp->file) == -1)
-                               err(1, NULL);
-               }
-       }
-       return file;
-}
-
 static void
 parse_entity(struct entityq *q, struct msgbuf *msgq)
 {
@@ -673,27 +704,41 @@ parse_entity(struct entityq *q, struct m
        unsigned char   *f;
        size_t           flen;
        char            *file;
-       int              c;
+       int              c, try;
 
        while ((entp = TAILQ_FIRST(q)) != NULL) {
                TAILQ_REMOVE(q, entp, entries);
 
                /* handle RTYPE_REPO first */
                if (entp->type == RTYPE_REPO) {
-                       repo_add(entp->repoid, entp->path);
+                       repo_add(entp->repoid, entp->path, entp->file);
                        entity_free(entp);
                        continue;
                }
 
                f = NULL;
-               file = parse_filepath(entp);
+               file = parse_filepath(entp->repoid, entp->path, entp->file, 0);
                if (entp->type != RTYPE_TAL) {
-                       f = load_file(file, &flen);
-                       if (f == NULL)
-                               warn("%s", file);
+                       for (try = 0; try <= 1; try++) {
+                               f = load_file(file, &flen);
+                               if (f != NULL)
+                                       break;
+
+                               if (errno == ENOENT && try == 0) {
+                                       char *fn;
+                                       fn = parse_filepath(entp->repoid,
+                                           entp->path, entp->file, 1);
+                                       if (fn != NULL) {
+                                               free(file);
+                                               file = fn;
+                                               continue;
+                                       }
+                               }
+                               warn("parse file %s", file);
+                       }
                }
 
-               /* pass back at least type and filename */
+               /* pass back at least type, repoid and filename */
                b = io_new_buffer();
                io_simple_buffer(b, &entp->type, sizeof(entp->type));
                io_str_buffer(b, file);
Index: repo.c
===================================================================
RCS file: /cvs/src/usr.sbin/rpki-client/repo.c,v
retrieving revision 1.24
diff -u -p -r1.24 repo.c
--- repo.c      13 Jan 2022 14:57:02 -0000      1.24
+++ repo.c      13 Jan 2022 15:50:40 -0000
@@ -58,8 +58,6 @@ struct rrdprepo {
        SLIST_ENTRY(rrdprepo)    entry;
        char                    *notifyuri;
        char                    *basedir;
-       char                    *temp;
-       struct filepath_tree     added;
        struct filepath_tree     deleted;
        unsigned int             id;
        enum repo_state          state;
@@ -92,6 +90,7 @@ struct repo {
        SLIST_ENTRY(repo)        entry;
        char                    *repouri;
        char                    *notifyuri;
+       char                    *basedir;
        const struct rrdprepo   *rrdp;
        const struct rsyncrepo  *rsync;
        const struct tarepo     *ta;
@@ -105,7 +104,7 @@ static SLIST_HEAD(, repo)   repos = SLIST_
 /* counter for unique repo id */
 unsigned int           repoid;
 
-static struct rsyncrepo        *rsync_get(const char *, int);
+static struct rsyncrepo        *rsync_get(const char *, const char *);
 static void             remove_contents(char *);
 
 /*
@@ -168,26 +167,6 @@ filepath_exists(struct filepath_tree *tr
 }
 
 /*
- * Return true if a filepath entry exists that starts with path.
- */
-static int
-filepath_dir_exists(struct filepath_tree *tree, char *path)
-{
-       struct filepath needle;
-       struct filepath *res;
-
-       needle.file = path;
-       res = RB_NFIND(filepath_tree, tree, &needle);
-       while (res != NULL && strstr(res->file, path) == res->file) {
-               /* make sure that filepath actually is in that path */
-               if (res->file[strlen(path)] == '/')
-                       return 1;
-               res = RB_NEXT(filepath_tree, tree, res);
-       }
-       return 0;
-}
-
-/*
  * Remove entry from tree and free it.
  */
 static void
@@ -284,7 +263,8 @@ repo_state(struct repo *rp)
                return rp->rsync->state;
        if (rp->rrdp)
                return rp->rrdp->state;
-       errx(1, "%s: bad repo", rp->repouri);
+       /* No backend so sync is by definition done. */
+       return REPO_DONE;
 }
 
 /*
@@ -305,7 +285,8 @@ repo_done(const void *vp, int ok)
                        if (!ok) {
                                /* try to fall back to rsync */
                                rp->rrdp = NULL;
-                               rp->rsync = rsync_get(rp->repouri, 0);
+                               rp->rsync = rsync_get(rp->repouri,
+                                   rp->basedir);
                                /* need to check if it was already loaded */
                                if (repo_state(rp) != REPO_LOADING)
                                        entityq_flush(&rp->queue, rp);
@@ -362,7 +343,7 @@ ta_fetch(struct tarepo *tr)
                 * Create destination location.
                 * Build up the tree to this point.
                 */
-               rsync_fetch(tr->id, tr->uri[tr->uriidx], tr->basedir);
+               rsync_fetch(tr->id, tr->uri[tr->uriidx], tr->basedir, NULL);
        } else {
                int fd;
 
@@ -387,9 +368,6 @@ ta_get(struct tal *tal)
 
        /* no need to look for possible other repo */
 
-       if (tal->urisz == 0)
-               errx(1, "TAL %s has no URI", tal->descr);
-
        if ((tr = calloc(1, sizeof(*tr))) == NULL)
                err(1, NULL);
        tr->id = ++repoid;
@@ -405,17 +383,7 @@ ta_get(struct tal *tal)
        tal->urisz = 0;
        tal->uri = NULL;
 
-       if (noop) {
-               tr->state = REPO_DONE;
-               logx("ta/%s: using cache", tr->descr);
-               repo_done(tr, 0);
-       } else {
-               /* try to create base directory */
-               if (mkpath(tr->basedir) == -1)
-                       warn("mkpath %s", tr->basedir);
-
-               ta_fetch(tr);
-       }
+       ta_fetch(tr);
 
        return tr;
 }
@@ -447,7 +415,7 @@ ta_free(void)
 }
 
 static struct rsyncrepo *
-rsync_get(const char *uri, int nofetch)
+rsync_get(const char *uri, const char *validdir)
 {
        struct rsyncrepo *rr;
        char *repo;
@@ -470,22 +438,16 @@ rsync_get(const char *uri, int nofetch)
        rr->repouri = repo;
        rr->basedir = repo_dir(repo, "rsync", 0);
 
-       if (noop || nofetch) {
-               rr->state = REPO_DONE;
-               logx("%s: using cache", rr->basedir);
-               repo_done(rr, 0);
-       } else {
-               /* create base directory */
-               if (mkpath(rr->basedir) == -1) {
-                       warn("mkpath %s", rr->basedir);
-                       rsync_finish(rr->id, 0);
-                       return rr;
-               }
-
-               logx("%s: pulling from %s", rr->basedir, rr->repouri);
-               rsync_fetch(rr->id, rr->repouri, rr->basedir);
+       /* create base directory */
+       if (mkpath(rr->basedir) == -1) {
+               warn("mkpath %s", rr->basedir);
+               rsync_finish(rr->id, 0);
+               return rr;
        }
 
+       logx("%s: pulling from %s", rr->basedir, rr->repouri);
+       rsync_fetch(rr->id, rr->repouri, rr->basedir, validdir);
+
        return rr;
 }
 
@@ -513,25 +475,19 @@ rsync_free(void)
        }
 }
 
-static int rrdprepo_fetch(struct rrdprepo *);
-
 /*
  * Build local file name base on the URI and the rrdprepo info.
  */
 static char *
-rrdp_filename(const struct rrdprepo *rr, const char *uri, int temp)
+rrdp_filename(const struct rrdprepo *rr, const char *uri, int valid)
 {
        char *nfile;
-       char *dir = rr->basedir;
-
-       if (temp)
-               dir = rr->temp;
-
-       if (!valid_uri(uri, strlen(uri), "rsync://")) {
-               warnx("%s: bad URI %s", rr->basedir, uri);
-               return NULL;
-       }
+       const char *dir = rr->basedir;
 
+       if (valid)
+               dir = "valid";
+       if (!valid_uri(uri, strlen(uri), "rsync://"))
+               errx(1, "%s: bad URI %s", rr->basedir, uri);
        uri += strlen("rsync://");      /* skip proto */
        if (asprintf(&nfile, "%s/%s", dir, uri) == -1)
                err(1, NULL);
@@ -575,28 +531,49 @@ rrdp_free(void)
 
                free(rr->notifyuri);
                free(rr->basedir);
-               free(rr->temp);
 
-               filepath_free(&rr->added);
                filepath_free(&rr->deleted);
 
                free(rr);
        }
 }
 
-static struct rrdprepo *
-rrdp_basedir(const char *dir)
+/*
+ * Check if a directory is an active rrdp repository.
+ * Returns 1 if found else 0.
+ */
+static int
+rrdp_is_active(const char *dir)
 {
        struct rrdprepo *rr;
 
        SLIST_FOREACH(rr, &rrdprepos, entry)
                if (strcmp(dir, rr->basedir) == 0) {
                        if (rr->state == REPO_FAILED)
-                               return NULL;
-                       return rr;
+                               return 0;
+                       return 1;
                }
 
-       return NULL;
+       return 0;
+}
+
+/*
+ * Check if the URI is actually covered by one of the repositories
+ * that depend on this RRDP repository.
+ * Returns 1 if the URI is valid, 0 if no repouri matches the URI.
+ */
+static int
+rrdp_uri_valid(struct rrdprepo *rr, const char *uri)
+{
+       struct repo *rp;
+
+       SLIST_FOREACH(rp, &repos, entry) {
+               if (rp->rrdp != rr)
+                       continue;
+               if (strncmp(uri, rp->repouri, strlen(rp->repouri)) == 0)
+                       return 1;
+       }
+       return 0;
 }
 
 /*
@@ -740,8 +717,9 @@ fail:
 }
 
 static struct rrdprepo *
-rrdp_get(const char *uri, int nofetch)
+rrdp_get(const char *uri)
 {
+       struct rrdp_session state = { 0 };
        struct rrdprepo *rr;
 
        SLIST_FOREACH(rr, &rrdprepos, entry)
@@ -761,28 +739,24 @@ rrdp_get(const char *uri, int nofetch)
                err(1, NULL);
        rr->basedir = repo_dir(uri, "rrdp", 1);
 
-       RB_INIT(&rr->added);
        RB_INIT(&rr->deleted);
 
-       if (noop || nofetch) {
-               rr->state = REPO_DONE;
-               logx("%s: using cache", rr->notifyuri);
-               repo_done(rr, 0);
-       } else {
-               /* create base directory */
-               if (mkpath(rr->basedir) == -1) {
-                       warn("mkpath %s", rr->basedir);
-                       rrdp_finish(rr->id, 0);
-                       return rr;
-               }
-               if (rrdprepo_fetch(rr) == -1) {
-                       rrdp_finish(rr->id, 0);
-                       return rr;
-               }
 
-               logx("%s: pulling from %s", rr->notifyuri, "network");
+       /* create base directory */
+       if (mkpath(rr->basedir) == -1) {
+               warn("mkpath %s", rr->basedir);
+               rrdp_finish(rr->id, 0);
+               return rr;
        }
 
+       /* parse state and start the sync */
+       rrdp_parse_state(rr, &state);
+       rrdp_fetch(rr->id, rr->notifyuri, rr->notifyuri, &state);
+       free(state.session_id);
+       free(state.last_mod);
+
+       logx("%s: pulling from %s", rr->notifyuri, "network");
+
        return rr;
 }
 
@@ -800,7 +774,6 @@ rrdp_clear(unsigned int id)
 
        /* remove rrdp repository contents */
        remove_contents(rr->basedir);
-       remove_contents(rr->temp);
 }
 
 /*
@@ -816,8 +789,8 @@ rrdp_handle_file(unsigned int id, enum p
        struct rrdprepo *rr;
        struct filepath *fp;
        ssize_t s;
-       char *fn;
-       int fd = -1;
+       char *fn = NULL;
+       int fd = -1, try = 0;
 
        rr = rrdp_find(id);
        if (rr == NULL)
@@ -825,21 +798,20 @@ rrdp_handle_file(unsigned int id, enum p
        if (rr->state == REPO_FAILED)
                return -1;
 
+       /* check hash of original file for updates and deletes */
        if (pt == PUB_UPD || pt == PUB_DEL) {
                if (filepath_exists(&rr->deleted, uri)) {
                        warnx("%s: already deleted", uri);
                        return 0;
                }
-               fp = filepath_find(&rr->added, uri);
-               if (fp == NULL) {
-                       if ((fn = rrdp_filename(rr, uri, 0)) == NULL)
-                               return 0;
-               } else {
-                       filepath_put(&rr->added, fp);
-                       if ((fn = rrdp_filename(rr, uri, 1)) == NULL)
+               /* try to open file first in rrdp then in valid repo */
+               do {
+                       free(fn);
+                       if ((fn = rrdp_filename(rr, uri, try++)) == NULL)
                                return 0;
-               }
-               fd = open(fn, O_RDONLY);
+                       fd = open(fn, O_RDONLY);
+               } while (fd == -1 && try < 2);
+
                if (!valid_filehash(fd, hash, hlen)) {
                        warnx("%s: bad file digest for %s", rr->notifyuri, fn);
                        free(fn);
@@ -848,6 +820,7 @@ rrdp_handle_file(unsigned int id, enum p
                free(fn);
        }
 
+       /* write new content or mark uri as deleted. */
        if (pt == PUB_DEL) {
                filepath_add(&rr->deleted, uri);
        } else {
@@ -855,8 +828,8 @@ rrdp_handle_file(unsigned int id, enum p
                if (fp != NULL)
                        filepath_put(&rr->deleted, fp);
 
-               /* add new file to temp dir */
-               if ((fn = rrdp_filename(rr, uri, 1)) == NULL)
+               /* add new file to rrdp dir */
+               if ((fn = rrdp_filename(rr, uri, 0)) == NULL)
                        return 0;
 
                if (repo_mkpath(fn) == -1)
@@ -876,7 +849,6 @@ rrdp_handle_file(unsigned int id, enum p
                if ((size_t)s != dlen)  /* impossible */
                        errx(1, "short write %s", fn);
                free(fn);
-               filepath_add(&rr->added, uri);
        }
 
        return 1;
@@ -890,66 +862,6 @@ fail:
 }
 
 /*
- * Initiate a RRDP sync, create the required temporary directory and
- * parse a possible state file before sending the request to the RRDP process.
- */
-static int
-rrdprepo_fetch(struct rrdprepo *rr)
-{
-       struct rrdp_session state = { 0 };
-
-       if (asprintf(&rr->temp, "%s.XXXXXXXX", rr->basedir) == -1)
-               err(1, NULL);
-       if (mkdtemp(rr->temp) == NULL) {
-               warn("mkdtemp %s", rr->temp);
-               return -1;
-       }
-
-       rrdp_parse_state(rr, &state);
-       rrdp_fetch(rr->id, rr->notifyuri, rr->notifyuri, &state);
-
-       free(state.session_id);
-       free(state.last_mod);
-
-       return 0;
-}
-
-static int
-rrdp_merge_repo(struct rrdprepo *rr)
-{
-       struct filepath *fp, *nfp;
-       char *fn, *rfn;
-
-       RB_FOREACH_SAFE(fp, filepath_tree, &rr->added, nfp) {
-               fn = rrdp_filename(rr, fp->file, 1);
-               rfn = rrdp_filename(rr, fp->file, 0);
-
-               if (fn == NULL || rfn == NULL)
-                       errx(1, "bad filepath");        /* should not happen */
-
-               if (repo_mkpath(rfn) == -1) {
-                       goto fail;
-               }
-
-               if (rename(fn, rfn) == -1) {
-                       warn("rename %s", rfn);
-                       goto fail;
-               }
-
-               free(rfn);
-               free(fn);
-               filepath_put(&rr->added, fp);
-       }
-
-       return 1;
-
-fail:
-       free(rfn);
-       free(fn);
-       return 0;
-}
-
-/*
  * RSYNC sync finished, either with or without success.
  */
 void
@@ -993,6 +905,8 @@ rsync_finish(unsigned int id, int ok)
                    rr->basedir);
                stats.rsync_fails++;
                rr->state = REPO_FAILED;
+               /* clear rsync repo since it failed */
+               remove_contents(rr->basedir);
        }
 
        repo_done(rr, ok);
@@ -1013,19 +927,20 @@ rrdp_finish(unsigned int id, int ok)
        if (rr->state != REPO_LOADING)
                return;
 
-       if (ok && rrdp_merge_repo(rr)) {
+       if (ok) {
                logx("%s: loaded from network", rr->notifyuri);
                stats.rrdp_repos++;
                rr->state = REPO_DONE;
-               repo_done(rr, ok);
        } else {
                logx("%s: load from network failed, fallback to rsync",
                    rr->notifyuri);
                stats.rrdp_fails++;
                rr->state = REPO_FAILED;
-               remove_contents(rr->temp);
-               repo_done(rr, 0);
+               /* clear the RRDP repo since it failed */
+               remove_contents(rr->basedir);
        }
+
+       repo_done(rr, ok);
 }
 
 /*
@@ -1082,6 +997,9 @@ ta_lookup(int id, struct tal *tal)
 {
        struct repo     *rp;
 
+       if (tal->urisz == 0)
+               errx(1, "TAL %s has no URI", tal->descr);
+
        /* Look up in repository table. (Lookup should actually fail here) */
        SLIST_FOREACH(rp, &repos, entry) {
                if (strcmp(rp->repouri, tal->descr) == 0)
@@ -1089,11 +1007,24 @@ ta_lookup(int id, struct tal *tal)
        }
 
        rp = repo_alloc(id);
+       rp->basedir = repo_dir(tal->descr, "ta", 0);
        if ((rp->repouri = strdup(tal->descr)) == NULL)
                err(1, NULL);
 
+       /* try to create base directory */
+       if (mkpath(rp->basedir) == -1)
+               warn("mkpath %s", rp->basedir);
+
+       /* check if sync disabled ... */
+       if (noop) {
+               logx("ta/%s: using cache", rp->repouri);
+               entityq_flush(&rp->queue, rp);
+               return rp;
+       }
+
        rp->ta = ta_get(tal);
 
+       /* need to check if it was already loaded */
        if (repo_state(rp) != REPO_LOADING)
                entityq_flush(&rp->queue, rp);
 
@@ -1130,6 +1061,7 @@ repo_lookup(int talid, const char *uri, 
        }
 
        rp = repo_alloc(talid);
+       rp->basedir = repo_dir(repouri, "valid", 0);
        rp->repouri = repouri;
        if (notify != NULL)
                if ((rp->notifyuri = strdup(notify)) == NULL)
@@ -1141,12 +1073,24 @@ repo_lookup(int talid, const char *uri, 
                nofetch = 1;
        }
 
-       /* try RRDP first if available */
+       /* try to create base directory */
+       if (mkpath(rp->basedir) == -1)
+               warn("mkpath %s", rp->basedir);
+
+       /* check if sync disabled ... */
+       if (noop || nofetch) {
+               logx("%s: using cache", rp->basedir);
+               entityq_flush(&rp->queue, rp);
+               return rp;
+       }
+
+       /* ... else try RRDP first if available then rsync */
        if (notify != NULL)
-               rp->rrdp = rrdp_get(notify, nofetch);
+               rp->rrdp = rrdp_get(notify);
        if (rp->rrdp == NULL)
-               rp->rsync = rsync_get(uri, nofetch);
+               rp->rsync = rsync_get(uri, rp->basedir);
 
+       /* need to check if it was already loaded */
        if (repo_state(rp) != REPO_LOADING)
                entityq_flush(&rp->queue, rp);
 
@@ -1169,24 +1113,29 @@ repo_byid(unsigned int id)
 }
 
 /*
- * Return the repository base directory.
+ * Return the repository base or alternate directory.
  * Returned string must be freed by caller.
  */
 char *
-repo_basedir(const struct repo *rp)
+repo_basedir(const struct repo *rp, int wantvalid)
 {
-       char *path;
+       char *path = NULL;
 
-       if (rp->ta) {
-               if ((path = strdup(rp->ta->basedir)) == NULL)
-                       err(1, NULL);
-       } else if (rp->rsync) {
-               if ((path = strdup(rp->rsync->basedir)) == NULL)
+       if (wantvalid == 0) {
+               if (rp->ta) {
+                       if ((path = strdup(rp->ta->basedir)) == NULL)
+                               err(1, NULL);
+               } else if (rp->rsync) {
+                       if ((path = strdup(rp->rsync->basedir)) == NULL)
+                               err(1, NULL);
+               } else if (rp->rrdp) {
+                       path = rrdp_filename(rp->rrdp, rp->repouri, 0);
+               } else
+                       path = NULL;    /* only valid repo available */
+       } else if (rp->basedir != NULL) {
+               if ((path = strdup(rp->basedir)) == NULL)
                        err(1, NULL);
-       } else if (rp->rrdp) {
-               path = rrdp_filename(rp->rrdp, rp->repouri, 0);
-       } else
-               errx(1, "%s: bad repo", rp->repouri);
+       }
 
        return path;
 }
@@ -1289,64 +1238,173 @@ add_to_del(char **del, size_t *dsz, char
        return del;
 }
 
+/*
+ * Delayed delete of files from RRDP. Since RRDP has no security built-in
+ * this code needs to check if this RRDP repository is actually allowed to
+ * remove the file referenced by the URI.
+ */
 static char **
-repo_rrdp_cleanup(struct filepath_tree *tree, struct rrdprepo *rr,
-    char **del, size_t *delsz)
+repo_cleanup_rrdp(struct filepath_tree *tree, char **del, size_t *delsz)
 {
+       struct rrdprepo *rr;
        struct filepath *fp, *nfp;
        char *fn;
-
-       RB_FOREACH_SAFE(fp, filepath_tree, &rr->deleted, nfp) {
-               fn = rrdp_filename(rr, fp->file, 0);
-               /* temp dir will be cleaned up by repo_cleanup() */
-
-               if (fn == NULL)
-                       errx(1, "bad filepath");        /* should not happen */
-
-               if (!filepath_exists(tree, fn))
+       
+       SLIST_FOREACH(rr, &rrdprepos, entry) {
+               RB_FOREACH_SAFE(fp, filepath_tree, &rr->deleted, nfp) {
+                       if (!rrdp_uri_valid(rr, fp->file)) {
+                               warnx("%s: external URI %s", rr->notifyuri,
+                                   fp->file);
+                               filepath_put(&rr->deleted, fp);
+                               continue;
+                       }
+                       /* try to remove file from rrdp repo ... */
+                       fn = rrdp_filename(rr, fp->file, 0);
                        del = add_to_del(del, delsz, fn);
-               else
-                       warnx("%s: referenced file supposed to be deleted", fn);
+                       free(fn);
 
-               free(fn);
-               filepath_put(&rr->deleted, fp);
+                       /* ... and from the valid repository if unused. */
+                       fn = rrdp_filename(rr, fp->file, 1);
+                       if (!filepath_exists(tree, fn))
+                               del = add_to_del(del, delsz, fn);
+                       else
+                               warnx("%s: referenced file supposed to be "
+                                   "deleted", fn);
+
+                       free(fn);
+                       filepath_put(&rr->deleted, fp);
+               }
        }
 
        return del;
 }
 
+/*
+ * All files in tree are valid and should be moved to the valid repository
+ * if not already there. Rename the files to the new path and readd the
+ * filepath entry with the new path if successful.
+ */
+static void
+repo_move_valid(struct filepath_tree *tree)
+{
+       struct filepath *fp, *nfp;
+       size_t rsyncsz = strlen("rsync/");
+       size_t rrdpsz = strlen("rrdp/");
+       char *fn, *base;
+
+       RB_FOREACH_SAFE(fp, filepath_tree, tree, nfp) {
+               if (strncmp(fp->file, "rsync/", rsyncsz) != 0 &&
+                   strncmp(fp->file, "rrdp/", rrdpsz) != 0)
+                       continue; /* not a temporary file path */
+
+               if (strncmp(fp->file, "rsync/", rsyncsz) == 0) {
+                       if (asprintf(&fn, "valid/%s", fp->file + rsyncsz) == -1)
+                               err(1, NULL);
+               } else {
+                       base = strchr(fp->file + rrdpsz, '/');
+                       assert(base != NULL);
+                       if (asprintf(&fn, "valid/%s", base + 1) == -1)
+                               err(1, NULL);
+               }
+
+               if (repo_mkpath(fn) == -1) {
+                       free(fn);
+                       continue;
+               }
+
+               if (rename(fp->file, fn) == -1) {
+                       warn("rename %s", fn);
+                       free(fn);
+                       continue;
+               }
+
+               /* switch filepath node to new path */
+               RB_REMOVE(filepath_tree, tree, fp);
+               free(fp->file);
+               fp->file = fn;
+               if (RB_INSERT(filepath_tree, tree, fp) != NULL)
+                       errx(1, "both possibilities of file present");
+       }
+}
+
+#define        BASE_DIR        (void *)0x62617365
+#define        RSYNC_DIR       (void *)0x73796e63
+#define        RRDP_DIR        (void *)0x52524450
+
 void
 repo_cleanup(struct filepath_tree *tree)
 {
        size_t i, cnt, delsz = 0, dirsz = 0;
        char **del = NULL, **dir = NULL;
-       char *argv[4] = { "ta", "rsync", "rrdp", NULL };
-       struct rrdprepo *rr;
+       char *argv[5] = { "ta", "rsync", "rrdp", "valid", NULL };
        FTS *fts;
        FTSENT *e;
 
+       /* first move temp files which have been used to valid dir */
+       repo_move_valid(tree);
+       /* then delete files requested by rrdp */ 
+       del = repo_cleanup_rrdp(tree, del, &delsz);
+
        if ((fts = fts_open(argv, FTS_PHYSICAL | FTS_NOSTAT, NULL)) == NULL)
                err(1, "fts_open");
        errno = 0;
        while ((e = fts_read(fts)) != NULL) {
                switch (e->fts_info) {
                case FTS_NSOK:
-                       if (!filepath_exists(tree, e->fts_path))
+                       /* handle rrdp .state files explicitly */
+                       if (e->fts_parent->fts_pointer == RRDP_DIR &&
+                           e->fts_level == 2 &&
+                           strcmp(e->fts_name, ".state") == 0) {
+                               e->fts_parent->fts_number++;
+                       } else if (filepath_exists(tree, e->fts_path)) {
+                               e->fts_parent->fts_number++;
+                       } else if (e->fts_parent->fts_pointer == RRDP_DIR) {
+                               /* can't delete these extra files */
+                               e->fts_parent->fts_number++;
+                               stats.extra_files++;
+                               if (verbose > 1)
+                                       logx("superfluous %s", e->fts_path);
+                       } else {
+                               if (e->fts_parent->fts_pointer == RSYNC_DIR) {
+                                       /* no need to keep rsync files */
+                                       stats.extra_files++;
+                                       if (verbose > 1)
+                                               logx("superfluous %s",
+                                                   e->fts_path);
+                               }
                                del = add_to_del(del, &delsz,
                                    e->fts_path);
+                       }
                        break;
                case FTS_D:
-                       /* special cleanup for rrdp directories */
-                       if ((rr = rrdp_basedir(e->fts_path)) != NULL) {
-                               del = repo_rrdp_cleanup(tree, rr, del, &delsz);
-                               if (fts_set(fts, e, FTS_SKIP) == -1)
-                                       err(1, "fts_set");
+                       if (e->fts_level == FTS_ROOTLEVEL) {
+                               if (strcmp("rsync", e->fts_path) == 0)
+                                       e->fts_pointer = RSYNC_DIR;
+                               else if (strcmp("rrdp", e->fts_path) == 0)
+                                       e->fts_pointer = RRDP_DIR;
+                               else
+                                       e->fts_pointer = BASE_DIR;
+                       } else
+                               e->fts_pointer = e->fts_parent->fts_pointer;
+
+                       /*
+                        * special handling for rrdp directories,
+                        * clear them if they are not used anymore but
+                        * only if rrdp is active.
+                        */
+                       if (e->fts_pointer == RRDP_DIR && !noop && rrdpon &&
+                           e->fts_level == 1) {
+                               if (!rrdp_is_active(e->fts_path))
+                                       e->fts_pointer = NULL;
                        }
                        break;
                case FTS_DP:
-                       if (!filepath_dir_exists(tree, e->fts_path))
-                               dir = add_to_del(dir, &dirsz,
-                                   e->fts_path);
+                       if (e->fts_level == FTS_ROOTLEVEL)
+                               break;
+                       if (e->fts_number == 0)
+                               dir = add_to_del(dir, &dirsz, e->fts_path);
+
+                       e->fts_parent->fts_number += e->fts_number;
                        break;
                case FTS_SL:
                case FTS_SLNONE:
@@ -1356,8 +1414,7 @@ repo_cleanup(struct filepath_tree *tree)
                case FTS_NS:
                case FTS_ERR:
                        if (e->fts_errno == ENOENT &&
-                           (strcmp(e->fts_path, "rsync") == 0 ||
-                           strcmp(e->fts_path, "rrdp") == 0))
+                           e->fts_level == FTS_ROOTLEVEL)
                                continue;
                        warnx("fts_read %s: %s", e->fts_path,
                            strerror(e->fts_errno));
@@ -1388,7 +1445,7 @@ repo_cleanup(struct filepath_tree *tree)
                free(del[i]);
        }
        free(del);
-       stats.del_files = cnt;
+       stats.del_files += cnt;
 
        cnt = 0;
        for (i = 0; i < dirsz; i++) {
@@ -1399,7 +1456,7 @@ repo_cleanup(struct filepath_tree *tree)
                free(dir[i]);
        }
        free(dir);
-       stats.del_dirs = cnt;
+       stats.del_dirs += cnt;
 }
 
 void
@@ -1418,6 +1475,9 @@ repo_free(void)
        rsync_free();
 }
 
+/*
+ * Remove all files and directories under base but do not remove base itself.
+ */
 static void
 remove_contents(char *base)
 {

Reply via email to