Hello Paul,
thank you for all your advice. I made a fix that allows absolute
paths to --one-top-level again and confines extraction to the
--one-top-level directory (instead of to the current or -C directory).
The patch leverages the existing -C code. In order to do that, every
entry in the wd[] table gets another companion entry in the table that
represents the --one-top-level directory. There is one additional field
in each entry that allows skipping the companion entries if they are not
desired.
The actual directory is created lazily by chdir_do() if needed, as you
requested, to avoid empty "a/foo" after --one-top-level=foo -C a -C b.
The patch "by the way" fixes also extraction of hardlinks with
--one-top-level which currently is broken in the typical case (the
transform is not applied to the target, so the hardlink is wrong).
The patch does not yet handle the --show-transformed case with
--one-top-level that you discussed in another subthread. As a result, two
tests now fail (onetop02.at and onetop04.at). I suppose that this would
be quite easy to fix.
Another issue that I am aware of is that I am not sure whether to call
repair_delayed_set_stat and/or delay_set_stat on the newly created
directories like extract_dir() does (the whole delay_set code is abit
mysterious to me).
Use of --create together with --one-top-level should probably be
forbidden, as unlink.c uses wd[] in a way that will likely break in the
presence of companion entries (the chdir_do call in
flush_deferred_unlinks).
Other than that, I believe that the patch is fairly complete, although
of course it needs to be better commented and documentation updated.
Please have a look.
Best regards, Pavel
diff --git a/src/common.h b/src/common.h
index 032c0a1f..6a197d6b 100644
--- a/src/common.h
+++ b/src/common.h
@@ -559,6 +559,7 @@ void verify_volume (void);
extern dev_t root_device;
void extr_init (void);
+bool create_dir (char const *file_name);
void extract_archive (void);
void extract_finish (void);
bool rename_directory (char *src, char *dst);
@@ -765,8 +766,8 @@ idx_t blocking_write (int fd, void const *buf, idx_t count);
enum { BADFD = AT_FDCWD == -1 ? -2 : -1 };
extern idx_t chdir_current;
-idx_t chdir_arg (char const *dir);
-void chdir_do (idx_t dir);
+idx_t chdir_arg (char const *dir, bool one_top_level);
+void chdir_do (idx_t dir, bool create);
struct chdir_id { int err; dev_t st_dev; ino_t st_ino; } chdir_id (void);
struct fdbase { int fd; char const *base; } fdbase (char const *);
struct fdbase fdbase1 (char const *);
diff --git a/src/extract.c b/src/extract.c
index ab83a650..93a78b91 100644
--- a/src/extract.c
+++ b/src/extract.c
@@ -1049,7 +1049,7 @@ apply_nonancestor_delayed_set_stat (char const *file_name, bool metadata_set)
&& memeq (file_name, data->file_name, data->file_name_len)))
break;
- chdir_do (data->change_dir);
+ chdir_do (data->change_dir, false);
if (check_for_renamed_directories)
{
@@ -1129,6 +1129,73 @@ safe_dir_mode (struct stat const *st)
| (we_are_root ? 0 : MODE_WXUSR));
}
+/* Trimmed version of extract_dir, to create a dir that is not in the
+ archive, including parents. Should behave like extract_dir when
+ NO_OVERWRITE_DIR_OLD_FILES is set in order to avoid changing existing
+ paths if they are in the way.
+*/
+bool
+create_dir (char const *file_name)
+{
+ int status;
+ mode_t mode;
+ bool interdir_made = false;
+ /* exists only to avoid passing a const pointer to make_directories */
+ char *unconst_file_name;
+
+ mode = MODE_RWX & ~ newdir_umask;
+
+ for (;;)
+ {
+ struct fdbase f = fdbase (file_name);
+ status = f.fd == BADFD ? -1 : mkdirat (f.fd, f.base, mode);
+ if (status == 0)
+ {
+ return true;
+ }
+
+ if (errno == EEXIST)
+ {
+ struct stat st;
+ st.st_mode = 0;
+
+ if (is_directory_link (file_name, &st))
+ return true;
+
+ if ((st.st_mode != 0 && fstatat_flags == 0)
+ || deref_stat (file_name, &st) == 0)
+ {
+ if (S_ISDIR (st.st_mode))
+ {
+ return true;
+ }
+ }
+ errno = EEXIST;
+ break;
+ }
+ else if (errno != ENOENT || interdir_made)
+ {
+ /* The error is not due to missing parent, or we already
+ tried to make the parent directories and succeeded, so
+ there must be another problem. No point in retrying. */
+ break;
+ }
+ unconst_file_name = xstrdup (file_name);
+ if (make_directories (unconst_file_name, &interdir_made) == 0)
+ {
+ free (unconst_file_name);
+ continue;
+ }
+ else
+ {
+ free (unconst_file_name);
+ break;
+ }
+ }
+ mkdir_error (file_name);
+ return false;
+}
+
/* Extractor functions for various member types */
static bool
@@ -1900,7 +1967,7 @@ extract_archive (void)
{
idx_t dir = chdir_current;
apply_nonancestor_delayed_set_stat (current_stat_info.file_name, false);
- chdir_do (dir);
+ chdir_do (dir, false);
}
/* Take a safety backup of a previously existing file. */
@@ -1923,6 +1990,8 @@ extract_archive (void)
typeflag);
if (fun)
{
+ /* create one_top_level dir if it does not exist */
+ chdir_do (chdir_current, one_top_level_option);
if (fun (current_stat_info.file_name, typeflag))
return;
}
@@ -1939,7 +2008,7 @@ apply_delayed_link (struct delayed_link *ds)
{
char const *valid_source = NULL;
- chdir_do (ds->change_dir);
+ chdir_do (ds->change_dir, false);
for (struct string_list *sources = ds->sources;
sources;
diff --git a/src/list.c b/src/list.c
index d541cf26..6567a4a8 100644
--- a/src/list.c
+++ b/src/list.c
@@ -128,16 +128,27 @@ enforce_one_top_level (char **pfile_name)
idx_t pos = strlen (one_top_level_dir);
if (strncmp (p, one_top_level_dir, pos) == 0)
{
- if (ISSLASH (p[pos]) || p[pos] == 0)
- return;
+ /* remove the one_top_level_dir prefix if it ends at component boundary. */
+ if (ISSLASH (p[pos]))
+ {
+ *pfile_name = xstrdup (p[pos+1] ? &p[pos+1] : ".");
+ free (file_name);
+ return;
+ }
+ else if (p[pos] == 0)
+ {
+ *pfile_name = xstrdup (".");
+ free (file_name);
+ return;
+ }
}
-
- *pfile_name = make_file_name (one_top_level_dir, file_name);
- normalize_filename_x (*pfile_name);
+ /* if the prefix does not match, do nothing */
}
else
- *pfile_name = xstrdup (one_top_level_dir);
- free (file_name);
+ {
+ *pfile_name = xstrdup (".");
+ free (file_name);
+ }
}
bool
@@ -163,7 +174,14 @@ transform_stat_info (char typeflag, struct tar_stat_info *stat_info)
}
if (one_top_level_option)
- enforce_one_top_level (&stat_info->file_name);
+ {
+ enforce_one_top_level (&stat_info->file_name);
+ /* Hardlinks are interpreted relative to cwd, and --one-top-level
+ works by means of a hidden change of cwd to the requested directory.
+ Adjust hardlink targets as well. */
+ if (typeflag == LNKTYPE)
+ enforce_one_top_level (&stat_info->link_name);
+ }
return true;
}
diff --git a/src/misc.c b/src/misc.c
index 02dfbcb4..e6bf5a36 100644
--- a/src/misc.c
+++ b/src/misc.c
@@ -24,6 +24,7 @@
#include <xgetcwd.h>
#include <unlinkdir.h>
#include <utimens.h>
+#include <assert.h>
#ifndef DOUBLE_SLASH_IS_DISTINCT_ROOT
# define DOUBLE_SLASH_IS_DISTINCT_ROOT 0
@@ -969,6 +970,7 @@ struct wd
to be used. */
int fd;
+ bool one_top_level;
/* If ID.err is zero, the directory's identity;
if positive, a failure indication with errno = ID.err;
if negative, no attempt has been made yet to get the identity. */
@@ -1000,7 +1002,17 @@ static idx_t wdcache_count;
idx_t
chdir_count (void)
{
- return wd_count - !!wd_count;
+ idx_t count = 0;
+ if (wd_count)
+ {
+ /* Do not count the initial CWD entry -> start at 1. */
+ for (idx_t i = 1; i < wd_count; i++)
+ {
+ if (! wd[i].one_top_level)
+ count++;
+ }
+ }
+ return count;
}
/* Grow the WD table by at least one entry. */
@@ -1015,15 +1027,27 @@ grow_wd (void)
wd[wd_count].abspath = NULL;
wd[wd_count].fd = AT_FDCWD;
wd[wd_count].id.err = -1;
+ wd[wd_count].one_top_level = false;
wd_count++;
+ if (one_top_level_option)
+ {
+ wd[wd_count].name = one_top_level_dir;
+ wd[wd_count].abspath = NULL;
+ wd[wd_count].fd = 0;
+ wd[wd_count].id.err = -1;
+ wd[wd_count].one_top_level = true;
+ wd_count++;
+ }
}
}
/* DIR is the operand of a -C option; add it to vector of chdir targets,
and return the index of its location. */
idx_t
-chdir_arg (char const *dir)
+chdir_arg (char const *dir, bool one_top_level)
{
+ if (one_top_level)
+ chdir_arg (dir, false);
if (wd_count == wd_alloc)
grow_wd ();
@@ -1033,13 +1057,22 @@ chdir_arg (char const *dir)
{
dir += dotslashlen (dir);
if (! dir[dir[0] == '.'])
- return wd_count - 1;
+ {
+ if (wd[wd_count - 1].one_top_level == one_top_level)
+ return wd_count - 1;
+ else
+ return wd_count - 2;
+ }
}
+ if (one_top_level)
+ dir = one_top_level_dir;
+
wd[wd_count].name = dir;
wd[wd_count].abspath = NULL;
wd[wd_count].fd = 0;
wd[wd_count].id.err = -1;
+ wd[wd_count].one_top_level = one_top_level;
return wd_count++;
}
@@ -1058,21 +1091,63 @@ static int chdir_fd = AT_FDCWD;
working directory; otherwise, I must be a value returned by
chdir_arg. */
void
-chdir_do (idx_t i)
+chdir_do (idx_t i, bool create)
{
- if (chdir_current != i)
- {
- struct wd *curr = &wd[i];
- int fd = curr->fd;
+ struct wd *curr = &wd[i];
+ int fd = curr->fd;
+
+ /* nothing to create unless we are at the one_top_level dir that has not been
+ created yet */
+ create = create && curr->one_top_level && (fd == BADFD || fd == 0);
- if (! fd)
+ if (chdir_current != i || create)
+ {
+ if (! fd || create)
{
if (! IS_ABSOLUTE_FILE_NAME (curr->name))
- chdir_do (i - 1);
+ {
+ idx_t j = i - 1;
+ if (wd[j].one_top_level)
+ {
+ j--;
+ assert (! wd[j].one_top_level);
+ }
+ chdir_do (j, false);
+ }
fd = openat (chdir_fd, curr->name,
open_searchdir_how.flags & ~O_NOFOLLOW);
if (fd < 0)
- open_fatal (curr->name);
+ {
+ if (create)
+ {
+ struct open_how saved_open_searchdir_how = open_searchdir_how;
+ /* Don't use O_BENEATH during creation of the
+ directory. The one-top-level directory is
+ allowed to be given as an absolute path. */
+ open_searchdir_how.resolve = 0;
+ if (create_dir (curr->name))
+ /* Directory created, retry */
+ fd = openat (chdir_fd, curr->name,
+ open_searchdir_how.flags & ~O_NOFOLLOW);
+ open_searchdir_how = saved_open_searchdir_how;
+ /* Either the creation or open failed */
+ if (fd < 0)
+ open_fatal (curr->name);
+ }
+ else if (errno == ENOENT && curr->one_top_level)
+ {
+ /* We are requested to not create the directory now. Mark it
+ as to be created later when called with create == true. */
+ chdir_fd = curr->fd = BADFD;
+ chdir_current = i;
+ /* Do not add it to the cache */
+ return;
+ }
+ else
+ {
+ open_fatal (curr->name);
+ }
+ }
curr->fd = fd;
@@ -1090,7 +1165,7 @@ chdir_do (idx_t i)
}
}
- if (0 < fd)
+ if (0 < fd && /* no assumption about sign of BADFD */ fd != BADFD)
{
/* Move the i value to the front of the cache. This is
O(CHDIR_CACHE_SIZE), but the cache is small. */
@@ -1194,6 +1269,14 @@ fdbase_opendir (char const *file_name, bool alternate)
{
char const *name = file_name;
+ if (chdir_fd == BADFD && ! IS_ABSOLUTE_FILE_NAME (file_name))
+ {
+ /* BADFD is a sentinel value meaning that the chdir directory
+ needs to be created lazily, therefore if we encounter it, the
+ directory does not exist yet. */
+ errno = ENOENT;
+ return (struct fdbase) { .fd = chdir_fd, .base = name };
+ }
/* Skip past leading "./"s,
but not past the last "./" if that ends the name. */
idx_t dslen = dotslashlen (name);
@@ -1323,12 +1406,13 @@ tar_getcdpath (idx_t idx)
if (!wd[idx].abspath)
{
idx_t save_cwdi = chdir_current, i = idx;
- while (0 < i && !wd[i - 1].abspath)
+ while (0 < i && (!wd[i - 1].abspath || wd[i - 1].one_top_level))
i--;
for (; i <= idx; i++)
{
- chdir_do (i);
+ if (!wd[i].one_top_level)
+ chdir_do (i, false);
if (i == 0)
{
if ((wd[i].abspath = xgetcwd ()) == NULL)
@@ -1341,13 +1425,19 @@ tar_getcdpath (idx_t idx)
wd[i].abspath = xstrdup (wd[i].name);
else
{
- namebuf_t nbuf = namebuf_create (wd[i - 1].abspath);
+ idx_t j = i - 1;
+ if (wd[j].one_top_level)
+ {
+ j--;
+ assert (! wd[j].one_top_level);
+ }
+ namebuf_t nbuf = namebuf_create (wd[j].abspath);
namebuf_add_dir (nbuf, wd[i].name);
wd[i].abspath = namebuf_finish (nbuf);
}
}
- chdir_do (save_cwdi);
+ chdir_do (save_cwdi, false);
}
return wd[idx].abspath;
diff --git a/src/names.c b/src/names.c
index 1b8131c5..ea05b5f8 100644
--- a/src/names.c
+++ b/src/names.c
@@ -875,6 +875,7 @@ static idx_t name_buffer_length; /* allocated length of name_buffer */
void
name_init (void)
{
+ chdir_do (chdir_arg (".", one_top_level_option), false);
name_list_adjust ();
}
@@ -1118,7 +1119,7 @@ name_next_elt (bool change_dirs)
case NELT_CHDIR:
if (change_dirs)
{
- chdir_do (chdir_arg (xstrdup (ep->v.name)));
+ chdir_do (chdir_arg (xstrdup (ep->v.name), one_top_level_option), false);
name_list_advance ();
break;
}
@@ -1181,7 +1182,7 @@ name_gather (void)
static idx_t change_dir;
while ((ep = name_next_elt (false)) && ep->type == NELT_CHDIR)
- change_dir = chdir_arg (xstrdup (ep->v.name));
+ change_dir = chdir_arg (xstrdup (ep->v.name), one_top_level_option);
if (ep)
{
@@ -1210,7 +1211,7 @@ name_gather (void)
{
idx_t change_dir0 = change_dir;
while ((ep = name_next_elt (false)) && ep->type == NELT_CHDIR)
- change_dir = chdir_arg (xstrdup (ep->v.name));
+ change_dir = chdir_arg (xstrdup (ep->v.name), one_top_level_option);
if (ep)
addname (ep->v.name, change_dir, true, NULL);
@@ -1339,7 +1340,7 @@ name_match (const char *file_name)
if (cursor->name[0] == 0)
{
- chdir_do (cursor->change_dir);
+ chdir_do (cursor->change_dir, false);
namelist = NULL;
nametail = NULL;
return true;
@@ -1383,7 +1384,7 @@ name_match (const char *file_name)
return false;
/* We got a match. */
- chdir_do (found->change_dir);
+ chdir_do (found->change_dir, false);
return true;
}
@@ -1785,7 +1786,7 @@ collect_and_sort_names (void)
/* NOTE: EXCLUDE_ANCHORED is not relevant here */
/* FIXME: just skip regexps for now */
continue;
- chdir_do (name->change_dir);
+ chdir_do (name->change_dir, false);
if (name->name[0] == 0)
continue;
@@ -1931,7 +1932,7 @@ name_from_list (void)
{
if (!gnu_list_name->is_wildcard)
gnu_list_name->found_count++;
- chdir_do (gnu_list_name->change_dir);
+ chdir_do (gnu_list_name->change_dir, false);
return gnu_list_name;
}
return NULL;
diff --git a/src/tar.c b/src/tar.c
index 9376b59b..13f678e4 100644
--- a/src/tar.c
+++ b/src/tar.c
@@ -2688,8 +2688,7 @@ decode_options (int argc, char **argv)
"please set it explicitly with --one-top-level=DIR"));
}
- if (one_top_level_dir && !IS_RELATIVE_FILE_NAME (one_top_level_dir))
- paxusage(_("--one-top-level=DIR must use a relative file name"));
+ normalize_filename_x (one_top_level_dir);
}
/* If ready to unlink hierarchies, so we are for simpler files. */
diff --git a/src/unlink.c b/src/unlink.c
index 58187415..d809ad44 100644
--- a/src/unlink.c
+++ b/src/unlink.c
@@ -92,7 +92,7 @@ flush_deferred_unlinks (bool force)
if (force
|| p->records_written < records_written)
{
- chdir_do (p->dir_idx);
+ chdir_do (p->dir_idx, false);
if (p->is_dir)
{
const char *fname;
@@ -163,11 +163,11 @@ flush_deferred_unlinks (bool force)
struct deferred_unlink *next = p->next;
const char *fname;
- chdir_do (p->dir_idx);
+ chdir_do (p->dir_idx, false);
if (p->dir_idx && is_cwd (p))
{
fname = tar_dirname ();
- chdir_do (p->dir_idx - 1);
+ chdir_do (p->dir_idx - 1, false);
}
else
fname = p->file_name;
@@ -184,7 +184,7 @@ flush_deferred_unlinks (bool force)
dunlink_head = dunlink_tail = NULL;
}
- chdir_do (saved_chdir);
+ chdir_do (saved_chdir, false);
}
void
diff --git a/src/update.c b/src/update.c
index 872e701f..fa4883b0 100644
--- a/src/update.c
+++ b/src/update.c
@@ -133,7 +133,7 @@ update_archive (void)
{
struct stat s;
- chdir_do (name->change_dir);
+ chdir_do (name->change_dir, false);
if (deref_stat (current_stat_info.file_name, &s) == 0)
{
if (S_ISDIR (s.st_mode))