Thanks a lot for the great bug report and test case! That is indeed a bug. Here's a patch:
* src/copy.c (copy_internal): Move the block that sets `earlier_file' down to just before the first use of that variable. Otherwise, it was possible to make mv (and probably cp, too) malfunction when copying hard-linked files into a directory containing at least one of the source file names. Call forget_created everywhere thereafter where this function returns without creating a destination file that might subsequently be linked. Reported by Iida Yosiaki. * src/cp-hash.c (forget_created): New function. * src/cp-hash.h (forget_created): Prototype. Index: copy.c =================================================================== RCS file: /fetish/fileutils/src/copy.c,v retrieving revision 1.132 diff -u -p -r1.132 copy.c --- copy.c 2002/03/19 08:49:28 1.132 +++ copy.c 2002/03/30 07:07:23 @@ -815,38 +815,6 @@ copy_internal (const char *src_path, con src_type = src_sb.st_mode; - /* Associate the destination path with the source device and inode - so that if we encounter a matching dev/ino pair in the source tree - we can arrange to create a hard link between the corresponding names - in the destination tree. - - Sometimes, when preserving links, we have to record dev/ino even - though st_nlink == 1: - - when using -H and processing a command line argument; - that command line argument could be a symlink pointing to another - command line argument. With `cp -H --preserve=link', we hard-link - those two destination files. - - likewise for -L except that it applies to all files, not just - command line arguments. - - Also record directory dev/ino when using --recursive. We'll use that - info to detect this problem: cp -R dir dir. FIXME-maybe: ideally, - directory info would be recorded in a separate hash table, since - such entries are useful only while a single command line hierarchy - is being copied -- so that separate table could be cleared between - command line args. Using the same hash table to preserve hard - links means that it may not be cleared. */ - - if ((x->preserve_links - && (1 < src_sb.st_nlink - || (command_line_arg - && x->dereference == DEREF_COMMAND_LINE_ARGUMENTS) - || x->dereference == DEREF_ALWAYS)) - || (x->recursive && S_ISDIR (src_type))) - { - earlier_file = remember_copied (dst_path, src_sb.st_ino, src_sb.st_dev); - } - src_mode = src_sb.st_mode; if (S_ISDIR (src_type) && !x->recursive) @@ -1084,6 +1052,38 @@ copy_internal (const char *src_path, con putchar ('\n'); } + /* Associate the destination path with the source device and inode + so that if we encounter a matching dev/ino pair in the source tree + we can arrange to create a hard link between the corresponding names + in the destination tree. + + Sometimes, when preserving links, we have to record dev/ino even + though st_nlink == 1: + - when using -H and processing a command line argument; + that command line argument could be a symlink pointing to another + command line argument. With `cp -H --preserve=link', we hard-link + those two destination files. + - likewise for -L except that it applies to all files, not just + command line arguments. + + Also record directory dev/ino when using --recursive. We'll use that + info to detect this problem: cp -R dir dir. FIXME-maybe: ideally, + directory info would be recorded in a separate hash table, since + such entries are useful only while a single command line hierarchy + is being copied -- so that separate table could be cleared between + command line args. Using the same hash table to preserve hard + links means that it may not be cleared. */ + + if ((x->preserve_links + && (1 < src_sb.st_nlink + || (command_line_arg + && x->dereference == DEREF_COMMAND_LINE_ARGUMENTS) + || x->dereference == DEREF_ALWAYS)) + || (x->recursive && S_ISDIR (src_type))) + { + earlier_file = remember_copied (dst_path, src_sb.st_ino, src_sb.st_dev); + } + /* Did we copy this inode somewhere else (in this command line argument) and therefore this is a second hard link to the inode? */ @@ -1172,6 +1172,11 @@ copy_internal (const char *src_path, con error (0, 0, _("cannot move %s to a subdirectory of itself, %s"), quote_n (0, top_level_src_path), quote_n (1, top_level_dst_path)); + + /* Note that there is no need to call forget_created here, + (compare with the other calls in this file) since the + destination directory didn't exist before. */ + *copy_into_self = 1; /* FIXME-cleanup: Don't return zero here; adjust mv.c accordingly. The only caller that uses this code (mv.c) ends up setting its @@ -1209,6 +1214,7 @@ copy_internal (const char *src_path, con error (0, errno, _("cannot move %s to %s"), quote_n (0, src_path), quote_n (1, dst_path)); + forget_created (src_sb.st_ino, src_sb.st_dev); return 1; } @@ -1217,10 +1223,10 @@ copy_internal (const char *src_path, con the rename syscall. */ if (unlink (dst_path) && errno != ENOENT) { - /* Use the value of errno from the failed rename. */ error (0, errno, _("inter-device move failed: %s to %s; unable to remove target"), quote_n (0, src_path), quote_n (1, dst_path)); + forget_created (src_sb.st_ino, src_sb.st_dev); return 1; } @@ -1522,6 +1528,13 @@ copy_internal (const char *src_path, con return delayed_fail; un_backup: + + /* We didn't create the destination. + Remove the entry associating the source dev/ino with the + destination file name, so we don't try to `preserve' a link + to a file we didn't create. */ + forget_created (src_sb.st_ino, src_sb.st_dev); + if (dst_backup) { if (rename (dst_backup, dst_path)) Index: cp-hash.c =================================================================== RCS file: /fetish/fileutils/src/cp-hash.c,v retrieving revision 1.25 diff -u -p -r1.25 cp-hash.c --- cp-hash.c 2001/11/23 08:11:08 1.25 +++ cp-hash.c 2002/03/30 07:07:23 @@ -1,5 +1,5 @@ /* cp-hash.c -- file copying (hash search routines) - Copyright (C) 89, 90, 91, 1995-2001 Free Software Foundation. + Copyright (C) 89, 90, 91, 1995-2002 Free Software Foundation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -82,6 +82,23 @@ src_to_dest_free (void *x) struct Src_to_dest *a = x; free ((char *) (a->name)); free (x); +} + +/* Remove the entry matching INO/DEV from the table + that maps source ino/dev to destination file name. */ +void +forget_created (ino_t ino, dev_t dev) +{ + struct Src_to_dest probe; + struct Src_to_dest *ent; + + probe.st_ino = ino; + probe.st_dev = dev; + probe.name = NULL; + + ent = hash_delete (src_to_dest, &probe); + if (ent) + src_to_dest_free (ent); } /* Add PATH to the list of files that we have created. Index: cp-hash.h =================================================================== RCS file: /fetish/fileutils/src/cp-hash.h,v retrieving revision 1.4 diff -u -p -r1.4 cp-hash.h --- cp-hash.h 2001/10/06 17:24:58 1.4 +++ cp-hash.h 2002/03/30 07:07:23 @@ -1,4 +1,5 @@ void hash_init PARAMS ((void)); void forget_all PARAMS ((void)); +void forget_created PARAMS ((ino_t ino, dev_t dev)); char *remember_copied PARAMS ((const char *node, ino_t ino, dev_t dev)); int remember_created PARAMS ((const char *path)); _______________________________________________ Bug-fileutils mailing list [EMAIL PROTECTED] http://mail.gnu.org/mailman/listinfo/bug-fileutils