The branch, master has been updated via ad9f1571 Add hashtable to delete_in_dir() to fix -x deletions via f8005578 Tweak the hashtable routines to be a little clearer and easier. from 13f81f4a Tweak a usage message.
https://git.samba.org/?p=rsync.git;a=shortlog;h=master - Log ----------------------------------------------------------------- commit ad9f1571cee2a29232d83fcb13c6924722f492bf Author: Wayne Davison <wa...@opencoder.net> Date: Fri Jun 12 16:52:54 2020 -0700 Add hashtable to delete_in_dir() to fix -x deletions commit f800557824bbb616f68451a9d5919b982c146ac8 Author: Wayne Davison <wa...@opencoder.net> Date: Fri Jun 12 16:39:29 2020 -0700 Tweak the hashtable routines to be a little clearer and easier. ----------------------------------------------------------------------- Summary of changes: flist.c | 8 ++++---- generator.c | 13 ++++++++++--- hashtable.c | 29 ++++++++++++++++++++++------- hlink.c | 26 +++++++++++++------------- rsync.h | 3 +++ xattrs.c | 17 ++++++----------- 6 files changed, 58 insertions(+), 38 deletions(-) Changeset truncated at 500 lines: diff --git a/flist.c b/flist.c index dbb0f921..bbc028ba 100644 --- a/flist.c +++ b/flist.c @@ -491,9 +491,9 @@ static void send_file_entry(int f, const char *fname, struct file_struct *file, if (tmp_dev != -1) { if (protocol_version >= 30) { struct ht_int64_node *np = idev_find(tmp_dev, tmp_ino); - first_hlink_ndx = (int32)(long)np->data - 1; + first_hlink_ndx = (int32)(long)np->data; /* is -1 when new */ if (first_hlink_ndx < 0) { - np->data = (void*)(long)(first_ndx + ndx + 1); + np->data = (void*)(long)(first_ndx + ndx); xflags |= XMIT_HLINK_FIRST; } if (DEBUG_GTE(HLINK, 1)) { @@ -1101,10 +1101,10 @@ static struct file_struct *recv_file_entry(int f, struct file_list *flist, int x ino = read_longint(f); } np = idev_find(dev, ino); - ndx = (int32)(long)np->data - 1; + ndx = (int32)(long)np->data; /* is -1 when new */ if (ndx < 0) { - ndx = cnt++; np->data = (void*)(long)cnt; + ndx = cnt++; } F_HL_GNUM(file) = ndx; } diff --git a/generator.c b/generator.c index dba97d85..210e5f31 100644 --- a/generator.c +++ b/generator.c @@ -277,6 +277,7 @@ static void do_delayed_deletions(char *delbuf) static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev) { static int already_warned = 0; + static struct hashtable *dev_tbl; struct file_list *dirlist; char delbuf[MAXPATHLEN]; int dlen, i; @@ -305,10 +306,16 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev) change_local_filter_dir(fbuf, dlen, F_DEPTH(file)); if (one_file_system) { - if (file->flags & FLAG_TOP_DIR) + if (!dev_tbl) + dev_tbl = hashtable_create(16, HT_KEY64); + if (file->flags & FLAG_TOP_DIR) { + hashtable_find(dev_tbl, *fs_dev+1, ""); filesystem_dev = *fs_dev; - else if (filesystem_dev != *fs_dev) - return; + } else if (filesystem_dev != *fs_dev) { + if (!hashtable_find(dev_tbl, *fs_dev+1, NULL)) + return; + filesystem_dev = *fs_dev; /* it's a prior top-dir dev */ + } } dirlist = get_dirlist(fbuf, dlen, 0); diff --git a/hashtable.c b/hashtable.c index 00c057db..52f0fa07 100644 --- a/hashtable.c +++ b/hashtable.c @@ -66,9 +66,19 @@ void hashtable_destroy(struct hashtable *tbl) free(tbl); } -/* This returns the node for the indicated key, either newly created or - * already existing. Returns NULL if not allocating and not found. */ -void *hashtable_find(struct hashtable *tbl, int64 key, int allocate_if_missing) +/* Returns the node that holds the indicated key if it exists. When it does not + * exist, it returns either NULL (when data_when_new is NULL), or it returns a + * new node with its node->data set to the indicated value. + * + * If your code doesn't know the data value for a new node in advance (usually + * because it doesn't know if a node is new or not) you should pass in a unique + * (non-0) value that you can use to check if the returned node is new. You can + * then overwrite the data with any value you want (even 0) since it only needs + * to be different than whatever data_when_new value you use later on. + * + * This return is a void* just because it might be pointing at a ht_int32_node + * or a ht_int64_node, and that makes the caller's assignment a little easier. */ +void *hashtable_find(struct hashtable *tbl, int64 key, void *data_when_new) { int key64 = tbl->key64; struct ht_int32_node *node; @@ -79,7 +89,7 @@ void *hashtable_find(struct hashtable *tbl, int64 key, int allocate_if_missing) exit_cleanup(RERR_MESSAGEIO); } - if (allocate_if_missing && tbl->entries > HASH_LOAD_LIMIT(tbl->size)) { + if (data_when_new && tbl->entries > HASH_LOAD_LIMIT(tbl->size)) { void *old_nodes = tbl->nodes; int size = tbl->size * 2; int i; @@ -99,8 +109,12 @@ void *hashtable_find(struct hashtable *tbl, int64 key, int allocate_if_missing) int64 move_key = HT_KEY(move_node, key64); if (move_key == 0) continue; - node = hashtable_find(tbl, move_key, 1); - node->data = move_node->data; + if (move_node->data) + hashtable_find(tbl, move_key, move_node->data); + else { + node = hashtable_find(tbl, move_key, ""); + node->data = 0; + } } free(old_nodes); @@ -155,7 +169,7 @@ void *hashtable_find(struct hashtable *tbl, int64 key, int allocate_if_missing) if (nkey == key) return node; if (nkey == 0) { - if (!allocate_if_missing) + if (!data_when_new) return NULL; break; } @@ -167,6 +181,7 @@ void *hashtable_find(struct hashtable *tbl, int64 key, int allocate_if_missing) ((struct ht_int64_node*)node)->key = key; else node->key = (int32)key; + node->data = data_when_new; tbl->entries++; return node; } diff --git a/hlink.c b/hlink.c index 6c5ea61a..29927166 100644 --- a/hlink.c +++ b/hlink.c @@ -48,6 +48,8 @@ extern struct file_list *cur_flist; * we can avoid the pool of dev+inode data. For incremental recursion mode, * the receiver will use a ndx hash to remember old pathnames. */ +static void *data_when_new = ""; + static struct hashtable *dev_tbl; static struct hashtable *prior_hlinks; @@ -57,32 +59,30 @@ static struct file_list *hlink_flist; void init_hard_links(void) { if (am_sender || protocol_version < 30) - dev_tbl = hashtable_create(16, 1); + dev_tbl = hashtable_create(16, HT_KEY64); else if (inc_recurse) - prior_hlinks = hashtable_create(1024, 0); + prior_hlinks = hashtable_create(1024, HT_KEY32); } struct ht_int64_node *idev_find(int64 dev, int64 ino) { static struct ht_int64_node *dev_node = NULL; - struct hashtable *tbl; /* Note that some OSes have a dev == 0, so increment to avoid storing a 0. */ if (!dev_node || dev_node->key != dev+1) { /* We keep a separate hash table of inodes for every device. */ - dev_node = hashtable_find(dev_tbl, dev+1, 1); - if (!(tbl = dev_node->data)) { - tbl = dev_node->data = hashtable_create(512, 1); + dev_node = hashtable_find(dev_tbl, dev+1, data_when_new); + if (dev_node->data == data_when_new) { + dev_node->data = hashtable_create(512, HT_KEY64); if (DEBUG_GTE(HLINK, 3)) { rprintf(FINFO, "[%s] created hashtable for dev %s\n", who_am_i(), big_num(dev)); } } - } else - tbl = dev_node->data; + } - return hashtable_find(tbl, ino, 1); + return hashtable_find(dev_node->data, ino, (void*)-1L); } void idev_destroy(void) @@ -125,8 +125,8 @@ static void match_gnums(int32 *ndx_list, int ndx_count) file = hlink_flist->sorted[ndx_list[from]]; gnum = F_HL_GNUM(file); if (inc_recurse) { - node = hashtable_find(prior_hlinks, gnum, 1); - if (!node->data) { + node = hashtable_find(prior_hlinks, gnum, data_when_new); + if (node->data == data_when_new) { if (!(node->data = new_array0(char, 5))) out_of_memory("match_gnums"); assert(gnum >= hlink_flist->ndx_start); @@ -269,7 +269,7 @@ static char *check_prior(struct file_struct *file, int gnum, } if (inc_recurse - && (node = hashtable_find(prior_hlinks, gnum, 0)) != NULL) { + && (node = hashtable_find(prior_hlinks, gnum, NULL)) != NULL) { assert(node->data != NULL); if (CVAL(node->data, 0) != 0) { *prev_ndx_p = -1; @@ -528,7 +528,7 @@ void finish_hard_link(struct file_struct *file, const char *fname, int fin_ndx, if (inc_recurse) { int gnum = F_HL_GNUM(file); - struct ht_int32_node *node = hashtable_find(prior_hlinks, gnum, 0); + struct ht_int32_node *node = hashtable_find(prior_hlinks, gnum, NULL); if (node == NULL) { rprintf(FERROR, "Unable to find a hlink node for %d (%s)\n", gnum, f_name(file, prev_name)); exit_cleanup(RERR_MESSAGEIO); diff --git a/rsync.h b/rsync.h index 06f6d338..f5350da8 100644 --- a/rsync.h +++ b/rsync.h @@ -631,6 +631,9 @@ typedef unsigned int size_t; # define SIZEOF_INT64 SIZEOF_OFF_T #endif +#define HT_KEY32 0 +#define HT_KEY64 1 + struct hashtable { void *nodes; int32 size, entries; diff --git a/xattrs.c b/xattrs.c index 2afa3473..9016aa4c 100644 --- a/xattrs.c +++ b/xattrs.c @@ -415,7 +415,7 @@ static int find_matching_xattr(const item_list *xalp) key = xattr_lookup_hash(xalp); - node = hashtable_find(rsync_xal_h, key, 0); + node = hashtable_find(rsync_xal_h, key, NULL); if (node == NULL) return -1; @@ -478,21 +478,17 @@ static int rsync_xal_store(item_list *xalp) new_list->key = xattr_lookup_hash(&new_list->xa_items); if (rsync_xal_h == NULL) - rsync_xal_h = hashtable_create(512, 1); + rsync_xal_h = hashtable_create(512, HT_KEY64); if (rsync_xal_h == NULL) out_of_memory("rsync_xal_h hashtable_create()"); - node = hashtable_find(rsync_xal_h, new_list->key, 1); - if (node == NULL) - out_of_memory("rsync_xal_h hashtable_find()"); - new_ref = new0(rsync_xa_list_ref); if (new_ref == NULL) out_of_memory("new0(rsync_xa_list_ref)"); - new_ref->ndx = ndx; - if (node->data != NULL) { + node = hashtable_find(rsync_xal_h, new_list->key, new_ref); + if (node->data != (void*)new_ref) { rsync_xa_list_ref *ref = node->data; while (ref != NULL) { @@ -504,8 +500,7 @@ static int rsync_xal_store(item_list *xalp) ref->next = new_ref; break; } - } else - node->data = new_ref; + } return ndx; } @@ -926,7 +921,7 @@ void uncache_tmp_xattrs(void) if (rsync_xal_h == NULL) continue; - node = hashtable_find(rsync_xal_h, xa_list_item->key, 0); + node = hashtable_find(rsync_xal_h, xa_list_item->key, NULL); if (node == NULL) continue; -- The rsync repository. _______________________________________________ rsync-cvs mailing list rsync-cvs@lists.samba.org https://lists.samba.org/mailman/listinfo/rsync-cvs