Writing large files is slow

Ivan Krasilnikov Thu, 16 Dec 2010 20:02:21 -0800

Hello,

Recently I was using vim to edit large, multi-gigabyte text files and
I noticed that writing a file back to disk takes significantly longer
time than reading it.


Here's the amount of time it took vim (v7.3.081) on my machine (24G RAM)
to open a file with N 20-byte long lines and execute :wq command with
default options and disabled swap files via -n flag. Also shown is
the number of entries inserted into mf_hash table (explained below).

        N             real     user    sys  items in mf_hash at peak
  1000000 (19Mb)      0.58     0.33   0.16  5967
  2000000 (38Mb)      1.02     0.57   0.19  11931
  4000000 (76Mb)      2.33     1.48   0.39  23860
  8000000 (153Mb)     5.48     3.31   1.23  47716
 16000000 (305Mb)    13.79     9.77   1.40  95429
 32000000 (610Mb)    66.10    59.55   2.64  190855
 64000000 (1.2Gb)   329.30   311.52   7.45  381707
128000000 (2.4Gb)  1366.13  1224.87  17.96  763410

Just reading a large file without writing it back is much faster:

        N   real   user   sys
  1000000   0.23   0.17  0.05
  2000000   0.44   0.31  0.12
  4000000   0.88   0.70  0.17
  8000000   1.72   1.35  0.36
 16000000   4.13   2.69  1.41
 32000000   7.22   5.58  1.60
 64000000  14.62  11.11  3.44
128000000  27.49  22.13  5.24

Profiler shows that much of the time during write is spent in
mf_find_hash(),
which implements a fixed 64-bucket chained hash table mf_hash, and which is
called once for every line written. This number of buckets is inadequate for
the number of items inserted into this hash table for large files.

Attached is a patch which makes mf_hash a dynamically growing hashtable
with a fixed maximum load factor. Here's the runtime of my write test
case with this patch:

             max load factor = 1   max load factor = 64
        N    real   user    sys     real   user    sys
  1000000    0.46   0.25   0.11     0.52   0.34   0.08
  2000000    0.98   0.50   0.24     0.93   0.56   0.18
  4000000    1.98   1.06   0.39     1.81   1.08   0.42
  8000000    4.08   2.17   0.71     3.97   2.25   0.81
 16000000    8.24   4.29   1.47     7.83   4.74   1.53
 32000000   16.85   8.60   3.05    16.78   9.45   3.26
 64000000   32.62  17.16   5.55    32.98  19.08   5.94
128000000   66.71  34.38  11.27    66.51  38.09  12.40

Other values of load factor up to ~ 256 seem also good to me.

--
Ivan Krasilnikov

-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

diff -r 916c90b37ea9 src/memfile.c
--- a/src/memfile.c	Fri Dec 10 20:35:50 2010 +0100
+++ b/src/memfile.c	Fri Dec 17 06:41:18 2010 +0300
@@ -88,6 +88,13 @@
 static int  mf_write_block __ARGS((memfile_T *mfp, bhdr_T *hp, off_t offset, unsigned size));
 static int  mf_trans_add __ARGS((memfile_T *, bhdr_T *));
 static void mf_do_open __ARGS((memfile_T *, char_u *, int));
+static void mf_hash_init __ARGS((mf_hashtab_T *));
+static void mf_hash_clear __ARGS((mf_hashtab_T *));
+static void mf_hash_clear_all __ARGS((mf_hashtab_T *));
+static mf_hashitem_T *mf_hash_find __ARGS((mf_hashtab_T *, blocknr_T));
+static void mf_hash_add_item __ARGS((mf_hashtab_T *, mf_hashitem_T *));
+static void mf_hash_rem_item __ARGS((mf_hashtab_T *, mf_hashitem_T *));
+static int mf_hash_grow __ARGS((mf_hashtab_T *));
 
 /*
  * The functions for using a memfile:
@@ -123,7 +130,6 @@
     int		flags;
 {
     memfile_T		*mfp;
-    int			i;
     off_t		size;
 #if defined(STATFS) && defined(UNIX) && !defined(__QNX__)
 # define USE_FSTATFS
@@ -156,11 +162,8 @@
     mfp->mf_used_last = NULL;
     mfp->mf_dirty = FALSE;
     mfp->mf_used_count = 0;
-    for (i = 0; i < MEMHASHSIZE; ++i)
-    {
-	mfp->mf_hash[i] = NULL;		/* hash lists are empty */
-	mfp->mf_trans[i] = NULL;	/* trans lists are empty */
-    }
+    mf_hash_init(&mfp->mf_hash);
+    mf_hash_init(&mfp->mf_trans);
     mfp->mf_page_size = MEMFILE_PAGE_SIZE;
 #ifdef FEAT_CRYPT
     mfp->mf_old_key = NULL;
@@ -246,8 +249,6 @@
     int		del_file;
 {
     bhdr_T	*hp, *nextp;
-    NR_TRANS	*tp, *tpnext;
-    int		i;
 
     if (mfp == NULL)		    /* safety check */
 	return;
@@ -267,12 +268,8 @@
     }
     while (mfp->mf_free_first != NULL)	    /* free entries in free list */
 	vim_free(mf_rem_free(mfp));
-    for (i = 0; i < MEMHASHSIZE; ++i)	    /* free entries in trans lists */
-	for (tp = mfp->mf_trans[i]; tp != NULL; tp = tpnext)
-	{
-	    tpnext = tp->nt_next;
-	    vim_free(tp);
-	}
+    mf_hash_clear(&mfp->mf_hash);
+    mf_hash_clear_all(&mfp->mf_trans);	    /* free hashtable and its items */
     vim_free(mfp->mf_fname);
     vim_free(mfp->mf_ffname);
     vim_free(mfp);
@@ -747,16 +744,7 @@
     memfile_T	*mfp;
     bhdr_T	*hp;
 {
-    bhdr_T	*hhp;
-    int		hash;
-
-    hash = MEMHASH(hp->bh_bnum);
-    hhp = mfp->mf_hash[hash];
-    hp->bh_hash_next = hhp;
-    hp->bh_hash_prev = NULL;
-    if (hhp != NULL)
-	hhp->bh_hash_prev = hp;
-    mfp->mf_hash[hash] = hp;
+    mf_hash_add_item(&mfp->mf_hash, (mf_hashitem_T *)hp);
 }
 
 /*
@@ -767,13 +755,7 @@
     memfile_T	*mfp;
     bhdr_T	*hp;
 {
-    if (hp->bh_hash_prev == NULL)
-	mfp->mf_hash[MEMHASH(hp->bh_bnum)] = hp->bh_hash_next;
-    else
-	hp->bh_hash_prev->bh_hash_next = hp->bh_hash_next;
-
-    if (hp->bh_hash_next)
-	hp->bh_hash_next->bh_hash_prev = hp->bh_hash_prev;
+    mf_hash_rem_item(&mfp->mf_hash, (mf_hashitem_T *)hp);
 }
 
 /*
@@ -784,12 +766,7 @@
     memfile_T	*mfp;
     blocknr_T	nr;
 {
-    bhdr_T	*hp;
-
-    for (hp = mfp->mf_hash[MEMHASH(nr)]; hp != NULL; hp = hp->bh_hash_next)
-	if (hp->bh_bnum == nr)
-	    break;
-    return hp;
+    return (bhdr_T *)mf_hash_find(&mfp->mf_hash, nr);
 }
 
 /*
@@ -1191,7 +1168,6 @@
 {
     bhdr_T	*freep;
     blocknr_T	new_bnum;
-    int		hash;
     NR_TRANS	*np;
     int		page_count;
 
@@ -1239,12 +1215,8 @@
     hp->bh_bnum = new_bnum;
     mf_ins_hash(mfp, hp);		    /* insert in new hash list */
 
-    hash = MEMHASH(np->nt_old_bnum);	    /* insert in trans list */
-    np->nt_next = mfp->mf_trans[hash];
-    mfp->mf_trans[hash] = np;
-    if (np->nt_next != NULL)
-	np->nt_next->nt_prev = np;
-    np->nt_prev = NULL;
+    /* Insert "np" into "mf_trans" hashtable with key "np->nt_old_bnum" */
+    mf_hash_add_item(&mfp->mf_trans, (mf_hashitem_T *)np);
 
     return OK;
 }
@@ -1259,25 +1231,20 @@
     memfile_T	*mfp;
     blocknr_T	old_nr;
 {
-    int		hash;
     NR_TRANS	*np;
     blocknr_T	new_bnum;
 
-    hash = MEMHASH(old_nr);
-    for (np = mfp->mf_trans[hash]; np != NULL; np = np->nt_next)
-	if (np->nt_old_bnum == old_nr)
-	    break;
-    if (np == NULL)		/* not found */
+    np = (NR_TRANS *)mf_hash_find(&mfp->mf_trans, old_nr);
+
+    if (np == NULL)	                    /* not found */
 	return old_nr;
 
     mfp->mf_neg_count--;
     new_bnum = np->nt_new_bnum;
-    if (np->nt_prev != NULL)		/* remove entry from the trans list */
-	np->nt_prev->nt_next = np->nt_next;
-    else
-	mfp->mf_trans[hash] = np->nt_next;
-    if (np->nt_next != NULL)
-	np->nt_next->nt_prev = np->nt_prev;
+
+    /* remove entry from the trans list */
+    mf_hash_rem_item(&mfp->mf_trans, (mf_hashitem_T *)np);
+
     vim_free(np);
 
     return new_bnum;
@@ -1401,3 +1368,196 @@
 	mch_hide(mfp->mf_fname);    /* try setting the 'hidden' flag */
     }
 }
+
+/*
+ * Implementation of mf_hashtab_T follows.
+ */
+
+/*
+ * Number of buckets in hashtable is increased by a factor of
+ * MHT_GROWTH_FACTOR when average number of items per bucket
+ * exceeds 2^MHT_LOG_LOAD_FACTOR.
+ */
+#define MHT_LOG_LOAD_FACTOR 6
+#define MHT_GROWTH_FACTOR   2   /* must be a power of two */
+
+/*
+ * Initialize an empty hash table.
+ */
+    static void
+mf_hash_init(mht)
+    mf_hashtab_T *mht;
+{
+    vim_memset(mht, 0, sizeof(mf_hashtab_T));
+    mht->mht_buckets = mht->mht_small_buckets;
+    mht->mht_mask = MHT_INIT_SIZE - 1;
+}
+
+/*
+ * Free the array of a hash table.  Does not free the items it contains!
+ * Hash table may not be used again without another mf_hash_init() call.
+ */
+    static void
+mf_hash_clear(mht)
+    mf_hashtab_T *mht;
+{
+    if (mht->mht_buckets != mht->mht_small_buckets)
+	vim_free(mht->mht_buckets);
+}
+
+/*
+ * Free the array of a hash table and all the items it contains
+ * using vim_free().
+ */
+    static void
+mf_hash_clear_all(mht)
+    mf_hashtab_T    *mht;
+{
+    long_u	    idx;
+    mf_hashitem_T   *mhi;
+    mf_hashitem_T   *next;
+
+    for (idx = 0; idx <= mht->mht_mask; idx++)
+    {
+	mhi = mht->mht_buckets[idx];
+	while (mhi != NULL)
+	{
+	    next = mhi->mhi_next;
+	    vim_free(mhi);
+	    mhi = next;
+	}
+    }
+
+    mf_hash_clear(mht);
+}
+
+/*
+ * Find "key" in hashtable "mht".
+ * Returns a pointer to a mf_hashitem_T or NULL if the item was not found.
+ */
+    static mf_hashitem_T *
+mf_hash_find(mht, key)
+    mf_hashtab_T    *mht;
+    blocknr_T	    key;
+{
+    mf_hashitem_T   *mhi;
+
+    mhi = mht->mht_buckets[key & mht->mht_mask];
+
+    while (mhi != NULL && mhi->mhi_key != key)
+	mhi = mhi->mhi_next;
+
+    return mhi;
+}
+
+/*
+ * Add item "mhi" to hashtable "mht".
+ * "mhi" must not be NULL.
+ */
+    static void
+mf_hash_add_item(mht, mhi)
+    mf_hashtab_T    *mht;
+    mf_hashitem_T   *mhi;
+{
+    long_u	    idx;
+
+    idx = mhi->mhi_key & mht->mht_mask;
+    mhi->mhi_next = mht->mht_buckets[idx];
+    mhi->mhi_prev = NULL;
+    if (mhi->mhi_next != NULL)
+	mhi->mhi_next->mhi_prev = mhi;
+    mht->mht_buckets[idx] = mhi;
+
+    mht->mht_count++;
+
+    /* Expand when each bucket has more than 2^MHT_LOG_LOAD_FACTOR items on average */
+    if (!mht->mht_fixed && (mht->mht_count >> MHT_LOG_LOAD_FACTOR) > mht->mht_mask) {
+        if (mf_hash_grow(mht) == FAIL)
+            mht->mht_fixed = 1;     /* stop trying to expand after first failure */
+    }
+}
+
+/*
+ * Remove item "mhi" from hashtable "mht".
+ * "mhi" must not be NULL and must have been inserted into "mht".
+ */
+    static void
+mf_hash_rem_item(mht, mhi)
+    mf_hashtab_T    *mht;
+    mf_hashitem_T   *mhi;
+{
+    if (mhi->mhi_prev == NULL)
+	mht->mht_buckets[mhi->mhi_key & mht->mht_mask] = mhi->mhi_next;
+    else
+	mhi->mhi_prev->mhi_next = mhi->mhi_next;
+
+    if (mhi->mhi_next != NULL)
+	mhi->mhi_next->mhi_prev = mhi->mhi_prev;
+
+    mht->mht_count--;
+
+    /* TODO: can shrink the table here, but it takes little memory anyway */
+}
+
+/*
+ * Increase number of buckets in the hashtable by MHT_GROWTH_FACTOR and rehash items.
+ * Returns FAIL when out of memory.
+ */
+    static int
+mf_hash_grow(mht)
+    mf_hashtab_T    *mht;
+{
+    long_u	    i, j;
+    int		    shift;
+    mf_hashitem_T   *mhi;
+    mf_hashitem_T   *tails[MHT_GROWTH_FACTOR];
+    mf_hashitem_T   **buckets;
+
+    buckets = (mf_hashitem_T **)lalloc_clear((mht->mht_mask + 1) * MHT_GROWTH_FACTOR * sizeof(void *), FALSE);
+    if (buckets == NULL)
+	return FAIL;
+
+    shift = 0;
+    while ((mht->mht_mask >> shift) != 0)
+	shift++;
+
+    for (i = 0; i <= mht->mht_mask; i++) {
+        /*
+         * Shuffle items from i-th original bucket into MHT_GROWTH_FACTOR new buckets,
+         * preserving their relative order.
+         */
+
+        vim_memset(tails, 0, sizeof(tails));
+
+	for (mhi = mht->mht_buckets[i]; mhi != NULL; mhi = mhi->mhi_next)
+	{
+	    j = (mhi->mhi_key >> shift) & (MHT_GROWTH_FACTOR - 1);
+	    if (tails[j] == NULL)
+	    {
+		buckets[i + (j << shift)] = mhi;
+		tails[j] = mhi;
+		mhi->mhi_prev = NULL;
+	    }
+	    else
+	    {
+		tails[j]->mhi_next = mhi;
+		mhi->mhi_prev = tails[j];
+		tails[j] = mhi;
+	    }
+	}
+
+	for (j = 0; j < MHT_GROWTH_FACTOR; j++)
+	{
+	    if (tails[j] != NULL)
+		tails[j]->mhi_next = NULL;
+	}
+    }
+
+    if (mht->mht_buckets != mht->mht_small_buckets)
+	vim_free(mht->mht_buckets);
+
+    mht->mht_buckets = buckets;
+    mht->mht_mask = (mht->mht_mask + 1) * MHT_GROWTH_FACTOR - 1;
+
+    return OK;
+}
diff -r 916c90b37ea9 src/structs.h
--- a/src/structs.h	Fri Dec 10 20:35:50 2010 +0100
+++ b/src/structs.h	Fri Dec 17 06:41:18 2010 +0300
@@ -378,6 +378,35 @@
 typedef long		    blocknr_T;
 
 /*
+ * mf_hashtab_T is a chained hashtable with blocknr_T key and arbitrary
+ * structures as items.  This is an intrusive data structure: we require
+ * that items begin with mf_hashitem_T which contains the key and linked
+ * list pointers.  List of items in each bucket is doubly-linked.
+ */
+
+typedef struct mf_hashitem_S mf_hashitem_T;
+
+struct mf_hashitem_S
+{
+    mf_hashitem_T   *mhi_next;
+    mf_hashitem_T   *mhi_prev;
+    blocknr_T	    mhi_key;
+};
+
+#define MHT_INIT_SIZE   64
+
+typedef struct mf_hashtab_S
+{
+    long_u	    mht_mask;	/* mask used for hash value (nr of items in
+				   array is "mht_mask" + 1) */
+    long_u	    mht_count;	/* number of items inserted into hashtable */
+    mf_hashitem_T   **mht_buckets;  /* points to mht_small_buckets or
+				      dynamically allocated array */
+    mf_hashitem_T   *mht_small_buckets[MHT_INIT_SIZE];   /* initial buckets */
+    char            mht_fixed;  /* non-zero values forbids growth */
+} mf_hashtab_T;
+
+/*
  * for each (previously) used block in the memfile there is one block header.
  *
  * The block may be linked in the used list OR in the free list.
@@ -394,11 +423,14 @@
 
 struct block_hdr
 {
-    bhdr_T	*bh_next;	    /* next block_hdr in free or used list */
-    bhdr_T	*bh_prev;	    /* previous block_hdr in used list */
+    /* Note: it is important the following three fields be at the head
+     * of this structure and in this particular order so that pointer to
+     * this structure can be used as a mf_hashitem_T pointer. */
     bhdr_T	*bh_hash_next;	    /* next block_hdr in hash list */
     bhdr_T	*bh_hash_prev;	    /* previous block_hdr in hash list */
     blocknr_T	bh_bnum;	    /* block number */
+    bhdr_T	*bh_next;	    /* next block_hdr in free or used list */
+    bhdr_T	*bh_prev;	    /* previous block_hdr in used list */
     char_u	*bh_data;	    /* pointer to memory (for used block) */
     int		bh_page_count;	    /* number of pages in this block */
 
@@ -417,9 +449,13 @@
 
 struct nr_trans
 {
+    /* Note: it is important the following three fields be at the head
+     * of this structure and in this particular order so that pointer to
+     * this structure can be used as a mf_hashitem_T pointer. */
     NR_TRANS	*nt_next;		/* next nr_trans in hash list */
     NR_TRANS	*nt_prev;		/* previous nr_trans in hash list */
-    blocknr_T	nt_old_bnum;		/* old, negative, number */
+    blocknr_T	nt_old_bnum;		/* old, negative, number.
+					   Used as a key in mf_hashtab_T. */
     blocknr_T	nt_new_bnum;		/* new, positive, number */
 };
 
@@ -499,12 +535,6 @@
 
 typedef struct file_buffer buf_T;  /* forward declaration */
 
-/*
- * Simplistic hashing scheme to quickly locate the blocks in the used list.
- * 64 blocks are found directly (64 * 4K = 256K, most files are smaller).
- */
-#define MEMHASHSIZE	64
-#define MEMHASH(nr)	((nr) & (MEMHASHSIZE - 1))
 #define MF_SEED_LEN	8
 
 struct memfile
@@ -517,8 +547,8 @@
     bhdr_T	*mf_used_last;		/* lru block_hdr in used list */
     unsigned	mf_used_count;		/* number of pages in used list */
     unsigned	mf_used_count_max;	/* maximum number of pages in memory */
-    bhdr_T	*mf_hash[MEMHASHSIZE];	/* array of hash lists */
-    NR_TRANS	*mf_trans[MEMHASHSIZE];	/* array of trans lists */
+    mf_hashtab_T mf_hash;		/* hash lists */
+    mf_hashtab_T mf_trans;		/* trans lists */
     blocknr_T	mf_blocknr_max;		/* highest positive block number + 1*/
     blocknr_T	mf_blocknr_min;		/* lowest negative block number - 1 */
     blocknr_T	mf_neg_count;		/* number of negative blocks numbers */

Writing large files is slow

Raspunde prin e-mail lui