Nice work :)

On 06/11/2014 11:08, Andriy Gapon wrote:
Author: avg
Date: Thu Nov  6 11:08:02 2014
New Revision: 274172
URL: https://svnweb.freebsd.org/changeset/base/274172

Log:
   fix l2arc compression buffers leak
We have observed that arc_release() can be called concurrently with a
   l2arc in-flight write.
   Also, we have observed that arc_hdr_destroy() can be called from
   arc_write_done() for a zio with ZIO_FLAG_IO_REWRITE flag in similar
   circumstances.
Previously the l2arc headers would be freed while leaking their
   associated compression buffers.  Now the buffers are placed on
   l2arc_free_on_write list for delayed freeing.  This is similar to what
   was already done to arc buffers that were supposed to be freed
   concurrently with in-flight writes of those buffers.
In addition to fixing the discovered leaks this change also adds some
   protective code to assert that a compression buffer associated with a
   l2arc header is never leaked.
A new kstat l2_cdata_free_on_write is added. It keeps a count of
   delayed compression buffer frees which previously would have been leaks.
Tested by: Vitalij Satanivskij <sa...@ukr.net> et al
   Requested by:        many
   MFC after:   2 weeks
   Sponsored by:        HybridCluster / ClusterHQ

Modified:
   head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c   Thu Nov  6 
10:30:10 2014        (r274171)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c   Thu Nov  6 
11:08:02 2014        (r274172)
@@ -387,6 +387,7 @@ typedef struct arc_stats {
        kstat_named_t arcstat_l2_evict_lock_retry;
        kstat_named_t arcstat_l2_evict_reading;
        kstat_named_t arcstat_l2_free_on_write;
+       kstat_named_t arcstat_l2_cdata_free_on_write;
        kstat_named_t arcstat_l2_abort_lowmem;
        kstat_named_t arcstat_l2_cksum_bad;
        kstat_named_t arcstat_l2_io_error;
@@ -464,6 +465,7 @@ static arc_stats_t arc_stats = {
        { "l2_evict_lock_retry",      KSTAT_DATA_UINT64 },
        { "l2_evict_reading",         KSTAT_DATA_UINT64 },
        { "l2_free_on_write",         KSTAT_DATA_UINT64 },
+       { "l2_cdata_free_on_write",   KSTAT_DATA_UINT64 },
        { "l2_abort_lowmem",          KSTAT_DATA_UINT64 },
        { "l2_cksum_bad",             KSTAT_DATA_UINT64 },
        { "l2_io_error",              KSTAT_DATA_UINT64 },
@@ -1655,6 +1657,21 @@ arc_buf_add_ref(arc_buf_t *buf, void* ta
            data, metadata, hits);
  }
+static void
+arc_buf_free_on_write(void *data, size_t size,
+    void (*free_func)(void *, size_t))
+{
+       l2arc_data_free_t *df;
+
+       df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
+       df->l2df_data = data;
+       df->l2df_size = size;
+       df->l2df_func = free_func;
+       mutex_enter(&l2arc_free_on_write_mtx);
+       list_insert_head(l2arc_free_on_write, df);
+       mutex_exit(&l2arc_free_on_write_mtx);
+}
+
  /*
   * Free the arc data buffer.  If it is an l2arc write in progress,
   * the buffer is placed on l2arc_free_on_write to be freed later.
@@ -1665,14 +1682,7 @@ arc_buf_data_free(arc_buf_t *buf, void (
        arc_buf_hdr_t *hdr = buf->b_hdr;
if (HDR_L2_WRITING(hdr)) {
-               l2arc_data_free_t *df;
-               df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
-               df->l2df_data = buf->b_data;
-               df->l2df_size = hdr->b_size;
-               df->l2df_func = free_func;
-               mutex_enter(&l2arc_free_on_write_mtx);
-               list_insert_head(l2arc_free_on_write, df);
-               mutex_exit(&l2arc_free_on_write_mtx);
+               arc_buf_free_on_write(buf->b_data, hdr->b_size, free_func);
                ARCSTAT_BUMP(arcstat_l2_free_on_write);
        } else {
                free_func(buf->b_data, hdr->b_size);
@@ -1684,6 +1694,23 @@ arc_buf_data_free(arc_buf_t *buf, void (
   * arc_buf_t off of the the arc_buf_hdr_t's list and free it.
   */
  static void
+arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr)
+{
+       l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr;
+
+       ASSERT(MUTEX_HELD(&l2arc_buflist_mtx));
+
+       if (l2hdr->b_tmp_cdata == NULL)
+               return;
+
+       ASSERT(HDR_L2_WRITING(hdr));
+       arc_buf_free_on_write(l2hdr->b_tmp_cdata, hdr->b_size,
+           zio_data_buf_free);
+       ARCSTAT_BUMP(arcstat_l2_cdata_free_on_write);
+       l2hdr->b_tmp_cdata = NULL;
+}
+
+static void
  arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
  {
        arc_buf_t **bufp;
@@ -1782,6 +1809,7 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
                        trim_map_free(l2hdr->b_dev->l2ad_vdev, l2hdr->b_daddr,
                            hdr->b_size, 0);
                        list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
+                       arc_buf_l2_cdata_free(hdr);
                        ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
                        ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
                        vdev_space_update(l2hdr->b_dev->l2ad_vdev,
@@ -3675,6 +3703,7 @@ arc_release(arc_buf_t *buf, void *tag)
        l2hdr = hdr->b_l2hdr;
        if (l2hdr) {
                mutex_enter(&l2arc_buflist_mtx);
+               arc_buf_l2_cdata_free(hdr);
                hdr->b_l2hdr = NULL;
                list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
        }
@@ -4964,6 +4993,11 @@ top:
                                ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize);
                                bytes_evicted += abl2->b_asize;
                                ab->b_l2hdr = NULL;
+                               /*
+                                * We are destroying l2hdr, so ensure that
+                                * its compressed buffer, if any, is not leaked.
+                                */
+                               ASSERT(abl2->b_tmp_cdata == NULL);
                                kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
                                ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
                        }
@@ -5202,6 +5236,14 @@ l2arc_write_buffers(spa_t *spa, l2arc_de
                buf_data = l2hdr->b_tmp_cdata;
                buf_sz = l2hdr->b_asize;
+ /*
+                * If the data has not been compressed, then clear b_tmp_cdata
+                * to make sure that it points only to a temporary compression
+                * buffer.
+                */
+               if (!L2ARC_IS_VALID_COMPRESS(l2hdr->b_compress))
+                       l2hdr->b_tmp_cdata = NULL;
+
                /* Compression may have squashed the buffer to zero length. */
                if (buf_sz != 0) {
                        uint64_t buf_p_sz;
@@ -5392,15 +5434,18 @@ l2arc_release_cdata_buf(arc_buf_hdr_t *a
  {
        l2arc_buf_hdr_t *l2hdr = ab->b_l2hdr;
- if (l2hdr->b_compress == ZIO_COMPRESS_LZ4) {
+       ASSERT(L2ARC_IS_VALID_COMPRESS(l2hdr->b_compress));
+       if (l2hdr->b_compress != ZIO_COMPRESS_EMPTY) {
                /*
                 * If the data was compressed, then we've allocated a
                 * temporary buffer for it, so now we need to release it.
                 */
                ASSERT(l2hdr->b_tmp_cdata != NULL);
                zio_data_buf_free(l2hdr->b_tmp_cdata, ab->b_size);
+               l2hdr->b_tmp_cdata = NULL;
+       } else {
+               ASSERT(l2hdr->b_tmp_cdata == NULL);
        }
-       l2hdr->b_tmp_cdata = NULL;
  }
/*


_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to