Author: delphij
Date: Sat Oct  4 08:05:39 2014
New Revision: 272504
URL: https://svnweb.freebsd.org/changeset/base/272504

Log:
  MFV r272494:
  
  Make space_map_truncate() always do space_map_reallocate().  Without
  this, setting space_map_max_blksz would cause panic for existing pool,
  as dmu_objset_set_blocksize would fail if the object have multiple blocks.
  
  Illumos issues:
     5164 space_map_max_blksz causes panic, does not work
     5165 zdb fails assertion when run on pool with recently-enabled
        spacemap_histogram feature
  
  MFC after:    2 weeks

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h
Directory Properties:
  head/sys/cddl/contrib/opensolaris/   (props changed)

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c      Sat Oct 
 4 08:03:52 2014        (r272503)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c      Sat Oct 
 4 08:05:39 2014        (r272504)
@@ -75,7 +75,7 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, condense_
 /*
  * Condensing a metaslab is not guaranteed to actually reduce the amount of
  * space used on disk. In particular, a space map uses data in increments of
- * MAX(1 << ashift, SPACE_MAP_INITIAL_BLOCKSIZE), so a metaslab might use the
+ * MAX(1 << ashift, space_map_blksize), so a metaslab might use the
  * same number of blocks after condensing. Since the goal of condensing is to
  * reduce the number of IOPs required to read the space map, we only want to
  * condense when we can be sure we will reduce the number of blocks used by the
@@ -1470,10 +1470,12 @@ metaslab_fragmentation(metaslab_t *msp)
                uint64_t txg = spa_syncing_txg(spa);
                vdev_t *vd = msp->ms_group->mg_vd;
 
-               msp->ms_condense_wanted = B_TRUE;
-               vdev_dirty(vd, VDD_METASLAB, msp, txg + 1);
-               spa_dbgmsg(spa, "txg %llu, requesting force condense: "
-                   "msp %p, vd %p", txg, msp, vd);
+               if (spa_writeable(spa)) {
+                       msp->ms_condense_wanted = B_TRUE;
+                       vdev_dirty(vd, VDD_METASLAB, msp, txg + 1);
+                       spa_dbgmsg(spa, "txg %llu, requesting force condense: "
+                           "msp %p, vd %p", txg, msp, vd);
+               }
                return (ZFS_FRAG_INVALID);
        }
 
@@ -1917,6 +1919,15 @@ metaslab_sync(metaslab_t *msp, uint64_t 
 
        mutex_enter(&msp->ms_lock);
 
+       /*
+        * Note: metaslab_condense() clears the space_map's histogram.
+        * Therefore we must verify and remove this histogram before
+        * condensing.
+        */
+       metaslab_group_histogram_verify(mg);
+       metaslab_class_histogram_verify(mg->mg_class);
+       metaslab_group_histogram_remove(mg, msp);
+
        if (msp->ms_loaded && spa_sync_pass(spa) == 1 &&
            metaslab_should_condense(msp)) {
                metaslab_condense(msp, txg, tx);
@@ -1925,9 +1936,6 @@ metaslab_sync(metaslab_t *msp, uint64_t 
                space_map_write(msp->ms_sm, *freetree, SM_FREE, tx);
        }
 
-       metaslab_group_histogram_verify(mg);
-       metaslab_class_histogram_verify(mg->mg_class);
-       metaslab_group_histogram_remove(mg, msp);
        if (msp->ms_loaded) {
                /*
                 * When the space map is loaded, we have an accruate

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c     Sat Oct 
 4 08:03:52 2014        (r272503)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c     Sat Oct 
 4 08:05:39 2014        (r272504)
@@ -38,15 +38,12 @@
 #include <sys/zfeature.h>
 
 /*
- * This value controls how the space map's block size is allowed to grow.
- * If the value is set to the same size as SPACE_MAP_INITIAL_BLOCKSIZE then
- * the space map block size will remain fixed. Setting this value to something
- * greater than SPACE_MAP_INITIAL_BLOCKSIZE will allow the space map to
- * increase its block size as needed. To maintain backwards compatibilty the
- * space map's block size must be a power of 2 and SPACE_MAP_INITIAL_BLOCKSIZE
- * or larger.
+ * The data for a given space map can be kept on blocks of any size.
+ * Larger blocks entail fewer i/o operations, but they also cause the
+ * DMU to keep more data in-core, and also to waste more i/o bandwidth
+ * when only a few blocks have changed since the last transaction group.
  */
-int space_map_max_blksz = (1 << 12);
+int space_map_blksz = (1 << 12);
 
 /*
  * Load the space map disk into the specified range tree. Segments of maptype
@@ -233,58 +230,6 @@ space_map_entries(space_map_t *sm, range
        return (entries);
 }
 
-void
-space_map_set_blocksize(space_map_t *sm, uint64_t size, dmu_tx_t *tx)
-{
-       uint32_t blksz;
-       u_longlong_t blocks;
-
-       ASSERT3U(sm->sm_blksz, !=, 0);
-       ASSERT3U(space_map_object(sm), !=, 0);
-       ASSERT(sm->sm_dbuf != NULL);
-       VERIFY(ISP2(space_map_max_blksz));
-
-       if (sm->sm_blksz >= space_map_max_blksz)
-               return;
-
-       /*
-        * The object contains more than one block so we can't adjust
-        * its size.
-        */
-       if (sm->sm_phys->smp_objsize > sm->sm_blksz)
-               return;
-
-       if (size > sm->sm_blksz) {
-               uint64_t newsz;
-
-               /*
-                * Older software versions treat space map blocks as fixed
-                * entities. The DMU is capable of handling different block
-                * sizes making it possible for us to increase the
-                * block size and maintain backwards compatibility. The
-                * caveat is that the new block sizes must be a
-                * power of 2 so that old software can append to the file,
-                * adding more blocks. The block size can grow until it
-                * reaches space_map_max_blksz.
-                */
-               newsz = ISP2(size) ? size : 1ULL << highbit64(size);
-               if (newsz > space_map_max_blksz)
-                       newsz = space_map_max_blksz;
-
-               VERIFY0(dmu_object_set_blocksize(sm->sm_os,
-                   space_map_object(sm), newsz, 0, tx));
-               dmu_object_size_from_db(sm->sm_dbuf, &blksz, &blocks);
-
-               zfs_dbgmsg("txg %llu, spa %s, increasing blksz from %d to %d",
-                   dmu_tx_get_txg(tx), spa_name(dmu_objset_spa(sm->sm_os)),
-                   sm->sm_blksz, blksz);
-
-               VERIFY3U(newsz, ==, blksz);
-               VERIFY3U(sm->sm_blksz, <, blksz);
-               sm->sm_blksz = blksz;
-       }
-}
-
 /*
  * Note: space_map_write() will drop sm_lock across dmu_write() calls.
  */
@@ -298,7 +243,7 @@ space_map_write(space_map_t *sm, range_t
        range_seg_t *rs;
        uint64_t size, total, rt_space, nodes;
        uint64_t *entry, *entry_map, *entry_map_end;
-       uint64_t newsz, expected_entries, actual_entries = 1;
+       uint64_t expected_entries, actual_entries = 1;
 
        ASSERT(MUTEX_HELD(rt->rt_lock));
        ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
@@ -324,13 +269,6 @@ space_map_write(space_map_t *sm, range_t
 
        expected_entries = space_map_entries(sm, rt);
 
-       /*
-        * Calculate the new size for the space map on-disk and see if
-        * we can grow the block size to accommodate the new size.
-        */
-       newsz = sm->sm_phys->smp_objsize + expected_entries * sizeof (uint64_t);
-       space_map_set_blocksize(sm, newsz, tx);
-
        entry_map = zio_buf_alloc(sm->sm_blksz);
        entry_map_end = entry_map + (sm->sm_blksz / sizeof (uint64_t));
        entry = entry_map;
@@ -457,46 +395,48 @@ space_map_close(space_map_t *sm)
        kmem_free(sm, sizeof (*sm));
 }
 
-static void
-space_map_reallocate(space_map_t *sm, dmu_tx_t *tx)
-{
-       ASSERT(dmu_tx_is_syncing(tx));
-
-       space_map_free(sm, tx);
-       dmu_buf_rele(sm->sm_dbuf, sm);
-
-       sm->sm_object = space_map_alloc(sm->sm_os, tx);
-       VERIFY0(space_map_open_impl(sm));
-}
-
 void
 space_map_truncate(space_map_t *sm, dmu_tx_t *tx)
 {
        objset_t *os = sm->sm_os;
        spa_t *spa = dmu_objset_spa(os);
        dmu_object_info_t doi;
-       int bonuslen;
 
        ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
        ASSERT(dmu_tx_is_syncing(tx));
 
-       VERIFY0(dmu_free_range(os, space_map_object(sm), 0, -1ULL, tx));
        dmu_object_info_from_db(sm->sm_dbuf, &doi);
 
-       if (spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
-               bonuslen = sizeof (space_map_phys_t);
-               ASSERT3U(bonuslen, <=, dmu_bonus_max());
-       } else {
-               bonuslen = SPACE_MAP_SIZE_V0;
-       }
-
-       if (bonuslen != doi.doi_bonus_size ||
-           doi.doi_data_block_size != SPACE_MAP_INITIAL_BLOCKSIZE) {
+       /*
+        * If the space map has the wrong bonus size (because
+        * SPA_FEATURE_SPACEMAP_HISTOGRAM has recently been enabled), or
+        * the wrong block size (because space_map_blksz has changed),
+        * free and re-allocate its object with the updated sizes.
+        *
+        * Otherwise, just truncate the current object.
+        */
+       if ((spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM) &&
+           doi.doi_bonus_size != sizeof (space_map_phys_t)) ||
+           doi.doi_data_block_size != space_map_blksz) {
                zfs_dbgmsg("txg %llu, spa %s, reallocating: "
                    "old bonus %u, old blocksz %u", dmu_tx_get_txg(tx),
                    spa_name(spa), doi.doi_bonus_size, doi.doi_data_block_size);
-               space_map_reallocate(sm, tx);
-               VERIFY3U(sm->sm_blksz, ==, SPACE_MAP_INITIAL_BLOCKSIZE);
+
+               space_map_free(sm, tx);
+               dmu_buf_rele(sm->sm_dbuf, sm);
+
+               sm->sm_object = space_map_alloc(sm->sm_os, tx);
+               VERIFY0(space_map_open_impl(sm));
+       } else {
+               VERIFY0(dmu_free_range(os, space_map_object(sm), 0, -1ULL, tx));
+
+               /*
+                * If the spacemap is reallocated, its histogram
+                * will be reset.  Do the same in the common case so that
+                * bugs related to the uncommon case do not go unnoticed.
+                */
+               bzero(sm->sm_phys->smp_histogram,
+                   sizeof (sm->sm_phys->smp_histogram));
        }
 
        dmu_buf_will_dirty(sm->sm_dbuf, tx);
@@ -535,7 +475,7 @@ space_map_alloc(objset_t *os, dmu_tx_t *
        }
 
        object = dmu_object_alloc(os,
-           DMU_OT_SPACE_MAP, SPACE_MAP_INITIAL_BLOCKSIZE,
+           DMU_OT_SPACE_MAP, space_map_blksz,
            DMU_OT_SPACE_MAP_HEADER, bonuslen, tx);
 
        return (object);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h Sat Oct 
 4 08:03:52 2014        (r272503)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h Sat Oct 
 4 08:05:39 2014        (r272504)
@@ -133,17 +133,6 @@ typedef enum {
        SM_FREE
 } maptype_t;
 
-/*
- * The data for a given space map can be kept on blocks of any size.
- * Larger blocks entail fewer i/o operations, but they also cause the
- * DMU to keep more data in-core, and also to waste more i/o bandwidth
- * when only a few blocks have changed since the last transaction group.
- * Rather than having a fixed block size for all space maps the block size
- * can adjust as needed (see space_map_max_blksz). Set the initial block
- * size for the space map to 4k.
- */
-#define        SPACE_MAP_INITIAL_BLOCKSIZE     (1ULL << 12)
-
 int space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype);
 
 void space_map_histogram_clear(space_map_t *sm);
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to