Author: mav
Date: Wed Jul 26 16:11:08 2017
New Revision: 321527
URL: https://svnweb.freebsd.org/changeset/base/321527

Log:
  MFC r314267: MFV 314243
  
  6676 Race between unique_insert() and unique_remove() causes ZFS fsid change
  
  illumos/illumos-gate@40510e8eba18690b9a9843b26393725eeb0f1dac
  
https://github.com/illumos/illumos-gate/commit/40510e8eba18690b9a9843b26393725ee
  b0f1dac
  
  https://www.illumos.org/issues/6676
  
    The fsid of zfs filesystems might change after reboot or remount. The 
problem
  seems to
    be caused by a race between unique_insert() and unique_remove(). The 
unique_re
  move()
    is called from dsl_dataset_evict() which is now an asynchronous thread. In 
a c
  ase the
    dsl_dataset_evict() thread is very slow and calls unique_remove() too late 
we
  will end
    up with changed fsid on zfs mount.
  
    This problem is very likely caused by #5056.
  
    Steps to Reproduce
    Note: I'm able to reproduce this always on a single core (virtual) machine. 
On
   multicore
    machines it is not so easy to reproduce.
  
  # uname -a
  SunOS openindiana 5.11 illumos-633aa80 i86pc i386 i86pc Solaris
  # zfs create rpool/TEST
  # FS=$(echo ::fsinfo | mdb -k | grep TEST | awk '{print $1}')
  # echo $FS::print vfs_t vfs_fsid | mdb -k
  vfs_fsid = {
      vfs_fsid.val = [ 0x54d7028a, 0x70311508 ]
  }
  # zfs umount rpool/TEST
  # zfs mount rpool/TEST
  # FS=$(echo ::fsinfo | mdb -k | grep TEST | awk '{print $1}')
  # echo $FS::print vfs_t vfs_fsid | mdb -k
  vfs_fsid = {
      vfs_fsid.val = [ 0xd9454e49, 0x6b36d08 ]
  }
  #
  
    Impact
    The persistent fsid (filesystem id) is essential for proper NFS 
functionality.
    If the fsid of a filesystem changes on remount (or after reboot) the NFS
    clients might not be able to automatically recover from such event and the
    manual remount of the NFS filesystems on every NFS client might be needed.
  
  Author: Josef 'Jeff' Sipek <josef.si...@nexenta.com>
  Reviewed by: Saso Kiselkov <saso.kisel...@nexenta.com>
  Reviewed by: Sanjay Nadkarni <sanjay.nadka...@nexenta.com>
  Reviewed by: Dan Vatca <dan.va...@gmail.com>
  Reviewed by: Matthew Ahrens <mahr...@delphix.com>
  Reviewed by: George Wilson <george.wil...@delphix.com>
  Reviewed by: Sebastien Roy <sebastien....@delphix.com>
  Approved by: Robert Mustacchi <r...@joyent.com>

Modified:
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c     Wed Jul 
26 15:52:51 2017        (r321526)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c     Wed Jul 
26 16:11:08 2017        (r321527)
@@ -54,7 +54,9 @@ static void dbuf_write(dbuf_dirty_record_t *dr, arc_bu
 
 #ifndef __lint
 extern inline void dmu_buf_init_user(dmu_buf_user_t *dbu,
-    dmu_buf_evict_func_t *evict_func, dmu_buf_t **clear_on_evict_dbufp);
+    dmu_buf_evict_func_t *evict_func_sync,
+    dmu_buf_evict_func_t *evict_func_async,
+    dmu_buf_t **clear_on_evict_dbufp);
 #endif /* ! __lint */
 
 /*
@@ -361,11 +363,24 @@ dbuf_evict_user(dmu_buf_impl_t *db)
 #endif
 
        /*
-        * Invoke the callback from a taskq to avoid lock order reversals
-        * and limit stack depth.
+        * There are two eviction callbacks - one that we call synchronously
+        * and one that we invoke via a taskq.  The async one is useful for
+        * avoiding lock order reversals and limiting stack depth.
+        *
+        * Note that if we have a sync callback but no async callback,
+        * it's likely that the sync callback will free the structure
+        * containing the dbu.  In that case we need to take care to not
+        * dereference dbu after calling the sync evict func.
         */
-       taskq_dispatch_ent(dbu_evict_taskq, dbu->dbu_evict_func, dbu, 0,
-           &dbu->dbu_tqent);
+       boolean_t has_async = (dbu->dbu_evict_func_async != NULL);
+
+       if (dbu->dbu_evict_func_sync != NULL)
+               dbu->dbu_evict_func_sync(dbu);
+
+       if (has_async) {
+               taskq_dispatch_ent(dbu_evict_taskq, dbu->dbu_evict_func_async,
+                   dbu, 0, &dbu->dbu_tqent);
+       }
 }
 
 boolean_t

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c    Wed Jul 
26 15:52:51 2017        (r321526)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c    Wed Jul 
26 16:11:08 2017        (r321527)
@@ -1014,7 +1014,7 @@ dnode_special_open(objset_t *os, dnode_phys_t *dnp, ui
 }
 
 static void
-dnode_buf_pageout(void *dbu)
+dnode_buf_evict_async(void *dbu)
 {
        dnode_children_t *children_dnodes = dbu;
        int i;
@@ -1140,8 +1140,8 @@ dnode_hold_impl(objset_t *os, uint64_t object, int fla
                for (i = 0; i < epb; i++) {
                        zrl_init(&dnh[i].dnh_zrlock);
                }
-               dmu_buf_init_user(&children_dnodes->dnc_dbu,
-                   dnode_buf_pageout, NULL);
+               dmu_buf_init_user(&children_dnodes->dnc_dbu, NULL,
+                   dnode_buf_evict_async, NULL);
                winner = dmu_buf_set_user(&db->db, &children_dnodes->dnc_dbu);
                if (winner != NULL) {
 

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c      
Wed Jul 26 15:52:51 2017        (r321526)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c      
Wed Jul 26 16:11:08 2017        (r321527)
@@ -21,7 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Portions Copyright (c) 2011 Martin Matuska <m...@freebsd.org>
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  * Copyright (c) 2014 RackTop Systems.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@@ -279,17 +279,31 @@ dsl_dataset_block_freeable(dsl_dataset_t *ds, const bl
        return (B_TRUE);
 }
 
+/*
+ * We have to release the fsid syncronously or we risk that a subsequent
+ * mount of the same dataset will fail to unique_insert the fsid.  This
+ * failure would manifest itself as the fsid of this dataset changing
+ * between mounts which makes NFS clients quite unhappy.
+ */
 static void
-dsl_dataset_evict(void *dbu)
+dsl_dataset_evict_sync(void *dbu)
 {
        dsl_dataset_t *ds = dbu;
 
        ASSERT(ds->ds_owner == NULL);
 
-       ds->ds_dbuf = NULL;
-
        unique_remove(ds->ds_fsid_guid);
+}
 
+static void
+dsl_dataset_evict_async(void *dbu)
+{
+       dsl_dataset_t *ds = dbu;
+
+       ASSERT(ds->ds_owner == NULL);
+
+       ds->ds_dbuf = NULL;
+
        if (ds->ds_objset != NULL)
                dmu_objset_evict(ds->ds_objset);
 
@@ -528,7 +542,8 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, v
                        ds->ds_reserved = ds->ds_quota = 0;
                }
 
-               dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict, &ds->ds_dbuf);
+               dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict_sync,
+                   dsl_dataset_evict_async, &ds->ds_dbuf);
                if (err == 0)
                        winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu);
 
@@ -551,6 +566,16 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, v
                } else {
                        ds->ds_fsid_guid =
                            unique_insert(dsl_dataset_phys(ds)->ds_fsid_guid);
+                       if (ds->ds_fsid_guid !=
+                           dsl_dataset_phys(ds)->ds_fsid_guid) {
+                               zfs_dbgmsg("ds_fsid_guid changed from "
+                                   "%llx to %llx for pool %s dataset id %llu",
+                                   (long long)
+                                   dsl_dataset_phys(ds)->ds_fsid_guid,
+                                   (long long)ds->ds_fsid_guid,
+                                   spa_name(dp->dp_spa),
+                                   dsobj);
+                       }
                }
        }
        ASSERT3P(ds->ds_dbuf, ==, dbuf);

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c  Wed Jul 
26 15:52:51 2017        (r321526)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c  Wed Jul 
26 16:11:08 2017        (r321527)
@@ -22,7 +22,7 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 Pawel Jakub Dawidek <pa...@dawidek.net>.
  * All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  * Copyright (c) 2014 Joyent, Inc. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
@@ -133,7 +133,7 @@ extern inline dsl_dir_phys_t *dsl_dir_phys(dsl_dir_t *
 static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
 
 static void
-dsl_dir_evict(void *dbu)
+dsl_dir_evict_async(void *dbu)
 {
        dsl_dir_t *dd = dbu;
        dsl_pool_t *dp = dd->dd_pool;
@@ -240,7 +240,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
                        dmu_buf_rele(origin_bonus, FTAG);
                }
 
-               dmu_buf_init_user(&dd->dd_dbu, dsl_dir_evict, &dd->dd_dbuf);
+               dmu_buf_init_user(&dd->dd_dbu, NULL, dsl_dir_evict_async,
+                   &dd->dd_dbuf);
                winner = dmu_buf_set_user_ie(dbuf, &dd->dd_dbu);
                if (winner != NULL) {
                        if (dd->dd_parent)

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c       Wed Jul 
26 15:52:51 2017        (r321526)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c       Wed Jul 
26 16:11:08 2017        (r321527)
@@ -22,7 +22,7 @@
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  * Portions Copyright 2011 iXsystems, Inc
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  */
@@ -1299,7 +1299,7 @@ sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype
 
 /*ARGSUSED*/
 static void
-sa_evict(void *dbu)
+sa_evict_sync(void *dbu)
 {
        panic("evicting sa dbuf\n");
 }
@@ -1383,7 +1383,8 @@ sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, voi
                sa_handle_t *winner = NULL;
 
                handle = kmem_cache_alloc(sa_cache, KM_SLEEP);
-               handle->sa_dbu.dbu_evict_func = NULL;
+               handle->sa_dbu.dbu_evict_func_sync = NULL;
+               handle->sa_dbu.dbu_evict_func_async = NULL;
                handle->sa_userp = userp;
                handle->sa_bonus = db;
                handle->sa_os = os;
@@ -1394,7 +1395,8 @@ sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, voi
                error = sa_build_index(handle, SA_BONUS);
 
                if (hdl_type == SA_HDL_SHARED) {
-                       dmu_buf_init_user(&handle->sa_dbu, sa_evict, NULL);
+                       dmu_buf_init_user(&handle->sa_dbu, sa_evict_sync, NULL,
+                           NULL);
                        winner = dmu_buf_set_user_ie(db, &handle->sa_dbu);
                }
 

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h  Wed Jul 
26 15:52:51 2017        (r321526)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h  Wed Jul 
26 16:11:08 2017        (r321527)
@@ -539,8 +539,14 @@ typedef struct dmu_buf_user {
         */
        taskq_ent_t     dbu_tqent;
 
-       /* This instance's eviction function pointer. */
-       dmu_buf_evict_func_t *dbu_evict_func;
+       /*
+        * This instance's eviction function pointers.
+        *
+        * dbu_evict_func_sync is called synchronously and then
+        * dbu_evict_func_async is executed asynchronously on a taskq.
+        */
+       dmu_buf_evict_func_t *dbu_evict_func_sync;
+       dmu_buf_evict_func_t *dbu_evict_func_async;
 #ifdef ZFS_DEBUG
        /*
         * Pointer to user's dbuf pointer.  NULL for clients that do
@@ -564,15 +570,19 @@ typedef struct dmu_buf_user {
 /* Very ugly, but it beats issuing suppression directives in many Makefiles. */
 extern void
 dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func,
-    dmu_buf_t **clear_on_evict_dbufp);
+    dmu_buf_evict_func_t *evict_func_async, dmu_buf_t **clear_on_evict_dbufp);
 #else /* __lint */
 inline void
-dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func,
-    dmu_buf_t **clear_on_evict_dbufp)
+dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func_sync,
+    dmu_buf_evict_func_t *evict_func_async, dmu_buf_t **clear_on_evict_dbufp)
 {
-       ASSERT(dbu->dbu_evict_func == NULL);
-       ASSERT(evict_func != NULL);
-       dbu->dbu_evict_func = evict_func;
+       ASSERT(dbu->dbu_evict_func_sync == NULL);
+       ASSERT(dbu->dbu_evict_func_async == NULL);
+
+       /* must have at least one evict func */
+       IMPLY(evict_func_sync == NULL, evict_func_async != NULL);
+       dbu->dbu_evict_func_sync = evict_func_sync;
+       dbu->dbu_evict_func_async = evict_func_async;
 #ifdef ZFS_DEBUG
        dbu->dbu_clear_on_evict_dbufp = clear_on_evict_dbufp;
 #endif

Modified: 
stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h     
Wed Jul 26 15:52:51 2017        (r321526)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h     
Wed Jul 26 16:11:08 2017        (r321527)
@@ -199,7 +199,7 @@ boolean_t zap_match(zap_name_t *zn, const char *matchn
 int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
     krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp);
 void zap_unlockdir(zap_t *zap, void *tag);
-void zap_evict(void *dbu);
+void zap_evict_sync(void *dbu);
 zap_name_t *zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt);
 void zap_name_free(zap_name_t *zn);
 int zap_hashbits(zap_t *zap);

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c      Wed Jul 
26 15:52:51 2017        (r321526)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c      Wed Jul 
26 16:11:08 2017        (r321527)
@@ -81,7 +81,8 @@ fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t fla
        ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
        zap->zap_ismicro = FALSE;
 
-       zap->zap_dbu.dbu_evict_func = zap_evict;
+       zap->zap_dbu.dbu_evict_func_sync = zap_evict_sync;
+       zap->zap_dbu.dbu_evict_func_async = NULL;
 
        mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
        zap->zap_f.zap_block_shift = highbit64(zap->zap_dbuf->db_size) - 1;
@@ -399,7 +400,7 @@ zap_allocate_blocks(zap_t *zap, int nblocks)
 }
 
 static void
-zap_leaf_pageout(void *dbu)
+zap_leaf_evict_sync(void *dbu)
 {
        zap_leaf_t *l = dbu;
 
@@ -423,7 +424,7 @@ zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
        VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
            l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf,
            DMU_READ_NO_PREFETCH));
-       dmu_buf_init_user(&l->l_dbu, zap_leaf_pageout, &l->l_dbuf);
+       dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL, &l->l_dbuf);
        winner = dmu_buf_set_user(l->l_dbuf, &l->l_dbu);
        ASSERT(winner == NULL);
        dmu_buf_will_dirty(l->l_dbuf, tx);
@@ -470,13 +471,13 @@ zap_open_leaf(uint64_t blkid, dmu_buf_t *db)
        l->l_bs = highbit64(db->db_size) - 1;
        l->l_dbuf = db;
 
-       dmu_buf_init_user(&l->l_dbu, zap_leaf_pageout, &l->l_dbuf);
+       dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL, &l->l_dbuf);
        winner = dmu_buf_set_user(db, &l->l_dbu);
 
        rw_exit(&l->l_rwlock);
        if (winner != NULL) {
                /* someone else set it first */
-               zap_leaf_pageout(&l->l_dbu);
+               zap_leaf_evict_sync(&l->l_dbu);
                l = winner;
        }
 

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c        
Wed Jul 26 15:52:51 2017        (r321526)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c        
Wed Jul 26 16:11:08 2017        (r321527)
@@ -403,7 +403,7 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
         * it, because zap_lockdir() checks zap_ismicro without the lock
         * held.
         */
-       dmu_buf_init_user(&zap->zap_dbu, zap_evict, &zap->zap_dbuf);
+       dmu_buf_init_user(&zap->zap_dbu, zap_evict_sync, NULL, &zap->zap_dbuf);
        winner = dmu_buf_set_user(db, &zap->zap_dbu);
 
        if (winner != NULL)
@@ -747,7 +747,7 @@ zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *t
 }
 
 void
-zap_evict(void *dbu)
+zap_evict_sync(void *dbu)
 {
        zap_t *zap = dbu;
 
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to