Author: avg
Date: Thu Jul 14 11:42:53 2016
New Revision: 302837
URL: https://svnweb.freebsd.org/changeset/base/302837

Log:
  MFV r302641: 6844 dnode_next_offset can detect fictional holes
  
  illumos/illumos-gate@11ceac77ea8034bf2fe9bdd6d314f5d1e5ceeba3
  
https://github.com/illumos/illumos-gate/commit/11ceac77ea8034bf2fe9bdd6d314f5d1e5ceeba3
  
  https://www.illumos.org/issues/6844
    dnode_next_offset is used in a variety of places to iterate over the holes 
or
    allocated blocks in a dnode. It operates under the premise that it can 
iterate
    over the blockpointers of a dnode in open context while holding only the
    dn_struct_rwlock as reader. Unfortunately, this premise does not hold.
    When we create the zio for a dbuf, we pass in the actual block pointer in 
the
    indirect block above that dbuf. When we later zero the bp in
    zio_write_compress, we are directly modifying the bp. The state of the bp is
    now inconsistent from the perspective of dnode_next_offset: the bp will 
appear
    to be a hole until zio_dva_allocate finally finishes filling it in. In the
    meantime, dnode_next_offset can detect a hole in the dnode when none exists.
    I was able to experimentally demonstrate this behavior with the following
    setup:
    1. Create a file with 1 million dbufs.
    2. Create a thread that randomly dirties L2 blocks by writing to the first 
L0
    block under them.
    3. Observe dnode_next_offset, waiting for it to skip over a hole in the 
middle
    of a file.
    4. Do dnode_next_offset in a loop until we skip over such a non-existent 
hole.
    The fix is to ensure that it is valid to iterate over the indirect blocks 
in a
    dnode while holding the dn_struct_rwlock by passing the zio a copy of the BP
    and updating the actual BP in dbuf_write_ready while holding the lock.
  
  Reviewed by: Matthew Ahrens <mahr...@delphix.com>
  Reviewed by: George Wilson <george.wil...@delphix.com>
  Reviewed by: Boris Protopopov <bprotopo...@hotmail.com>
  Approved by: Dan McDonald <dan...@omniti.com>
  Author: Alex Reece <a...@delphix.com>
  MFC after:    3 weeks

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
Directory Properties:
  head/sys/cddl/contrib/opensolaris/   (props changed)

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c  Thu Jul 14 
11:39:36 2016        (r302836)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c  Thu Jul 14 
11:42:53 2016        (r302837)
@@ -2883,7 +2883,8 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *
        uint64_t fill = 0;
        int i;
 
-       ASSERT3P(db->db_blkptr, ==, bp);
+       ASSERT3P(db->db_blkptr, !=, NULL);
+       ASSERT3P(&db->db_data_pending->dr_bp_copy, ==, bp);
 
        DB_DNODE_ENTER(db);
        dn = DB_DNODE(db);
@@ -2905,7 +2906,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *
 #ifdef ZFS_DEBUG
        if (db->db_blkid == DMU_SPILL_BLKID) {
                ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR);
-               ASSERT(!(BP_IS_HOLE(db->db_blkptr)) &&
+               ASSERT(!(BP_IS_HOLE(bp)) &&
                    db->db_blkptr == &dn->dn_phys->dn_spill);
        }
 #endif
@@ -2946,6 +2947,10 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *
                bp->blk_fill = fill;
 
        mutex_exit(&db->db_mtx);
+
+       rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+       *db->db_blkptr = *bp;
+       rw_exit(&dn->dn_struct_rwlock);
 }
 
 /*
@@ -3124,6 +3129,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_
        zio_t *zio;
        int wp_flag = 0;
 
+       ASSERT(dmu_tx_is_syncing(tx));
+
        DB_DNODE_ENTER(db);
        dn = DB_DNODE(db);
        os = dn->dn_objset;
@@ -3182,6 +3189,14 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_
        dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
        DB_DNODE_EXIT(db);
 
+       /*
+        * We copy the blkptr now (rather than when we instantiate the dirty
+        * record), because its value can change between open context and
+        * syncing context. We do not need to hold dn_struct_rwlock to read
+        * db_blkptr because we are in syncing context.
+        */
+       dr->dr_bp_copy = *db->db_blkptr;
+
        if (db->db_level == 0 &&
            dr->dt.dl.dr_override_state == DR_OVERRIDDEN) {
                /*
@@ -3191,7 +3206,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_
                void *contents = (data != NULL) ? data->b_data : NULL;
 
                dr->dr_zio = zio_write(zio, os->os_spa, txg,
-                   db->db_blkptr, contents, db->db.db_size, &zp,
+                   &dr->dr_bp_copy, contents, db->db.db_size, &zp,
                    dbuf_write_override_ready, NULL, dbuf_write_override_done,
                    dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
                mutex_enter(&db->db_mtx);
@@ -3203,14 +3218,14 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_
                ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF ||
                    zp.zp_checksum == ZIO_CHECKSUM_NOPARITY);
                dr->dr_zio = zio_write(zio, os->os_spa, txg,
-                   db->db_blkptr, NULL, db->db.db_size, &zp,
+                   &dr->dr_bp_copy, NULL, db->db.db_size, &zp,
                    dbuf_write_nofill_ready, NULL, dbuf_write_nofill_done, db,
                    ZIO_PRIORITY_ASYNC_WRITE,
                    ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb);
        } else {
                ASSERT(arc_released(data));
                dr->dr_zio = arc_write(zio, os->os_spa, txg,
-                   db->db_blkptr, data, DBUF_IS_L2CACHEABLE(db),
+                   &dr->dr_bp_copy, data, DBUF_IS_L2CACHEABLE(db),
                    DBUF_IS_L2COMPRESSIBLE(db), &zp, dbuf_write_ready,
                    dbuf_write_physdone, dbuf_write_done, db,
                    ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h      Thu Jul 
14 11:39:36 2016        (r302836)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h      Thu Jul 
14 11:42:53 2016        (r302837)
@@ -121,6 +121,9 @@ typedef struct dbuf_dirty_record {
        /* How much space was changed to dsl_pool_dirty_space() for this? */
        unsigned int dr_accounted;
 
+       /* A copy of the bp that points to us */
+       blkptr_t dr_bp_copy;
+
        union dirty_types {
                struct dirty_indirect {
 
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to