Author: jpaetzel
Date: Fri Apr 28 02:11:29 2017
New Revision: 317541
URL: https://svnweb.freebsd.org/changeset/base/317541

Log:
  MFV 316905
  
  7740 fix for 6513 only works in hole punching case, not truncation
  
  illumos/illumos-gate@7de35a3ed0c2e6d4256bd2fb05b48b3798aaf553
  
https://github.com/illumos/illumos-gate/commit/7de35a3ed0c2e6d4256bd2fb05b48b3798aaf553
  
  https://www.illumos.org/issues/7740
    The problem is that dbuf_findbp will return ENOENT if the block it's
    trying to find is beyond the end of the file. If that happens, we assume
    there is no birth time, and so we lose that information when we write
    out new blkptrs. We should teach dbuf_findbp to look for things that are
    beyond the current end, but not beyond the absolute end of the file.
    To verify, create a large file, truncate it to a short length, and then
    write beyond the end. Check with zdb to make sure that there are no
    holes with birth time zero (will appear as gaps).
  
  Reviewed by: Steve Gonczi <steve.gon...@delphix.com>
  Reviewed by: Matthew Ahrens <mahr...@delphix.com>
  Approved by: Dan McDonald <dan...@omniti.com>
  Author: Paul Dagnelie <p...@delphix.com>

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
Directory Properties:
  head/sys/cddl/contrib/opensolaris/   (props changed)

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c  Fri Apr 28 
01:54:01 2017        (r317540)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c  Fri Apr 28 
02:11:29 2017        (r317541)
@@ -2161,8 +2161,6 @@ static int
 dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
     dmu_buf_impl_t **parentp, blkptr_t **bpp)
 {
-       int nlevels, epbs;
-
        *parentp = NULL;
        *bpp = NULL;
 
@@ -2181,17 +2179,35 @@ dbuf_findbp(dnode_t *dn, int level, uint
                return (0);
        }
 
-       if (dn->dn_phys->dn_nlevels == 0)
-               nlevels = 1;
-       else
-               nlevels = dn->dn_phys->dn_nlevels;
-
-       epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
+       int nlevels =
+           (dn->dn_phys->dn_nlevels == 0) ? 1 : dn->dn_phys->dn_nlevels;
+       int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 
        ASSERT3U(level * epbs, <, 64);
        ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
+       /*
+        * This assertion shouldn't trip as long as the max indirect block size
+        * is less than 1M.  The reason for this is that up to that point,
+        * the number of levels required to address an entire object with blocks
+        * of size SPA_MINBLOCKSIZE satisfies nlevels * epbs + 1 <= 64.  In
+        * other words, if N * epbs + 1 > 64, then if (N-1) * epbs + 1 > 55
+        * (i.e. we can address the entire object), objects will all use at most
+        * N-1 levels and the assertion won't overflow.  However, once epbs is
+        * 13, 4 * 13 + 1 = 53, but 5 * 13 + 1 = 66.  Then, 4 levels will not be
+        * enough to address an entire object, so objects will have 5 levels,
+        * but then this assertion will overflow.
+        *
+        * All this is to say that if we ever increase DN_MAX_INDBLKSHIFT, we
+        * need to redo this logic to handle overflows.
+        */
+       ASSERT(level >= nlevels ||
+           ((nlevels - level - 1) * epbs) +
+           highbit64(dn->dn_phys->dn_nblkptr) <= 64);
        if (level >= nlevels ||
-           (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))) {
+           blkid >= ((uint64_t)dn->dn_phys->dn_nblkptr <<
+           ((nlevels - level - 1) * epbs)) ||
+           (fail_sparse &&
+           blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))) {
                /* the buffer has no parent yet */
                return (SET_ERROR(ENOENT));
        } else if (level < nlevels-1) {
@@ -2209,6 +2225,8 @@ dbuf_findbp(dnode_t *dn, int level, uint
                }
                *bpp = ((blkptr_t *)(*parentp)->db.db_data) +
                    (blkid & ((1ULL << epbs) - 1));
+               if (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))
+                       ASSERT(BP_IS_HOLE(*bpp));
                return (0);
        } else {
                /* the block is referenced from the dnode */

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h     Fri Apr 
28 01:54:01 2017        (r317540)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h     Fri Apr 
28 02:11:29 2017        (r317541)
@@ -58,6 +58,12 @@ extern "C" {
  */
 #define        DNODE_SHIFT             9       /* 512 bytes */
 #define        DN_MIN_INDBLKSHIFT      12      /* 4k */
+/*
+ * If we ever increase this value beyond 20, we need to revisit all logic that
+ * does x << level * ebps to handle overflow.  With a 1M indirect block size,
+ * 4 levels of indirect blocks would not be able to guarantee addressing an
+ * entire object, so 5 levels will be used, but 5 * (20 - 7) = 65.
+ */
 #define        DN_MAX_INDBLKSHIFT      17      /* 128k */
 #define        DNODE_BLOCK_SHIFT       14      /* 16k */
 #define        DNODE_CORE_SIZE         64      /* 64 bytes for dnode sans 
blkptrs */
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to