Author: mav
Date: Mon Aug 10 19:32:58 2015
New Revision: 286587
URL: https://svnweb.freebsd.org/changeset/base/286587

Log:
  MFV 286586: 5746 more checksumming in zfs send
  
  Reviewed by: Christopher Siden <christopher.si...@delphix.com>
  Reviewed by: George Wilson <george.wil...@delphix.com>
  Reviewed by: Bayard Bell <buffer.g.overf...@gmail.com>
  Approved by: Albert Lee <tr...@omniti.com>
  Author: Matthew Ahrens <mahr...@delphix.com>
  
  illumos/illumos-gate@98110f08fa182032082d98be2ddb9391fcd62bf1

Modified:
  head/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c
  head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_checksum.h

Modified: head/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c Mon Aug 10 
19:32:30 2015        (r286586)
+++ head/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c Mon Aug 10 
19:32:58 2015        (r286587)
@@ -25,7 +25,7 @@
  */
 
 /*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
  */
 
 #include <ctype.h>
@@ -34,6 +34,7 @@
 #include <stdlib.h>
 #include <strings.h>
 #include <unistd.h>
+#include <stddef.h>
 
 #include <sys/dmu.h>
 #include <sys/zfs_ioctl.h>
@@ -83,7 +84,6 @@ safe_malloc(size_t size)
  *
  * Read while computing incremental checksum
  */
-
 static size_t
 ssread(void *buf, size_t len, zio_cksum_t *cksum)
 {
@@ -92,7 +92,7 @@ ssread(void *buf, size_t len, zio_cksum_
        if ((outlen = fread(buf, len, 1, send_stream)) == 0)
                return (0);
 
-       if (do_cksum && cksum) {
+       if (do_cksum) {
                if (do_byteswap)
                        fletcher_4_incremental_byteswap(buf, len, cksum);
                else
@@ -102,6 +102,34 @@ ssread(void *buf, size_t len, zio_cksum_
        return (outlen);
 }
 
+static size_t
+read_hdr(dmu_replay_record_t *drr, zio_cksum_t *cksum)
+{
+       ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+           ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+       size_t r = ssread(drr, sizeof (*drr) - sizeof (zio_cksum_t), cksum);
+       if (r == 0)
+               return (0);
+       zio_cksum_t saved_cksum = *cksum;
+       r = ssread(&drr->drr_u.drr_checksum.drr_checksum,
+           sizeof (zio_cksum_t), cksum);
+       if (r == 0)
+               return (0);
+       if (!ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.drr_checksum.drr_checksum) &&
+           !ZIO_CHECKSUM_EQUAL(saved_cksum,
+           drr->drr_u.drr_checksum.drr_checksum)) {
+               fprintf(stderr, "invalid checksum\n");
+               (void) printf("Incorrect checksum in record header.\n");
+               (void) printf("Expected checksum = %llx/%llx/%llx/%llx\n",
+                   saved_cksum.zc_word[0],
+                   saved_cksum.zc_word[1],
+                   saved_cksum.zc_word[2],
+                   saved_cksum.zc_word[3]);
+               exit(1);
+       }
+       return (sizeof (*drr));
+}
+
 /*
  * Print part of a block in ASCII characters
  */
@@ -183,8 +211,10 @@ main(int argc, char *argv[])
        struct drr_free *drrf = &thedrr.drr_u.drr_free;
        struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
        struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
+       struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum;
        char c;
        boolean_t verbose = B_FALSE;
+       boolean_t very_verbose = B_FALSE;
        boolean_t first = B_TRUE;
        /*
         * dump flag controls whether the contents of any modified data blocks
@@ -202,11 +232,14 @@ main(int argc, char *argv[])
                        do_cksum = B_FALSE;
                        break;
                case 'v':
+                       if (verbose)
+                               very_verbose = B_TRUE;
                        verbose = B_TRUE;
                        break;
                case 'd':
                        dump = B_TRUE;
                        verbose = B_TRUE;
+                       very_verbose = B_TRUE;
                        break;
                case ':':
                        (void) fprintf(stderr,
@@ -230,7 +263,7 @@ main(int argc, char *argv[])
 
        send_stream = stdin;
        pcksum = zc;
-       while (ssread(drr, sizeof (dmu_replay_record_t), &zc)) {
+       while (read_hdr(drr, &zc)) {
 
                /*
                 * If this is the first DMU record being processed, check for
@@ -432,7 +465,7 @@ main(int argc, char *argv[])
                        if (verbose) {
                                (void) printf("WRITE object = %llu type = %u "
                                    "checksum type = %u\n"
-                                   "offset = %llu length = %llu "
+                                   "    offset = %llu length = %llu "
                                    "props = %llx\n",
                                    (u_longlong_t)drrw->drr_object,
                                    drrw->drr_type,
@@ -476,9 +509,9 @@ main(int argc, char *argv[])
                        if (verbose) {
                                (void) printf("WRITE_BYREF object = %llu "
                                    "checksum type = %u props = %llx\n"
-                                   "offset = %llu length = %llu\n"
+                                   "    offset = %llu length = %llu\n"
                                    "toguid = %llx refguid = %llx\n"
-                                   "refobject = %llu refoffset = %llu\n",
+                                   "    refobject = %llu refoffset = %llu\n",
                                    (u_longlong_t)drrwbr->drr_object,
                                    drrwbr->drr_checksumtype,
                                    (u_longlong_t)drrwbr->drr_key.ddk_prop,
@@ -538,7 +571,7 @@ main(int argc, char *argv[])
                        if (verbose) {
                                (void) printf("WRITE_EMBEDDED object = %llu "
                                    "offset = %llu length = %llu\n"
-                                   "toguid = %llx comp = %u etype = %u "
+                                   "    toguid = %llx comp = %u etype = %u "
                                    "lsize = %u psize = %u\n",
                                    (u_longlong_t)drrwe->drr_object,
                                    (u_longlong_t)drrwe->drr_offset,
@@ -553,6 +586,13 @@ main(int argc, char *argv[])
                            P2ROUNDUP(drrwe->drr_psize, 8), &zc);
                        break;
                }
+               if (drr->drr_type != DRR_BEGIN && very_verbose) {
+                       (void) printf("    checksum = %llx/%llx/%llx/%llx\n",
+                           (longlong_t)drrc->drr_checksum.zc_word[0],
+                           (longlong_t)drrc->drr_checksum.zc_word[1],
+                           (longlong_t)drrc->drr_checksum.zc_word[2],
+                           (longlong_t)drrc->drr_checksum.zc_word[3]);
+               }
                pcksum = zc;
        }
        free(buf);

Modified: head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
==============================================================================
--- head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c   Mon Aug 
10 19:32:30 2015        (r286586)
+++ head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c   Mon Aug 
10 19:32:58 2015        (r286587)
@@ -188,10 +188,28 @@ ddt_update(libzfs_handle_t *hdl, dedup_t
 }
 
 static int
-cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
+dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
+    zio_cksum_t *zc, int outfd)
 {
-       fletcher_4_incremental_native(buf, len, zc);
-       return (write(outfd, buf, len));
+       ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+           ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+       fletcher_4_incremental_native(drr,
+           offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
+       if (drr->drr_type != DRR_BEGIN) {
+               ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
+                   drr_checksum.drr_checksum));
+               drr->drr_u.drr_checksum.drr_checksum = *zc;
+       }
+       fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
+           sizeof (zio_cksum_t), zc);
+       if (write(outfd, drr, sizeof (*drr)) == -1)
+               return (errno);
+       if (payload_len != 0) {
+               fletcher_4_incremental_native(payload, payload_len, zc);
+               if (write(outfd, payload, payload_len) == -1)
+                       return (errno);
+       }
+       return (0);
 }
 
 /*
@@ -218,26 +236,18 @@ cksummer(void *arg)
        char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE);
        dmu_replay_record_t thedrr;
        dmu_replay_record_t *drr = &thedrr;
-       struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
-       struct drr_end *drre = &thedrr.drr_u.drr_end;
-       struct drr_object *drro = &thedrr.drr_u.drr_object;
-       struct drr_write *drrw = &thedrr.drr_u.drr_write;
-       struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
-       struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
        FILE *ofp;
        int outfd;
-       dmu_replay_record_t wbr_drr = {0};
-       struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref;
        dedup_table_t ddt;
        zio_cksum_t stream_cksum;
        uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
        uint64_t numbuckets;
 
        ddt.max_ddt_size =
-           MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100,
-           SMALLEST_POSSIBLE_MAX_DDT_MB<<20);
+           MAX((physmem * MAX_DDT_PHYSMEM_PERCENT) / 100,
+           SMALLEST_POSSIBLE_MAX_DDT_MB << 20);
 
-       numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t));
+       numbuckets = ddt.max_ddt_size / (sizeof (dedup_entry_t));
 
        /*
         * numbuckets must be a power of 2.  Increase number to
@@ -253,32 +263,29 @@ cksummer(void *arg)
        ddt.numhashbits = high_order_bit(numbuckets) - 1;
        ddt.ddt_full = B_FALSE;
 
-       /* Initialize the write-by-reference block. */
-       wbr_drr.drr_type = DRR_WRITE_BYREF;
-       wbr_drr.drr_payloadlen = 0;
-
        outfd = dda->outputfd;
        ofp = fdopen(dda->inputfd, "r");
-       while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
+       while (ssread(drr, sizeof (*drr), ofp) != 0) {
 
                switch (drr->drr_type) {
                case DRR_BEGIN:
                {
-                       int     fflags;
+                       struct drr_begin *drrb = &drr->drr_u.drr_begin;
+                       int fflags;
+                       int sz = 0;
                        ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
 
+                       ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
+
                        /* set the DEDUP feature flag for this stream */
                        fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
                        fflags |= (DMU_BACKUP_FEATURE_DEDUP |
                            DMU_BACKUP_FEATURE_DEDUPPROPS);
                        DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
 
-                       if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
-                           &stream_cksum, outfd) == -1)
-                               goto out;
                        if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
                            DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
-                               int sz = drr->drr_payloadlen;
+                               sz = drr->drr_payloadlen;
 
                                if (sz > SPA_MAXBLOCKSIZE) {
                                        buf = zfs_realloc(dda->dedup_hdl, buf,
@@ -287,64 +294,60 @@ cksummer(void *arg)
                                (void) ssread(buf, sz, ofp);
                                if (ferror(stdin))
                                        perror("fread");
-                               if (cksum_and_write(buf, sz, &stream_cksum,
-                                   outfd) == -1)
-                                       goto out;
                        }
+                       if (dump_record(drr, buf, sz, &stream_cksum,
+                           outfd) != 0)
+                               goto out;
                        break;
                }
 
                case DRR_END:
                {
+                       struct drr_end *drre = &drr->drr_u.drr_end;
                        /* use the recalculated checksum */
-                       ZIO_SET_CHECKSUM(&drre->drr_checksum,
-                           stream_cksum.zc_word[0], stream_cksum.zc_word[1],
-                           stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
-                       if ((write(outfd, drr,
-                           sizeof (dmu_replay_record_t))) == -1)
+                       drre->drr_checksum = stream_cksum;
+                       if (dump_record(drr, NULL, 0, &stream_cksum,
+                           outfd) != 0)
                                goto out;
                        break;
                }
 
                case DRR_OBJECT:
                {
-                       if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
-                           &stream_cksum, outfd) == -1)
-                               goto out;
+                       struct drr_object *drro = &drr->drr_u.drr_object;
                        if (drro->drr_bonuslen > 0) {
                                (void) ssread(buf,
                                    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
                                    ofp);
-                               if (cksum_and_write(buf,
-                                   P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
-                                   &stream_cksum, outfd) == -1)
-                                       goto out;
                        }
+                       if (dump_record(drr, buf,
+                           P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
+                           &stream_cksum, outfd) != 0)
+                               goto out;
                        break;
                }
 
                case DRR_SPILL:
                {
-                       if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
-                           &stream_cksum, outfd) == -1)
-                               goto out;
+                       struct drr_spill *drrs = &drr->drr_u.drr_spill;
                        (void) ssread(buf, drrs->drr_length, ofp);
-                       if (cksum_and_write(buf, drrs->drr_length,
-                           &stream_cksum, outfd) == -1)
+                       if (dump_record(drr, buf, drrs->drr_length,
+                           &stream_cksum, outfd) != 0)
                                goto out;
                        break;
                }
 
                case DRR_FREEOBJECTS:
                {
-                       if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
-                           &stream_cksum, outfd) == -1)
+                       if (dump_record(drr, NULL, 0, &stream_cksum,
+                           outfd) != 0)
                                goto out;
                        break;
                }
 
                case DRR_WRITE:
                {
+                       struct drr_write *drrw = &drr->drr_u.drr_write;
                        dataref_t       dataref;
 
                        (void) ssread(buf, drrw->drr_length, ofp);
@@ -382,7 +385,13 @@ cksummer(void *arg)
                        if (ddt_update(dda->dedup_hdl, &ddt,
                            &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
                            &dataref)) {
+                               dmu_replay_record_t wbr_drr = {0};
+                               struct drr_write_byref *wbr_drrr =
+                                   &wbr_drr.drr_u.drr_write_byref;
+
                                /* block already present in stream */
+                               wbr_drr.drr_type = DRR_WRITE_BYREF;
+
                                wbr_drrr->drr_object = drrw->drr_object;
                                wbr_drrr->drr_offset = drrw->drr_offset;
                                wbr_drrr->drr_length = drrw->drr_length;
@@ -402,19 +411,13 @@ cksummer(void *arg)
                                wbr_drrr->drr_key.ddk_prop =
                                    drrw->drr_key.ddk_prop;
 
-                               if (cksum_and_write(&wbr_drr,
-                                   sizeof (dmu_replay_record_t), &stream_cksum,
-                                   outfd) == -1)
+                               if (dump_record(&wbr_drr, NULL, 0,
+                                   &stream_cksum, outfd) != 0)
                                        goto out;
                        } else {
                                /* block not previously seen */
-                               if (cksum_and_write(drr,
-                                   sizeof (dmu_replay_record_t), &stream_cksum,
-                                   outfd) == -1)
-                                       goto out;
-                               if (cksum_and_write(buf,
-                                   drrw->drr_length,
-                                   &stream_cksum, outfd) == -1)
+                               if (dump_record(drr, buf, drrw->drr_length,
+                                   &stream_cksum, outfd) != 0)
                                        goto out;
                        }
                        break;
@@ -422,28 +425,27 @@ cksummer(void *arg)
 
                case DRR_WRITE_EMBEDDED:
                {
-                       if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
-                           &stream_cksum, outfd) == -1)
-                               goto out;
+                       struct drr_write_embedded *drrwe =
+                           &drr->drr_u.drr_write_embedded;
                        (void) ssread(buf,
                            P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp);
-                       if (cksum_and_write(buf,
+                       if (dump_record(drr, buf,
                            P2ROUNDUP((uint64_t)drrwe->drr_psize, 8),
-                           &stream_cksum, outfd) == -1)
+                           &stream_cksum, outfd) != 0)
                                goto out;
                        break;
                }
 
                case DRR_FREE:
                {
-                       if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
-                           &stream_cksum, outfd) == -1)
+                       if (dump_record(drr, NULL, 0, &stream_cksum,
+                           outfd) != 0)
                                goto out;
                        break;
                }
 
                default:
-                       (void) printf("INVALID record type 0x%x\n",
+                       (void) fprintf(stderr, "INVALID record type 0x%x\n",
                            drr->drr_type);
                        /* should never happen, so assert */
                        assert(B_FALSE);
@@ -1470,18 +1472,11 @@ zfs_send(zfs_handle_t *zhp, const char *
                            sizeof (drr.drr_u.drr_begin.drr_toname),
                            "%s@%s", zhp->zfs_name, tosnap);
                        drr.drr_payloadlen = buflen;
-                       err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
 
-                       /* write header nvlist */
-                       if (err != -1 && packbuf != NULL) {
-                               err = cksum_and_write(packbuf, buflen, &zc,
-                                   outfd);
-                       }
+                       err = dump_record(&drr, packbuf, buflen, &zc, outfd);
                        free(packbuf);
-                       if (err == -1) {
-                               err = errno;
+                       if (err != 0)
                                goto stderr_out;
-                       }
 
                        /* write end record */
                        bzero(&drr, sizeof (drr));
@@ -1714,6 +1709,8 @@ recv_read(libzfs_handle_t *hdl, int fd, 
        int rv;
        int len = ilen;
 
+       assert(ilen <= SPA_MAXBLOCKSIZE);
+
        do {
                rv = read(fd, cp, len);
                cp += rv;

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c      Mon Aug 
10 19:32:30 2015        (r286586)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c      Mon Aug 
10 19:32:58 2015        (r286587)
@@ -73,7 +73,6 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf
        struct iovec aiov;
        ASSERT0(len % 8);
 
-       fletcher_4_incremental_native(buf, len, &dsp->dsa_zc);
        aiov.iov_base = buf;
        aiov.iov_len = len;
        auio.uio_iov = &aiov;
@@ -99,6 +98,38 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf
        return (dsp->dsa_err);
 }
 
+/*
+ * For all record types except BEGIN, fill in the checksum (overlaid in
+ * drr_u.drr_checksum.drr_checksum).  The checksum verifies everything
+ * up to the start of the checksum itself.
+ */
+static int
+dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len)
+{
+       ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+           ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+       fletcher_4_incremental_native(dsp->dsa_drr,
+           offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+           &dsp->dsa_zc);
+       if (dsp->dsa_drr->drr_type != DRR_BEGIN) {
+               ASSERT(ZIO_CHECKSUM_IS_ZERO(&dsp->dsa_drr->drr_u.
+                   drr_checksum.drr_checksum));
+               dsp->dsa_drr->drr_u.drr_checksum.drr_checksum = dsp->dsa_zc;
+       }
+       fletcher_4_incremental_native(&dsp->dsa_drr->
+           drr_u.drr_checksum.drr_checksum,
+           sizeof (zio_cksum_t), &dsp->dsa_zc);
+       if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
+               return (SET_ERROR(EINTR));
+       if (payload_len != 0) {
+               fletcher_4_incremental_native(payload, payload_len,
+                   &dsp->dsa_zc);
+               if (dump_bytes(dsp, payload, payload_len) != 0)
+                       return (SET_ERROR(EINTR));
+       }
+       return (0);
+}
+
 static int
 dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
     uint64_t length)
@@ -143,8 +174,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t o
         */
        if (dsp->dsa_pending_op != PENDING_NONE &&
            dsp->dsa_pending_op != PENDING_FREE) {
-               if (dump_bytes(dsp, dsp->dsa_drr,
-                   sizeof (dmu_replay_record_t)) != 0)
+               if (dump_record(dsp, NULL, 0) != 0)
                        return (SET_ERROR(EINTR));
                dsp->dsa_pending_op = PENDING_NONE;
        }
@@ -167,8 +197,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t o
                        return (0);
                } else {
                        /* not a continuation.  Push out pending record */
-                       if (dump_bytes(dsp, dsp->dsa_drr,
-                           sizeof (dmu_replay_record_t)) != 0)
+                       if (dump_record(dsp, NULL, 0) != 0)
                                return (SET_ERROR(EINTR));
                        dsp->dsa_pending_op = PENDING_NONE;
                }
@@ -181,8 +210,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t o
        drrf->drr_length = length;
        drrf->drr_toguid = dsp->dsa_toguid;
        if (length == -1ULL) {
-               if (dump_bytes(dsp, dsp->dsa_drr,
-                   sizeof (dmu_replay_record_t)) != 0)
+               if (dump_record(dsp, NULL, 0) != 0)
                        return (SET_ERROR(EINTR));
        } else {
                dsp->dsa_pending_op = PENDING_FREE;
@@ -214,12 +242,11 @@ dump_write(dmu_sendarg_t *dsp, dmu_objec
         * of different types.
         */
        if (dsp->dsa_pending_op != PENDING_NONE) {
-               if (dump_bytes(dsp, dsp->dsa_drr,
-                   sizeof (dmu_replay_record_t)) != 0)
+               if (dump_record(dsp, NULL, 0) != 0)
                        return (SET_ERROR(EINTR));
                dsp->dsa_pending_op = PENDING_NONE;
        }
-       /* write a DATA record */
+       /* write a WRITE record */
        bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
        dsp->dsa_drr->drr_type = DRR_WRITE;
        drrw->drr_object = object;
@@ -245,9 +272,7 @@ dump_write(dmu_sendarg_t *dsp, dmu_objec
                drrw->drr_key.ddk_cksum = bp->blk_cksum;
        }
 
-       if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
-               return (SET_ERROR(EINTR));
-       if (dump_bytes(dsp, data, blksz) != 0)
+       if (dump_record(dsp, data, blksz) != 0)
                return (SET_ERROR(EINTR));
        return (0);
 }
@@ -261,8 +286,7 @@ dump_write_embedded(dmu_sendarg_t *dsp, 
            &(dsp->dsa_drr->drr_u.drr_write_embedded);
 
        if (dsp->dsa_pending_op != PENDING_NONE) {
-               if (dump_bytes(dsp, dsp->dsa_drr,
-                   sizeof (dmu_replay_record_t)) != 0)
+               if (dump_record(dsp, NULL, 0) != 0)
                        return (EINTR);
                dsp->dsa_pending_op = PENDING_NONE;
        }
@@ -282,9 +306,7 @@ dump_write_embedded(dmu_sendarg_t *dsp, 
 
        decode_embedded_bp_compressed(bp, buf);
 
-       if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
-               return (EINTR);
-       if (dump_bytes(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0)
+       if (dump_record(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0)
                return (EINTR);
        return (0);
 }
@@ -295,8 +317,7 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t 
        struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill);
 
        if (dsp->dsa_pending_op != PENDING_NONE) {
-               if (dump_bytes(dsp, dsp->dsa_drr,
-                   sizeof (dmu_replay_record_t)) != 0)
+               if (dump_record(dsp, NULL, 0) != 0)
                        return (SET_ERROR(EINTR));
                dsp->dsa_pending_op = PENDING_NONE;
        }
@@ -308,9 +329,7 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t 
        drrs->drr_length = blksz;
        drrs->drr_toguid = dsp->dsa_toguid;
 
-       if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)))
-               return (SET_ERROR(EINTR));
-       if (dump_bytes(dsp, data, blksz))
+       if (dump_record(dsp, data, blksz) != 0)
                return (SET_ERROR(EINTR));
        return (0);
 }
@@ -333,8 +352,7 @@ dump_freeobjects(dmu_sendarg_t *dsp, uin
         */
        if (dsp->dsa_pending_op != PENDING_NONE &&
            dsp->dsa_pending_op != PENDING_FREEOBJECTS) {
-               if (dump_bytes(dsp, dsp->dsa_drr,
-                   sizeof (dmu_replay_record_t)) != 0)
+               if (dump_record(dsp, NULL, 0) != 0)
                        return (SET_ERROR(EINTR));
                dsp->dsa_pending_op = PENDING_NONE;
        }
@@ -348,8 +366,7 @@ dump_freeobjects(dmu_sendarg_t *dsp, uin
                        return (0);
                } else {
                        /* can't be aggregated.  Push out pending record */
-                       if (dump_bytes(dsp, dsp->dsa_drr,
-                           sizeof (dmu_replay_record_t)) != 0)
+                       if (dump_record(dsp, NULL, 0) != 0)
                                return (SET_ERROR(EINTR));
                        dsp->dsa_pending_op = PENDING_NONE;
                }
@@ -376,8 +393,7 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t 
                return (dump_freeobjects(dsp, object, 1));
 
        if (dsp->dsa_pending_op != PENDING_NONE) {
-               if (dump_bytes(dsp, dsp->dsa_drr,
-                   sizeof (dmu_replay_record_t)) != 0)
+               if (dump_record(dsp, NULL, 0) != 0)
                        return (SET_ERROR(EINTR));
                dsp->dsa_pending_op = PENDING_NONE;
        }
@@ -398,11 +414,10 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t 
            drro->drr_blksz > SPA_OLD_MAXBLOCKSIZE)
                drro->drr_blksz = SPA_OLD_MAXBLOCKSIZE;
 
-       if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
-               return (SET_ERROR(EINTR));
-
-       if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0)
+       if (dump_record(dsp, DN_BONUS(dnp),
+           P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) {
                return (SET_ERROR(EINTR));
+       }
 
        /* Free anything past the end of the file. */
        if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
@@ -651,7 +666,6 @@ dmu_send_impl(void *tag, dsl_pool_t *dp,
        dsp->dsa_os = os;
        dsp->dsa_off = off;
        dsp->dsa_toguid = dsl_dataset_phys(ds)->ds_guid;
-       ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
        dsp->dsa_pending_op = PENDING_NONE;
        dsp->dsa_incremental = (fromzb != NULL);
        dsp->dsa_featureflags = featureflags;
@@ -663,7 +677,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp,
        dsl_dataset_long_hold(ds, FTAG);
        dsl_pool_rele(dp, tag);
 
-       if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
+       if (dump_record(dsp, NULL, 0) != 0) {
                err = dsp->dsa_err;
                goto out;
        }
@@ -672,7 +686,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp,
            backup_cb, dsp);
 
        if (dsp->dsa_pending_op != PENDING_NONE)
-               if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0)
+               if (dump_record(dsp, NULL, 0) != 0)
                        err = SET_ERROR(EINTR);
 
        if (err != 0) {
@@ -686,7 +700,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp,
        drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc;
        drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
 
-       if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
+       if (dump_record(dsp, NULL, 0) != 0) {
                err = dsp->dsa_err;
                goto out;
        }
@@ -1251,14 +1265,20 @@ dmu_recv_begin(char *tofs, char *tosnap,
 }
 
 struct restorearg {
+       objset_t *os;
        int err;
        boolean_t byteswap;
        kthread_t *td;
        struct file *fp;
-       char *buf;
        uint64_t voff;
        int bufsize; /* amount of memory allocated for buf */
+
+       dmu_replay_record_t *drr;
+       dmu_replay_record_t *next_drr;
+       char *buf;
        zio_cksum_t cksum;
+       zio_cksum_t prev_cksum;
+
        avl_tree_t *guid_to_ds_map;
 };
 
@@ -1323,14 +1343,11 @@ restore_bytes(struct restorearg *ra, voi
        return (error);
 }
 
-static void *
-restore_read(struct restorearg *ra, int len, char *buf)
+static int
+restore_read(struct restorearg *ra, int len, void *buf)
 {
        int done = 0;
 
-       if (buf == NULL)
-               buf = ra->buf;
-
        /* some things will require 8-byte alignment, so everything must */
        ASSERT0(len % 8);
        ASSERT3U(len, <=, ra->bufsize);
@@ -1346,24 +1363,21 @@ restore_read(struct restorearg *ra, int 
                ra->voff += len - done - resid;
                done = len - resid;
                if (ra->err != 0)
-                       return (NULL);
+                       return (ra->err);
        }
 
        ASSERT3U(done, ==, len);
-       if (ra->byteswap)
-               fletcher_4_incremental_byteswap(buf, len, &ra->cksum);
-       else
-               fletcher_4_incremental_native(buf, len, &ra->cksum);
-       return (buf);
+       return (0);
 }
 
 static void
-backup_byteswap(dmu_replay_record_t *drr)
+byteswap_record(dmu_replay_record_t *drr)
 {
 #define        DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X))
 #define        DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X))
        drr->drr_type = BSWAP_32(drr->drr_type);
        drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen);
+
        switch (drr->drr_type) {
        case DRR_BEGIN:
                DO64(drr_begin.drr_magic);
@@ -1393,10 +1407,7 @@ backup_byteswap(dmu_replay_record_t *drr
                DO64(drr_write.drr_offset);
                DO64(drr_write.drr_length);
                DO64(drr_write.drr_toguid);
-               DO64(drr_write.drr_key.ddk_cksum.zc_word[0]);
-               DO64(drr_write.drr_key.ddk_cksum.zc_word[1]);
-               DO64(drr_write.drr_key.ddk_cksum.zc_word[2]);
-               DO64(drr_write.drr_key.ddk_cksum.zc_word[3]);
+               ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_write.drr_key.ddk_cksum);
                DO64(drr_write.drr_key.ddk_prop);
                break;
        case DRR_WRITE_BYREF:
@@ -1407,10 +1418,8 @@ backup_byteswap(dmu_replay_record_t *drr
                DO64(drr_write_byref.drr_refguid);
                DO64(drr_write_byref.drr_refobject);
                DO64(drr_write_byref.drr_refoffset);
-               DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]);
-               DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]);
-               DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]);
-               DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]);
+               ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_write_byref.
+                   drr_key.ddk_cksum);
                DO64(drr_write_byref.drr_key.ddk_prop);
                break;
        case DRR_WRITE_EMBEDDED:
@@ -1433,13 +1442,15 @@ backup_byteswap(dmu_replay_record_t *drr
                DO64(drr_spill.drr_toguid);
                break;
        case DRR_END:
-               DO64(drr_end.drr_checksum.zc_word[0]);
-               DO64(drr_end.drr_checksum.zc_word[1]);
-               DO64(drr_end.drr_checksum.zc_word[2]);
-               DO64(drr_end.drr_checksum.zc_word[3]);
                DO64(drr_end.drr_toguid);
+               ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_end.drr_checksum);
                break;
        }
+
+       if (drr->drr_type != DRR_BEGIN) {
+               ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_checksum.drr_checksum);
+       }
+
 #undef DO64
 #undef DO32
 }
@@ -1456,11 +1467,10 @@ deduce_nblkptr(dmu_object_type_t bonus_t
 }
 
 static int
-restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
+restore_object(struct restorearg *ra, struct drr_object *drro, void *data)
 {
        dmu_object_info_t doi;
        dmu_tx_t *tx;
-       void *data = NULL;
        uint64_t object;
        int err;
 
@@ -1471,23 +1481,17 @@ restore_object(struct restorearg *ra, ob
            drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS ||
            P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) ||
            drro->drr_blksz < SPA_MINBLOCKSIZE ||
-           drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(os)) ||
+           drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(ra->os)) ||
            drro->drr_bonuslen > DN_MAX_BONUSLEN) {
                return (SET_ERROR(EINVAL));
        }
 
-       err = dmu_object_info(os, drro->drr_object, &doi);
+       err = dmu_object_info(ra->os, drro->drr_object, &doi);
 
        if (err != 0 && err != ENOENT)
                return (SET_ERROR(EINVAL));
        object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT;
 
-       if (drro->drr_bonuslen) {
-               data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8), NULL);
-               if (ra->err != 0)
-                       return (ra->err);
-       }
-
        /*
         * If we are losing blkptrs or changing the block size this must
         * be a new file instance.  We must clear out the previous file
@@ -1501,14 +1505,14 @@ restore_object(struct restorearg *ra, ob
 
                if (drro->drr_blksz != doi.doi_data_block_size ||
                    nblkptr < doi.doi_nblkptr) {
-                       err = dmu_free_long_range(os, drro->drr_object,
+                       err = dmu_free_long_range(ra->os, drro->drr_object,
                            0, DMU_OBJECT_END);
                        if (err != 0)
                                return (SET_ERROR(EINVAL));
                }
        }
 
-       tx = dmu_tx_create(os);
+       tx = dmu_tx_create(ra->os);
        dmu_tx_hold_bonus(tx, object);
        err = dmu_tx_assign(tx, TXG_WAIT);
        if (err != 0) {
@@ -1518,7 +1522,7 @@ restore_object(struct restorearg *ra, ob
 
        if (object == DMU_NEW_OBJECT) {
                /* currently free, want to be allocated */
-               err = dmu_object_claim(os, drro->drr_object,
+               err = dmu_object_claim(ra->os, drro->drr_object,
                    drro->drr_type, drro->drr_blksz,
                    drro->drr_bonustype, drro->drr_bonuslen, tx);
        } else if (drro->drr_type != doi.doi_type ||
@@ -1526,7 +1530,7 @@ restore_object(struct restorearg *ra, ob
            drro->drr_bonustype != doi.doi_bonus_type ||
            drro->drr_bonuslen != doi.doi_bonus_size) {
                /* currently allocated, but with different properties */
-               err = dmu_object_reclaim(os, drro->drr_object,
+               err = dmu_object_reclaim(ra->os, drro->drr_object,
                    drro->drr_type, drro->drr_blksz,
                    drro->drr_bonustype, drro->drr_bonuslen, tx);
        }
@@ -1535,14 +1539,15 @@ restore_object(struct restorearg *ra, ob
                return (SET_ERROR(EINVAL));
        }
 
-       dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype,
-           tx);
-       dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx);
+       dmu_object_set_checksum(ra->os, drro->drr_object,
+           drro->drr_checksumtype, tx);
+       dmu_object_set_compress(ra->os, drro->drr_object,
+           drro->drr_compress, tx);
 
        if (data != NULL) {
                dmu_buf_t *db;
 
-               VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db));
+               VERIFY0(dmu_bonus_hold(ra->os, drro->drr_object, FTAG, &db));
                dmu_buf_will_dirty(db, tx);
 
                ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
@@ -1561,7 +1566,7 @@ restore_object(struct restorearg *ra, ob
 
 /* ARGSUSED */
 static int
-restore_freeobjects(struct restorearg *ra, objset_t *os,
+restore_freeobjects(struct restorearg *ra,
     struct drr_freeobjects *drrfo)
 {
        uint64_t obj;
@@ -1571,13 +1576,13 @@ restore_freeobjects(struct restorearg *r
 
        for (obj = drrfo->drr_firstobj;
            obj < drrfo->drr_firstobj + drrfo->drr_numobjs;
-           (void) dmu_object_next(os, &obj, FALSE, 0)) {
+           (void) dmu_object_next(ra->os, &obj, FALSE, 0)) {
                int err;
 
-               if (dmu_object_info(os, obj, NULL) != 0)
+               if (dmu_object_info(ra->os, obj, NULL) != 0)
                        continue;
 
-               err = dmu_free_long_object(os, obj);
+               err = dmu_free_long_object(ra->os, obj);
                if (err != 0)
                        return (err);
        }
@@ -1585,49 +1590,37 @@ restore_freeobjects(struct restorearg *r
 }
 
 static int
-restore_write(struct restorearg *ra, objset_t *os,
-    struct drr_write *drrw)
+restore_write(struct restorearg *ra, struct drr_write *drrw, arc_buf_t *abuf)
 {
        dmu_tx_t *tx;
-       void *data;
        int err;
 
        if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset ||
            !DMU_OT_IS_VALID(drrw->drr_type))
                return (SET_ERROR(EINVAL));
 
-       if (dmu_object_info(os, drrw->drr_object, NULL) != 0)
+       if (dmu_object_info(ra->os, drrw->drr_object, NULL) != 0)
                return (SET_ERROR(EINVAL));
 
-       dmu_buf_t *bonus;
-       if (dmu_bonus_hold(os, drrw->drr_object, FTAG, &bonus) != 0)
-               return (SET_ERROR(EINVAL));
-
-       arc_buf_t *abuf = dmu_request_arcbuf(bonus, drrw->drr_length);
-
-       data = restore_read(ra, drrw->drr_length, abuf->b_data);
-       if (data == NULL) {
-               dmu_return_arcbuf(abuf);
-               dmu_buf_rele(bonus, FTAG);
-               return (ra->err);
-       }
-
-       tx = dmu_tx_create(os);
+       tx = dmu_tx_create(ra->os);
 
        dmu_tx_hold_write(tx, drrw->drr_object,
            drrw->drr_offset, drrw->drr_length);
        err = dmu_tx_assign(tx, TXG_WAIT);
        if (err != 0) {
-               dmu_return_arcbuf(abuf);
-               dmu_buf_rele(bonus, FTAG);
                dmu_tx_abort(tx);
                return (err);
        }
        if (ra->byteswap) {
                dmu_object_byteswap_t byteswap =
                    DMU_OT_BYTESWAP(drrw->drr_type);
-               dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length);
+               dmu_ot_byteswap[byteswap].ob_func(abuf->b_data,
+                   drrw->drr_length);
        }
+
+       dmu_buf_t *bonus;
+       if (dmu_bonus_hold(ra->os, drrw->drr_object, FTAG, &bonus) != 0)
+               return (SET_ERROR(EINVAL));
        dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx);
        dmu_tx_commit(tx);
        dmu_buf_rele(bonus, FTAG);
@@ -1642,8 +1635,7 @@ restore_write(struct restorearg *ra, obj
  * data from the stream to fulfill this write.
  */
 static int
-restore_write_byref(struct restorearg *ra, objset_t *os,
-    struct drr_write_byref *drrwbr)
+restore_write_byref(struct restorearg *ra, struct drr_write_byref *drrwbr)
 {
        dmu_tx_t *tx;
        int err;
@@ -1669,7 +1661,7 @@ restore_write_byref(struct restorearg *r
                if (dmu_objset_from_ds(gmep->gme_ds, &ref_os))
                        return (SET_ERROR(EINVAL));
        } else {
-               ref_os = os;
+               ref_os = ra->os;
        }
 
        err = dmu_buf_hold(ref_os, drrwbr->drr_refobject,
@@ -1677,7 +1669,7 @@ restore_write_byref(struct restorearg *r
        if (err != 0)
                return (err);
 
-       tx = dmu_tx_create(os);
+       tx = dmu_tx_create(ra->os);
 
        dmu_tx_hold_write(tx, drrwbr->drr_object,
            drrwbr->drr_offset, drrwbr->drr_length);
@@ -1686,7 +1678,7 @@ restore_write_byref(struct restorearg *r
                dmu_tx_abort(tx);
                return (err);
        }
-       dmu_write(os, drrwbr->drr_object,
+       dmu_write(ra->os, drrwbr->drr_object,
            drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx);
        dmu_buf_rele(dbp, FTAG);
        dmu_tx_commit(tx);
@@ -1694,12 +1686,11 @@ restore_write_byref(struct restorearg *r
 }
 

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to