Author: mm
Date: Mon Oct 18 08:54:33 2010
New Revision: 214013
URL: http://svn.freebsd.org/changeset/base/214013

Log:
  MFC r213197, r213198:
  
  MFC r213197:
  Enable offlining of log devices.
  
  OpenSolaris revision and Bug IDs:
  
  9701:cc5b64682e64
  6803605       should be able to offline log devices
  6726045       vdev_deflate_ratio is not set when offlining a log device
  6599442       zpool import has faults in the display
  
  MFC r213198:
  Properly handle IO with B_FAILFAST
  Retry IO once with ZIO_FLAG_TRYHARD before declaring a pool faulted
  
  OpenSolaris revision and Bug IDs:
  
  9725:0bf7402e8022
  6843014 ZFS B_FAILFAST handling is broken
  
  As FreeBSD does not support B_FAILFAST and we don't use vdev_disk.c,
  MFC of r213198 is a no-op with the purpose of diff reduction against
  latest ZFS code and making future updates easier.
  
  Approved by:  delphij (mentor)
  Obtained from:        OpenSolaris (Bug ID 6803605, 6726045, 6599442, 6843014)

Modified:
  stable/8/cddl/contrib/opensolaris/cmd/zinject/zinject.c
  stable/8/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
  stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
  stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
  stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c
Directory Properties:
  stable/8/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)
  stable/8/sys/dev/xen/xenpci/   (props changed)

Modified: stable/8/cddl/contrib/opensolaris/cmd/zinject/zinject.c
==============================================================================
--- stable/8/cddl/contrib/opensolaris/cmd/zinject/zinject.c     Mon Oct 18 
08:36:03 2010        (r214012)
+++ stable/8/cddl/contrib/opensolaris/cmd/zinject/zinject.c     Mon Oct 18 
08:54:33 2010        (r214013)
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident  "%Z%%M% %I%     %E% SMI"
-
 /*
  * ZFS Fault Injector
  *
@@ -227,7 +225,7 @@ usage(void)
            "\t\tClear the particular record (if given a numeric ID), or\n"
            "\t\tall records if 'all' is specificed.\n"
            "\n"
-           "\tzinject -d device [-e errno] [-L <nvlist|uber>] pool\n"
+           "\tzinject -d device [-e errno] [-L <nvlist|uber>] [-F] pool\n"
            "\t\tInject a fault into a particular device or the device's\n"
            "\t\tlabel.  Label injection can either be 'nvlist' or 'uber'.\n"
            "\t\t'errno' can either be 'nxio' (the default) or 'io'.\n"
@@ -519,7 +517,7 @@ main(int argc, char **argv)
                return (0);
        }
 
-       while ((c = getopt(argc, argv, ":ab:d:f:qhc:t:l:mr:e:uL:")) != -1) {
+       while ((c = getopt(argc, argv, ":ab:d:f:Fqhc:t:l:mr:e:uL:")) != -1) {
                switch (c) {
                case 'a':
                        flags |= ZINJECT_FLUSH_ARC;
@@ -556,6 +554,9 @@ main(int argc, char **argv)
                                return (1);
                        }
                        break;
+               case 'F':
+                       record.zi_failfast = B_TRUE;
+                       break;
                case 'h':
                        usage();
                        return (0);

Modified: stable/8/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
==============================================================================
--- stable/8/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c    Mon Oct 18 
08:36:03 2010        (r214012)
+++ stable/8/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c    Mon Oct 18 
08:54:33 2010        (r214013)
@@ -980,14 +980,189 @@ max_width(zpool_handle_t *zhp, nvlist_t 
        return (max);
 }
 
+typedef struct spare_cbdata {
+       uint64_t        cb_guid;
+       zpool_handle_t  *cb_zhp;
+} spare_cbdata_t;
+
+static boolean_t
+find_vdev(nvlist_t *nv, uint64_t search)
+{
+       uint64_t guid;
+       nvlist_t **child;
+       uint_t c, children;
+
+       if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
+           search == guid)
+               return (B_TRUE);
+
+       if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+           &child, &children) == 0) {
+               for (c = 0; c < children; c++)
+                       if (find_vdev(child[c], search))
+                               return (B_TRUE);
+       }
+
+       return (B_FALSE);
+}
+
+static int
+find_spare(zpool_handle_t *zhp, void *data)
+{
+       spare_cbdata_t *cbp = data;
+       nvlist_t *config, *nvroot;
+
+       config = zpool_get_config(zhp, NULL);
+       verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+           &nvroot) == 0);
+
+       if (find_vdev(nvroot, cbp->cb_guid)) {
+               cbp->cb_zhp = zhp;
+               return (1);
+       }
+
+       zpool_close(zhp);
+       return (0);
+}
+
+/*
+ * Print out configuration state as requested by status_callback.
+ */
+void
+print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
+    int namewidth, int depth, boolean_t isspare)
+{
+       nvlist_t **child;
+       uint_t c, children;
+       vdev_stat_t *vs;
+       char rbuf[6], wbuf[6], cbuf[6], repaired[7];
+       char *vname;
+       uint64_t notpresent;
+       spare_cbdata_t cb;
+       char *state;
+
+       verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
+           (uint64_t **)&vs, &c) == 0);
+
+       if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+           &child, &children) != 0)
+               children = 0;
+
+       state = zpool_state_to_name(vs->vs_state, vs->vs_aux);
+       if (isspare) {
+               /*
+                * For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for
+                * online drives.
+                */
+               if (vs->vs_aux == VDEV_AUX_SPARED)
+                       state = "INUSE";
+               else if (vs->vs_state == VDEV_STATE_HEALTHY)
+                       state = "AVAIL";
+       }
+
+       (void) printf("\t%*s%-*s  %-8s", depth, "", namewidth - depth,
+           name, state);
+
+       if (!isspare) {
+               zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf));
+               zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf));
+               zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf));
+               (void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf);
+       }
+
+       if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
+           &notpresent) == 0) {
+               char *path;
+               verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
+               (void) printf("  was %s", path);
+       } else if (vs->vs_aux != 0) {
+               (void) printf("  ");
+
+               switch (vs->vs_aux) {
+               case VDEV_AUX_OPEN_FAILED:
+                       (void) printf(gettext("cannot open"));
+                       break;
+
+               case VDEV_AUX_BAD_GUID_SUM:
+                       (void) printf(gettext("missing device"));
+                       break;
+
+               case VDEV_AUX_NO_REPLICAS:
+                       (void) printf(gettext("insufficient replicas"));
+                       break;
+
+               case VDEV_AUX_VERSION_NEWER:
+                       (void) printf(gettext("newer version"));
+                       break;
+
+               case VDEV_AUX_SPARED:
+                       verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
+                           &cb.cb_guid) == 0);
+                       if (zpool_iter(g_zfs, find_spare, &cb) == 1) {
+                               if (strcmp(zpool_get_name(cb.cb_zhp),
+                                   zpool_get_name(zhp)) == 0)
+                                       (void) printf(gettext("currently in "
+                                           "use"));
+                               else
+                                       (void) printf(gettext("in use by "
+                                           "pool '%s'"),
+                                           zpool_get_name(cb.cb_zhp));
+                               zpool_close(cb.cb_zhp);
+                       } else {
+                               (void) printf(gettext("currently in use"));
+                       }
+                       break;
+
+               case VDEV_AUX_ERR_EXCEEDED:
+                       (void) printf(gettext("too many errors"));
+                       break;
+
+               case VDEV_AUX_IO_FAILURE:
+                       (void) printf(gettext("experienced I/O failures"));
+                       break;
+
+               case VDEV_AUX_BAD_LOG:
+                       (void) printf(gettext("bad intent log"));
+                       break;
+
+               default:
+                       (void) printf(gettext("corrupted data"));
+                       break;
+               }
+       } else if (vs->vs_scrub_repaired != 0 && children == 0) {
+               /*
+                * Report bytes resilvered/repaired on leaf devices.
+                */
+               zfs_nicenum(vs->vs_scrub_repaired, repaired, sizeof (repaired));
+               (void) printf(gettext("  %s %s"), repaired,
+                   (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ?
+                   "resilvered" : "repaired");
+       }
+
+       (void) printf("\n");
+
+       for (c = 0; c < children; c++) {
+               uint64_t is_log = B_FALSE;
+
+               /* Don't print logs here */
+               (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
+                   &is_log);
+               if (is_log)
+                       continue;
+               vname = zpool_vdev_name(g_zfs, zhp, child[c]);
+               print_status_config(zhp, vname, child[c],
+                   namewidth, depth + 2, isspare);
+               free(vname);
+       }
+}
+
 
 /*
  * Print the configuration of an exported pool.  Iterate over all vdevs in the
  * pool, printing out the name and status for each one.
  */
 void
-print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth,
-    boolean_t print_logs)
+print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
 {
        nvlist_t **child;
        uint_t c, children;
@@ -1044,12 +1219,11 @@ print_import_config(const char *name, nv
 
                (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
                    &is_log);
-               if ((is_log && !print_logs) || (!is_log && print_logs))
+               if (is_log)
                        continue;
 
                vname = zpool_vdev_name(g_zfs, NULL, child[c]);
-               print_import_config(vname, child[c],
-                   namewidth, depth + 2, B_FALSE);
+               print_import_config(vname, child[c], namewidth, depth + 2);
                free(vname);
        }
 
@@ -1075,6 +1249,43 @@ print_import_config(const char *name, nv
 }
 
 /*
+ * Print log vdevs.
+ * Logs are recorded as top level vdevs in the main pool child array
+ * but with "is_log" set to 1. We use either print_status_config() or
+ * print_import_config() to print the top level logs then any log
+ * children (eg mirrored slogs) are printed recursively - which
+ * works because only the top level vdev is marked "is_log"
+ */
+static void
+print_logs(zpool_handle_t *zhp, nvlist_t *nv, int namewidth, boolean_t verbose)
+{
+       uint_t c, children;
+       nvlist_t **child;
+
+       if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child,
+           &children) != 0)
+               return;
+
+       (void) printf(gettext("\tlogs\n"));
+
+       for (c = 0; c < children; c++) {
+               uint64_t is_log = B_FALSE;
+               char *name;
+
+               (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
+                   &is_log);
+               if (!is_log)
+                       continue;
+               name = zpool_vdev_name(g_zfs, zhp, child[c]);
+               if (verbose)
+                       print_status_config(zhp, name, child[c], namewidth,
+                           2, B_FALSE);
+               else
+                       print_import_config(name, child[c], namewidth, 2);
+               free(name);
+       }
+}
+/*
  * Display the status for the given pool.
  */
 static void
@@ -1242,11 +1453,9 @@ show_import(nvlist_t *config)
        if (namewidth < 10)
                namewidth = 10;
 
-       print_import_config(name, nvroot, namewidth, 0, B_FALSE);
-       if (num_logs(nvroot) > 0) {
-               (void) printf(gettext("\tlogs\n"));
-               print_import_config(name, nvroot, namewidth, 0, B_TRUE);
-       }
+       print_import_config(name, nvroot, namewidth, 0);
+       if (num_logs(nvroot) > 0)
+               print_logs(NULL, nvroot, namewidth, B_FALSE);
 
        if (reason == ZPOOL_STATUS_BAD_GUID_SUM) {
                (void) printf(gettext("\n\tAdditional devices are known to "
@@ -2717,182 +2926,6 @@ print_scrub_status(nvlist_t *nvroot)
            (u_longlong_t)(minutes_left / 60), (uint_t)(minutes_left % 60));
 }
 
-typedef struct spare_cbdata {
-       uint64_t        cb_guid;
-       zpool_handle_t  *cb_zhp;
-} spare_cbdata_t;
-
-static boolean_t
-find_vdev(nvlist_t *nv, uint64_t search)
-{
-       uint64_t guid;
-       nvlist_t **child;
-       uint_t c, children;
-
-       if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
-           search == guid)
-               return (B_TRUE);
-
-       if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
-           &child, &children) == 0) {
-               for (c = 0; c < children; c++)
-                       if (find_vdev(child[c], search))
-                               return (B_TRUE);
-       }
-
-       return (B_FALSE);
-}
-
-static int
-find_spare(zpool_handle_t *zhp, void *data)
-{
-       spare_cbdata_t *cbp = data;
-       nvlist_t *config, *nvroot;
-
-       config = zpool_get_config(zhp, NULL);
-       verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
-           &nvroot) == 0);
-
-       if (find_vdev(nvroot, cbp->cb_guid)) {
-               cbp->cb_zhp = zhp;
-               return (1);
-       }
-
-       zpool_close(zhp);
-       return (0);
-}
-
-/*
- * Print out configuration state as requested by status_callback.
- */
-void
-print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
-    int namewidth, int depth, boolean_t isspare)
-{
-       nvlist_t **child;
-       uint_t c, children;
-       vdev_stat_t *vs;
-       char rbuf[6], wbuf[6], cbuf[6], repaired[7];
-       char *vname;
-       uint64_t notpresent;
-       spare_cbdata_t cb;
-       char *state;
-
-       verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
-           (uint64_t **)&vs, &c) == 0);
-
-       if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
-           &child, &children) != 0)
-               children = 0;
-
-       state = zpool_state_to_name(vs->vs_state, vs->vs_aux);
-       if (isspare) {
-               /*
-                * For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for
-                * online drives.
-                */
-               if (vs->vs_aux == VDEV_AUX_SPARED)
-                       state = "INUSE";
-               else if (vs->vs_state == VDEV_STATE_HEALTHY)
-                       state = "AVAIL";
-       }
-
-       (void) printf("\t%*s%-*s  %-8s", depth, "", namewidth - depth,
-           name, state);
-
-       if (!isspare) {
-               zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf));
-               zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf));
-               zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf));
-               (void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf);
-       }
-
-       if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
-           &notpresent) == 0) {
-               char *path;
-               verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
-               (void) printf("  was %s", path);
-       } else if (vs->vs_aux != 0) {
-               (void) printf("  ");
-
-               switch (vs->vs_aux) {
-               case VDEV_AUX_OPEN_FAILED:
-                       (void) printf(gettext("cannot open"));
-                       break;
-
-               case VDEV_AUX_BAD_GUID_SUM:
-                       (void) printf(gettext("missing device"));
-                       break;
-
-               case VDEV_AUX_NO_REPLICAS:
-                       (void) printf(gettext("insufficient replicas"));
-                       break;
-
-               case VDEV_AUX_VERSION_NEWER:
-                       (void) printf(gettext("newer version"));
-                       break;
-
-               case VDEV_AUX_SPARED:
-                       verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
-                           &cb.cb_guid) == 0);
-                       if (zpool_iter(g_zfs, find_spare, &cb) == 1) {
-                               if (strcmp(zpool_get_name(cb.cb_zhp),
-                                   zpool_get_name(zhp)) == 0)
-                                       (void) printf(gettext("currently in "
-                                           "use"));
-                               else
-                                       (void) printf(gettext("in use by "
-                                           "pool '%s'"),
-                                           zpool_get_name(cb.cb_zhp));
-                               zpool_close(cb.cb_zhp);
-                       } else {
-                               (void) printf(gettext("currently in use"));
-                       }
-                       break;
-
-               case VDEV_AUX_ERR_EXCEEDED:
-                       (void) printf(gettext("too many errors"));
-                       break;
-
-               case VDEV_AUX_IO_FAILURE:
-                       (void) printf(gettext("experienced I/O failures"));
-                       break;
-
-               case VDEV_AUX_BAD_LOG:
-                       (void) printf(gettext("bad intent log"));
-                       break;
-
-               default:
-                       (void) printf(gettext("corrupted data"));
-                       break;
-               }
-       } else if (vs->vs_scrub_repaired != 0 && children == 0) {
-               /*
-                * Report bytes resilvered/repaired on leaf devices.
-                */
-               zfs_nicenum(vs->vs_scrub_repaired, repaired, sizeof (repaired));
-               (void) printf(gettext("  %s %s"), repaired,
-                   (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ?
-                   "resilvered" : "repaired");
-       }
-
-       (void) printf("\n");
-
-       for (c = 0; c < children; c++) {
-               uint64_t is_log = B_FALSE;
-
-               /* Don't print logs here */
-               (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
-                   &is_log);
-               if (is_log)
-                       continue;
-               vname = zpool_vdev_name(g_zfs, zhp, child[c]);
-               print_status_config(zhp, vname, child[c],
-                   namewidth, depth + 2, isspare);
-               free(vname);
-       }
-}
-
 static void
 print_error_log(zpool_handle_t *zhp)
 {
@@ -2969,39 +3002,6 @@ print_l2cache(zpool_handle_t *zhp, nvlis
 }
 
 /*
- * Print log vdevs.
- * Logs are recorded as top level vdevs in the main pool child array but with
- * "is_log" set to 1. We use print_status_config() to print the top level logs
- * then any log children (eg mirrored slogs) are printed recursively - which
- * works because only the top level vdev is marked "is_log"
- */
-static void
-print_logs(zpool_handle_t *zhp, nvlist_t *nv, int namewidth)
-{
-       uint_t c, children;
-       nvlist_t **child;
-
-       if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child,
-           &children) != 0)
-               return;
-
-       (void) printf(gettext("\tlogs\n"));
-
-       for (c = 0; c < children; c++) {
-               uint64_t is_log = B_FALSE;
-               char *name;
-
-               (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
-                   &is_log);
-               if (!is_log)
-                       continue;
-               name = zpool_vdev_name(g_zfs, zhp, child[c]);
-               print_status_config(zhp, name, child[c], namewidth, 2, B_FALSE);
-               free(name);
-       }
-}
-
-/*
  * Display a summary of pool status.  Displays a summary such as:
  *
  *        pool: tank
@@ -3229,7 +3229,7 @@ status_callback(zpool_handle_t *zhp, voi
                    namewidth, 0, B_FALSE);
 
                if (num_logs(nvroot) > 0)
-                       print_logs(zhp, nvroot, namewidth);
+                       print_logs(zhp, nvroot, namewidth, B_TRUE);
                if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
                    &l2cache, &nl2cache) == 0)
                        print_l2cache(zhp, l2cache, nl2cache, namewidth);

Modified: stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
==============================================================================
--- stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h        Mon Oct 
18 08:36:03 2010        (r214012)
+++ stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h        Mon Oct 
18 08:54:33 2010        (r214013)
@@ -116,6 +116,7 @@ enum {
        EZFS_VDEVNOTSUP,        /* unsupported vdev type */
        EZFS_NOTSUP,            /* ops not supported on this dataset */
        EZFS_ACTIVE_SPARE,      /* pool has active shared spare devices */
+       EZFS_UNPLAYED_LOGS,     /* log device has unplayed logs */
        EZFS_UNKNOWN
 };
 

Modified: stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
==============================================================================
--- stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c   Mon Oct 
18 08:36:03 2010        (r214012)
+++ stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c   Mon Oct 
18 08:54:33 2010        (r214013)
@@ -1720,6 +1720,12 @@ zpool_vdev_fault(zpool_handle_t *zhp, ui
                 */
                return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
 
+       case EEXIST:
+               /*
+                * The log device has unplayed logs
+                */
+               return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
+
        default:
                return (zpool_standard_error(hdl, errno, msg));
        }

Modified: stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c
==============================================================================
--- stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c   Mon Oct 
18 08:36:03 2010        (r214012)
+++ stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c   Mon Oct 
18 08:54:33 2010        (r214013)
@@ -210,6 +210,9 @@ libzfs_error_description(libzfs_handle_t
        case EZFS_ACTIVE_SPARE:
                return (dgettext(TEXT_DOMAIN, "pool has active shared spare "
                    "device"));
+       case EZFS_UNPLAYED_LOGS:
+               return (dgettext(TEXT_DOMAIN, "log device has unplayed intent "
+                   "logs"));
        case EZFS_UNKNOWN:
                return (dgettext(TEXT_DOMAIN, "unknown error"));
        default:

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c Mon Oct 
18 08:36:03 2010        (r214012)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c Mon Oct 
18 08:54:33 2010        (r214013)
@@ -351,7 +351,7 @@ traverse_zil_block(zilog_t *zilog, blkpt
                return;
 
        /*
-        * One block ("stumpy") can be allocated a long time ago; we
+        * One block ("stubby") can be allocated a long time ago; we
         * want to visit that one because it has been allocated
         * (on-disk) even if it hasn't been claimed (even though for
         * plain scrub there's nothing to do to it).

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c       Mon Oct 
18 08:36:03 2010        (r214012)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c       Mon Oct 
18 08:54:33 2010        (r214013)
@@ -1110,6 +1110,33 @@ spa_check_removed(vdev_t *vd)
 }
 
 /*
+ * Load the slog device state from the config object since it's possible
+ * that the label does not contain the most up-to-date information.
+ */
+void
+spa_load_log_state(spa_t *spa)
+{
+       nvlist_t *nv, *nvroot, **child;
+       uint64_t is_log;
+       uint_t children, c;
+       vdev_t *rvd = spa->spa_root_vdev;
+
+       VERIFY(load_nvlist(spa, spa->spa_config_object, &nv) == 0);
+       VERIFY(nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+       VERIFY(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+           &child, &children) == 0);
+
+       for (c = 0; c < children; c++) {
+               vdev_t *tvd = rvd->vdev_child[c];
+
+               if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
+                   &is_log) == 0 && is_log)
+                       vdev_load_log_state(tvd, child[c]);
+       }
+       nvlist_free(nv);
+}
+
+/*
  * Check for missing log devices
  */
 int
@@ -1125,13 +1152,7 @@ spa_check_logs(spa_t *spa)
                        return (1);
                }
                break;
-
-       case SPA_LOG_CLEAR:
-               (void) dmu_objset_find(spa->spa_name, zil_clear_log_chain, NULL,
-                   DS_FIND_CHILDREN);
-               break;
        }
-       spa->spa_log_state = SPA_LOG_GOOD;
        return (0);
 }
 
@@ -1455,6 +1476,8 @@ spa_load(spa_t *spa, nvlist_t *config, s
                spa_config_exit(spa, SCL_ALL, FTAG);
        }
 
+       spa_load_log_state(spa);
+
        if (spa_check_logs(spa)) {
                vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
                    VDEV_AUX_BAD_LOG);
@@ -1542,6 +1565,7 @@ spa_load(spa_t *spa, nvlist_t *config, s
                    zil_claim, tx, DS_FIND_CHILDREN);
                dmu_tx_commit(tx);
 
+               spa->spa_log_state = SPA_LOG_GOOD;
                spa->spa_sync_on = B_TRUE;
                txg_sync_start(spa->spa_dsl_pool);
 
@@ -4222,10 +4246,16 @@ spa_sync(spa_t *spa, uint64_t txg)
                                if (svdcount == SPA_DVAS_PER_BP)
                                        break;
                        }
-                       error = vdev_config_sync(svd, svdcount, txg);
+                       error = vdev_config_sync(svd, svdcount, txg, B_FALSE);
+                       if (error != 0)
+                               error = vdev_config_sync(svd, svdcount, txg,
+                                   B_TRUE);
                } else {
                        error = vdev_config_sync(rvd->vdev_child,
-                           rvd->vdev_children, txg);
+                           rvd->vdev_children, txg, B_FALSE);
+                       if (error != 0)
+                               error = vdev_config_sync(rvd->vdev_child,
+                                   rvd->vdev_children, txg, B_TRUE);
                }
 
                spa_config_exit(spa, SCL_STATE, FTAG);

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h  Mon Oct 
18 08:36:03 2010        (r214012)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h  Mon Oct 
18 08:54:33 2010        (r214013)
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -113,7 +113,8 @@ extern void vdev_queue_io_done(zio_t *zi
 
 extern void vdev_config_dirty(vdev_t *vd);
 extern void vdev_config_clean(vdev_t *vd);
-extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg);
+extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg,
+    boolean_t);
 
 extern void vdev_state_dirty(vdev_t *vd);
 extern void vdev_state_clean(vdev_t *vd);

Modified: 
stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h     
Mon Oct 18 08:36:03 2010        (r214012)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h     
Mon Oct 18 08:54:33 2010        (r214013)
@@ -259,6 +259,7 @@ extern void vdev_remove_parent(vdev_t *c
 /*
  * vdev sync load and sync
  */
+extern void vdev_load_log_state(vdev_t *vd, nvlist_t *nv);
 extern void vdev_load(vdev_t *vd);
 extern void vdev_sync(vdev_t *vd, uint64_t txg);
 extern void vdev_sync_done(vdev_t *vd, uint64_t txg);

Modified: 
stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h     
Mon Oct 18 08:36:03 2010        (r214012)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h     
Mon Oct 18 08:54:33 2010        (r214013)
@@ -118,7 +118,7 @@ typedef struct zinject_record {
        uint32_t        zi_error;
        uint64_t        zi_type;
        uint32_t        zi_freq;
-       uint32_t        zi_pad; /* pad out to 64 bit alignment */
+       uint32_t        zi_failfast;
 } zinject_record_t;
 
 #define        ZINJECT_NULL            0x1

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h   Mon Oct 
18 08:36:03 2010        (r214012)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h   Mon Oct 
18 08:54:33 2010        (r214013)
@@ -366,9 +366,9 @@ extern uint64_t zil_itx_assign(zilog_t *
 
 extern void    zil_commit(zilog_t *zilog, uint64_t seq, uint64_t oid);
 
+extern int     zil_vdev_offline(char *osname, void *txarg);
 extern int     zil_claim(char *osname, void *txarg);
 extern int     zil_check_log_chain(char *osname, void *txarg);
-extern int     zil_clear_log_chain(char *osname, void *txarg);
 extern void    zil_sync(zilog_t *zilog, dmu_tx_t *tx);
 extern void    zil_clean(zilog_t *zilog);
 extern int     zil_is_committed(zilog_t *zilog);

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h   Mon Oct 
18 08:36:03 2010        (r214012)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h   Mon Oct 
18 08:54:33 2010        (r214013)
@@ -117,31 +117,33 @@ enum zio_compress {
 #define        ZIO_PRIORITY_SCRUB              (zio_priority_table[10])
 #define        ZIO_PRIORITY_TABLE_SIZE         11
 
-#define        ZIO_FLAG_MUSTSUCCEED            0x00000
-#define        ZIO_FLAG_CANFAIL                0x00001
-#define        ZIO_FLAG_SPECULATIVE            0x00002
-#define        ZIO_FLAG_CONFIG_WRITER          0x00004
-#define        ZIO_FLAG_DONT_RETRY             0x00008
-
-#define        ZIO_FLAG_DONT_CACHE             0x00010
-#define        ZIO_FLAG_DONT_QUEUE             0x00020
-#define        ZIO_FLAG_DONT_AGGREGATE         0x00040
-#define        ZIO_FLAG_DONT_PROPAGATE         0x00080
-
-#define        ZIO_FLAG_IO_BYPASS              0x00100
-#define        ZIO_FLAG_IO_REPAIR              0x00200
-#define        ZIO_FLAG_IO_RETRY               0x00400
-#define        ZIO_FLAG_IO_REWRITE             0x00800
-
-#define        ZIO_FLAG_SELF_HEAL              0x01000
-#define        ZIO_FLAG_RESILVER               0x02000
-#define        ZIO_FLAG_SCRUB                  0x04000
-#define        ZIO_FLAG_SCRUB_THREAD           0x08000
-
-#define        ZIO_FLAG_PROBE                  0x10000
-#define        ZIO_FLAG_GANG_CHILD             0x20000
-#define        ZIO_FLAG_RAW                    0x40000
-#define        ZIO_FLAG_GODFATHER              0x80000
+#define        ZIO_FLAG_MUSTSUCCEED            0x000000
+#define        ZIO_FLAG_CANFAIL                0x000001
+#define        ZIO_FLAG_SPECULATIVE            0x000002
+#define        ZIO_FLAG_CONFIG_WRITER          0x000004
+#define        ZIO_FLAG_DONT_RETRY             0x000008
+
+#define        ZIO_FLAG_DONT_CACHE             0x000010
+#define        ZIO_FLAG_DONT_QUEUE             0x000020
+#define        ZIO_FLAG_DONT_AGGREGATE         0x000040
+#define        ZIO_FLAG_DONT_PROPAGATE         0x000080
+
+#define        ZIO_FLAG_IO_BYPASS              0x000100
+#define        ZIO_FLAG_IO_REPAIR              0x000200
+#define        ZIO_FLAG_IO_RETRY               0x000400
+#define        ZIO_FLAG_IO_REWRITE             0x000800
+
+#define        ZIO_FLAG_SELF_HEAL              0x001000
+#define        ZIO_FLAG_RESILVER               0x002000
+#define        ZIO_FLAG_SCRUB                  0x004000
+#define        ZIO_FLAG_SCRUB_THREAD           0x008000
+
+#define        ZIO_FLAG_PROBE                  0x010000
+#define        ZIO_FLAG_GANG_CHILD             0x020000
+#define        ZIO_FLAG_RAW                    0x040000
+#define        ZIO_FLAG_GODFATHER              0x080000
+
+#define        ZIO_FLAG_TRYHARD                0x100000
 
 #define        ZIO_FLAG_GANG_INHERIT           \
        (ZIO_FLAG_CANFAIL |             \
@@ -159,7 +161,8 @@ enum zio_compress {
        (ZIO_FLAG_GANG_INHERIT |        \
        ZIO_FLAG_IO_REPAIR |            \
        ZIO_FLAG_IO_RETRY |             \
-       ZIO_FLAG_PROBE)
+       ZIO_FLAG_PROBE |                \
+       ZIO_FLAG_TRYHARD)
 
 #define        ZIO_FLAG_AGG_INHERIT            \
        (ZIO_FLAG_DONT_AGGREGATE |      \
@@ -440,7 +443,7 @@ extern int zio_inject_list_next(int *id,
     struct zinject_record *record);
 extern int zio_clear_fault(int id);
 extern int zio_handle_fault_injection(zio_t *zio, int error);
-extern int zio_handle_device_injection(vdev_t *vd, int error);
+extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error);
 extern int zio_handle_label_injection(zio_t *zio, int error);
 
 #ifdef __cplusplus

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c      Mon Oct 
18 08:36:03 2010        (r214012)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c      Mon Oct 
18 08:54:33 2010        (r214013)
@@ -39,6 +39,7 @@
 #include <sys/zap.h>
 #include <sys/fs/zfs.h>
 #include <sys/arc.h>
+#include <sys/zil.h>
 
 SYSCTL_DECL(_vfs_zfs);
 SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CTLFLAG_RW, 0, "ZFS VDEV");
@@ -765,6 +766,15 @@ vdev_metaslab_init(vdev_t *vd, uint64_t 
        if (vd->vdev_ms_shift == 0)     /* not being allocated from yet */
                return (0);
 
+       /*
+        * Compute the raidz-deflation ratio.  Note, we hard-code
+        * in 128k (1 << 17) because it is the current "typical" blocksize.
+        * Even if SPA_MAXBLOCKSIZE changes, this algorithm must never change,
+        * or we will inconsistently account for existing bp's.
+        */
+       vd->vdev_deflate_ratio = (1 << 17) /
+           (vdev_psize_to_asize(vd, 1 << 17) >> SPA_MINBLOCKSHIFT);
+
        ASSERT(oldc <= newc);
 
        if (vd->vdev_islog)
@@ -918,7 +928,7 @@ vdev_probe(vdev_t *vd, zio_t *zio)
 
                vps->vps_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_PROBE |
                    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE |
-                   ZIO_FLAG_DONT_RETRY;
+                   ZIO_FLAG_TRYHARD;
 
                if (spa_config_held(spa, SCL_ZIO, RW_WRITER)) {
                        /*
@@ -998,6 +1008,8 @@ vdev_open(vdev_t *vd)
            vd->vdev_state == VDEV_STATE_OFFLINE);
 
        vd->vdev_stat.vs_aux = VDEV_AUX_NONE;
+       vd->vdev_cant_read = B_FALSE;
+       vd->vdev_cant_write = B_FALSE;
 
        if (!vd->vdev_removed && vd->vdev_faulted) {
                ASSERT(vd->vdev_children == 0);
@@ -1013,7 +1025,7 @@ vdev_open(vdev_t *vd)
        error = vd->vdev_ops->vdev_op_open(vd, &osize, &ashift);
 
        if (zio_injection_enabled && error == 0)
-               error = zio_handle_device_injection(vd, ENXIO);
+               error = zio_handle_device_injection(vd, NULL, ENXIO);
 
        if (error) {
                if (vd->vdev_removed &&
@@ -1113,18 +1125,6 @@ vdev_open(vdev_t *vd)
        }
 
        /*
-        * If this is a top-level vdev, compute the raidz-deflation
-        * ratio.  Note, we hard-code in 128k (1<<17) because it is the
-        * current "typical" blocksize.  Even if SPA_MAXBLOCKSIZE
-        * changes, this algorithm must never change, or we will
-        * inconsistently account for existing bp's.
-        */
-       if (vd->vdev_top == vd) {
-               vd->vdev_deflate_ratio = (1<<17) /
-                   (vdev_psize_to_asize(vd, 1<<17) >> SPA_MINBLOCKSHIFT);
-       }
-
-       /*
         * If a leaf vdev has a DTL, and seems healthy, then kick off a
         * resilver.  But don't do this if we are doing a reopen for a scrub,
         * since this would just restart the scrub we are already doing.
@@ -1937,7 +1937,8 @@ vdev_online(spa_t *spa, uint64_t guid, u
 int
 vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags)
 {
-       vdev_t *vd;
+       vdev_t *vd, *tvd;
+       int error;
 
        spa_vdev_state_enter(spa);
 
@@ -1947,34 +1948,58 @@ vdev_offline(spa_t *spa, uint64_t guid, 
        if (!vd->vdev_ops->vdev_op_leaf)
                return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
 
+       tvd = vd->vdev_top;
+
        /*
         * If the device isn't already offline, try to offline it.
         */
        if (!vd->vdev_offline) {
                /*
                 * If this device has the only valid copy of some data,
-                * don't allow it to be offlined.
+                * don't allow it to be offlined. Log devices are always
+                * expendable.
                 */
-               if (vd->vdev_aux == NULL && vdev_dtl_required(vd))
+               if (!tvd->vdev_islog && vd->vdev_aux == NULL &&
+                   vdev_dtl_required(vd))
                        return (spa_vdev_state_exit(spa, NULL, EBUSY));
 
                /*
                 * Offline this device and reopen its top-level vdev.
-                * If this action results in the top-level vdev becoming
-                * unusable, undo it and fail the request.
+                * If the top-level vdev is a log device then just offline
+                * it. Otherwise, if this action results in the top-level
+                * vdev becoming unusable, undo it and fail the request.
                 */
                vd->vdev_offline = B_TRUE;
-               vdev_reopen(vd->vdev_top);
-               if (vd->vdev_aux == NULL && vdev_is_dead(vd->vdev_top)) {
+               vdev_reopen(tvd);
+
+               if (!tvd->vdev_islog && vd->vdev_aux == NULL &&
+                   vdev_is_dead(tvd)) {
                        vd->vdev_offline = B_FALSE;
-                       vdev_reopen(vd->vdev_top);
+                       vdev_reopen(tvd);
                        return (spa_vdev_state_exit(spa, NULL, EBUSY));
                }
        }
 
        vd->vdev_tmpoffline = !!(flags & ZFS_OFFLINE_TEMPORARY);
 
-       return (spa_vdev_state_exit(spa, vd, 0));
+       if (!tvd->vdev_islog || !vdev_is_dead(tvd))
+               return (spa_vdev_state_exit(spa, vd, 0));
+
+       (void) spa_vdev_state_exit(spa, vd, 0);
+
+       error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
+           NULL, DS_FIND_CHILDREN);
+       if (error) {
+               (void) vdev_online(spa, guid, 0, NULL);
+               return (error);
+       }
+       /*
+        * If we successfully offlined the log device then we need to
+        * sync out the current txg so that the "stubby" block can be
+        * removed by zil_sync().
+        */
+       txg_wait_synced(spa->spa_dsl_pool, 0);
+       return (0);
 }
 
 /*
@@ -2182,6 +2207,16 @@ vdev_stat_update(zio_t *zio, uint64_t ps
        if (flags & ZIO_FLAG_SPECULATIVE)
                return;
 
+       /*
+        * If this is an I/O error that is going to be retried, then ignore the
+        * error.  Otherwise, the user may interpret B_FAILFAST I/O errors as
+        * hard errors, when in reality they can happen for any number of
+        * innocuous reasons (bus resets, MPxIO link failure, etc).
+        */
+       if (zio->io_error == EIO &&
+           !(zio->io_flags & ZIO_FLAG_IO_RETRY))
+               return;
+
        mutex_enter(&vd->vdev_stat_lock);
        if (type == ZIO_TYPE_READ && !vdev_is_dead(vd)) {
                if (zio->io_error == ECKSUM)
@@ -2279,6 +2314,7 @@ vdev_space_update(vdev_t *vd, int64_t sp
         * childrens', thus not accurate enough for us.
         */
        ASSERT((dspace_delta & (SPA_MINBLOCKSIZE-1)) == 0);
+       ASSERT(vd->vdev_deflate_ratio != 0 || vd->vdev_isl2cache);
        dspace_delta = (dspace_delta >> SPA_MINBLOCKSHIFT) *
            vd->vdev_deflate_ratio;
 
@@ -2631,11 +2667,7 @@ vdev_set_state(vdev_t *vd, boolean_t iso
 boolean_t
 vdev_is_bootable(vdev_t *vd)
 {
-#ifdef __FreeBSD_version
-       return (B_TRUE);
-#else
-       int c;
-
+#ifdef sun
        if (!vd->vdev_ops->vdev_op_leaf) {
                char *vdev_type = vd->vdev_ops->vdev_op_type;
 

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to