Author: avg Date: Mon Nov 18 09:38:35 2019 New Revision: 354804 URL: https://svnweb.freebsd.org/changeset/base/354804
Log: MFV r354378,r354379,r354386: 10499 Multi-modifier protection (MMP) 10499 Multi-modifier protection (MMP) illumos/illumos-gate@e0f1c0afa46cc84d4b1e40124032a9a87310386e https://github.com/illumos/illumos-gate/commit/e0f1c0afa46cc84d4b1e40124032a9a87310386e https://www.illumos.org/issues/10499 Port the following ZFS commits from ZoL to illumos. 379ca9cf2 Multi-modifier protection (MMP) bbffb59ef Fix multihost stale cache file import 0d398b256 Do not initiate MMP writes while pool is suspended 10701 Correct lock ASSERTs in vdev_label_read/write illumos/illumos-gate@58447f688d5e308373ab16a3b129bc0ba0fbc154 https://github.com/illumos/illumos-gate/commit/58447f688d5e308373ab16a3b129bc0ba0fbc154 https://www.illumos.org/issues/10701 Port of ZoL commit: 0091d66f4e Correct lock ASSERTs in vdev_label_read/write At a minimum, this fixes a blown assert during an MMP test run when running on a DEBUG build. 11770 additional mmp fixes illumos/illumos-gate@4348eb901228d2f8fa50bb132a34248e8662074e https://github.com/illumos/illumos-gate/commit/4348eb901228d2f8fa50bb132a34248e8662074e https://www.illumos.org/issues/11770 Port a few additional MMP fixes from ZoL that came in after our initial MMP port. 4ca457b065 ZTS: Fix mmp_interval failure ca95f70dff zpool import progress kstat (only minimal changes from above can be pulled in right now) 060f0226e6 MMP interval and fail_intervals in uberblock Note from the committer (me). I do not have any use for this feature and I have not tested it. I only did smoke testing with multihost=off. Please be aware. I merged the code only to make future merges easier. Portions contributed by: Jerry Jelinek <jerry.jeli...@joyent.com> Portions contributed by: Tim Chase <t...@chase2k.com> Portions contributed by: sanjeevbagewadi <sanjeev.bagew...@gmail.com> Portions contributed by: John L. Hammond <john.hamm...@intel.com> Portions contributed by: Giuseppe Di Natale <dinata...@llnl.gov> Portions contributed by: Prakash Surya <sur...@llnl.gov> Portions contributed by: Brian Behlendorf <behlendo...@llnl.gov> Author: Olaf Faaland <faala...@llnl.gov> MFC after: 4 weeks Added: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/mmp.c - copied, changed from r354378, vendor-sys/illumos/dist/uts/common/fs/zfs/mmp.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/mmp.h - copied, changed from r354378, vendor-sys/illumos/dist/uts/common/fs/zfs/sys/mmp.h Modified: head/cddl/contrib/opensolaris/cmd/zdb/zdb.c head/cddl/contrib/opensolaris/cmd/zhack/zhack.c head/cddl/contrib/opensolaris/cmd/zpool/zpool.8 head/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c head/cddl/contrib/opensolaris/cmd/ztest/ztest.c head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_status.c head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c head/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h head/cddl/lib/libzpool/Makefile head/cddl/usr.bin/ztest/Makefile head/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.h head/sys/cddl/contrib/opensolaris/common/zfs/zpool_prop.c head/sys/cddl/contrib/opensolaris/uts/common/Makefile.files head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/uberblock.h head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/uberblock_impl.h head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/uberblock.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c head/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h Directory Properties: head/cddl/contrib/opensolaris/ (props changed) head/cddl/contrib/opensolaris/cmd/zdb/ (props changed) head/cddl/contrib/opensolaris/lib/libzfs/ (props changed) head/sys/cddl/contrib/opensolaris/ (props changed) Modified: head/cddl/contrib/opensolaris/cmd/zdb/zdb.c ============================================================================== --- head/cddl/contrib/opensolaris/cmd/zdb/zdb.c Mon Nov 18 07:04:59 2019 (r354803) +++ head/cddl/contrib/opensolaris/cmd/zdb/zdb.c Mon Nov 18 09:38:35 2019 (r354804) @@ -24,6 +24,7 @@ * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2017 Nexenta Systems, Inc. + * Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC. * Copyright 2017 RackTop Systems. */ @@ -2436,6 +2437,26 @@ dump_uberblock(uberblock_t *ub, const char *header, co (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum); (void) printf("\ttimestamp = %llu UTC = %s", (u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp))); + + (void) printf("\tmmp_magic = %016llx\n", + (u_longlong_t)ub->ub_mmp_magic); + if (MMP_VALID(ub)) { + (void) printf("\tmmp_delay = %0llu\n", + (u_longlong_t)ub->ub_mmp_delay); + if (MMP_SEQ_VALID(ub)) + (void) printf("\tmmp_seq = %u\n", + (unsigned int) MMP_SEQ(ub)); + if (MMP_FAIL_INT_VALID(ub)) + (void) printf("\tmmp_fail = %u\n", + (unsigned int) MMP_FAIL_INT(ub)); + if (MMP_INTERVAL_VALID(ub)) + (void) printf("\tmmp_write = %u\n", + (unsigned int) MMP_INTERVAL(ub)); + /* After MMP_* to make summarize_uberblock_mmp cleaner */ + (void) printf("\tmmp_valid = %x\n", + (unsigned int) ub->ub_mmp_config & 0xFF); + } + if (dump_opt['u'] >= 3) { char blkbuf[BP_SPRINTF_LEN]; snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp); @@ -2534,6 +2555,12 @@ dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashi if (uberblock_verify(ub)) continue; + + if ((dump_opt['u'] < 4) && + (ub->ub_mmp_magic == MMP_MAGIC) && ub->ub_mmp_delay && + (i >= VDEV_UBERBLOCK_COUNT(&vd) - MMP_BLOCKS_PER_LABEL)) + continue; + (void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE, "Uberblock[%d]\n", i); dump_uberblock(ub, header, ""); @@ -4173,6 +4200,22 @@ verify_device_removal_feature_counts(spa_t *spa) return (ret); } +static void +zdb_set_skip_mmp(char *target) +{ + spa_t *spa; + + /* + * Disable the activity check to allow examination of + * active pools. + */ + mutex_enter(&spa_namespace_lock); + if ((spa = spa_lookup(target)) != NULL) { + spa->spa_import_flags |= ZFS_IMPORT_SKIP_MMP; + } + mutex_exit(&spa_namespace_lock); +} + #define BOGUS_SUFFIX "_CHECKPOINTED_UNIVERSE" /* * Import the checkpointed state of the pool specified by the target @@ -4207,6 +4250,7 @@ import_checkpointed_state(char *target, nvlist_t *cfg, } if (cfg == NULL) { + zdb_set_skip_mmp(poolname); error = spa_get_stats(poolname, &cfg, NULL, 0); if (error != 0) { fatal("Tried to read config of pool \"%s\" but " @@ -4219,7 +4263,8 @@ import_checkpointed_state(char *target, nvlist_t *cfg, fnvlist_add_string(cfg, ZPOOL_CONFIG_POOL_NAME, bogus_name); error = spa_import(bogus_name, cfg, NULL, - ZFS_IMPORT_MISSING_LOG | ZFS_IMPORT_CHECKPOINT); + ZFS_IMPORT_MISSING_LOG | ZFS_IMPORT_CHECKPOINT | + ZFS_IMPORT_SKIP_MMP); if (error != 0) { fatal("Tried to import pool \"%s\" but spa_import() failed " "with error %d\n", bogus_name, error); @@ -5222,90 +5267,6 @@ zdb_embedded_block(char *thing) free(buf); } -static boolean_t -pool_match(nvlist_t *cfg, char *tgt) -{ - uint64_t v, guid = strtoull(tgt, NULL, 0); - char *s; - - if (guid != 0) { - if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0) - return (v == guid); - } else { - if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0) - return (strcmp(s, tgt) == 0); - } - return (B_FALSE); -} - -static char * -find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv) -{ - nvlist_t *pools; - nvlist_t *match = NULL; - char *name = NULL; - char *sepp = NULL; - char sep = '\0'; - int count = 0; - importargs_t args; - - bzero(&args, sizeof (args)); - args.paths = dirc; - args.path = dirv; - args.can_be_active = B_TRUE; - - if ((sepp = strpbrk(*target, "/@")) != NULL) { - sep = *sepp; - *sepp = '\0'; - } - - pools = zpool_search_import(g_zfs, &args); - - if (pools != NULL) { - nvpair_t *elem = NULL; - while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) { - verify(nvpair_value_nvlist(elem, configp) == 0); - if (pool_match(*configp, *target)) { - count++; - if (match != NULL) { - /* print previously found config */ - if (name != NULL) { - (void) printf("%s\n", name); - dump_nvlist(match, 8); - name = NULL; - } - (void) printf("%s\n", - nvpair_name(elem)); - dump_nvlist(*configp, 8); - } else { - match = *configp; - name = nvpair_name(elem); - } - } - } - } - if (count > 1) - (void) fatal("\tMatched %d pools - use pool GUID " - "instead of pool name or \n" - "\tpool name part of a dataset name to select pool", count); - - if (sepp) - *sepp = sep; - /* - * If pool GUID was specified for pool id, replace it with pool name - */ - if (name && (strstr(*target, name) != *target)) { - int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0); - - *target = umem_alloc(sz, UMEM_NOFAIL); - (void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : ""); - } - - *configp = name ? match : NULL; - - return (name); -} - int main(int argc, char **argv) { @@ -5318,7 +5279,7 @@ main(int argc, char **argv) int error = 0; char **searchdirs = NULL; int nsearch = 0; - char *target; + char *target, *target_pool; nvlist_t *policy = NULL; uint64_t max_txg = UINT64_MAX; int flags = ZFS_IMPORT_MISSING_LOG; @@ -5526,22 +5487,48 @@ main(int argc, char **argv) error = 0; target = argv[0]; + if (strpbrk(target, "/@") != NULL) { + size_t targetlen; + + target_pool = strdup(target); + *strpbrk(target_pool, "/@") = '\0'; + + target_is_spa = B_FALSE; + targetlen = strlen(target); + if (targetlen && target[targetlen - 1] == '/') + target[targetlen - 1] = '\0'; + } else { + target_pool = target; + } + if (dump_opt['e']) { - char *name = find_zpool(&target, &cfg, nsearch, searchdirs); + importargs_t args = { 0 }; - error = ENOENT; - if (name) { - if (dump_opt['C'] > 1) { - (void) printf("\nConfiguration for import:\n"); - dump_nvlist(cfg, 8); - } + args.paths = nsearch; + args.path = searchdirs; + args.can_be_active = B_TRUE; + error = zpool_tryimport(g_zfs, target_pool, &cfg, &args); + + if (error == 0) { + if (nvlist_add_nvlist(cfg, ZPOOL_LOAD_POLICY, policy) != 0) { fatal("can't open '%s': %s", target, strerror(ENOMEM)); } - error = spa_import(name, cfg, NULL, flags); + + if (dump_opt['C'] > 1) { + (void) printf("\nConfiguration for import:\n"); + dump_nvlist(cfg, 8); + } + + /* + * Disable the activity check to allow examination of + * active pools. + */ + error = spa_import(target_pool, cfg, NULL, + flags | ZFS_IMPORT_SKIP_MMP); } } @@ -5556,21 +5543,6 @@ main(int argc, char **argv) } - if (strpbrk(target, "/@") != NULL) { - size_t targetlen; - - target_is_spa = B_FALSE; - /* - * Remove any trailing slash. Later code would get confused - * by it, but we want to allow it so that "pool/" can - * indicate that we want to dump the topmost filesystem, - * rather than the whole pool. - */ - targetlen = strlen(target); - if (targetlen != 0 && target[targetlen - 1] == '/') - target[targetlen - 1] = '\0'; - } - if (error == 0) { if (dump_opt['k'] && (target_is_spa || dump_opt['R'])) { ASSERT(checkpoint_pool != NULL); @@ -5584,6 +5556,7 @@ main(int argc, char **argv) } } else if (target_is_spa || dump_opt['R']) { + zdb_set_skip_mmp(target); error = spa_open_rewind(target, &spa, FTAG, policy, NULL); if (error) { @@ -5606,6 +5579,7 @@ main(int argc, char **argv) } } } else { + zdb_set_skip_mmp(target); error = open_objset(target, DMU_OST_ANY, FTAG, &os); } } Modified: head/cddl/contrib/opensolaris/cmd/zhack/zhack.c ============================================================================== --- head/cddl/contrib/opensolaris/cmd/zhack/zhack.c Mon Nov 18 07:04:59 2019 (r354803) +++ head/cddl/contrib/opensolaris/cmd/zhack/zhack.c Mon Nov 18 09:38:35 2019 (r354804) @@ -121,16 +121,11 @@ space_delta_cb(dmu_object_type_t bonustype, void *data * Target is the dataset whose pool we want to open. */ static void -import_pool(const char *target, boolean_t readonly) +zhack_import(char *target, boolean_t readonly) { nvlist_t *config; - nvlist_t *pools; - int error; - char *sepp; - spa_t *spa; - nvpair_t *elem; nvlist_t *props; - const char *name; + int error; kernel_init(readonly ? FREAD : (FREAD | FWRITE)); g_zfs = libzfs_init(); @@ -139,68 +134,40 @@ import_pool(const char *target, boolean_t readonly) dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb); g_readonly = readonly; - - /* - * If we only want readonly access, it's OK if we find - * a potentially-active (ie, imported into the kernel) pool from the - * default cachefile. - */ - if (readonly && spa_open(target, &spa, FTAG) == 0) { - spa_close(spa, FTAG); - return; - } - g_importargs.unique = B_TRUE; g_importargs.can_be_active = readonly; g_pool = strdup(target); - if ((sepp = strpbrk(g_pool, "/@")) != NULL) - *sepp = '\0'; - g_importargs.poolname = g_pool; - pools = zpool_search_import(g_zfs, &g_importargs); - if (nvlist_empty(pools)) { - if (!g_importargs.can_be_active) { - g_importargs.can_be_active = B_TRUE; - if (zpool_search_import(g_zfs, &g_importargs) != NULL || - spa_open(target, &spa, FTAG) == 0) { - fatal(spa, FTAG, "cannot import '%s': pool is " - "active; run " "\"zpool export %s\" " - "first\n", g_pool, g_pool); - } - } + error = zpool_tryimport(g_zfs, target, &config, &g_importargs); + if (error) + fatal(NULL, FTAG, "cannot import '%s': %s", target, + libzfs_error_description(g_zfs)); - fatal(NULL, FTAG, "cannot import '%s': no such pool " - "available\n", g_pool); - } - - elem = nvlist_next_nvpair(pools, NULL); - name = nvpair_name(elem); - verify(nvpair_value_nvlist(elem, &config) == 0); - props = NULL; if (readonly) { - verify(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); - verify(nvlist_add_uint64(props, + VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); + VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(ZPOOL_PROP_READONLY), 1) == 0); } zfeature_checks_disable = B_TRUE; - error = spa_import(name, config, props, ZFS_IMPORT_NORMAL); + error = spa_import(target, config, props, + (readonly ? ZFS_IMPORT_SKIP_MMP : ZFS_IMPORT_NORMAL)); zfeature_checks_disable = B_FALSE; if (error == EEXIST) error = 0; if (error) - fatal(NULL, FTAG, "can't import '%s': %s", name, + fatal(NULL, FTAG, "can't import '%s': %s", target, strerror(error)); } static void -zhack_spa_open(const char *target, boolean_t readonly, void *tag, spa_t **spa) +zhack_spa_open(char *target, boolean_t readonly, void *tag, spa_t **spa) { int err; - import_pool(target, readonly); + zhack_import(target, readonly); zfeature_checks_disable = B_TRUE; err = spa_open(target, spa, tag); Modified: head/cddl/contrib/opensolaris/cmd/zpool/zpool.8 ============================================================================== --- head/cddl/contrib/opensolaris/cmd/zpool/zpool.8 Mon Nov 18 07:04:59 2019 (r354803) +++ head/cddl/contrib/opensolaris/cmd/zpool/zpool.8 Mon Nov 18 09:38:35 2019 (r354804) @@ -481,6 +481,11 @@ If a pool has a shared spare that is currently being u exported since other pools may use this shared spare, which may lead to potential data corruption. .Pp +Shared spares add some risk. +If the pools are imported on different hosts, and both pools suffer a device +failure at the same time, both could attempt to use the spare at the same time. +This may not be detected, resulting in data corruption. +.Pp An in-progress spare replacement can be cancelled by detaching the hot spare. If the original faulted device is detached, then the hot spare assumes its place in the configuration, and is removed from the spare list of all active @@ -806,7 +811,7 @@ to the enabled state. See .Xr zpool-features 7 for details on feature states. -.It Sy listsnaps Ns = Ns Cm on No | Cm off +.It Sy listsnapshots Ns = Ns Cm on No | Cm off Controls whether information about snapshots associated with this pool is output when .Qq Nm zfs Cm list @@ -814,6 +819,31 @@ is run without the .Fl t option. The default value is .Cm off . +This property can also be referred to by its shortened name, +.Sy listsnaps . +.It Sy multihost Ns = Ns Sy on No | Sy off +Controls whether a pool activity check should be performed during +.Nm zpool Cm import . +When a pool is determined to be active it cannot be imported, even with the +.Fl f +option. +This property is intended to be used in failover configurations +where multiple hosts have access to a pool on shared storage. +.sp +Multihost provides protection on import only. +It does not protect against an +individual device being used in multiple pools, regardless of the type of vdev. +See the discussion under +.Sy zpool create. +.sp +When this property is on, periodic writes to storage occur to show the pool is +in use. +See +.Sy vfs.zfs.multihost_interval +sysctl. +In order to enable this property each host must set a unique hostid. +The default value is +.Sy off . .It Sy version Ns = Ns Ar version The current on-disk version of the pool. This can be increased, but never decreased. The preferred method of updating pools is with the @@ -958,8 +988,22 @@ discarded transactions is irretrievably lost. Used in combination with the .Fl F flag. Check whether discarding transactions would make the pool openable, but +<<<<<<< do not actually discard any transactions. .El +||||||| +If no arguments are specified, all device errors within the pool are cleared. +If one or more devices is specified, only those errors associated with the +specified device or devices are cleared. +======= +If no arguments are specified, all device errors within the pool are cleared. +If one or more devices is specified, only those errors associated with the +specified device or devices are cleared. +If multihost is enabled, and the pool has been suspended, this will not +resume I/O. +While the pool was suspended, it may have been imported on +another host, and resuming I/O could result in pool damage. +>>>>>>> .It Xo .Nm .Cm create @@ -984,7 +1028,37 @@ specification is described in the .Qq Sx Virtual Devices section. .Pp +<<<<<<< The command verifies that each device specified is accessible and not currently +||||||| +The command verifies that each device specified is accessible and not currently +in use by another subsystem. +There are some uses, such as being currently mounted, or specified as the +dedicated dump device, that prevents a device from ever being used by ZFS. +Other uses, such as having a preexisting UFS file system, can be overridden with +======= +The command attempts to verify that each device specified is accessible and not +currently in use by another subsystem. +However this check is not robust enough +to detect simultaneous attempts to use a new device in different pools, even if +.Sy multihost +is +.Sy enabled. +The +administrator must ensure that simultaneous invocations of any combination of +.Sy zpool replace , +.Sy zpool create , +.Sy zpool add , +or +.Sy zpool labelclear , +do not refer to the same device. +Using the same device in two pools will +result in pool corruption. +.sp +There are some uses, such as being currently mounted, or specified as the +dedicated dump device, that prevents a device from ever being used by ZFS. +Other uses, such as having a preexisting UFS file system, can be overridden with +>>>>>>> in use by another subsystem. There are some uses, such as being currently mounted, or specified as the dedicated dump device, that prevents a device from ever being used by Modified: head/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c ============================================================================== --- head/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c Mon Nov 18 07:04:59 2019 (r354803) +++ head/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c Mon Nov 18 09:38:35 2019 (r354804) @@ -53,6 +53,7 @@ #include <zfs_prop.h> #include <sys/fs/zfs.h> #include <sys/stat.h> +#include <sys/debug.h> #include <libzfs.h> @@ -1635,6 +1636,10 @@ print_status_config(zpool_handle_t *zhp, const char *n (void) printf(gettext("split into new pool")); break; + case VDEV_AUX_ACTIVE: + (void) printf(gettext("currently in use")); + break; + case VDEV_AUX_CHILDREN_OFFLINE: (void) printf(gettext("all children offline")); break; @@ -1769,6 +1774,10 @@ print_import_config(const char *name, nvlist_t *nv, in (void) printf(gettext("too many errors")); break; + case VDEV_AUX_ACTIVE: + (void) printf(gettext("currently in use")); + break; + case VDEV_AUX_CHILDREN_OFFLINE: (void) printf(gettext("all children offline")); break; @@ -1866,8 +1875,10 @@ show_import(nvlist_t *config) vdev_stat_t *vs; char *name; uint64_t guid; + uint64_t hostid = 0; char *msgid; - nvlist_t *nvroot; + char *hostname = "unknown"; + nvlist_t *nvroot, *nvinfo; int reason; const char *health; uint_t vsc; @@ -1954,6 +1965,17 @@ show_import(nvlist_t *config) zpool_print_unsup_feat(config); break; + case ZPOOL_STATUS_HOSTID_ACTIVE: + (void) printf(gettext(" status: The pool is currently " + "imported by another system.\n")); + break; + + case ZPOOL_STATUS_HOSTID_REQUIRED: + (void) printf(gettext(" status: The pool has the " + "multihost property on. It cannot\n\tbe safely imported " + "when the system hostid is not set.\n")); + break; + case ZPOOL_STATUS_HOSTID_MISMATCH: (void) printf(gettext(" status: The pool was last accessed by " "another system.\n")); @@ -2040,6 +2062,27 @@ show_import(nvlist_t *config) "imported. Attach the missing\n\tdevices and try " "again.\n")); break; + case ZPOOL_STATUS_HOSTID_ACTIVE: + VERIFY0(nvlist_lookup_nvlist(config, + ZPOOL_CONFIG_LOAD_INFO, &nvinfo)); + + if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTNAME)) + hostname = fnvlist_lookup_string(nvinfo, + ZPOOL_CONFIG_MMP_HOSTNAME); + + if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTID)) + hostid = fnvlist_lookup_uint64(nvinfo, + ZPOOL_CONFIG_MMP_HOSTID); + + (void) printf(gettext(" action: The pool must be " + "exported from %s (hostid=%lx)\n\tbefore it " + "can be safely imported.\n"), hostname, + (unsigned long) hostid); + break; + case ZPOOL_STATUS_HOSTID_REQUIRED: + (void) printf(gettext(" action: Check the SMF " + "svc:/system/hostid service.\n")); + break; default: (void) printf(gettext(" action: The pool cannot be " "imported due to damaged devices or data.\n")); @@ -2087,6 +2130,31 @@ show_import(nvlist_t *config) } } +static boolean_t +zfs_force_import_required(nvlist_t *config) +{ + uint64_t state; + uint64_t hostid = 0; + nvlist_t *nvinfo; + + state = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE); + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); + + if (state != POOL_STATE_EXPORTED && hostid != get_system_hostid()) + return (B_TRUE); + + nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO); + if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_STATE)) { + mmp_state_t mmp_state = fnvlist_lookup_uint64(nvinfo, + ZPOOL_CONFIG_MMP_STATE); + + if (mmp_state != MMP_STATE_INACTIVE) + return (B_TRUE); + } + + return (B_FALSE); +} + /* * Perform the import for the given configuration. This passes the heavy * lifting off to zpool_import_props(), and then mounts the datasets contained @@ -2098,53 +2166,73 @@ do_import(nvlist_t *config, const char *newname, const { zpool_handle_t *zhp; char *name; - uint64_t state; uint64_t version; - verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, - &name) == 0); + name = fnvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME); + version = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION); - verify(nvlist_lookup_uint64(config, - ZPOOL_CONFIG_POOL_STATE, &state) == 0); - verify(nvlist_lookup_uint64(config, - ZPOOL_CONFIG_VERSION, &version) == 0); if (!SPA_VERSION_IS_SUPPORTED(version)) { (void) fprintf(stderr, gettext("cannot import '%s': pool " "is formatted using an unsupported ZFS version\n"), name); return (1); - } else if (state != POOL_STATE_EXPORTED && + } else if (zfs_force_import_required(config) && !(flags & ZFS_IMPORT_ANY_HOST)) { - uint64_t hostid; + mmp_state_t mmp_state = MMP_STATE_INACTIVE; + nvlist_t *nvinfo; - if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, - &hostid) == 0) { - if ((unsigned long)hostid != gethostid()) { - char *hostname; - uint64_t timestamp; - time_t t; + nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO); + if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_STATE)) + mmp_state = fnvlist_lookup_uint64(nvinfo, + ZPOOL_CONFIG_MMP_STATE); - verify(nvlist_lookup_string(config, - ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); - verify(nvlist_lookup_uint64(config, - ZPOOL_CONFIG_TIMESTAMP, ×tamp) == 0); - t = timestamp; - (void) fprintf(stderr, gettext("cannot import " - "'%s': pool may be in use from other " - "system, it was last accessed by %s " - "(hostid: 0x%lx) on %s"), name, hostname, - (unsigned long)hostid, - asctime(localtime(&t))); - (void) fprintf(stderr, gettext("use '-f' to " - "import anyway\n")); - return (1); - } + if (mmp_state == MMP_STATE_ACTIVE) { + char *hostname = "<unknown>"; + uint64_t hostid = 0; + + if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTNAME)) + hostname = fnvlist_lookup_string(nvinfo, + ZPOOL_CONFIG_MMP_HOSTNAME); + + if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTID)) + hostid = fnvlist_lookup_uint64(nvinfo, + ZPOOL_CONFIG_MMP_HOSTID); + + (void) fprintf(stderr, gettext("cannot import '%s': " + "pool is imported on %s (hostid: " + "0x%lx)\nExport the pool on the other system, " + "then run 'zpool import'.\n"), + name, hostname, (unsigned long) hostid); + } else if (mmp_state == MMP_STATE_NO_HOSTID) { + (void) fprintf(stderr, gettext("Cannot import '%s': " + "pool has the multihost property on and the\n" + "system's hostid is not set.\n"), name); } else { + char *hostname = "<unknown>"; + uint64_t timestamp = 0; + uint64_t hostid = 0; + + if (nvlist_exists(config, ZPOOL_CONFIG_HOSTNAME)) + hostname = fnvlist_lookup_string(config, + ZPOOL_CONFIG_HOSTNAME); + + if (nvlist_exists(config, ZPOOL_CONFIG_TIMESTAMP)) + timestamp = fnvlist_lookup_uint64(config, + ZPOOL_CONFIG_TIMESTAMP); + + if (nvlist_exists(config, ZPOOL_CONFIG_HOSTID)) + hostid = fnvlist_lookup_uint64(config, + ZPOOL_CONFIG_HOSTID); + (void) fprintf(stderr, gettext("cannot import '%s': " - "pool may be in use from other system\n"), name); - (void) fprintf(stderr, gettext("use '-f' to import " - "anyway\n")); - return (1); + "pool was previously in use from another system.\n" + "Last accessed by %s (hostid=%lx) at %s" + "The pool can be imported, use 'zpool import -f' " + "to import the pool.\n"), name, hostname, + (unsigned long)hostid, ctime((time_t *)×tamp)); + } + + return (1); } if (zpool_import_props(g_zfs, config, newname, props, flags) != 0) @@ -5106,6 +5194,15 @@ status_callback(zpool_handle_t *zhp, void *data) "from a backup source. Manually marking the device\n" "\trepaired using 'zpool clear' may allow some data " "to be recovered.\n")); + break; + + case ZPOOL_STATUS_IO_FAILURE_MMP: + (void) printf(gettext("status: The pool is suspended because " + "multihost writes failed or were delayed;\n\tanother " + "system could import the pool undetected.\n")); + (void) printf(gettext("action: Make sure the pool's devices " + "are connected, then reboot your system and\n\timport the " + "pool.\n")); break; case ZPOOL_STATUS_IO_FAILURE_WAIT: Modified: head/cddl/contrib/opensolaris/cmd/ztest/ztest.c ============================================================================== --- head/cddl/contrib/opensolaris/cmd/ztest/ztest.c Mon Nov 18 07:04:59 2019 (r354803) +++ head/cddl/contrib/opensolaris/cmd/ztest/ztest.c Mon Nov 18 09:38:35 2019 (r354804) @@ -128,6 +128,7 @@ #include <errno.h> #include <sys/fs/zfs.h> #include <libnvpair.h> +#include <libzfs.h> #include <libcmdutils.h> static int ztest_fd_data = -1; @@ -166,6 +167,7 @@ typedef struct ztest_shared_opts { uint64_t zo_time; uint64_t zo_maxloops; uint64_t zo_metaslab_force_ganging; + int zo_mmp_test; } ztest_shared_opts_t; static const ztest_shared_opts_t ztest_opts_defaults = { @@ -184,6 +186,7 @@ static const ztest_shared_opts_t ztest_opts_defaults = .zo_passtime = 60, /* 60 seconds */ .zo_killrate = 70, /* 70% kill rate */ .zo_verbose = 0, + .zo_mmp_test = 0, .zo_init = 1, .zo_time = 300, /* 5 minutes */ .zo_maxloops = 50, /* max loops during spa_freeze() */ @@ -343,6 +346,7 @@ ztest_func_t ztest_spa_create_destroy; ztest_func_t ztest_fault_inject; ztest_func_t ztest_ddt_repair; ztest_func_t ztest_dmu_snapshot_hold; +ztest_func_t ztest_mmp_enable_disable; ztest_func_t ztest_scrub; ztest_func_t ztest_dsl_dataset_promote_busy; ztest_func_t ztest_vdev_attach_detach; @@ -388,6 +392,7 @@ ztest_info_t ztest_info[] = { { ztest_fault_inject, 1, &zopt_incessant }, { ztest_ddt_repair, 1, &zopt_sometimes }, { ztest_dmu_snapshot_hold, 1, &zopt_sometimes }, + { ztest_mmp_enable_disable, 1, &zopt_sometimes }, { ztest_reguid, 1, &zopt_rarely }, { ztest_scrub, 1, &zopt_often }, { ztest_spa_upgrade, 1, &zopt_rarely }, @@ -601,6 +606,7 @@ usage(boolean_t requested) "\t[-k kill_percentage (default: %llu%%)]\n" "\t[-p pool_name (default: %s)]\n" "\t[-f dir (default: %s)] file directory for vdev files\n" + "\t[-M] Multi-host simulate pool imported on remote host\n" "\t[-V] verbose (use multiple times for ever more blather)\n" "\t[-E] use existing pool instead of creating new one\n" "\t[-T time (default: %llu sec)] total run time\n" @@ -644,7 +650,7 @@ process_options(int argc, char **argv) bcopy(&ztest_opts_defaults, zo, sizeof (*zo)); while ((opt = getopt(argc, argv, - "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:hF:B:o:")) != EOF) { + "v:s:a:m:r:R:d:t:g:i:k:p:f:MVET:P:hF:B:o:")) != EOF) { value = 0; switch (opt) { case 'v': @@ -713,6 +719,9 @@ process_options(int argc, char **argv) sizeof (zo->zo_dir)); } break; + case 'M': + zo->zo_mmp_test = 1; + break; case 'V': zo->zo_verbose++; break; @@ -2480,6 +2489,9 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) spa_t *spa; nvlist_t *nvroot; + if (zo->zo_mmp_test) + return; + /* * Attempt to create using a bad file. */ @@ -2511,8 +2523,58 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) rw_exit(&ztest_name_lock); } +/* + * Start and then stop the MMP threads to ensure the startup and shutdown code + * works properly. Actual protection and property-related code tested via ZTS. + */ /* ARGSUSED */ void +ztest_mmp_enable_disable(ztest_ds_t *zd, uint64_t id) +{ + ztest_shared_opts_t *zo = &ztest_opts; + spa_t *spa = ztest_spa; + + if (zo->zo_mmp_test) + return; + + /* + * Since enabling MMP involves setting a property, it could not be done + * while the pool is suspended. + */ + if (spa_suspended(spa)) + return; + + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + mutex_enter(&spa->spa_props_lock); + + zfs_multihost_fail_intervals = 0; + + if (!spa_multihost(spa)) { + spa->spa_multihost = B_TRUE; + mmp_thread_start(spa); + } + + mutex_exit(&spa->spa_props_lock); + spa_config_exit(spa, SCL_CONFIG, FTAG); + + txg_wait_synced(spa_get_dsl(spa), 0); + mmp_signal_all_threads(); + txg_wait_synced(spa_get_dsl(spa), 0); + + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + mutex_enter(&spa->spa_props_lock); + + if (spa_multihost(spa)) { + mmp_thread_stop(spa); + spa->spa_multihost = B_FALSE; + } + + mutex_exit(&spa->spa_props_lock); + spa_config_exit(spa, SCL_CONFIG, FTAG); +} + +/* ARGSUSED */ +void ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) { spa_t *spa; @@ -2521,6 +2583,9 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) nvlist_t *nvroot, *props; char *name; + if (ztest_opts.zo_mmp_test) + return; + mutex_enter(&ztest_vdev_lock); name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool); @@ -2689,6 +2754,9 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) nvlist_t *nvroot; int error; + if (ztest_opts.zo_mmp_test) + return; + mutex_enter(&ztest_vdev_lock); leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz; @@ -2771,6 +2839,9 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id) uint64_t guid = 0; int error; + if (ztest_opts.zo_mmp_test) + return; + if (ztest_random(2) == 0) { sav = &spa->spa_spares; aux = ZPOOL_CONFIG_SPARES; @@ -2866,6 +2937,9 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id) uint_t c, children, schildren = 0, lastlogid = 0; int error = 0; + if (ztest_opts.zo_mmp_test) + return; + mutex_enter(&ztest_vdev_lock); /* ensure we have a useable config; mirrors of raidz aren't supported */ @@ -2972,6 +3046,9 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) int oldvd_is_log; int error, expected_error; + if (ztest_opts.zo_mmp_test) + return; + mutex_enter(&ztest_vdev_lock); leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz; @@ -5564,6 +5641,9 @@ ztest_reguid(ztest_ds_t *zd, uint64_t id) uint64_t orig, load; int error; + if (ztest_opts.zo_mmp_test) + return; + orig = spa_guid(spa); load = spa_load_guid(spa); @@ -6249,7 +6329,7 @@ ztest_run(ztest_shared_t *zs) * Verify that we can export the pool and reimport it under a * different name. */ - if (ztest_random(2) == 0) { + if ((ztest_random(2) == 0) && !ztest_opts.zo_mmp_test) { char name[ZFS_MAX_DATASET_NAME_LEN]; (void) snprintf(name, sizeof (name), "%s_import", ztest_opts.zo_pool); @@ -6398,6 +6478,56 @@ make_random_props() } /* + * Import a storage pool with the given name. + */ +static void +ztest_import(ztest_shared_t *zs) +{ + libzfs_handle_t *hdl; + importargs_t args = { 0 }; + spa_t *spa; + nvlist_t *cfg = NULL; + int nsearch = 1; + char *searchdirs[nsearch]; + char *name = ztest_opts.zo_pool; + int flags = ZFS_IMPORT_MISSING_LOG; + int error; + + mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&ztest_name_lock, NULL, USYNC_THREAD, NULL); + + kernel_init(FREAD | FWRITE); + hdl = libzfs_init(); + + searchdirs[0] = ztest_opts.zo_dir; + args.paths = nsearch; + args.path = searchdirs; + args.can_be_active = B_FALSE; + + error = zpool_tryimport(hdl, name, &cfg, &args); + if (error) + (void) fatal(0, "No pools found\n"); + + VERIFY0(spa_import(name, cfg, NULL, flags)); + VERIFY0(spa_open(name, &spa, FTAG)); + zs->zs_metaslab_sz = + 1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift; + spa_close(spa, FTAG); + + libzfs_fini(hdl); + kernel_fini(); + + if (!ztest_opts.zo_mmp_test) { *** DIFF OUTPUT TRUNCATED AT 1000 LINES *** _______________________________________________ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"