This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push: new 6908620 [consensus] force fsync metadata on XFS 6908620 is described below commit 6908620821ab93cb964f0f7a3674abf9843a9b5f Author: Andrew Wong <aw...@cloudera.com> AuthorDate: Tue Dec 14 11:48:44 2021 -0800 [consensus] force fsync metadata on XFS We've seen issues like KUDU-2195 rear their heads more prominently on XFS, sometimes requiring costly surgery to fix by restoring metadata if all replicas lose their consensus metadata. This patch adds to the band-aid flag --cmeta_force_fsync, checking if the metadata is on an XFS mount, and if so, always fsyncing. This new behavior is enabled by default with the --cmeta_fsync_override_on_xfs flag. Change-Id: Ie97138e3522f20325be104d3665d887693b79a10 Reviewed-on: http://gerrit.cloudera.org:8080/18101 Reviewed-by: Alexey Serbin <aser...@cloudera.com> Tested-by: Kudu Jenkins Reviewed-by: Attila Bukor <abu...@apache.org> --- src/kudu/consensus/consensus_meta.cc | 6 +++++- src/kudu/fs/fs_manager.cc | 26 ++++++++++++++++++++++++-- src/kudu/fs/fs_manager.h | 7 +++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/kudu/consensus/consensus_meta.cc b/src/kudu/consensus/consensus_meta.cc index 18a8f47..60d3758 100644 --- a/src/kudu/consensus/consensus_meta.cc +++ b/src/kudu/consensus/consensus_meta.cc @@ -50,6 +50,8 @@ DEFINE_bool(cmeta_force_fsync, false, "Whether fsync() should be called when consensus metadata files are updated"); TAG_FLAG(cmeta_force_fsync, advanced); +DECLARE_bool(cmeta_fsync_override_on_xfs); + using std::string; using strings::Substitute; @@ -302,6 +304,8 @@ Status ConsensusMetadata::Flush(FlushMode flush_mode) { "Unable to fsync consensus parent dir " + parent_dir); } + const bool cmeta_force_fsync = + FLAGS_cmeta_force_fsync || (FLAGS_cmeta_fsync_override_on_xfs && fs_manager_->meta_on_xfs()); string meta_file_path = fs_manager_->GetConsensusMetadataPath(tablet_id_); RETURN_NOT_OK_PREPEND(pb_util::WritePBContainerToPath( fs_manager_->env(), meta_file_path, pb_, @@ -315,7 +319,7 @@ Status ConsensusMetadata::Flush(FlushMode flush_mode) { // fsync() due to periodic commit with default settings, whereas other // filesystems such as XFS will not commit as often and need the fsync to // avoid significant data loss when a crash happens. - FLAGS_log_force_fsync_all || FLAGS_cmeta_force_fsync ? pb_util::SYNC : pb_util::NO_SYNC), + FLAGS_log_force_fsync_all || cmeta_force_fsync ? pb_util::SYNC : pb_util::NO_SYNC), Substitute("Unable to write consensus meta file for tablet $0 to path $1", tablet_id_, meta_file_path)); return UpdateOnDiskSize(); diff --git a/src/kudu/fs/fs_manager.cc b/src/kudu/fs/fs_manager.cc index 03fd2cd..d84c751 100644 --- a/src/kudu/fs/fs_manager.cc +++ b/src/kudu/fs/fs_manager.cc @@ -60,6 +60,14 @@ #include "kudu/util/stopwatch.h" #include "kudu/util/timer.h" +DEFINE_bool(cmeta_fsync_override_on_xfs, true, + "Whether to ignore --cmeta_force_fsync and instead always flush if Kudu detects " + "the server is on XFS. This can prevent consensus metadata corruption in the " + "event of sudden server failure. Disabling this flag may cause data loss in " + "the event of a system crash. See KUDU-2195 for more details."); +TAG_FLAG(cmeta_fsync_override_on_xfs, experimental); +TAG_FLAG(cmeta_fsync_override_on_xfs, advanced); + DEFINE_bool(enable_data_block_fsync, true, "Whether to enable fsync() of data blocks, metadata, and their parent directories. " "Disabling this flag may cause data loss in the event of a system crash."); @@ -175,7 +183,8 @@ FsManager::FsManager(Env* env, FsManagerOpts opts) : env_(DCHECK_NOTNULL(env)), opts_(std::move(opts)), error_manager_(new FsErrorManager()), - initted_(false) { + initted_(false), + meta_on_xfs_(false) { DCHECK(opts_.update_instances == UpdateInstanceBehavior::DONT_UPDATE || !opts_.read_only) << "FsManager can only be for updated if not in read-only mode"; } @@ -361,10 +370,23 @@ Status FsManager::PartialOpen(CanonicalizedRootsList* missing_roots) { reference_instance_path, metadata_->uuid() , root.path, pb->uuid())); } } - if (!metadata_) { return Status::NotFound("could not find a healthy instance file"); } + const auto& meta_root_path = canonicalized_metadata_fs_root_.path; + const auto bad_meta_fs_msg = + Substitute("Could not determine file system of metadata directory $0", + meta_root_path); + Status s = env_->IsOnXfsFilesystem(meta_root_path, &meta_on_xfs_); + if (FLAGS_cmeta_fsync_override_on_xfs) { + // Err on the side of visibility if we expect a behavior change based on + // the file system. + RETURN_NOT_OK_PREPEND(s, bad_meta_fs_msg); + } else { + WARN_NOT_OK(s, bad_meta_fs_msg); + } + VLOG(1) << Substitute("Detected metadata directory is $0on an XFS mount: $1", + meta_on_xfs_ ? "" : "not ", meta_root_path); return Status::OK(); } diff --git a/src/kudu/fs/fs_manager.h b/src/kudu/fs/fs_manager.h index ff04514..9243e26 100644 --- a/src/kudu/fs/fs_manager.h +++ b/src/kudu/fs/fs_manager.h @@ -311,6 +311,10 @@ class FsManager { // Prints the file system trees under the file system roots. void DumpFileSystemTree(std::ostream& out); + bool meta_on_xfs() const { + return meta_on_xfs_; + } + private: FRIEND_TEST(fs::FsManagerTestBase, TestDuplicatePaths); FRIEND_TEST(fs::FsManagerTestBase, TestEIOWhileRunningUpdateDirsTool); @@ -407,6 +411,9 @@ class FsManager { bool initted_; + // Cache whether or not the metadata directory is on an XFS directory. + bool meta_on_xfs_; + DISALLOW_COPY_AND_ASSIGN(FsManager); };