Module Name: src Committed By: jmmv Date: Wed Mar 30 11:10:56 UTC 2011
Modified Files: src/external/bsd/atf/dist/atf-run: fs.cpp Log Message: Pull up upstream revision 648ed6360b2b7cda81a6079b00dc436d09c745b8: Retry calls that raise file system errors during cleanup If a test case mounts user-space (puffs/fuse) file systems or spawns server processes that create pid files, the termination of the corresponding processes does not guarantee that the file system is left in a consistent state immediately. The cleanup routines of both components (file systems and daemons) may still be running. This situation causes a race condition between the termination of the auxiliary processes and our own file system cleanup: the file system calls performed from within the cleanup routine may raise errors because the file system is still changing underneath. (E.g. we first enumerate the contents of a directory and get file X, but when we attempt to delete file X, it may be gone.) Deal with this by retrying failing file system calls a few times and ignoring "expected" errors before giving up. To generate a diff of this commit: cvs rdiff -u -r1.1.1.3 -r1.2 src/external/bsd/atf/dist/atf-run/fs.cpp Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/external/bsd/atf/dist/atf-run/fs.cpp diff -u src/external/bsd/atf/dist/atf-run/fs.cpp:1.1.1.3 src/external/bsd/atf/dist/atf-run/fs.cpp:1.2 --- src/external/bsd/atf/dist/atf-run/fs.cpp:1.1.1.3 Sun Nov 7 17:43:27 2010 +++ src/external/bsd/atf/dist/atf-run/fs.cpp Wed Mar 30 11:10:56 2011 @@ -1,7 +1,7 @@ // // Automated Testing Framework (atf) // -// Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. +// Copyright (c) 2007, 2008, 2009, 2011 The NetBSD Foundation, Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -45,6 +45,7 @@ #include <cstring> #include "atf-c++/detail/process.hpp" +#include "atf-c++/detail/sanity.hpp" #include "fs.hpp" #include "user.hpp" @@ -61,6 +62,19 @@ bool); static void do_unmount(const atf::fs::path&); +// The cleanup routines below are tricky: they are executed immediately after +// a test case's death, and after we have forcibly killed any stale processes. +// However, even if the processes are dead, this does not mean that the file +// system we are scanning is stable. In particular, if the test case has +// mounted file systems through fuse/puffs, the fact that the processes died +// does not mean that the file system is truly unmounted. +// +// The code below attempts to cope with this by catching errors and either +// ignoring them or retrying the actions on the same file/directory a few times +// before giving up. +static const int max_retries = 5; +static const int retry_delay_in_seconds = 1; + // The erase parameter in this routine is to control nested mount points. // We want to descend into a mount point to unmount anything that is // mounted under it, but we do not want to delete any files while doing @@ -70,19 +84,24 @@ void cleanup_aux(const atf::fs::path& p, dev_t parent_device, bool erase) { - atf::fs::file_info fi(p); + try { + atf::fs::file_info fi(p); - if (fi.get_type() == atf::fs::file_info::dir_type) - cleanup_aux_dir(p, fi, fi.get_device() == parent_device); + if (fi.get_type() == atf::fs::file_info::dir_type) + cleanup_aux_dir(p, fi, fi.get_device() == parent_device); - if (fi.get_device() != parent_device) - do_unmount(p); + if (fi.get_device() != parent_device) + do_unmount(p); - if (erase) { - if (fi.get_type() == atf::fs::file_info::dir_type) - atf::fs::rmdir(p); - else - atf::fs::remove(p); + if (erase) { + if (fi.get_type() == atf::fs::file_info::dir_type) + atf::fs::rmdir(p); + else + atf::fs::remove(p); + } + } catch (const atf::system_error& e) { + if (e.code() != ENOENT && e.code() != ENOTDIR) + throw e; } } @@ -92,15 +111,39 @@ bool erase) { if (erase && ((fi.get_mode() & S_IRWXU) != S_IRWXU)) { - if (chmod(p.c_str(), fi.get_mode() | S_IRWXU) == -1) - throw atf::system_error(IMPL_NAME "::cleanup(" + - p.str() + ")", "chmod(2) failed", errno); + int retries = max_retries; +retry_chmod: + if (chmod(p.c_str(), fi.get_mode() | S_IRWXU) == -1) { + if (retries > 0) { + retries--; + ::sleep(retry_delay_in_seconds); + goto retry_chmod; + } else { + throw atf::system_error(IMPL_NAME "::cleanup(" + + p.str() + ")", "chmod(2) failed", + errno); + } + } } std::set< std::string > subdirs; { - const atf::fs::directory d(p); - subdirs = d.names(); + bool ok = false; + int retries = max_retries; + while (!ok) { + INV(retries > 0); + try { + const atf::fs::directory d(p); + subdirs = d.names(); + ok = true; + } catch (const atf::system_error& e) { + if (retries == 0) + throw e; + retries--; + ::sleep(retry_delay_in_seconds); + } + } + INV(ok); } for (std::set< std::string >::const_iterator iter = subdirs.begin(); @@ -122,9 +165,18 @@ in_path : in_path.to_absolute(); #if defined(HAVE_UNMOUNT) - if (unmount(abs_path.c_str(), 0) == -1) - throw atf::system_error(IMPL_NAME "::cleanup(" + in_path.str() + ")", - "unmount(2) failed", errno); + int retries = max_retries; +retry_unmount: + if (unmount(abs_path.c_str(), 0) == -1) { + if (errno == EBUSY && retries > 0) { + retries--; + ::sleep(retry_delay_in_seconds); + goto retry_unmount; + } else { + throw atf::system_error(IMPL_NAME "::cleanup(" + in_path.str() + + ")", "unmount(2) failed", errno); + } + } #else // We could use umount(2) instead if it was available... but // trying to do so under, e.g. Linux, is a nightmare because we