alamb commented on code in PR #643:
URL:
https://github.com/apache/arrow-rs-object-store/pull/643#discussion_r3415701408
##########
src/local.rs:
##########
@@ -816,23 +840,168 @@ impl LocalFileSystem {
}
/// Creates the parent directories of `path` or returns an error based on
`source` if no parent
-fn create_parent_dirs(path: &std::path::Path, source: io::Error) -> Result<()>
{
+///
+/// When `fsync` is true, every directory created here is fsynced, up to and
including the first
+/// pre-existing ancestor (whose entry list also changed), so the new
directory entries are durable.
+fn create_parent_dirs(path: &std::path::Path, source: io::Error, fsync: bool)
-> Result<()> {
let parent = path.parent().ok_or_else(|| {
let path = path.to_path_buf();
Error::UnableToCreateFile { path, source }
})?;
+ // Record the deepest already-existing ancestor *before* creating any
directories, so that
+ // afterwards we know exactly which directories are new and need to be
fsynced.
+ let first_existing = fsync.then(|| {
+ let mut dir = parent;
+ while !dir.exists() {
+ match dir.parent() {
+ Some(p) => dir = p,
+ None => break,
+ }
+ }
+ dir.to_path_buf()
+ });
+
std::fs::create_dir_all(parent).map_err(|source| {
let path = parent.into();
Error::UnableToCreateDir { source, path }
})?;
+
+ if let Some(first_existing) = first_existing {
+ // Walk from `parent` up to `first_existing`, fsyncing each directory
whose entries changed.
+ let mut dir = parent;
+ loop {
+ fsync_dir(dir).map_err(|source| Error::UnableToSyncFile {
+ source,
+ path: dir.into(),
+ })?;
+ if dir == first_existing {
+ break;
+ }
+ dir = match dir.parent() {
+ Some(p) => p,
+ None => break,
+ };
+ }
+ }
+ Ok(())
+}
+
+/// Renames `from` to `to`, then — when `fsync` is enabled — fsyncs the
destination's parent
+/// directory (and the source's too, if it differs) so the moved directory
entries are durable.
+///
+/// The directory fsync is bundled in deliberately: every durable rename goes
through here, so the
+/// post-rename fsync can never be forgotten at an individual call site.
+fn rename(from: &std::path::Path, to: &std::path::Path, fsync: bool) ->
io::Result<()> {
+ std::fs::rename(from, to)?;
+ if fsync {
+ fsync_parent_dir(to)?;
+ // A cross-directory move also removes an entry from the source
directory.
+ if from.parent() != to.parent() {
+ fsync_parent_dir(from)?;
+ }
+ }
+ Ok(())
+}
+
+/// Hard-links `original` to `link`, then — when `fsync` is enabled — fsyncs
`link`'s parent
+/// directory so the new directory entry is durable.
+///
+/// As with [`rename`], the directory fsync is bundled in so it cannot be
forgotten at a call site.
+fn hard_link(original: &std::path::Path, link: &std::path::Path, fsync: bool)
-> io::Result<()> {
+ std::fs::hard_link(original, link)?;
+ if fsync {
+ fsync_parent_dir(link)?;
+ }
+ Ok(())
+}
+
+/// Durably publishes the freshly-written staging file `file` (located at
`src`) to `dest` via a
+/// rename.
+///
+/// When `fsync` is enabled, the file's contents are flushed before — and
`dest`'s parent
+/// directory after — the rename, so a successful return is durable. The file
is always closed
+/// before the rename (checking for close errors): required for NFS error
detection and for some
+/// FUSE filesystems (e.g. Blobfuse) that only commit the data on close.
+fn finish_staged_rename(
+ file: File,
+ src: &std::path::Path,
+ dest: &std::path::Path,
+ fsync: bool,
+) -> Result<()> {
+ sync_and_close(file, src, fsync)?;
+ rename(src, dest, fsync).map_err(|source| Error::UnableToRenameFile {
source })?;
+ Ok(())
+}
+
+/// Like [`finish_staged_rename`] but publishes via a hard link
(`PutMode::Create` semantics): the
+/// staging file is linked to `dest` and then removed. Returns
[`Error::AlreadyExists`] if `dest`
+/// already exists.
+fn finish_staged_hard_link(
+ file: File,
+ src: &std::path::Path,
+ dest: &std::path::Path,
+ fsync: bool,
+) -> Result<()> {
+ sync_and_close(file, src, fsync)?;
+ match hard_link(src, dest, fsync) {
+ Ok(()) => {
+ let _ = std::fs::remove_file(src); // Attempt to cleanup
+ Ok(())
+ }
+ Err(source) => match source.kind() {
+ ErrorKind::AlreadyExists => Err(Error::AlreadyExists {
+ path: dest.to_str().unwrap().to_string(),
+ source,
+ }
+ .into()),
+ _ => Err(Error::UnableToRenameFile { source }.into()),
+ },
+ }
+}
+
+/// Flushes the freshly-written `file`'s contents to disk (when `fsync` is
enabled) and then
+/// closes it, checking for close errors that [`File::drop`] would silently
ignore.
+fn sync_and_close(file: File, path: &std::path::Path, fsync: bool) ->
Result<()> {
+ if fsync {
+ file.sync_all().map_err(|source| Error::UnableToSyncFile {
+ source,
+ path: path.into(),
+ })?;
+ }
+ close_file(file).map_err(|source| Error::UnableToCopyDataToFile { source
})?;
Ok(())
}
+/// Fsyncs the parent directory of `path` so a change to its directory entries
(e.g. one just made
+/// by a rename or hard link) is durable. A no-op when `path` has no parent.
+fn fsync_parent_dir(path: &std::path::Path) -> io::Result<()> {
+ match path.parent() {
+ Some(parent) => fsync_dir(parent),
+ None => Ok(()),
+ }
+}
+
+/// Fsyncs `dir_path` so that changes to its directory entries are durable.
+///
+/// This is only meaningful on Unix; on other platforms (e.g. Windows)
directories cannot be
+/// portably opened as a [`File`] and synced, so this is a no-op.
+fn fsync_dir(dir_path: &std::path::Path) -> io::Result<()> {
Review Comment:
I spent some time reviewing this and reviewing the calls to `fsync` -- to be
clear the the
[File::fsync_all](https://doc.rust-lang.org/beta/std/fs/struct.File.html#method.sync_all)
docs despite the name, only flushes the contents of this file descriptor to
disk.
It also might be worthwhile pointing out that fsync_dir is actually doing
three syscalls (`open`, `fsync` and then an implicit `close`). However,
apparently this is the same pattern as sqlite uses
https://github.com/sqlite/sqlite/blob/9a5801af3a411c880f556233dda7a3097967f8af/src/os_unix.c#L3945-L3948
so looks good to me
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]