commit: 8ab5c8835931fd9ec098dbf4c5f416eb32e4a3a4 Author: Zac Medico <zmedico <AT> gentoo <DOT> org> AuthorDate: Thu Feb 2 03:14:53 2017 +0000 Commit: Zac Medico <zmedico <AT> gentoo <DOT> org> CommitDate: Thu Mar 16 02:38:37 2017 +0000 URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=8ab5c883
movefile: support in-kernel file copying on Linux (bug 607868) Perform in-kernel file copying when possible, and also support reflinks and sparse files. If the optimized implementation fails at runtime, gracefully fallback to a plain read/write loop. Compile-time and run-time fallbacks are implemented, so that any incompatiblities will be handled gracefully. For example, if the code is compiled on a system that supports the copy_file_range syscall, but at run-time an older kernel that does not support this syscall is detected, it will be handled gracefully. There are similar fallbacks for lack of lseek SEEK_DATA and sendfile support. X-Gentoo-Bug: 607868 X-Gentoo-Bug-Url: https://bugs.gentoo.org/show_bug.cgi?id=607868 Acked-by: Brian Dolbec <dolsen <AT> gentoo.org> pym/portage/tests/util/file_copy/__init__.py | 0 pym/portage/tests/util/file_copy/__test__.py | 0 pym/portage/tests/util/file_copy/test_copyfile.py | 71 ++++ pym/portage/util/file_copy/__init__.py | 36 ++ pym/portage/util/movefile.py | 5 +- setup.py | 9 + src/portage_util_file_copy_reflink_linux.c | 385 ++++++++++++++++++++++ 7 files changed, 504 insertions(+), 2 deletions(-) diff --git a/pym/portage/tests/util/file_copy/__init__.py b/pym/portage/tests/util/file_copy/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pym/portage/tests/util/file_copy/__test__.py b/pym/portage/tests/util/file_copy/__test__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pym/portage/tests/util/file_copy/test_copyfile.py b/pym/portage/tests/util/file_copy/test_copyfile.py new file mode 100644 index 000000000..b900fdef0 --- /dev/null +++ b/pym/portage/tests/util/file_copy/test_copyfile.py @@ -0,0 +1,71 @@ +# Copyright 2017 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 + +import shutil +import tempfile + +from portage import os +from portage.tests import TestCase +from portage.checksum import perform_md5 +from portage.util.file_copy import copyfile + + +class CopyFileTestCase(TestCase): + + def testCopyFile(self): + + tempdir = tempfile.mkdtemp() + try: + src_path = os.path.join(tempdir, 'src') + dest_path = os.path.join(tempdir, 'dest') + content = b'foo' + + with open(src_path, 'wb') as f: + f.write(content) + + copyfile(src_path, dest_path) + + self.assertEqual(perform_md5(src_path), perform_md5(dest_path)) + finally: + shutil.rmtree(tempdir) + + +class CopyFileSparseTestCase(TestCase): + + def testCopyFileSparse(self): + + tempdir = tempfile.mkdtemp() + try: + src_path = os.path.join(tempdir, 'src') + dest_path = os.path.join(tempdir, 'dest') + content = b'foo' + + # Use seek to create some sparse blocks. Don't make these + # files too big, in case the filesystem doesn't support + # sparse files. + with open(src_path, 'wb') as f: + f.write(content) + f.seek(2**17, 1) + f.write(content) + f.seek(2**18, 1) + f.write(content) + # Test that sparse blocks are handled correctly at + # the end of the file (involves seek and truncate). + f.seek(2**17, 1) + + copyfile(src_path, dest_path) + + self.assertEqual(perform_md5(src_path), perform_md5(dest_path)) + + # This last part of the test is expected to fail when sparse + # copy is not implemented, so set the todo flag in order + # to tolerate failures. + self.todo = True + + # If sparse blocks were preserved, then both files should + # consume the same number of blocks. + self.assertEqual( + os.stat(src_path).st_blocks, + os.stat(dest_path).st_blocks) + finally: + shutil.rmtree(tempdir) diff --git a/pym/portage/util/file_copy/__init__.py b/pym/portage/util/file_copy/__init__.py new file mode 100644 index 000000000..3d9b745be --- /dev/null +++ b/pym/portage/util/file_copy/__init__.py @@ -0,0 +1,36 @@ +# Copyright 2017 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 + +import os +import shutil +import tempfile + +try: + from portage.util.file_copy.reflink_linux import file_copy as _file_copy +except ImportError: + _file_copy = None + + +def _optimized_copyfile(src, dst): + """ + Copy the contents (no metadata) of the file named src to a file + named dst. + + If possible, copying is done within the kernel, and uses + "copy acceleration" techniques (such as reflinks). This also + supports sparse files. + + @param src: path of source file + @type src: str + @param dst: path of destination file + @type dst: str + """ + with open(src, 'rb', buffering=0) as src_file, \ + open(dst, 'wb', buffering=0) as dst_file: + _file_copy(src_file.fileno(), dst_file.fileno()) + + +if _file_copy is None: + copyfile = shutil.copyfile +else: + copyfile = _optimized_copyfile diff --git a/pym/portage/util/movefile.py b/pym/portage/util/movefile.py index 4be1c3b31..37c809eb5 100644 --- a/pym/portage/util/movefile.py +++ b/pym/portage/util/movefile.py @@ -8,7 +8,6 @@ __all__ = ['movefile'] import errno import fnmatch import os as _os -import shutil as _shutil import stat import sys import textwrap @@ -23,6 +22,8 @@ from portage.localization import _ from portage.process import spawn from portage.util import writemsg from portage.util._xattr import xattr +from portage.util.file_copy import copyfile + def _apply_stat(src_stat, dest): _os.chown(dest, src_stat.st_uid, src_stat.st_gid) @@ -114,7 +115,7 @@ def movefile(src, dest, newmtime=None, sstat=None, mysettings=None, _copyfile = selinux.copyfile _rename = selinux.rename else: - _copyfile = _shutil.copyfile + _copyfile = copyfile _rename = _os.rename lchown = _unicode_func_wrapper(portage.data.lchown, encoding=encoding) diff --git a/setup.py b/setup.py index a346bd419..b62476758 100755 --- a/setup.py +++ b/setup.py @@ -23,6 +23,7 @@ import collections import glob import os import os.path +import platform import re import subprocess import sys @@ -54,6 +55,14 @@ x_c_helpers = { ], } +if platform.system() == 'Linux': + x_c_helpers.update({ + 'portage.util.file_copy.reflink_linux': [ + 'src/portage_util_file_copy_reflink_linux.c', + ], + }) + + class x_build(build): """ Build command with extra build_man call. """ diff --git a/src/portage_util_file_copy_reflink_linux.c b/src/portage_util_file_copy_reflink_linux.c new file mode 100644 index 000000000..b031d962d --- /dev/null +++ b/src/portage_util_file_copy_reflink_linux.c @@ -0,0 +1,385 @@ +/* Copyright 2017 Gentoo Foundation + * Distributed under the terms of the GNU General Public License v2 + */ + +#include <Python.h> +#include <errno.h> +#include <stdlib.h> +#include <ctype.h> +#include <sys/sendfile.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +static PyObject * _reflink_linux_file_copy(PyObject *, PyObject *); + +static PyMethodDef reflink_linuxMethods[] = { + { + "file_copy", + _reflink_linux_file_copy, + METH_VARARGS, + "Copy between two file descriptors, " + "with reflink and sparse file support." + }, + {NULL, NULL, 0, NULL} +}; + +#if PY_MAJOR_VERSION >= 3 +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "reflink_linux", /* m_name */ + "Module for reflink_linux copy operations", /* m_doc */ + -1, /* m_size */ + reflink_linuxMethods, /* m_methods */ + NULL, /* m_reload */ + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL, /* m_free */ +}; + +PyMODINIT_FUNC +PyInit_reflink_linux(void) +{ + PyObject *m; + m = PyModule_Create(&moduledef); + return m; +} +#else +PyMODINIT_FUNC +initreflink_linux(void) +{ + Py_InitModule("reflink_linux", reflink_linuxMethods); +} +#endif + + +/** + * cfr_wrapper - A copy_file_range syscall wrapper function, having a + * function signature that is compatible with sendfile. + * @fd_out: output file descriptor + * @fd_in: input file descriptor + * @off_out: offset of the output file + * @len: number of bytes to copy between the file descriptors + * + * Return: Number of bytes written to out_fd on success, -1 on failure + * (errno is set appropriately). + */ +static ssize_t +cfr_wrapper(int fd_out, int fd_in, loff_t *off_out, size_t len) +{ +#ifdef __NR_copy_file_range + return syscall(__NR_copy_file_range, fd_in, NULL, fd_out, + off_out, len, 0); +#else + /* This is how it fails at runtime when the syscall is not supported. */ + errno = ENOSYS; + return -1; +#endif +} + +/** + * do_lseek_data - Adjust file offsets to the next location containing + * data, creating sparse empty blocks in the output file as needed. + * @fd_in: input file descriptor + * @fd_out: output file descriptor + * @off_out: offset of the output file + * + * Use lseek SEEK_DATA to adjust the fd_in file offset to the next + * location containing data, and adjust the fd_in file offset and + * off_out to the same location (creating sparse empty blocks as + * needed). On success, both fd_in and fd_out file offsets are + * guaranteed to be exactly equal to the value that off_out points to. + * + * Return: On success, the number of bytes to copy before the next hole, + * and -1 on failure (errno is set appropriately). Returns 0 when fd_in + * reaches EOF. + */ +static off_t +do_lseek_data(int fd_out, int fd_in, loff_t *off_out) { +#ifdef SEEK_DATA + /* Use lseek SEEK_DATA/SEEK_HOLE for sparse file support, + * as suggested in the copy_file_range man page. + */ + off_t offset_data, offset_hole; + + offset_data = lseek(fd_in, *off_out, SEEK_DATA); + if (offset_data < 0) { + if (errno == ENXIO) { + /* EOF - If the file ends with a hole, then use lseek SEEK_END + * to find the end offset, and create sparse empty blocks in + * the output file. It's the caller's responsibility to + * truncate the file. + */ + offset_hole = lseek(fd_in, 0, SEEK_END); + if (offset_hole < 0) { + return -1; + } else if (offset_hole != *off_out) { + if (lseek(fd_out, offset_hole, SEEK_SET) < 0) { + return -1; + } + *off_out = offset_hole; + } + return 0; + } + return -1; + } + + /* Create sparse empty blocks in the output file, up + * until the next location that will contain data. + */ + if (offset_data != *off_out) { + if (lseek(fd_out, offset_data, SEEK_SET) < 0) { + return -1; + } + *off_out = offset_data; + } + + /* Locate the next hole, so that we know when to + * stop copying. There is an implicit hole at the + * end of the file. This should never result in ENXIO + * after SEEK_DATA has succeeded above. + */ + offset_hole = lseek(fd_in, offset_data, SEEK_HOLE); + if (offset_hole < 0) { + return -1; + } + + /* Revert SEEK_HOLE offset change, since we're going + * to copy the data that comes before the hole. + */ + if (lseek(fd_in, offset_data, SEEK_SET) < 0) { + return -1; + } + + return offset_hole - offset_data; +#else + /* This is how it fails at runtime when lseek SEEK_DATA is not supported. */ + errno = EINVAL; + return -1; +#endif +} + + +/** + * _reflink_linux_file_copy - Copy between two file descriptors, with + * reflink and sparse file support. + * @fd_in: input file descriptor + * @fd_out: output file descriptor + * + * When supported, this uses copy_file_range for reflink support, + * and lseek SEEK_DATA for sparse file support. It has graceful + * fallbacks when support is unavailable for copy_file_range, lseek + * SEEK_DATA, or sendfile operations. When all else fails, it uses + * a plain read/write loop that works in any kernel version. + * + * If a syscall is interrupted by a signal, then the function will + * automatically resume copying a the appropriate location which is + * tracked internally by the offset_out variable. + * + * Return: The length of the output file on success. Raise OSError + * on failure. + */ +static PyObject * +_reflink_linux_file_copy(PyObject *self, PyObject *args) +{ + int eintr_retry, error, fd_in, fd_out, stat_in_acquired, stat_out_acquired; + int lseek_works, sendfile_works; + off_t offset_out, len; + ssize_t buf_bytes, buf_offset, copyfunc_ret; + struct stat stat_in, stat_out; + char* buf; + ssize_t (*copyfunc)(int, int, loff_t *, size_t); + + if (!PyArg_ParseTuple(args, "ii", &fd_in, &fd_out)) + return NULL; + + eintr_retry = 1; + offset_out = 0; + stat_in_acquired = 0; + stat_out_acquired = 0; + buf = NULL; + buf_bytes = 0; + buf_offset = 0; + copyfunc = cfr_wrapper; + lseek_works = 1; + sendfile_works = 1; + + while (eintr_retry) { + + Py_BEGIN_ALLOW_THREADS + + /* Linux 3.1 and later support SEEK_DATA (for sparse file support). + * This code uses copy_file_range if possible, and falls back to + * sendfile for cross-device or when the copy_file_range syscall + * is not available (less than Linux 4.5). This will fail for + * Linux less than 3.1, which does not support the lseek SEEK_DATA + * parameter. + */ + if (sendfile_works && lseek_works) { + error = 0; + + while (1) { + len = do_lseek_data(fd_out, fd_in, &offset_out); + if (!len) { + /* EOF */ + break; + } else if (len < 0) { + error = errno; + if (errno == EINVAL && !offset_out) { + lseek_works = 0; + } + break; + } + + /* For the copyfunc call, the fd_in file offset must be + * exactly equal to offset_out. The above do_lseek_data + * function guarantees correct state. + */ + copyfunc_ret = copyfunc(fd_out, + fd_in, + &offset_out, + len); + + if (copyfunc_ret < 0) { + error = errno; + if ((errno == EXDEV || errno == ENOSYS) && + copyfunc == cfr_wrapper) { + /* Use sendfile instead of copy_file_range for + * cross-device copies, or when the copy_file_range + * syscall is not available (less than Linux 4.5). + */ + error = 0; + copyfunc = sendfile; + copyfunc_ret = copyfunc(fd_out, + fd_in, + &offset_out, + len); + + if (copyfunc_ret < 0) { + error = errno; + /* On Linux, if lseek succeeded above, then + * sendfile should have worked here too, so + * don't bother to fallback for EINVAL here. + */ + break; + } + } else { + break; + } + } + } + } + + /* Less than Linux 3.1 does not support SEEK_DATA or copy_file_range, + * so just use sendfile for in-kernel copy. This will fail for Linux + * versions from 2.6.0 to 2.6.32, because sendfile does not support + * writing to regular files. + */ + if (sendfile_works && !lseek_works) { + error = 0; + + if (!stat_in_acquired && fstat(fd_in, &stat_in) < 0) { + error = errno; + } else { + stat_in_acquired = 1; + + /* For the sendfile call, the fd_in file offset must be + * exactly equal to offset_out. Use lseek to ensure + * correct state, in case an EINTR retry caused it to + * get out of sync somewhow. + */ + if (lseek(fd_in, offset_out, SEEK_SET) < 0) { + error = errno; + } else { + while (offset_out < stat_in.st_size) { + copyfunc_ret = sendfile(fd_out, + fd_in, + &offset_out, + stat_in.st_size - offset_out); + + if (copyfunc_ret < 0) { + error = errno; + if (errno == EINVAL && !offset_out) { + sendfile_works = 0; + } + break; + } + } + } + } + } + + /* This implementation will work on any kernel. */ + if (!sendfile_works) { + error = 0; + + if (!stat_out_acquired && fstat(fd_in, &stat_out) < 0) { + error = errno; + } else { + stat_out_acquired = 1; + if (buf == NULL) + buf = malloc(stat_out.st_blksize); + if (buf == NULL) { + error = errno; + + /* For the read call, the fd_in file offset must be + * exactly equal to offset_out. Use lseek to ensure + * correct state, in case an EINTR retry caused it to + * get out of sync somewhow. + */ + } else if (lseek(fd_in, offset_out, SEEK_SET) < 0) { + error = errno; + } else { + while (1) { + /* Some bytes may still be buffered from the + * previous iteration of the outer loop. + */ + if (!buf_bytes) { + buf_offset = 0; + buf_bytes = read(fd_in, buf, stat_out.st_blksize); + + if (!buf_bytes) { + /* EOF */ + break; + + } else if (buf_bytes < 0) { + error = errno; + break; + } + } + + copyfunc_ret = write(fd_out, + buf + buf_offset, + buf_bytes); + + if (copyfunc_ret < 0) { + error = errno; + break; + } + + buf_bytes -= copyfunc_ret; + buf_offset += copyfunc_ret; + offset_out += copyfunc_ret; + } + } + } + } + + if (!error && ftruncate(fd_out, offset_out) < 0) + error = errno; + + Py_END_ALLOW_THREADS + + if (!(error == EINTR && PyErr_CheckSignals() == 0)) + eintr_retry = 0; + } + + if (buf != NULL) + free(buf); + + if (error) + return PyErr_SetFromErrno(PyExc_OSError); + + return Py_BuildValue("i", offset_out); +}