Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package virtiofsd for openSUSE:Factory checked in at 2024-01-31 23:53:23 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/virtiofsd (Old) and /work/SRC/openSUSE:Factory/.virtiofsd.new.1815 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "virtiofsd" Wed Jan 31 23:53:23 2024 rev:4 rq:1142838 version:1.10.1 Changes: -------- --- /work/SRC/openSUSE:Factory/virtiofsd/virtiofsd.changes 2023-09-02 22:07:22.914682372 +0200 +++ /work/SRC/openSUSE:Factory/.virtiofsd.new.1815/virtiofsd.changes 2024-01-31 23:53:32.812549276 +0100 @@ -1,0 +2,14 @@ +Tue Jan 30 17:09:25 UTC 2024 - caleb.cr...@suse.com + +- Fix CVE-2023-50711: vmm-sys-util: out of bounds memory accesses (bsc#1218502, bsc#1218500) +- Update to version 1.10.1: + * Bump version to v1.10.1 + * Fix mandatory user namespaces + * Don't drop supplemental groups in unprivileged user namespace + * Bump version to v1.10.0 + * Update rust-vmm dependencies (bsc#1218500) + * Bump version to v1.9.0 +- Spec: switch to using the upstream virtio-fs config file for qemu +- Spec: switch back to greedy cargo updates of vendored dependencies + +------------------------------------------------------------------- Old: ---- 50-qemu-virtiofsd.json virtiofsd-1.7.2.tar.xz New: ---- virtiofsd-1.10.1.tar.xz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ virtiofsd.spec ++++++ --- /var/tmp/diff_new_pack.h6GduW/_old 2024-01-31 23:53:34.136597045 +0100 +++ /var/tmp/diff_new_pack.h6GduW/_new 2024-01-31 23:53:34.136597045 +0100 @@ -1,7 +1,7 @@ # # spec file for package virtiofsd # -# Copyright (c) 2023 SUSE LLC +# Copyright (c) 2024 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -17,7 +17,7 @@ Name: virtiofsd -Version: 1.7.2 +Version: 1.10.1 Release: 0 Summary: vhost-user virtio-fs device backend written in Rust Group: Development/Libraries/Rust @@ -26,7 +26,6 @@ Source0: %{name}-%{version}.tar.xz Source1: vendor.tar.xz Source2: cargo_config -Source3: 50-qemu-virtiofsd.json BuildRequires: cargo-packaging BuildRequires: libcap-ng-devel BuildRequires: libseccomp-devel @@ -47,7 +46,7 @@ %install mkdir -p %{buildroot}%{_libexecdir} install -D -p -m 0755 %{_builddir}/%{name}-%{version}/target/release/virtiofsd %{buildroot}%{_libexecdir}/virtiofsd -install -D -p -m 0644 %{SOURCE3} %{buildroot}%{_datadir}/qemu/vhost-user/50-qemu-virtiofsd.json +install -D -p -m 0644 %{_builddir}/%{name}-%{version}/50-virtiofsd.json %{buildroot}%{_datadir}/qemu/vhost-user/50-virtiofsd.json %check %{cargo_test} @@ -57,5 +56,5 @@ %{_libexecdir}/virtiofsd %dir %{_datadir}/qemu %dir %{_datadir}/qemu/vhost-user -%{_datadir}/qemu/vhost-user/50-qemu-virtiofsd.json +%{_datadir}/qemu/vhost-user/50-virtiofsd.json ++++++ _service ++++++ --- /var/tmp/diff_new_pack.h6GduW/_old 2024-01-31 23:53:34.164598055 +0100 +++ /var/tmp/diff_new_pack.h6GduW/_new 2024-01-31 23:53:34.168598199 +0100 @@ -3,10 +3,11 @@ <param name="url">https://gitlab.com/virtio-fs/virtiofsd.git</param> <param name="scm">git</param> <param name="filename">virtiofsd</param> - <param name="revision">v1.7.2</param> + <param name="revision">v1.10.1</param> <param name="versionformat">@PARENT_TAG@</param> <param name="versionrewrite-pattern">[v]?([^\+]+)(.*)</param> <param name="versionrewrite-replacement">\1</param> + <param name="changesgenerate">enable</param> </service> <service name="set_version" mode="disabled"/> @@ -25,7 +26,7 @@ <service name="cargo_vendor" mode="disabled"> <param name="srcdir">virtiofsd</param> <param name="compression">xz</param> - <param name="update">false</param> + <param name="update">true</param> </service> </services> ++++++ vendor.tar.xz ++++++ /work/SRC/openSUSE:Factory/virtiofsd/vendor.tar.xz /work/SRC/openSUSE:Factory/.virtiofsd.new.1815/vendor.tar.xz differ: char 26, line 1 ++++++ virtiofsd-1.7.2.tar.xz -> virtiofsd-1.10.1.tar.xz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/virtiofsd-1.7.2/50-qemu-virtiofsd.json new/virtiofsd-1.10.1/50-qemu-virtiofsd.json --- old/virtiofsd-1.7.2/50-qemu-virtiofsd.json 2023-08-22 12:40:10.000000000 +0200 +++ new/virtiofsd-1.10.1/50-qemu-virtiofsd.json 1970-01-01 01:00:00.000000000 +0100 @@ -1,5 +0,0 @@ -{ - "description": "QEMU virtiofsd vhost-user-fs", - "type": "fs", - "binary": "/usr/libexec/virtiofsd" -} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/virtiofsd-1.7.2/50-virtiofsd.json new/virtiofsd-1.10.1/50-virtiofsd.json --- old/virtiofsd-1.7.2/50-virtiofsd.json 1970-01-01 01:00:00.000000000 +0100 +++ new/virtiofsd-1.10.1/50-virtiofsd.json 2024-01-24 10:36:29.000000000 +0100 @@ -0,0 +1,5 @@ +{ + "description": "virtiofsd vhost-user-fs", + "type": "fs", + "binary": "/usr/libexec/virtiofsd" +} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/virtiofsd-1.7.2/Cargo.lock new/virtiofsd-1.10.1/Cargo.lock --- old/virtiofsd-1.7.2/Cargo.lock 2023-08-22 12:40:10.000000000 +0200 +++ new/virtiofsd-1.10.1/Cargo.lock 2024-01-24 10:36:29.000000000 +0100 @@ -91,9 +91,9 @@ [[package]] name = "bitflags" -version = "2.3.3" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" [[package]] name = "capng" @@ -469,7 +469,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "172891ebdceb05aa0005f533a6cbfca599ddd7d966f6f5d4d9b2e70478e70399" dependencies = [ - "bitflags 2.3.3", + "bitflags 2.4.1", "errno", "libc", "linux-raw-sys", @@ -586,11 +586,11 @@ [[package]] name = "vhost" -version = "0.8.1" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61957aeb36daf0b00b87fff9c10dd28a161bd35ab157553d340d183b3d8756e6" +checksum = "2b64e816d0d49769fbfaa1494eb77cc2a3ddc526ead05c7f922cb7d64106286f" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.4.1", "libc", "vm-memory", "vmm-sys-util", @@ -598,9 +598,9 @@ [[package]] name = "vhost-user-backend" -version = "0.10.1" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab069cdedaf18a0673766eb0a07a0f4ee3ed1b8e17fbfe4aafe5b988e2de1d01" +checksum = "72c8c447d076ac508d78cb45664d203df7989e891656dce260a7e93d72352c9a" dependencies = [ "libc", "log", @@ -613,15 +613,15 @@ [[package]] name = "virtio-bindings" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c18d7b74098a946470ea265b5bacbbf877abc3373021388454de0d47735a5b98" +checksum = "878bcb1b2812a10c30d53b0ed054999de3d98f25ece91fc173973f9c57aaae86" [[package]] name = "virtio-queue" -version = "0.9.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35aca00da06841bd99162c381ec65893cace23ca0fb89254302cfe4bec4c300f" +checksum = "e3f69a13d6610db9312acbb438b0390362af905d37634a2106be70c0f734986d" dependencies = [ "log", "virtio-bindings", @@ -631,7 +631,7 @@ [[package]] name = "virtiofsd" -version = "1.7.2" +version = "1.10.1" dependencies = [ "bitflags 1.3.2", "capng", @@ -652,21 +652,23 @@ [[package]] name = "vm-memory" -version = "0.12.0" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a77c7a0891cbac53618f5f6eec650ed1dc4f7e506bbe14877aff49d94b8408b0" +checksum = "74ffc42216c32c35f858fa4bfdcd9b61017dfd691e0240268fdc85dbf59e5459" dependencies = [ "arc-swap", + "bitflags 2.4.1", "libc", "thiserror", + "vmm-sys-util", "winapi", ] [[package]] name = "vmm-sys-util" -version = "0.11.1" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd64fe09d8e880e600c324e7d664760a17f56e9672b7495a86381b49e4f72f46" +checksum = "1d1435039746e20da4f8d507a72ee1b916f7b4b05af7a91c093d2c6561934ede" dependencies = [ "bitflags 1.3.2", "libc", diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/virtiofsd-1.7.2/Cargo.toml new/virtiofsd-1.10.1/Cargo.toml --- old/virtiofsd-1.7.2/Cargo.toml 2023-08-22 12:40:10.000000000 +0200 +++ new/virtiofsd-1.10.1/Cargo.toml 2024-01-24 10:36:29.000000000 +0100 @@ -1,7 +1,7 @@ [package] name = "virtiofsd" description = "A virtio-fs vhost-user device daemon" -version = "1.7.2" +version = "1.10.1" authors = ["The Virtiofs Project Developers"] edition = "2018" homepage = "https://virtio-fs.gitlab.io/" @@ -11,6 +11,10 @@ exclude = [".gitlab-ci.yml"] +[features] +# Enabling Xen support will _disable_ QEMU/KVM support! +xen = ["vhost-user-backend/xen", "vhost/xen", "vm-memory/xen"] + [dependencies] bitflags = "1.2" capng = "0.2.2" @@ -20,12 +24,12 @@ log = "0.4" libseccomp-sys = "0.2" clap = { version = "4", features = ["derive"] } -vhost-user-backend = "0.10.1" -vhost = "0.8.1" -virtio-bindings = { version = "0.2", features = ["virtio-v5_0_0"] } -vm-memory = { version = "0.12.0", features = ["backend-mmap", "backend-atomic"] } -virtio-queue = "0.9.0" -vmm-sys-util = "0.11.1" +vhost-user-backend = "0.13.1" +vhost = "0.10.0" +virtio-bindings = { version = "0.2.1", features = ["virtio-v5_0_0"] } +vm-memory = { version = "0.14.0", features = ["backend-mmap", "backend-atomic"] } +virtio-queue = "0.11.0" +vmm-sys-util = "0.12.1" syslog = "6.0" [profile.release] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/virtiofsd-1.7.2/README.md new/virtiofsd-1.10.1/README.md --- old/virtiofsd-1.7.2/README.md 2023-08-22 12:40:10.000000000 +0200 +++ new/virtiofsd-1.10.1/README.md 2024-01-24 10:36:29.000000000 +0100 @@ -167,6 +167,17 @@ Shared directory path. ```shell +--tag <tag> +``` +The tag that the virtio device advertises. + +Setting this option will enable advertising of VHOST_USER_PROTOCOL_F_CONFIG. +However, the vhost-user frontend of your hypervisor may not negotiate this +feature and (or) ignore this value. Notably, QEMU currently (as of 8.1) ignores +the CONFIG feature. QEMU versions from 7.1 to 8.0 will crash while attempting to +log a warning about not supporting the feature. + +```shell --socket-group <socket-group> ``` Name of group for the vhost-user socket. @@ -245,7 +256,7 @@ ```shell --cache <cache> ``` -The caching policy the file system should use (auto, always, never). +The caching policy the file system should use (auto, always, metadata, never). Default: auto. @@ -324,13 +335,15 @@ -device virtio-blk,drive=hdd \ -chardev socket,id=char0,path=/tmp/vfsd.sock \ -device vhost-user-fs-pci,queue-size=1024,chardev=char0,tag=myfs \ - -object memory-backend-file,id=mem,size=4G,mem-path=/dev/shm,share=on \ + -object memory-backend-memfd,id=mem,size=4G,share=on \ -numa node,memdev=mem \ -accel kvm -m 4G guest# mount -t virtiofs myfs /mnt ``` +See [FAQ](#faq) for adding virtiofs config to an existing qemu command-line. + ### Running as non-privileged user When run without root, virtiofsd requires a user namespace (see `user_namespaces(7)`) to be able to switch between arbitrary user/group IDs within the guest. @@ -406,6 +419,23 @@ ``` Note the use of `--announce-submounts` to prevent data loss/corruption. +- How to add virtiofs devices to an existing qemu command-line: + + If `-object memory-backend-memfd,id=mem` and either `-numa node,memdev=mem` + or a `memory-backend=mem` property in the `-machine` option + have not already been added to the command, add them. + + If a different memory backend is already configured then it should be changed + to `memory-backend-memfd`. + + `-object memory-backend-memfd` **must** have the option `share=on` + and `size=` **must** match the memory size defined by `-m`. + + For each virtiofs device mount add a + `-chardev socket,id=${MATCHING_ID},path=${VIRTIOFSD_SOCKET_PATH}` and + `-device vhost-user-fs-pci,queue-size=1024,chardev=${MATCHING_ID},tag=${VIRTIOFS_TAG}` + substituting appropriate values for the shell-style variables. + ## SELinux Support One can enable support for SELinux by running virtiofsd with option "--security-label". But this will try to save guest's security context diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/virtiofsd-1.7.2/src/descriptor_utils.rs new/virtiofsd-1.10.1/src/descriptor_utils.rs --- old/virtiofsd-1.7.2/src/descriptor_utils.rs 2023-08-22 12:40:10.000000000 +0200 +++ new/virtiofsd-1.10.1/src/descriptor_utils.rs 2024-01-24 10:36:29.000000000 +0100 @@ -291,9 +291,12 @@ for vs in bufs { let copy_len = cmp::min(rem.len(), vs.len()); - // Safe because we have already verified that `vs` points to valid memory. + // SAFETY: Safe because we verify that we do not read outside + // of the slice's bound. The slice guard will only get dropped + // after the function returns. This will keep the pointer valid + // while reads are happening. unsafe { - copy_nonoverlapping(vs.as_ptr() as *const u8, rem.as_mut_ptr(), copy_len); + copy_nonoverlapping(vs.ptr_guard().as_ptr(), rem.as_mut_ptr(), copy_len); } rem = &mut rem[copy_len..]; total += copy_len; @@ -405,9 +408,12 @@ for vs in bufs { let copy_len = cmp::min(rem.len(), vs.len()); - // Safe because we have already verified that `vs` points to valid memory. + // SAFETY: Safe because we ensure that we do not write over the + // slice's bounds. The slice guard will only get dropped after + // the function returns. This will keep the pointer valid while + // writes are happening. unsafe { - copy_nonoverlapping(rem.as_ptr(), vs.as_ptr(), copy_len); + copy_nonoverlapping(rem.as_ptr(), vs.ptr_guard_mut().as_ptr(), copy_len); } vs.bitmap().mark_dirty(0, copy_len); rem = &rem[copy_len..]; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/virtiofsd-1.7.2/src/file_traits.rs new/virtiofsd-1.10.1/src/file_traits.rs --- old/virtiofsd-1.7.2/src/file_traits.rs 2023-08-22 12:40:10.000000000 +0200 +++ new/virtiofsd-1.10.1/src/file_traits.rs 2024-01-24 10:36:29.000000000 +0100 @@ -72,7 +72,8 @@ bufs: &[&VolatileSlice<B>], offset: u64, ) -> Result<usize> { - let iovecs: Vec<libc::iovec> = bufs + let slice_guards: Vec<_> = bufs.iter().map(|s| s.ptr_guard_mut()).collect(); + let iovecs: Vec<libc::iovec> = slice_guards .iter() .map(|s| libc::iovec { iov_base: s.as_ptr() as *mut c_void, @@ -84,8 +85,10 @@ return Ok(0); } - // Safe because only bytes inside the buffers are accessed and the kernel is - // expected to handle arbitrary memory for I/O. + // SAFETY: Safe because only bytes inside the buffers are + // accessed and the kernel is expected to handle arbitrary + // memory for I/O. The pointers into the slice are valid since + // the slice_guards are still in scope. let ret = unsafe { preadv64( self.as_raw_fd(), @@ -119,7 +122,8 @@ offset: u64, flags: Option<oslib::WritevFlags>, ) -> Result<usize> { - let iovecs: Vec<libc::iovec> = bufs + let slice_guards: Vec<_> = bufs.iter().map(|s| s.ptr_guard()).collect(); + let iovecs: Vec<libc::iovec> = slice_guards .iter() .map(|s| libc::iovec { iov_base: s.as_ptr() as *mut c_void, @@ -131,8 +135,10 @@ return Ok(0); } - // SAFETY: Each `libc::iovec` element is created from a `VolatileSlice` - // of the guest memory. + // SAFETY: Each `libc::iovec` element is created from a + // `VolatileSlice` of the guest memory. The pointers are valid + // because the slice guards are still in scope. We also ensure + // that we do not read over the slice bounds. unsafe { oslib::writev_at( self.as_fd(), diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/virtiofsd-1.7.2/src/fs_cache_req_handler.rs new/virtiofsd-1.10.1/src/fs_cache_req_handler.rs --- old/virtiofsd-1.7.2/src/fs_cache_req_handler.rs 2023-08-22 12:40:10.000000000 +0200 +++ new/virtiofsd-1.10.1/src/fs_cache_req_handler.rs 2024-01-24 10:36:29.000000000 +0100 @@ -2,9 +2,9 @@ use std::io; use std::os::unix::io::RawFd; use vhost::vhost_user::message::{ - VhostUserFSSlaveMsg, VhostUserFSSlaveMsgFlags, VHOST_USER_FS_SLAVE_ENTRIES, + VhostUserFSBackendMsg, VhostUserFSBackendMsgFlags, VHOST_USER_FS_BACKEND_ENTRIES, }; -use vhost::vhost_user::{Slave, VhostUserMasterReqHandler}; +use vhost::vhost_user::{Backend, VhostUserFrontendReqHandler}; /// Trait for virtio-fs cache requests operations. This is mainly used to hide /// vhost-user details from virtio-fs's fuse part. @@ -23,7 +23,7 @@ fn unmap(&mut self, requests: Vec<fuse::RemovemappingOne>) -> io::Result<()>; } -impl FsCacheReqHandler for Slave { +impl FsCacheReqHandler for Backend { fn map( &mut self, foffset: u64, @@ -32,30 +32,30 @@ flags: u64, fd: RawFd, ) -> io::Result<()> { - let mut msg: VhostUserFSSlaveMsg = Default::default(); + let mut msg: VhostUserFSBackendMsg = Default::default(); msg.fd_offset[0] = foffset; msg.cache_offset[0] = moffset; msg.len[0] = len; msg.flags[0] = if (flags & fuse::SetupmappingFlags::WRITE.bits()) != 0 { - VhostUserFSSlaveMsgFlags::MAP_W | VhostUserFSSlaveMsgFlags::MAP_R + VhostUserFSBackendMsgFlags::MAP_W | VhostUserFSBackendMsgFlags::MAP_R } else { - VhostUserFSSlaveMsgFlags::MAP_R + VhostUserFSBackendMsgFlags::MAP_R }; - self.fs_slave_map(&msg, &fd)?; + self.fs_backend_map(&msg, &fd)?; Ok(()) } fn unmap(&mut self, requests: Vec<fuse::RemovemappingOne>) -> io::Result<()> { - for chunk in requests.chunks(VHOST_USER_FS_SLAVE_ENTRIES) { - let mut msg: VhostUserFSSlaveMsg = Default::default(); + for chunk in requests.chunks(VHOST_USER_FS_BACKEND_ENTRIES) { + let mut msg: VhostUserFSBackendMsg = Default::default(); for (ind, req) in chunk.iter().enumerate() { msg.len[ind] = req.len; msg.cache_offset[ind] = req.moffset; } - self.fs_slave_unmap(&msg)?; + self.fs_backend_unmap(&msg)?; } Ok(()) } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/virtiofsd-1.7.2/src/main.rs new/virtiofsd-1.10.1/src/main.rs --- old/virtiofsd-1.7.2/src/main.rs 2023-08-22 12:40:10.000000000 +0200 +++ new/virtiofsd-1.10.1/src/main.rs 2024-01-24 10:36:29.000000000 +0100 @@ -21,7 +21,7 @@ use vhost::vhost_user::message::*; use vhost::vhost_user::Error::Disconnected; -use vhost::vhost_user::{Listener, Slave}; +use vhost::vhost_user::{Backend, Listener}; use vhost_user_backend::Error::HandleRequest; use vhost_user_backend::{VhostUserBackend, VhostUserDaemon, VringMutex, VringState, VringT}; use virtio_bindings::bindings::virtio_config::*; @@ -37,18 +37,27 @@ use virtiofsd::server::Server; use virtiofsd::util::write_pid_file; use virtiofsd::{limits, oslib, Error as VhostUserFsError}; -use vm_memory::{GuestAddressSpace, GuestMemoryAtomic, GuestMemoryLoadGuard, GuestMemoryMmap}; +use vm_memory::{ + ByteValued, GuestAddressSpace, GuestMemoryAtomic, GuestMemoryLoadGuard, GuestMemoryMmap, Le32, +}; use vmm_sys_util::epoll::EventSet; use vmm_sys_util::eventfd::EventFd; const QUEUE_SIZE: usize = 1024; -const NUM_QUEUES: usize = 2; +// The spec allows for multiple request queues. We currently only support one. +const REQUEST_QUEUES: u32 = 1; +// In addition to the request queue there is one high-prio queue. +// Since VIRTIO_FS_F_NOTIFICATION is not advertised we do not have a +// notification queue. +const NUM_QUEUES: usize = REQUEST_QUEUES as usize + 1; // The guest queued an available buffer for the high priority queue. const HIPRIO_QUEUE_EVENT: u16 = 0; // The guest queued an available buffer for the request queue. const REQ_QUEUE_EVENT: u16 = 1; +const MAX_TAG_LEN: usize = 36; + type Result<T> = std::result::Result<T, Error>; type VhostUserBackendResult<T> = std::result::Result<T, std::io::Error>; @@ -74,6 +83,8 @@ QueueWriter(VufDescriptorError), /// The unshare(CLONE_FS) call failed. UnshareCloneFs(io::Error), + /// Invalid tag name + InvalidTag, } impl fmt::Display for Error { @@ -88,6 +99,10 @@ runtime seccomp policy allows unshare." ) } + Self::InvalidTag => write!( + f, + "The tag may not be empty or longer than {MAX_TAG_LEN} bytes (encoded as UTF-8)." + ), _ => write!(f, "{self:?}"), } } @@ -105,8 +120,8 @@ mem: Option<GuestMemoryAtomic<GuestMemoryMmap>>, kill_evt: EventFd, server: Arc<Server<F>>, - // handle request from slave to master - vu_req: Option<Slave>, + // handle request from backend to frontend + vu_req: Option<Backend>, event_idx: bool, pool: Option<ThreadPool>, } @@ -284,7 +299,7 @@ &self, device_event: u16, vrings: &[VringMutex], - ) -> VhostUserBackendResult<bool> { + ) -> VhostUserBackendResult<()> { let idx = match device_event { HIPRIO_QUEUE_EVENT => { debug!("HIPRIO_QUEUE_EVENT"); @@ -314,14 +329,14 @@ self.process_queue_pool(vrings[idx].clone())?; } - Ok(false) + Ok(()) } fn handle_event_serial( &self, device_event: u16, vrings: &[VringMutex], - ) -> VhostUserBackendResult<bool> { + ) -> VhostUserBackendResult<()> { let mut vring_state = match device_event { HIPRIO_QUEUE_EVENT => { debug!("HIPRIO_QUEUE_EVENT"); @@ -351,22 +366,46 @@ self.process_queue_serial(&mut vring_state)?; } - Ok(false) + Ok(()) + } +} + +#[repr(C)] +#[derive(Clone, Copy)] +struct VirtioFsConfig { + tag: [u8; MAX_TAG_LEN], + num_request_queues: Le32, +} + +// vm-memory needs a Default implementation even though these values are never +// used anywhere... +impl Default for VirtioFsConfig { + fn default() -> Self { + Self { + tag: [0; MAX_TAG_LEN], + num_request_queues: Le32::default(), + } } } +unsafe impl ByteValued for VirtioFsConfig {} + struct VhostUserFsBackend<F: FileSystem + Send + Sync + 'static> { thread: RwLock<VhostUserFsThread<F>>, + tag: Option<String>, } impl<F: FileSystem + Send + Sync + 'static> VhostUserFsBackend<F> { - fn new(fs: F, thread_pool_size: usize) -> Result<Self> { + fn new(fs: F, thread_pool_size: usize, tag: Option<String>) -> Result<Self> { let thread = RwLock::new(VhostUserFsThread::new(fs, thread_pool_size)?); - Ok(VhostUserFsBackend { thread }) + Ok(VhostUserFsBackend { thread, tag }) } } -impl<F: FileSystem + Send + Sync + 'static> VhostUserBackend<VringMutex> for VhostUserFsBackend<F> { +impl<F: FileSystem + Send + Sync + 'static> VhostUserBackend for VhostUserFsBackend<F> { + type Bitmap = (); + type Vring = VringMutex; + fn num_queues(&self) -> usize { NUM_QUEUES } @@ -383,11 +422,50 @@ } fn protocol_features(&self) -> VhostUserProtocolFeatures { - VhostUserProtocolFeatures::MQ - | VhostUserProtocolFeatures::SLAVE_REQ - | VhostUserProtocolFeatures::SLAVE_SEND_FD + let mut protocol_features = VhostUserProtocolFeatures::MQ + | VhostUserProtocolFeatures::BACKEND_REQ + | VhostUserProtocolFeatures::BACKEND_SEND_FD | VhostUserProtocolFeatures::REPLY_ACK - | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS + | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS; + + if self.tag.is_some() { + protocol_features |= VhostUserProtocolFeatures::CONFIG; + } + + protocol_features + } + + fn get_config(&self, offset: u32, size: u32) -> Vec<u8> { + // virtio spec 1.2, 5.11.4: + // The tag is encoded in UTF-8 and padded with NUL bytes if shorter than + // the available space. This field is not NUL-terminated if the encoded + // bytes take up the entire field. + // The length was already checked when parsing the arguments. Hence, we + // only assert that everything looks sane and pad with NUL bytes to the + // fixed length. + let tag = self.tag.as_ref().expect("Did not expect read of config if tag is not set. We do not advertise F_CONFIG in that case!"); + assert!(tag.len() <= MAX_TAG_LEN, "too long tag length"); + assert!(!tag.is_empty(), "tag should not be empty"); + let mut fixed_len_tag = [0; MAX_TAG_LEN]; + fixed_len_tag[0..tag.len()].copy_from_slice(tag.as_bytes()); + + let config = VirtioFsConfig { + tag: fixed_len_tag, + num_request_queues: Le32::from(REQUEST_QUEUES), + }; + + let offset = offset as usize; + let size = size as usize; + let mut result: Vec<_> = config + .as_slice() + .iter() + .skip(offset) + .take(size) + .copied() + .collect(); + // pad with 0s up to `size` + result.resize(size, 0); + result } fn set_event_idx(&self, enabled: bool) { @@ -405,7 +483,7 @@ evset: EventSet, vrings: &[VringMutex], _thread_id: usize, - ) -> VhostUserBackendResult<bool> { + ) -> VhostUserBackendResult<()> { if evset != EventSet::IN { return Err(Error::HandleEventNotEpollIn.into()); } @@ -423,7 +501,7 @@ Some(self.thread.read().unwrap().kill_evt.try_clone().unwrap()) } - fn set_slave_req_fd(&self, vu_req: Slave) { + fn set_backend_req_fd(&self, vu_req: Backend) { self.thread.write().unwrap().vu_req = Some(vu_req); } } @@ -479,6 +557,14 @@ } } +fn parse_tag(tag: &str) -> Result<String> { + if !tag.is_empty() && tag.len() <= MAX_TAG_LEN { + Ok(tag.into()) + } else { + Err(Error::InvalidTag) + } +} + #[derive(Clone, Debug, Parser)] #[command( name = "virtiofsd", @@ -491,6 +577,17 @@ #[arg(long)] shared_dir: Option<String>, + /// The tag that the virtio device advertises + /// + /// Setting this option will enable advertising of + /// VHOST_USER_PROTOCOL_F_CONFIG. However, the vhost-user frontend of your + /// hypervisor may not negotiate this feature and (or) ignore this value. + /// Notably, QEMU currently (as of 8.1) ignores the CONFIG feature. QEMU + /// versions from 7.1 to 8.0 will crash while attempting to log a warning + /// about not supporting the feature. + #[arg(long, value_parser = parse_tag)] + tag: Option<String>, + /// vhost-user socket path [deprecated] #[arg(long, required_unless_present_any = &["fd", "socket_path", "print_capabilities"])] socket: Option<String>, @@ -559,7 +656,7 @@ #[arg(long, require_equals = true, default_value = "never")] inode_file_handles: InodeFileHandlesCommandLineMode, - /// The caching policy the file system should use (auto, always, never) + /// The caching policy the file system should use (auto, always, never, metadata) #[arg(long, default_value = "auto")] cache: CachePolicy, @@ -674,6 +771,7 @@ "auto" => opt.cache = CachePolicy::Auto, "always" => opt.cache = CachePolicy::Always, "none" => opt.cache = CachePolicy::Never, + "metadata" => opt.cache = CachePolicy::Metadata, _ => value_error("cache", value), }, ["loglevel", value] => match value { @@ -925,6 +1023,7 @@ let timeout = match opt.cache { CachePolicy::Never => Duration::from_secs(0), + CachePolicy::Metadata => Duration::from_secs(86400), CachePolicy::Auto => Duration::from_secs(1), CachePolicy::Always => Duration::from_secs(86400), }; @@ -1059,7 +1158,7 @@ }; let fs_backend = Arc::new( - VhostUserFsBackend::new(fs, thread_pool_size).unwrap_or_else(|error| { + VhostUserFsBackend::new(fs, thread_pool_size, opt.tag).unwrap_or_else(|error| { error!("Error creating vhost-user backend: {}", error); process::exit(1) }), diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/virtiofsd-1.7.2/src/passthrough/mod.rs new/virtiofsd-1.10.1/src/passthrough/mod.rs --- old/virtiofsd-1.7.2/src/passthrough/mod.rs 2023-08-22 12:40:10.000000000 +0200 +++ new/virtiofsd-1.10.1/src/passthrough/mod.rs 2024-01-24 10:36:29.000000000 +0100 @@ -77,6 +77,15 @@ /// the FUSE client (i.e., the file system does not have exclusive access to the directory). Never, + /// This is almost same as Never, but it allows page cache of directories, dentries and attr + /// cache in guest. In other words, it acts like cache=never for normal files, and like + /// cache=always for directories, besides, metadata like dentries and attrs are kept as well. + /// This policy can be used if: + /// 1. the client wants to use Never policy but it's performance in I/O is not good enough + /// 2. the file system has exclusive access to the directory + /// 3. cache directory content and other fs metadata can make a difference on performance. + Metadata, + /// The client is free to choose when and how to cache file data. This is the default policy and /// uses close-to-open consistency as described in the enum documentation. #[default] @@ -93,10 +102,11 @@ type Err = &'static str; fn from_str(s: &str) -> Result<Self, Self::Err> { - match s { - "never" | "Never" | "NEVER" => Ok(CachePolicy::Never), - "auto" | "Auto" | "AUTO" => Ok(CachePolicy::Auto), - "always" | "Always" | "ALWAYS" => Ok(CachePolicy::Always), + match &s.to_lowercase()[..] { + "never" => Ok(CachePolicy::Never), + "metadata" => Ok(CachePolicy::Metadata), + "auto" => Ok(CachePolicy::Auto), + "always" => Ok(CachePolicy::Always), _ => Err("invalid cache policy"), } } @@ -792,6 +802,13 @@ OpenOptions::DIRECT_IO, flags & (libc::O_DIRECTORY as u32) == 0, ), + CachePolicy::Metadata => { + if flags & (libc::O_DIRECTORY as u32) == 0 { + opts |= OpenOptions::DIRECT_IO; + } else { + opts |= OpenOptions::CACHE_DIR | OpenOptions::KEEP_CACHE; + } + } CachePolicy::Always => { opts |= OpenOptions::KEEP_CACHE; if flags & (libc::O_DIRECTORY as u32) != 0 { @@ -1435,6 +1452,7 @@ let mut opts = OpenOptions::empty(); match self.cfg.cache_policy { CachePolicy::Never => opts |= OpenOptions::DIRECT_IO, + CachePolicy::Metadata => opts |= OpenOptions::DIRECT_IO, CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE, _ => {} }; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/virtiofsd-1.7.2/src/sandbox.rs new/virtiofsd-1.10.1/src/sandbox.rs --- old/virtiofsd-1.7.2/src/sandbox.rs 2023-08-22 12:40:10.000000000 +0200 +++ new/virtiofsd-1.10.1/src/sandbox.rs 2024-01-24 10:36:29.000000000 +0100 @@ -8,6 +8,7 @@ use std::fs::{self, File}; use std::io::{Read, Write}; use std::os::unix::io::{AsRawFd, FromRawFd}; +use std::path::Path; use std::process::{self, Command}; use std::str::FromStr; use std::{error, fmt, io}; @@ -402,10 +403,16 @@ // Setup uid/gid mappings if uid != 0 { let ppid = unsafe { libc::getppid() }; - if let Err(why) = + if let Err(error) = self.setup_id_mappings(self.uid_map.as_ref(), self.gid_map.as_ref(), ppid) { - panic!("couldn't setup id mappings: {}", why) + // We don't really need to close the pipes here, since the OS will close the FDs + // after the process exits. But let's do it explicitly to signal an error to the + // other end of the pipe. + drop(x_reader); + drop(y_writer); + error!("sandbox: couldn't setup id mappings: {}", error); + process::exit(1); }; } @@ -432,7 +439,9 @@ .write_all(&[IdMapSetUpPipeMessage::Request as u8]) .unwrap(); - // Receive the signal that mapping is done + // Receive the signal that mapping is done. If the child process exits + // before setting up the mapping, closing the pipe before sending the + // message, `read_exact()` will fail with `UnexpectedEof`. y_reader .read_exact(&mut output) .unwrap_or_else(|_| process::exit(1)); @@ -495,6 +504,37 @@ Ok(()) } + fn must_drop_supplemental_groups(&self) -> Result<bool, Error> { + let uid = unsafe { libc::geteuid() }; + if uid != 0 { + return Ok(false); + } + + // If we are running as root and the system does not support user namespaces, + // we must drop supplemental groups. + if !Path::new("/proc/self/ns/user").exists() { + return Ok(true); + } + + let uid_mmap_data = + fs::read_to_string("/proc/self/uid_map").map_err(Error::DropSupplementalGroups)?; + let uid_map: Vec<_> = uid_mmap_data.split_whitespace().collect(); + + let gid_map_data = + fs::read_to_string("/proc/self/gid_map").map_err(Error::DropSupplementalGroups)?; + let gid_map: Vec<_> = gid_map_data.split_whitespace().collect(); + + let setgroups = + fs::read_to_string("/proc/self/setgroups").map_err(Error::DropSupplementalGroups)?; + + // A single line mapping only has 3 fields, and the 'count' field should + // be 1. + let single_uid_mapping = uid_map.len() == 3 && uid_map[2] == "1"; + let single_gid_mapping = gid_map.len() == 3 && gid_map[2] == "1"; + + Ok(setgroups.trim() != "deny" || !single_uid_mapping || !single_gid_mapping) + } + fn drop_supplemental_groups(&self) -> Result<(), Error> { let ngroups = unsafe { libc::getgroups(0, std::ptr::null_mut()) }; if ngroups < 0 { @@ -526,19 +566,26 @@ return Err(Error::SandboxModeInvalidGidMap); } - // Drop supplemental groups. This is running as root and will - // support arbitrary uid/gid switching and we don't want to - // retain membership of any supplementary groups. - // - // This is not necessarily required for non-root case, where - // unprivileged user has started us. We are not going to setup - // any sandbox or we will setup one user namespace with 1:1 - // mapping and there is no arbitrary uid/gid switching at all. - // In this mode setgroups() is not allowed, so we can't drop - // supplementary groups even if wanted to. Only way to do this - // will be to use newuidmap/newgidmap to setup user namespace - // which will allow setgroups(). - if uid == 0 { + // We must drop supplemental groups membership if we support switching + // between arbitrary uids/gids, unless the following conditions are met: + // we're not running as root or we are inside a user namespace with only + // one uid and gid mapping and '/proc/self/setgroups' is equal to + // "deny". In both of these cases, no arbitrary uid/gid switching is + // possible and thus there's no need to drop supplemental groups. In + // both of these scenarios calling setgroups() is also not allowed so we + // avoid calling it since we know it will return a privilege error. + let must_drop_supplemental_groups = match self.must_drop_supplemental_groups() { + Ok(must_drop) => must_drop, + Err(error) => { + warn!( + "Failed to determine whether supplemental groups must be dropped: {error}; \ + defaulting to trying to drop supplemental groups" + ); + true + } + }; + + if must_drop_supplemental_groups { self.drop_supplemental_groups()?; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/virtiofsd-1.7.2/src/seccomp.rs new/virtiofsd-1.10.1/src/seccomp.rs --- old/virtiofsd-1.7.2/src/seccomp.rs 2023-08-22 12:40:10.000000000 +0200 +++ new/virtiofsd-1.10.1/src/seccomp.rs 2024-01-24 10:36:29.000000000 +0100 @@ -106,7 +106,7 @@ allow_syscall!(ctx, libc::SYS_fsetxattr); #[cfg(not(target_arch = "loongarch64"))] allow_syscall!(ctx, libc::SYS_fstat); - #[cfg(target_arch = "s390x")] + #[cfg(any(target_arch = "s390x", target_arch = "powerpc64"))] allow_syscall!(ctx, libc::SYS_fstatfs64); allow_syscall!(ctx, libc::SYS_fstatfs); allow_syscall!(ctx, libc::SYS_fsync); @@ -122,6 +122,7 @@ allow_syscall!(ctx, libc::SYS_getegid); allow_syscall!(ctx, libc::SYS_geteuid); allow_syscall!(ctx, libc::SYS_getpid); + allow_syscall!(ctx, libc::SYS_getrandom); allow_syscall!(ctx, libc::SYS_gettid); allow_syscall!(ctx, libc::SYS_gettimeofday); allow_syscall!(ctx, libc::SYS_getxattr);