From: Waldemar Kozaczuk <jwkozac...@gmail.com> Committer: Waldemar Kozaczuk <jwkozac...@gmail.com> Branch: master
syscalls: allocate local iovec copies on the stack instead of the heap This commit is mostly based on the one from the Spirent fork of OSv - https://github.com/SpirentOrion/osv/commit/29e4d2bbc23d6ddbf8d4b065fc3388c9931e705a - and its original description reads: "Multiple syscalls used std::vector to manage local iovec copies. For our uses cases, this is totally unnecessary overhead and results in 1000's of small object memory allocations. So, just use the stack instead." In nutshell this patch slightly optimizes 4 functions - 2 that are part of the networking stack and 2 others in VFS layer to - by tweaking them to use stack instead of malloc()/free() which are relatively constly. Please note that the objects in question - copies of iovec - are pretty tiny, typically 16 bytes in size so it does make sense to use stack. Obviously it is hard to tell without measuring how significant this change is in terms of performance benefits and what use cases it would benefit. I did however find two usecases where I could observe some significant decrease of malloc invocations. The first is actually the cpiod app used to upload ZFS files during upload file which seems to be hitting the VFS code path in question. I this case I could see ~25% drop of malloc/free invocations. The second use case involved a test misc-tcp.cc which sends and receives data over socket in multiple threads and seems to be hitting the networking stack paths. In this case I could see 15-6% drop. The program was called like so: ./scripts/run.py -e '/tests/misc-tcp.so --remote=192.168.122.1 -c 10 -n 10 -l 5' with netcat running like so: ncat -l -k -p 9999 -e /bin/cat This patch also updates the test makefile to make it build misc-tcp.so after kernel no longer includes program options. It also slightly updates the test itself to output some helpful information in progress. Co-authored-by: "Timmons C. Player" <timmons.pla...@spirent.com> Co-authored-by: Waldemar Kozaczuk <jwkozac...@gmail.com> Signed-off-by: Waldemar Kozaczuk <jwkozac...@gmail.com> Message-Id: <20220628223749.131041-1-jwkozac...@gmail.com> --- diff --git a/bsd/sys/kern/uipc_syscalls.cc b/bsd/sys/kern/uipc_syscalls.cc --- a/bsd/sys/kern/uipc_syscalls.cc +++ b/bsd/sys/kern/uipc_syscalls.cc @@ -494,10 +494,12 @@ kern_sendit(int s, so = (struct socket *)file_data(fp); // Create a local copy of the user's iovec - sosend() is going to change it! - std::vector<iovec> uio_iov(mp->msg_iov, mp->msg_iov + mp->msg_iovlen); + assert(mp->msg_iovlen <= UIO_MAXIOV); + struct iovec uio_iov[mp->msg_iovlen]; + memcpy(uio_iov, mp->msg_iov, sizeof(uio_iov)); - auio.uio_iov = uio_iov.data(); - auio.uio_iovcnt = uio_iov.size(); + auio.uio_iov = uio_iov; + auio.uio_iovcnt = mp->msg_iovlen;; auio.uio_rw = UIO_WRITE; auio.uio_offset = 0; /* XXX */ auio.uio_resid = 0; @@ -585,10 +587,12 @@ kern_recvit(int s, struct msghdr *mp, struct mbuf **controlp, ssize_t* bytes) so = (socket*)file_data(fp); // Create a local copy of the user's iovec - sorecieve() is going to change it! - std::vector<iovec> uio_iov(mp->msg_iov, mp->msg_iov + mp->msg_iovlen); + assert(mp->msg_iovlen <= UIO_MAXIOV); + struct iovec uio_iov[mp->msg_iovlen]; + memcpy(uio_iov, mp->msg_iov, sizeof(uio_iov)); - auio.uio_iov = uio_iov.data(); - auio.uio_iovcnt = uio_iov.size(); + auio.uio_iov = uio_iov; + auio.uio_iovcnt = mp->msg_iovlen; auio.uio_rw = UIO_READ; auio.uio_offset = 0; /* XXX */ auio.uio_resid = 0; @@ -653,7 +657,7 @@ kern_recvit(int s, struct msghdr *mp, struct mbuf **controlp, ssize_t* bytes) if (fromsa) free(fromsa); - if (error == 0 && controlp != NULL) + if (error == 0 && controlp != NULL) *controlp = control; else if (control) m_freem(control); @@ -1038,10 +1042,12 @@ zcopy_tx(int s, struct zmsghdr *zm) if (so->so_type != SOCK_STREAM) return (EINVAL); // Create a local copy of the user's iovec - sosend() is going to change it! - std::vector<iovec> uio_iov(mp->msg_iov, mp->msg_iov + mp->msg_iovlen); + assert(mp->msg_iovlen <= UIO_MAXIOV); + struct iovec uio_iov[mp->msg_iovlen]; + memcpy(uio_iov, mp->msg_iov, sizeof(uio_iov)); - auio.uio_iov = uio_iov.data(); - auio.uio_iovcnt = uio_iov.size(); + auio.uio_iov = uio_iov; + auio.uio_iovcnt = mp->msg_iovlen; auio.uio_rw = UIO_WRITE; auio.uio_offset = 0; auio.uio_resid = 0; diff --git a/fs/vfs/vfs_syscalls.cc b/fs/vfs/vfs_syscalls.cc --- a/fs/vfs/vfs_syscalls.cc +++ b/fs/vfs/vfs_syscalls.cc @@ -266,8 +266,11 @@ sys_read(struct file *fp, const struct iovec *iov, size_t niov, struct uio uio; // Unfortunately, the current implementation of fp->read zeros the // iov_len fields when it reads from disk, so we have to copy iov. - std::vector<iovec> copy_iov(iov, iov + niov); - uio.uio_iov = copy_iov.data(); + assert(niov <= UIO_MAXIOV); + struct iovec copy_iov[niov]; + memcpy(copy_iov, iov, sizeof(copy_iov)); + + uio.uio_iov = copy_iov; uio.uio_iovcnt = niov; uio.uio_offset = offset; uio.uio_resid = bytes; @@ -302,8 +305,11 @@ sys_write(struct file *fp, const struct iovec *iov, size_t niov, struct uio uio; // Unfortunately, the current implementation of fp->write zeros the // iov_len fields when it writes to disk, so we have to copy iov. - std::vector<iovec> copy_iov(iov, iov + niov); - uio.uio_iov = copy_iov.data(); + assert(niov <= UIO_MAXIOV); + struct iovec copy_iov[niov]; + memcpy(copy_iov, iov, sizeof(copy_iov)); + + uio.uio_iov = copy_iov; uio.uio_iovcnt = niov; uio.uio_offset = offset; uio.uio_resid = bytes; diff --git a/modules/tests/Makefile b/modules/tests/Makefile --- a/modules/tests/Makefile +++ b/modules/tests/Makefile @@ -213,6 +213,10 @@ $(boost-tests:%=$(out)/tests/%): LIBS += \ -lboost_unit_test_framework \ -lboost_filesystem +boost-program-options-tests := misc-tcp.so misc-zfs-arc.so +$(boost-program-options-tests:%=$(out)/tests/%): LIBS += \ + -lboost_program_options + common-tests := $(tests) $(common-boost-tests) tests += $(boost-tests) @@ -253,7 +257,7 @@ usr.manifest: build_all_tests $(lastword $(MAKEFILE_LIST)) usr.manifest.skel FOR @cat $@.skel > $@ @case "$(CROSS_PREFIX)" in \ "aarch64"*) ./add_aarch64_boost_libraries.sh $(OSV_BASE) >> $@ ;; \ - *) LD_LIBRARY_PATH=$(boost-lib-dir) ldd $(addprefix $(out)/tests/,$(boost-tests)) | grep libboost | sed 's/ *[^ ] *\(.*\) => \(.*\) .*/\/usr\/lib\/\1: \2/' | sort | uniq >> $@ ;; \ + *) LD_LIBRARY_PATH=$(boost-lib-dir) ldd $(addprefix $(out)/tests/,$(boost-tests)) $(addprefix $(out)/tests/,$(boost-program-options-tests)) | grep libboost | sed 's/ *[^ ] *\(.*\) => \(.*\) .*/\/usr\/lib\/\1: \2/' | sort | uniq >> $@ ;; \ esac @echo $(all_tests) | tr ' ' '\n' | grep -v "tests/rofs/tst-.*.so" | awk '{print "/" $$0 ": ./" $$0}' >> $@ @echo $(all_tests) | tr ' ' '\n' | grep "tests/rofs/tst-.*.so" | sed 's/\.so//' | awk 'BEGIN { FS = "/" } ; { print "/tests/" $$3 "-rofs.so: ./tests/" $$2 "/" $$3 ".so"}' >> $@ @@ -266,7 +270,7 @@ common.manifest: build_all_tests $(lastword $(MAKEFILE_LIST)) usr.manifest.skel @cat usr.manifest.skel > $@ @case "$(CROSS_PREFIX)" in \ "aarch64"*) ./add_aarch64_boost_libraries.sh $(OSV_BASE) >> $@ ;; \ - *) LD_LIBRARY_PATH=$(boost-lib-dir) ldd $(addprefix $(out)/tests/,$(boost-tests)) | grep libboost | sed 's/ *[^ ] *\(.*\) => \(.*\) .*/\/usr\/lib\/\1: \2/' | sort | uniq >> $@ ;; \ + *) LD_LIBRARY_PATH=$(boost-lib-dir) ldd $(addprefix $(out)/tests/,$(boost-tests)) $(addprefix $(out)/tests/,$(boost-program-options-tests)) | grep libboost | sed 's/ *[^ ] *\(.*\) => \(.*\) .*/\/usr\/lib\/\1: \2/' | sort | uniq >> $@ ;; \ esac @echo $(common-tests) | tr ' ' '\n' | awk '{print "/tests/" $$0 ": ./tests/" $$0}' >> $@ diff --git a/tests/misc-tcp.cc b/tests/misc-tcp.cc --- a/tests/misc-tcp.cc +++ b/tests/misc-tcp.cc @@ -19,6 +19,10 @@ #include <random> #include <iostream> +// To run this test you need netcat running in another terminal +// to echo the data back to this program like so: +// ncat -l -k -p 9999 -e /bin/cat + using namespace std; using std::mutex; @@ -50,7 +54,7 @@ class tcp_test_client { ~test_thread(); void run(); private: - void do_connection(); + void do_connection(int c); private: tcp_test_client& _client; unique_lock<mutex> _lock; @@ -87,7 +91,7 @@ tcp_test_client::test_thread::~test_thread() _thread.join(); } -void tcp_test_client::test_thread::do_connection() +void tcp_test_client::test_thread::do_connection(int c) { uint64_t transfer = _client._dist_transfer(_client._rand); reverse_lock<decltype(_lock)> rev_lock(_lock); @@ -105,6 +109,7 @@ void tcp_test_client::test_thread::do_connection() socket.connect(endpoint); std::array<uint32_t, 1024> send_buffer, receive_buffer; uint64_t done = 0, rdone = 0; + std::cout << "Thread: " << _thread.get_id() << ", connection: " << c << " transfering " << transfer << " bytes" << std::endl; while (done < transfer) { iota(send_buffer.begin(), send_buffer.end(), done / sizeof(uint32_t)); uint64_t offset = 0; @@ -140,6 +145,7 @@ void tcp_test_client::test_thread::do_connection() if (_client.done()) { _client._condvar.notify_all(); } + std::cout << "Thread: " << _thread.get_id() << ", connection: " << c << " transfered and received all data back!" << std::endl; } void tcp_test_client::test_thread::run() @@ -149,7 +155,7 @@ void tcp_test_client::test_thread::run() auto lifetime = _client._dist_lifetime(_client._rand); for (auto i = 0; i < lifetime; ++i) { - do_connection(); + do_connection(i); } } catch (...) { // capture the first exception -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to osv-dev+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/0000000000002c8e4205e3407ddf%40google.com.