Re: add sendmmsg and recvmmsg systemcalls
On Thu, Sep 08, 2022 at 03:01:14PM +0200, Moritz Buhl wrote: > On Wed, Aug 31, 2022 at 05:44:31PM -0900, Philip Guenther wrote: > > kdump.c will need at least a SYS_recvmmsg line in the big table, and if you > > do a ktrmmsghdr() bit in the kernel a matching decoder will be needed in > > kdump. > > Here is a new diff for kdump. > OK? OK bluhm@ > Index: usr.bin/kdump/kdump.c > === > RCS file: /cvs/src/usr.bin/kdump/kdump.c,v > retrieving revision 1.149 > diff -u -p -r1.149 kdump.c > --- usr.bin/kdump/kdump.c 20 Jul 2022 05:56:36 - 1.149 > +++ usr.bin/kdump/kdump.c 7 Sep 2022 11:14:19 - > @@ -720,6 +720,8 @@ static const formatter scargs[][8] = { > [SYS_ptrace] = { Ptracedecode, Ppid_t, Pptr, Pdecint }, > [SYS_recvmsg]= { Pfd, Pptr, Sendrecvflagsname }, > [SYS_sendmsg]= { Pfd, Pptr, Sendrecvflagsname }, > +[SYS_recvmmsg] = { Pfd, Pptr, Pucount, Sendrecvflagsname, Pptr }, > +[SYS_sendmmsg] = { Pfd, Pptr, Pucount, Sendrecvflagsname }, > [SYS_recvfrom] = { Pfd, Pptr, Pbigsize, Sendrecvflagsname }, > [SYS_accept] = { Pfd, Pptr, Pptr }, > [SYS_getpeername]= { Pfd, Pptr, Pptr }, > Index: usr.bin/kdump/ktrstruct.c > === > RCS file: /cvs/src/usr.bin/kdump/ktrstruct.c,v > retrieving revision 1.29 > diff -u -p -r1.29 ktrstruct.c > --- usr.bin/kdump/ktrstruct.c 21 Dec 2020 07:47:37 - 1.29 > +++ usr.bin/kdump/ktrstruct.c 7 Sep 2022 10:00:23 - > @@ -398,6 +398,18 @@ ktrquota(const struct dqblk *quota) > } > > static void > +ktrmmsghdr(const struct mmsghdr *mmsg) > +{ > + printf("struct mmsghdr { msg_hdr = { name=%p, namelen=%u, " > + "iov=%p, iovlen=%u, control=%p, controllen=%u, flags=", > + mmsg->msg_hdr.msg_name, mmsg->msg_hdr.msg_namelen, > + mmsg->msg_hdr.msg_iov, mmsg->msg_hdr.msg_iovlen, > + mmsg->msg_hdr.msg_control, mmsg->msg_hdr.msg_controllen); > + sendrecvflagsname(mmsg->msg_hdr.msg_flags); > + printf(" }, msg_len = %u }\n", mmsg->msg_len); > +} > + > +static void > ktrmsghdr(const struct msghdr *msg) > { > printf("struct msghdr { name=%p, namelen=%u, iov=%p, iovlen=%u," > @@ -649,6 +661,13 @@ ktrstruct(char *buf, size_t buflen) > goto invalid; > memcpy(, data, datalen); > ktrmsghdr(); > + } else if (strcmp(name, "mmsghdr") == 0) { > + struct mmsghdr mmsg; > + > + if (datalen != sizeof(mmsg)) > + goto invalid; > + memcpy(, data, datalen); > + ktrmmsghdr(); > } else if (strcmp(name, "iovec") == 0) { > if (datalen % sizeof(struct iovec)) > goto invalid;
Re: add sendmmsg and recvmmsg systemcalls
On Wed, Aug 31, 2022 at 05:44:31PM -0900, Philip Guenther wrote: > kdump.c will need at least a SYS_recvmmsg line in the big table, and if you > do a ktrmmsghdr() bit in the kernel a matching decoder will be needed in > kdump. Here is a new diff for kdump. OK? mbuhl Index: usr.bin/kdump/kdump.c === RCS file: /cvs/src/usr.bin/kdump/kdump.c,v retrieving revision 1.149 diff -u -p -r1.149 kdump.c --- usr.bin/kdump/kdump.c 20 Jul 2022 05:56:36 - 1.149 +++ usr.bin/kdump/kdump.c 7 Sep 2022 11:14:19 - @@ -720,6 +720,8 @@ static const formatter scargs[][8] = { [SYS_ptrace] = { Ptracedecode, Ppid_t, Pptr, Pdecint }, [SYS_recvmsg] = { Pfd, Pptr, Sendrecvflagsname }, [SYS_sendmsg] = { Pfd, Pptr, Sendrecvflagsname }, +[SYS_recvmmsg] = { Pfd, Pptr, Pucount, Sendrecvflagsname, Pptr }, +[SYS_sendmmsg] = { Pfd, Pptr, Pucount, Sendrecvflagsname }, [SYS_recvfrom] = { Pfd, Pptr, Pbigsize, Sendrecvflagsname }, [SYS_accept] = { Pfd, Pptr, Pptr }, [SYS_getpeername] = { Pfd, Pptr, Pptr }, Index: usr.bin/kdump/ktrstruct.c === RCS file: /cvs/src/usr.bin/kdump/ktrstruct.c,v retrieving revision 1.29 diff -u -p -r1.29 ktrstruct.c --- usr.bin/kdump/ktrstruct.c 21 Dec 2020 07:47:37 - 1.29 +++ usr.bin/kdump/ktrstruct.c 7 Sep 2022 10:00:23 - @@ -398,6 +398,18 @@ ktrquota(const struct dqblk *quota) } static void +ktrmmsghdr(const struct mmsghdr *mmsg) +{ + printf("struct mmsghdr { msg_hdr = { name=%p, namelen=%u, " + "iov=%p, iovlen=%u, control=%p, controllen=%u, flags=", + mmsg->msg_hdr.msg_name, mmsg->msg_hdr.msg_namelen, + mmsg->msg_hdr.msg_iov, mmsg->msg_hdr.msg_iovlen, + mmsg->msg_hdr.msg_control, mmsg->msg_hdr.msg_controllen); + sendrecvflagsname(mmsg->msg_hdr.msg_flags); + printf(" }, msg_len = %u }\n", mmsg->msg_len); +} + +static void ktrmsghdr(const struct msghdr *msg) { printf("struct msghdr { name=%p, namelen=%u, iov=%p, iovlen=%u," @@ -649,6 +661,13 @@ ktrstruct(char *buf, size_t buflen) goto invalid; memcpy(, data, datalen); ktrmsghdr(); + } else if (strcmp(name, "mmsghdr") == 0) { + struct mmsghdr mmsg; + + if (datalen != sizeof(mmsg)) + goto invalid; + memcpy(, data, datalen); + ktrmmsghdr(); } else if (strcmp(name, "iovec") == 0) { if (datalen % sizeof(struct iovec)) goto invalid;
Re: add sendmmsg and recvmmsg systemcalls
On Tue, Sep 06, 2022 at 04:00:39PM +0200, Moritz Buhl wrote: > Hi, > here is the most recent diff for the libc part of send and recvmmsg. > This requires a libc minor bump and therefore should be coordinated > after snapshots are building normally again. > > To my understanding the minor bump itself should not cause problems > in ports anymore. miod reminded me to also bump librthread as stated in libc/shlib_version. Index: lib/libc/Symbols.list === RCS file: /cvs/src/lib/libc/Symbols.list,v retrieving revision 1.75 diff -u -p -r1.75 Symbols.list --- lib/libc/Symbols.list 2 Aug 2022 16:45:00 - 1.75 +++ lib/libc/Symbols.list 6 Sep 2022 09:36:40 - @@ -175,6 +175,7 @@ _thread_sys_readlinkat _thread_sys_readv _thread_sys_reboot _thread_sys_recvfrom +_thread_sys_recvmmsg _thread_sys_recvmsg _thread_sys_rename _thread_sys_renameat @@ -184,6 +185,7 @@ _thread_sys_sched_yield _thread_sys_select _thread_sys_semget _thread_sys_semop +_thread_sys_sendmmsg _thread_sys_sendmsg _thread_sys_sendsyslog _thread_sys_sendto @@ -372,6 +374,7 @@ readlinkat readv reboot recvfrom +recvmmsg recvmsg rename renameat @@ -383,6 +386,7 @@ select semctl semget semop +sendmmsg sendmsg sendsyslog sendto Index: lib/libc/shlib_version === RCS file: /cvs/src/lib/libc/shlib_version,v retrieving revision 1.210 diff -u -p -r1.210 shlib_version --- lib/libc/shlib_version 2 Jun 2021 07:29:03 - 1.210 +++ lib/libc/shlib_version 6 Sep 2022 13:42:09 - @@ -1,4 +1,4 @@ major=96 -minor=1 +minor=2 # note: If changes were made to include/thread_private.h or if system calls # were added/changed then librthread/shlib_version must also be updated. Index: lib/libc/hidden/sys/socket.h === RCS file: /cvs/src/lib/libc/hidden/sys/socket.h,v retrieving revision 1.4 diff -u -p -r1.4 socket.h --- lib/libc/hidden/sys/socket.h7 May 2016 19:05:22 - 1.4 +++ lib/libc/hidden/sys/socket.h6 Sep 2022 13:41:53 - @@ -33,9 +33,11 @@ PROTO_NORMAL(listen); PROTO_NORMAL(recv); PROTO_CANCEL(recvfrom); PROTO_CANCEL(recvmsg); +PROTO_CANCEL(recvmmsg); PROTO_NORMAL(send); -PROTO_CANCEL(sendmsg); PROTO_CANCEL(sendto); +PROTO_CANCEL(sendmsg); +PROTO_CANCEL(sendmmsg); PROTO_NORMAL(setrtable); PROTO_NORMAL(setsockopt); PROTO_NORMAL(shutdown); Index: lib/libc/sys/Makefile.inc === RCS file: /cvs/src/lib/libc/sys/Makefile.inc,v retrieving revision 1.163 diff -u -p -r1.163 Makefile.inc --- lib/libc/sys/Makefile.inc 17 Jul 2022 03:04:27 - 1.163 +++ lib/libc/sys/Makefile.inc 6 Sep 2022 13:41:53 - @@ -34,8 +34,8 @@ CANCEL= accept accept4 \ nanosleep \ open openat \ poll ppoll pread preadv pselect pwrite pwritev \ - read readv recvfrom recvmsg \ - select sendmsg sendto \ + read readv recvfrom recvmsg recvmmsg \ + select sendto sendmsg sendmmsg \ wait4 write writev SRCS+= ${CANCEL:%=w_%.c} Index: lib/libc/sys/recv.2 === RCS file: /cvs/src/lib/libc/sys/recv.2,v retrieving revision 1.48 diff -u -p -r1.48 recv.2 --- lib/libc/sys/recv.2 21 Nov 2021 23:44:55 - 1.48 +++ lib/libc/sys/recv.2 6 Sep 2022 13:42:12 - @@ -46,15 +46,35 @@ .Fn recvfrom "int s" "void *buf" "size_t len" "int flags" "struct sockaddr *from" "socklen_t *fromlen" .Ft ssize_t .Fn recvmsg "int s" "struct msghdr *msg" "int flags" +.Ft int +.Fn recvmmsg "int s" "struct mmsghdr *mmsg" "unsigned int vlen" "int flags" "struct timespec *timeout" .Sh DESCRIPTION -.Fn recvfrom +.Fn recv , +.Fn recvfrom , +.Fn recvmsg , and -.Fn recvmsg +.Fn recvmmsg are used to receive messages from a socket, -.Fa s , -and may be used to receive +.Fa s . +.Fn recv +is normally used only on a +.Em connected +socket (see +.Xr connect 2 ). +.Fn recvfrom , +.Fn recvmsg , +and +.Fn recvmmsg +may be used to receive data on a socket whether or not it is connection-oriented. .Pp +.Fn recv +is identical to +.Fn recvfrom +with a null +.Fa from +parameter. +.Pp If .Fa from is non-null and the socket is not connection-oriented, @@ -66,25 +86,6 @@ the buffer associated with and modified on return to indicate the actual size of the address stored there. .Pp -The -.Fn recv -call is normally used only on a -.Em connected -socket (see -.Xr connect 2 ) -and is identical to -.Fn recvfrom -with a null -.Fa from -parameter. -.Pp -On successful completion, all three routines return the number of -message bytes read. -If a message is too long to fit in the supplied -buffer, excess bytes may be discarded depending on the type of socket -the message is received from (see -.Xr socket 2 ) . -.Pp If no messages are available at the socket, the
Re: add sendmmsg and recvmmsg systemcalls
Hi, here is the most recent diff for the libc part of send and recvmmsg. This requires a libc minor bump and therefore should be coordinated after snapshots are building normally again. To my understanding the minor bump itself should not cause problems in ports anymore. mbuhl Index: lib/libc/Symbols.list === RCS file: /cvs/src/lib/libc/Symbols.list,v retrieving revision 1.75 diff -u -p -r1.75 Symbols.list --- lib/libc/Symbols.list 2 Aug 2022 16:45:00 - 1.75 +++ lib/libc/Symbols.list 6 Sep 2022 09:36:40 - @@ -175,6 +175,7 @@ _thread_sys_readlinkat _thread_sys_readv _thread_sys_reboot _thread_sys_recvfrom +_thread_sys_recvmmsg _thread_sys_recvmsg _thread_sys_rename _thread_sys_renameat @@ -184,6 +185,7 @@ _thread_sys_sched_yield _thread_sys_select _thread_sys_semget _thread_sys_semop +_thread_sys_sendmmsg _thread_sys_sendmsg _thread_sys_sendsyslog _thread_sys_sendto @@ -372,6 +374,7 @@ readlinkat readv reboot recvfrom +recvmmsg recvmsg rename renameat @@ -383,6 +386,7 @@ select semctl semget semop +sendmmsg sendmsg sendsyslog sendto Index: lib/libc/shlib_version === RCS file: /cvs/src/lib/libc/shlib_version,v retrieving revision 1.210 diff -u -p -r1.210 shlib_version --- lib/libc/shlib_version 2 Jun 2021 07:29:03 - 1.210 +++ lib/libc/shlib_version 5 Sep 2022 11:57:10 - @@ -1,4 +1,4 @@ major=96 -minor=1 +minor=2 # note: If changes were made to include/thread_private.h or if system calls # were added/changed then librthread/shlib_version must also be updated. Index: lib/libc/hidden/sys/socket.h === RCS file: /cvs/src/lib/libc/hidden/sys/socket.h,v retrieving revision 1.4 diff -u -p -r1.4 socket.h --- lib/libc/hidden/sys/socket.h7 May 2016 19:05:22 - 1.4 +++ lib/libc/hidden/sys/socket.h6 Sep 2022 09:36:49 - @@ -33,9 +33,11 @@ PROTO_NORMAL(listen); PROTO_NORMAL(recv); PROTO_CANCEL(recvfrom); PROTO_CANCEL(recvmsg); +PROTO_CANCEL(recvmmsg); PROTO_NORMAL(send); -PROTO_CANCEL(sendmsg); PROTO_CANCEL(sendto); +PROTO_CANCEL(sendmsg); +PROTO_CANCEL(sendmmsg); PROTO_NORMAL(setrtable); PROTO_NORMAL(setsockopt); PROTO_NORMAL(shutdown); Index: lib/libc/sys/Makefile.inc === RCS file: /cvs/src/lib/libc/sys/Makefile.inc,v retrieving revision 1.163 diff -u -p -r1.163 Makefile.inc --- lib/libc/sys/Makefile.inc 17 Jul 2022 03:04:27 - 1.163 +++ lib/libc/sys/Makefile.inc 6 Sep 2022 09:37:18 - @@ -34,8 +34,8 @@ CANCEL= accept accept4 \ nanosleep \ open openat \ poll ppoll pread preadv pselect pwrite pwritev \ - read readv recvfrom recvmsg \ - select sendmsg sendto \ + read readv recvfrom recvmsg recvmmsg \ + select sendto sendmsg sendmmsg \ wait4 write writev SRCS+= ${CANCEL:%=w_%.c} Index: lib/libc/sys/recv.2 === RCS file: /cvs/src/lib/libc/sys/recv.2,v retrieving revision 1.48 diff -u -p -r1.48 recv.2 --- lib/libc/sys/recv.2 21 Nov 2021 23:44:55 - 1.48 +++ lib/libc/sys/recv.2 5 Sep 2022 14:59:00 - @@ -46,15 +46,35 @@ .Fn recvfrom "int s" "void *buf" "size_t len" "int flags" "struct sockaddr *from" "socklen_t *fromlen" .Ft ssize_t .Fn recvmsg "int s" "struct msghdr *msg" "int flags" +.Ft int +.Fn recvmmsg "int s" "struct mmsghdr *mmsg" "unsigned int vlen" "int flags" "struct timespec *timeout" .Sh DESCRIPTION -.Fn recvfrom +.Fn recv , +.Fn recvfrom , +.Fn recvmsg , and -.Fn recvmsg +.Fn recvmmsg are used to receive messages from a socket, -.Fa s , -and may be used to receive +.Fa s . +.Fn recv +is normally used only on a +.Em connected +socket (see +.Xr connect 2 ). +.Fn recvfrom , +.Fn recvmsg , +and +.Fn recvmmsg +may be used to receive data on a socket whether or not it is connection-oriented. .Pp +.Fn recv +is identical to +.Fn recvfrom +with a null +.Fa from +parameter. +.Pp If .Fa from is non-null and the socket is not connection-oriented, @@ -66,25 +86,6 @@ the buffer associated with and modified on return to indicate the actual size of the address stored there. .Pp -The -.Fn recv -call is normally used only on a -.Em connected -socket (see -.Xr connect 2 ) -and is identical to -.Fn recvfrom -with a null -.Fa from -parameter. -.Pp -On successful completion, all three routines return the number of -message bytes read. -If a message is too long to fit in the supplied -buffer, excess bytes may be discarded depending on the type of socket -the message is received from (see -.Xr socket 2 ) . -.Pp If no messages are available at the socket, the receive call waits for a message to arrive, unless the socket is nonblocking (see @@ -158,6 +159,8 @@ The .Dv MSG_CMSG_CLOEXEC requests that
Re: add sendmmsg and recvmmsg systemcalls
On Sat, Sep 03, 2022 at 01:49:27AM +0200, Moritz Buhl wrote: > Here is an updated version of the kernel part for sendmmsg. OK bluhm@ > Index: sys/kern/syscalls.master > === > RCS file: /mount/openbsd/cvs/src/sys/kern/syscalls.master,v > retrieving revision 1.230 > diff -u -p -r1.230 syscalls.master > --- sys/kern/syscalls.master 2 Sep 2022 13:18:06 - 1.230 > +++ sys/kern/syscalls.master 2 Sep 2022 20:34:15 - > @@ -247,7 +247,9 @@ > 116 STD NOLOCK { int sys_recvmmsg(int s, struct mmsghdr *mmsg, \ > unsigned int vlen, unsigned int flags, \ > struct timespec *timeout); } > -117 UNIMPL sendmmsg > +117 STD NOLOCK { int sys_sendmmsg(int s, \ > + struct mmsghdr *mmsg, unsigned int vlen, \ > + unsigned int flags); } > 118 STD { int sys_getsockopt(int s, int level, int name, \ > void *val, socklen_t *avalsize); } > 119 STD { int sys_thrkill(pid_t tid, int signum, void *tcb); } > Index: sys/kern/uipc_syscalls.c > === > RCS file: /mount/openbsd/cvs/src/sys/kern/uipc_syscalls.c,v > retrieving revision 1.202 > diff -u -p -r1.202 uipc_syscalls.c > --- sys/kern/uipc_syscalls.c 2 Sep 2022 13:18:06 - 1.202 > +++ sys/kern/uipc_syscalls.c 2 Sep 2022 23:26:08 - > @@ -606,6 +606,92 @@ done: > } > > int > +sys_sendmmsg(struct proc *p, void *v, register_t *retval) > +{ > + struct sys_sendmmsg_args /* { > + syscallarg(int) s; > + syscallarg(struct mmsghdr *)mmsg; > + syscallarg(unsigned int)vlen; > + syscallarg(unsigned int)flags; > + } */ *uap = v; > + struct mmsghdr mmsg, *mmsgp; > + struct iovec aiov[UIO_SMALLIOV], *iov = aiov, *uiov; > + size_t iovlen = UIO_SMALLIOV; > + register_t retsnd; > + unsigned int vlen, dgrams; > + int error = 0; > + > + /* Arbitrarily capped at 1024 datagrams. */ > + vlen = SCARG(uap, vlen); > + if (vlen > 1024) > + vlen = 1024; > + > + mmsgp = SCARG(uap, mmsg); > + for (dgrams = 0; dgrams < vlen; dgrams++) { > + error = copyin([dgrams], , sizeof(mmsg)); > + if (error) > + break; > + > +#ifdef KTRACE > + if (KTRPOINT(p, KTR_STRUCT)) > + ktrmmsghdr(p, ); > +#endif > + > + if (mmsg.msg_hdr.msg_iovlen > IOV_MAX) { > + error = EMSGSIZE; > + break; > + } > + > + if (mmsg.msg_hdr.msg_iovlen > iovlen) { > + if (iov != aiov) > + free(iov, M_IOV, iovlen * > + sizeof(struct iovec)); > + > + iovlen = mmsg.msg_hdr.msg_iovlen; > + iov = mallocarray(iovlen, sizeof(struct iovec), > + M_IOV, M_WAITOK); > + } > + > + if (mmsg.msg_hdr.msg_iovlen > 0) { > + error = copyin(mmsg.msg_hdr.msg_iov, iov, > + mmsg.msg_hdr.msg_iovlen * sizeof(struct iovec)); > + if (error) > + break; > + } > + > +#ifdef KTRACE > + if (mmsg.msg_hdr.msg_iovlen && KTRPOINT(p, KTR_STRUCT)) > + ktriovec(p, iov, mmsg.msg_hdr.msg_iovlen); > +#endif > + > + uiov = mmsg.msg_hdr.msg_iov; > + mmsg.msg_hdr.msg_iov = iov; > + mmsg.msg_hdr.msg_flags = 0; > + > + error = sendit(p, SCARG(uap, s), _hdr, > + SCARG(uap, flags), ); > + if (error) > + break; > + > + mmsg.msg_hdr.msg_iov = uiov; > + mmsg.msg_len = retsnd; > + > + error = copyout(, [dgrams], sizeof(mmsg)); > + if (error) > + break; > + } > + > + if (iov != aiov) > + free(iov, M_IOV, sizeof(struct iovec) * iovlen); > + > + *retval = dgrams; > + > + if (dgrams) > + return 0; > + return error; > +} > + > +int > sendit(struct proc *p, int s, struct msghdr *mp, int flags, register_t > *retsize) > { > struct file *fp; > Index: sys/sys/socket.h > === > RCS file: /mount/openbsd/cvs/src/sys/sys/socket.h,v > retrieving revision 1.103 > diff -u -p -r1.103 socket.h > --- sys/sys/socket.h 2 Sep 2022 13:18:07 - 1.103 > +++ sys/sys/socket.h 2 Sep 2022 22:31:09 - > @@ -579,6 +579,7 @@ ssize_t send(int, const void *, size_t, > ssize_t sendto(int, const void *, > size_t, int, const struct sockaddr *, socklen_t); > ssize_t sendmsg(int, const struct msghdr *, int); > +int
Re: add sendmmsg and recvmmsg systemcalls
Here is an updated version of the kernel part for sendmmsg. mbuhl Index: sys/kern/syscalls.master === RCS file: /mount/openbsd/cvs/src/sys/kern/syscalls.master,v retrieving revision 1.230 diff -u -p -r1.230 syscalls.master --- sys/kern/syscalls.master2 Sep 2022 13:18:06 - 1.230 +++ sys/kern/syscalls.master2 Sep 2022 20:34:15 - @@ -247,7 +247,9 @@ 116STD NOLOCK { int sys_recvmmsg(int s, struct mmsghdr *mmsg, \ unsigned int vlen, unsigned int flags, \ struct timespec *timeout); } -117UNIMPL sendmmsg +117STD NOLOCK { int sys_sendmmsg(int s, \ + struct mmsghdr *mmsg, unsigned int vlen, \ + unsigned int flags); } 118STD { int sys_getsockopt(int s, int level, int name, \ void *val, socklen_t *avalsize); } 119STD { int sys_thrkill(pid_t tid, int signum, void *tcb); } Index: sys/kern/uipc_syscalls.c === RCS file: /mount/openbsd/cvs/src/sys/kern/uipc_syscalls.c,v retrieving revision 1.202 diff -u -p -r1.202 uipc_syscalls.c --- sys/kern/uipc_syscalls.c2 Sep 2022 13:18:06 - 1.202 +++ sys/kern/uipc_syscalls.c2 Sep 2022 23:26:08 - @@ -606,6 +606,92 @@ done: } int +sys_sendmmsg(struct proc *p, void *v, register_t *retval) +{ + struct sys_sendmmsg_args /* { + syscallarg(int) s; + syscallarg(struct mmsghdr *)mmsg; + syscallarg(unsigned int)vlen; + syscallarg(unsigned int)flags; + } */ *uap = v; + struct mmsghdr mmsg, *mmsgp; + struct iovec aiov[UIO_SMALLIOV], *iov = aiov, *uiov; + size_t iovlen = UIO_SMALLIOV; + register_t retsnd; + unsigned int vlen, dgrams; + int error = 0; + + /* Arbitrarily capped at 1024 datagrams. */ + vlen = SCARG(uap, vlen); + if (vlen > 1024) + vlen = 1024; + + mmsgp = SCARG(uap, mmsg); + for (dgrams = 0; dgrams < vlen; dgrams++) { + error = copyin([dgrams], , sizeof(mmsg)); + if (error) + break; + +#ifdef KTRACE + if (KTRPOINT(p, KTR_STRUCT)) + ktrmmsghdr(p, ); +#endif + + if (mmsg.msg_hdr.msg_iovlen > IOV_MAX) { + error = EMSGSIZE; + break; + } + + if (mmsg.msg_hdr.msg_iovlen > iovlen) { + if (iov != aiov) + free(iov, M_IOV, iovlen * + sizeof(struct iovec)); + + iovlen = mmsg.msg_hdr.msg_iovlen; + iov = mallocarray(iovlen, sizeof(struct iovec), + M_IOV, M_WAITOK); + } + + if (mmsg.msg_hdr.msg_iovlen > 0) { + error = copyin(mmsg.msg_hdr.msg_iov, iov, + mmsg.msg_hdr.msg_iovlen * sizeof(struct iovec)); + if (error) + break; + } + +#ifdef KTRACE + if (mmsg.msg_hdr.msg_iovlen && KTRPOINT(p, KTR_STRUCT)) + ktriovec(p, iov, mmsg.msg_hdr.msg_iovlen); +#endif + + uiov = mmsg.msg_hdr.msg_iov; + mmsg.msg_hdr.msg_iov = iov; + mmsg.msg_hdr.msg_flags = 0; + + error = sendit(p, SCARG(uap, s), _hdr, + SCARG(uap, flags), ); + if (error) + break; + + mmsg.msg_hdr.msg_iov = uiov; + mmsg.msg_len = retsnd; + + error = copyout(, [dgrams], sizeof(mmsg)); + if (error) + break; + } + + if (iov != aiov) + free(iov, M_IOV, sizeof(struct iovec) * iovlen); + + *retval = dgrams; + + if (dgrams) + return 0; + return error; +} + +int sendit(struct proc *p, int s, struct msghdr *mp, int flags, register_t *retsize) { struct file *fp; Index: sys/sys/socket.h === RCS file: /mount/openbsd/cvs/src/sys/sys/socket.h,v retrieving revision 1.103 diff -u -p -r1.103 socket.h --- sys/sys/socket.h2 Sep 2022 13:18:07 - 1.103 +++ sys/sys/socket.h2 Sep 2022 22:31:09 - @@ -579,6 +579,7 @@ ssize_t send(int, const void *, size_t, ssize_tsendto(int, const void *, size_t, int, const struct sockaddr *, socklen_t); ssize_tsendmsg(int, const struct msghdr *, int); +intsendmmsg(int, struct mmsghdr *, unsigned int, unsigned int); intsetsockopt(int, int, int, const void *, socklen_t); intshutdown(int, int); int
Re: add sendmmsg and recvmmsg systemcalls
On Thu, Sep 01, 2022 at 06:06:10PM +0200, Moritz Buhl wrote: > I addressed your concerns as well as these of jca, just the kernel > part (and the new ktrace stuff) below. > > One minor thing: I didn't see any kdump output where one struct was > contained in another one but I am printing it like ddb would so I > guess it should be fine. OK bluhm@ > Index: kern/syscalls.master > === > RCS file: /mount/openbsd/cvs/src/sys/kern/syscalls.master,v > retrieving revision 1.229 > diff -u -p -r1.229 syscalls.master > --- kern/syscalls.master 1 Aug 2022 14:56:59 - 1.229 > +++ kern/syscalls.master 1 Sep 2022 14:52:47 - > @@ -244,8 +244,10 @@ > const char *permissions); } > 115 STD { int sys___realpath(const char *pathname, \ > char *resolved); } > -116 OBSOL t32_gettimeofday > -117 OBSOL t32_getrusage > +116 STD NOLOCK { int sys_recvmmsg(int s, struct mmsghdr *mmsg, \ > + unsigned int vlen, unsigned int flags, \ > + struct timespec *timeout); } > +117 UNIMPL sendmmsg > 118 STD { int sys_getsockopt(int s, int level, int name, \ > void *val, socklen_t *avalsize); } > 119 STD { int sys_thrkill(pid_t tid, int signum, void *tcb); } > Index: kern/uipc_syscalls.c > === > RCS file: /mount/openbsd/cvs/src/sys/kern/uipc_syscalls.c,v > retrieving revision 1.201 > diff -u -p -r1.201 uipc_syscalls.c > --- kern/uipc_syscalls.c 14 Aug 2022 01:58:28 - 1.201 > +++ kern/uipc_syscalls.c 1 Sep 2022 14:37:26 - > @@ -805,6 +805,140 @@ done: > } > > int > +sys_recvmmsg(struct proc *p, void *v, register_t *retval) > +{ > + struct sys_recvmmsg_args /* { > + syscallarg(int) s; > + syscallarg(struct mmsghdr *)mmsg; > + syscallarg(unsigned int)vlen; > + syscallarg(unsigned int)flags; > + syscallarg(struct timespec *) timeout; > + } */ *uap = v; > + struct mmsghdr mmsg, *mmsgp; > + struct timespec ts, now; > + struct iovec aiov[UIO_SMALLIOV], *uiov, *iov = aiov; > + struct file *fp; > + struct socket *so; > + struct timespec *timeout; > + size_t iovlen = UIO_SMALLIOV; > + register_t retrec; > + unsigned int vlen, dgrams; > + int error = 0, flags, s; > + > + s = SCARG(uap, s); > + if ((error = getsock(p, s, ))) > + return (error); > + so = (struct socket *)fp->f_data; > + > + timeout = SCARG(uap, timeout); > + if (timeout != NULL) { > + error = copyin(timeout, , sizeof(ts)); > + if (error) > + return error; > +#ifdef KTRACE > + if (KTRPOINT(p, KTR_STRUCT)) > + ktrreltimespec(p, ); > +#endif > + getnanotime(); > + timespecadd(, , ); > + } > + > + flags = SCARG(uap, flags); > + > + /* Arbitrarily capped at 1024 datagrams. */ > + vlen = SCARG(uap, vlen); > + if (vlen > 1024) > + vlen = 1024; > + > + mmsgp = SCARG(uap, mmsg); > + for (dgrams = 0; dgrams < vlen;) { > + error = copyin([dgrams], , sizeof(mmsg)); > + if (error) > + break; > + > + if (mmsg.msg_hdr.msg_iovlen > IOV_MAX) { > + error = EMSGSIZE; > + break; > + } > + > + if (mmsg.msg_hdr.msg_iovlen > iovlen) { > + if (iov != aiov) > + free(iov, M_IOV, iovlen * > + sizeof(struct iovec)); > + > + iovlen = mmsg.msg_hdr.msg_iovlen; > + iov = mallocarray(iovlen, sizeof(struct iovec), > + M_IOV, M_WAITOK); > + } > + > + if (mmsg.msg_hdr.msg_iovlen > 0) { > + error = copyin(mmsg.msg_hdr.msg_iov, iov, > + mmsg.msg_hdr.msg_iovlen * sizeof(struct iovec)); > + if (error) > + break; > + } > + > + uiov = mmsg.msg_hdr.msg_iov; > + mmsg.msg_hdr.msg_iov = iov; > + mmsg.msg_hdr.msg_flags = flags; > + > + error = recvit(p, s, _hdr, NULL, ); > + if (error) { > + if (error == EAGAIN && dgrams > 0) > + error = 0; > + break; > + } > + > + if (dgrams == 0 && flags & MSG_WAITFORONE) { > + flags &= ~MSG_WAITFORONE; > + flags |= MSG_DONTWAIT; > + } > + > + mmsg.msg_hdr.msg_iov = uiov; > + mmsg.msg_len = retrec; >
Re: add sendmmsg and recvmmsg systemcalls
I addressed your concerns as well as these of jca, just the kernel part (and the new ktrace stuff) below. One minor thing: I didn't see any kdump output where one struct was contained in another one but I am printing it like ddb would so I guess it should be fine. Index: kern/syscalls.master === RCS file: /mount/openbsd/cvs/src/sys/kern/syscalls.master,v retrieving revision 1.229 diff -u -p -r1.229 syscalls.master --- kern/syscalls.master1 Aug 2022 14:56:59 - 1.229 +++ kern/syscalls.master1 Sep 2022 14:52:47 - @@ -244,8 +244,10 @@ const char *permissions); } 115STD { int sys___realpath(const char *pathname, \ char *resolved); } -116OBSOL t32_gettimeofday -117OBSOL t32_getrusage +116STD NOLOCK { int sys_recvmmsg(int s, struct mmsghdr *mmsg, \ + unsigned int vlen, unsigned int flags, \ + struct timespec *timeout); } +117UNIMPL sendmmsg 118STD { int sys_getsockopt(int s, int level, int name, \ void *val, socklen_t *avalsize); } 119STD { int sys_thrkill(pid_t tid, int signum, void *tcb); } Index: kern/uipc_syscalls.c === RCS file: /mount/openbsd/cvs/src/sys/kern/uipc_syscalls.c,v retrieving revision 1.201 diff -u -p -r1.201 uipc_syscalls.c --- kern/uipc_syscalls.c14 Aug 2022 01:58:28 - 1.201 +++ kern/uipc_syscalls.c1 Sep 2022 14:37:26 - @@ -805,6 +805,140 @@ done: } int +sys_recvmmsg(struct proc *p, void *v, register_t *retval) +{ + struct sys_recvmmsg_args /* { + syscallarg(int) s; + syscallarg(struct mmsghdr *)mmsg; + syscallarg(unsigned int)vlen; + syscallarg(unsigned int)flags; + syscallarg(struct timespec *) timeout; + } */ *uap = v; + struct mmsghdr mmsg, *mmsgp; + struct timespec ts, now; + struct iovec aiov[UIO_SMALLIOV], *uiov, *iov = aiov; + struct file *fp; + struct socket *so; + struct timespec *timeout; + size_t iovlen = UIO_SMALLIOV; + register_t retrec; + unsigned int vlen, dgrams; + int error = 0, flags, s; + + s = SCARG(uap, s); + if ((error = getsock(p, s, ))) + return (error); + so = (struct socket *)fp->f_data; + + timeout = SCARG(uap, timeout); + if (timeout != NULL) { + error = copyin(timeout, , sizeof(ts)); + if (error) + return error; +#ifdef KTRACE + if (KTRPOINT(p, KTR_STRUCT)) + ktrreltimespec(p, ); +#endif + getnanotime(); + timespecadd(, , ); + } + + flags = SCARG(uap, flags); + + /* Arbitrarily capped at 1024 datagrams. */ + vlen = SCARG(uap, vlen); + if (vlen > 1024) + vlen = 1024; + + mmsgp = SCARG(uap, mmsg); + for (dgrams = 0; dgrams < vlen;) { + error = copyin([dgrams], , sizeof(mmsg)); + if (error) + break; + + if (mmsg.msg_hdr.msg_iovlen > IOV_MAX) { + error = EMSGSIZE; + break; + } + + if (mmsg.msg_hdr.msg_iovlen > iovlen) { + if (iov != aiov) + free(iov, M_IOV, iovlen * + sizeof(struct iovec)); + + iovlen = mmsg.msg_hdr.msg_iovlen; + iov = mallocarray(iovlen, sizeof(struct iovec), + M_IOV, M_WAITOK); + } + + if (mmsg.msg_hdr.msg_iovlen > 0) { + error = copyin(mmsg.msg_hdr.msg_iov, iov, + mmsg.msg_hdr.msg_iovlen * sizeof(struct iovec)); + if (error) + break; + } + + uiov = mmsg.msg_hdr.msg_iov; + mmsg.msg_hdr.msg_iov = iov; + mmsg.msg_hdr.msg_flags = flags; + + error = recvit(p, s, _hdr, NULL, ); + if (error) { + if (error == EAGAIN && dgrams > 0) + error = 0; + break; + } + + if (dgrams == 0 && flags & MSG_WAITFORONE) { + flags &= ~MSG_WAITFORONE; + flags |= MSG_DONTWAIT; + } + + mmsg.msg_hdr.msg_iov = uiov; + mmsg.msg_len = retrec; +#ifdef KTRACE + if (KTRPOINT(p, KTR_STRUCT)) { + ktrmmsghdr(p, ); + if
Re: add sendmmsg and recvmmsg systemcalls
On Tue, Aug 30, 2022 at 11:18 AM Moritz Buhl wrote: > the following diff only contains recvmmsg which should be the more useful > syscall of the two. > Comments inline. > --- sys/kern/syscalls.master1 Aug 2022 14:56:59 - 1.229 > +++ sys/kern/syscalls.master30 Aug 2022 15:44:29 - > @@ -575,3 +575,6 @@ > 328OBSOL __tfork51 > 329STD NOLOCK { void sys___set_tcb(void *tcb); } > 330STD NOLOCK { void *sys___get_tcb(void); } > +331STD NOLOCK { int sys_recvmmsg(int s, struct mmsghdr *mmsg, \ > + unsigned int vlen, unsigned int flags, \ > + struct timespec *timeout); } > We believe in packing syscall numbers down, modulus having them group nicely. So, instead of putting recvmmsg() as a new high-water mark, I would put it at 116 ("t32_gettimeofday") and mark 117 ("t32_getrusage") as sendmmsg(), UNIMPL if not part of this round. (Typically, when a diff changes syscalls.master then you leave out the diff chunks for the generated files when sending for review, because they are 100% implied and are just noise. Not a big deal) --- sys/kern/uipc_syscalls.c14 Aug 2022 01:58:28 - 1.201 > +++ sys/kern/uipc_syscalls.c30 Aug 2022 17:03:09 - > @@ -805,6 +805,135 @@ done: > } > > int > +sys_recvmmsg(struct proc *p, void *v, register_t *retval) > +{ > + struct sys_recvmmsg_args /* { > + syscallarg(int) s; > + syscallarg(struct mmsghdr *)mmsg; > + syscallarg(unsigned int)vlen; > + syscallarg(unsigned int)flags; > + syscallarg(struct timespec *) timeout; > + } */ *uap = v; > + struct mmsghdr mmsg; > + struct timespec ts, now; > + struct iovec aiov[UIO_SMALLIOV], *uiov, *iov = aiov; > + struct file *fp; > + struct socket *so; > + struct timespec *timeout; > + unsigned int vlen, dg; > + int error = 0, flags, s; > + > + timeout = SCARG(uap, timeout); > + if (timeout != NULL) { > + error = copyin(SCARG(uap, timeout), , sizeof(ts)); > + if (error != 0) > + return error; > Should have a KTRACE ktrreltimmespec() block here. Should validate the timespec. (Follow what sys_kevent() does) + getnanotime(); > + timespecadd(, , ); > + } > + > + s = SCARG(uap, s); > + if ((error = getsock(p, s, )) != 0) > + return (error); > + so = (struct socket *)fp->f_data; > + > + flags = SCARG(uap, flags); > + > + vlen = SCARG(uap, vlen); > + if (vlen > 1024) > + vlen = 1024; > + > + for (dg = 0; dg < vlen;) { > + error = copyin(SCARG(uap, mmsg) + dg, , sizeof(mmsg)); > + if (error != 0) > + break; > Hmm. This copies in each mmsghdr structure when it gets to it. Is that how the Linux version behaves, lazily accessing them such that an early exit (from timeout, signal, whatever) means later values aren't read? Or do they copy them all in, update any that are changed, then copy them out at the end? (Not sure it matters, but it's an interesting corner case to think carefully about.) ... > + mmsg.msg_hdr.msg_iov = uiov; > + mmsg.msg_len = *retval; > +#ifdef KTRACE > + if (KTRPOINT(p, KTR_STRUCT)) { > + ktrmsghdr(p, _hdr); > I think you should go ahead and define ktrmmsghdr() taking that full struct, so kdump can report the msg_len value that is being returned. > + if (iov != aiov) { > + free(iov, M_IOV, sizeof(struct iovec) * > + mmsg.msg_hdr.msg_iovlen); > + iov = aiov; > + } > The iov freeing, IMO,, should be done once, at the end of the loop. Just keep growing as necessary (tracking the currently allocated size) and free once. kdump.c will need at least a SYS_recvmmsg line in the big table, and if you do a ktrmmsghdr() bit in the kernel a matching decoder will be needed in kdump. Philip Guenther
Re: add sendmmsg and recvmmsg systemcalls
On Tue, Aug 30 2022, Moritz Buhl wrote: > Hi tech@, > > the following diff only contains recvmmsg which should be the more useful > syscall of the two. > > I implemented some minor feedback regarding the man page and attaching > an error from recvit to the socket in case some messages were > received before. > > I am also looking into passing the timeout through recvit and > soreceive in order to not block indefinetly on a blocking socket > as the other implementations do: > > BUGS > >The timeout argument does not work as intended. The timeout is >checked only after the receipt of each datagram, so that if up to >vlen-1 datagrams are received before the timeout expires, but >then no further datagrams are received, the call will block >forever. > https://www.man7.org/linux/man-pages/man2/recvmmsg.2.html > > But I would prefer doing this in another change. As discussed with Moritz, ports/net/knot (which I co-maintain) expects to be able to use both recvmmsg and sendmmsg. I didn't check other ports but from the code we read, using both recvmmsg and sendmmsg looks legit (useful). And since Moritz insists, please find my bikeshedding below. :) tl;dr nothing looks wrong to me but I haven't tested it yet. [...] > Index: sys/kern/uipc_syscalls.c > === > RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v > retrieving revision 1.201 > diff -u -p -r1.201 uipc_syscalls.c > --- sys/kern/uipc_syscalls.c 14 Aug 2022 01:58:28 - 1.201 > +++ sys/kern/uipc_syscalls.c 30 Aug 2022 17:03:09 - > @@ -805,6 +805,135 @@ done: > } > > int > +sys_recvmmsg(struct proc *p, void *v, register_t *retval) > +{ > + struct sys_recvmmsg_args /* { > + syscallarg(int) s; > + syscallarg(struct mmsghdr *)mmsg; > + syscallarg(unsigned int)vlen; > + syscallarg(unsigned int)flags; > + syscallarg(struct timespec *) timeout; > + } */ *uap = v; > + struct mmsghdr mmsg; > + struct timespec ts, now; > + struct iovec aiov[UIO_SMALLIOV], *uiov, *iov = aiov; > + struct file *fp; > + struct socket *so; > + struct timespec *timeout; > + unsigned int vlen, dg; "dg" is a weird name. At first it looks like an index, I would name it idx but if you want to keep the original name maybe put it on its own line and add a comment like /* number of datagrams received */? > + int error = 0, flags, s; > + > + timeout = SCARG(uap, timeout); > + if (timeout != NULL) { > + error = copyin(SCARG(uap, timeout), , sizeof(ts)); ^^^ could reuse the timeout local variable > + if (error != 0) The local idiom is if (error) > + return error; There are other occurrences below which you may or may not address if you prefer to limit the differences with NetBSD. > + getnanotime(); > + timespecadd(, , ); > + } > + > + s = SCARG(uap, s); > + if ((error = getsock(p, s, )) != 0) > + return (error); > + so = (struct socket *)fp->f_data; > + > + flags = SCARG(uap, flags); > + > + vlen = SCARG(uap, vlen); Maybe add a comment? /* Arbitrarily capped at 1024 items. */ > + if (vlen > 1024) > + vlen = 1024; > + > + for (dg = 0; dg < vlen;) { > + error = copyin(SCARG(uap, mmsg) + dg, , sizeof(mmsg)); We could have a struct mmsghdr *mmsgp local pointer and use mmsgp[idx] instead of a mix of a macro and pointer arithmetics. > + if (error != 0) > + break; > + > + if (mmsg.msg_hdr.msg_iovlen > IOV_MAX) { > + error = EMSGSIZE; > + break; > + } > + > + if (mmsg.msg_hdr.msg_iovlen > UIO_SMALLIOV) > + iov = mallocarray(mmsg.msg_hdr.msg_iovlen, > + sizeof(struct iovec), M_IOV, M_WAITOK); > + else > + iov = aiov; > + > + if (mmsg.msg_hdr.msg_iovlen > 0) { > + error = copyin(mmsg.msg_hdr.msg_iov, iov, > + mmsg.msg_hdr.msg_iovlen * sizeof(struct iovec)); > + if (error) > + break; > + } > + > + uiov = mmsg.msg_hdr.msg_iov; > + mmsg.msg_hdr.msg_iov = iov; > + mmsg.msg_hdr.msg_flags = flags; > + > + error = recvit(p, s, _hdr, NULL, retval); > + if (error != 0) { > + if (error == EAGAIN && dg > 0) > + error = 0; > + break; > + } > + > + if (dg == 0 && flags & MSG_WAITFORONE) { > + flags &= ~MSG_WAITFORONE; > + flags |=
Re: add sendmmsg and recvmmsg systemcalls
I created a pull request on their github: https://github.com/NLnetLabs/nsd/pull/231 On Tue, Aug 30, 2022 at 04:33:10PM -0600, Todd C. Miller wrote: > On Wed, 31 Aug 2022 00:19:20 +0200, Moritz Buhl wrote: > > > On Tue, Aug 30, 2022 at 10:59:43PM +0200, Claudio Jeker wrote: > > > And nsd in base. It seems unbound does not use recvmmsg. > > > > After 'make -f Makefile.bsd-wrapper config' recvmmsg is picked up. > > The config compile test currently defines NONBLOCKING_IS_BROKEN > > because of a missing include. Then vlen for recvmmsg would always > > be 1. > > > > checking for struct mmsghdr... yes > > checking for recvmmsg... yes > > checking for sendmmsg... no > > ... > > checking if nonblocking sockets work... yes > > > > If anybody knows how to make nsd call nsd_recvmmsg this would > > make for some nice testing. > > Does this also fix the configure test? If so, we can submit it > upstream. > > - todd > > Index: acx_nlnetlabs.m4 > === > RCS file: /cvs/src/usr.sbin/nsd/acx_nlnetlabs.m4,v > retrieving revision 1.7 > diff -u -p -u -r1.7 acx_nlnetlabs.m4 > --- acx_nlnetlabs.m4 24 Oct 2021 12:14:18 - 1.7 > +++ acx_nlnetlabs.m4 30 Aug 2022 22:31:48 - > @@ -963,6 +963,9 @@ AC_LANG_SOURCE([[ > #ifdef HAVE_SYS_TYPES_H > #include > #endif > +#ifdef HAVE_SYS_SELECT_H > +#include > +#endif > #ifdef HAVE_SYS_SOCKET_H > #include > #endif > Index: configure > === > RCS file: /cvs/src/usr.sbin/nsd/configure,v > retrieving revision 1.56 > diff -u -p -u -r1.56 configure > --- configure 30 Jun 2022 10:49:39 - 1.56 > +++ configure 30 Aug 2022 22:32:08 - > @@ -6593,6 +6593,9 @@ else > #ifdef HAVE_SYS_TYPES_H > #include > #endif > +#ifdef HAVE_SYS_SELECT_H > +#include > +#endif > #ifdef HAVE_SYS_SOCKET_H > #include > #endif >
Re: add sendmmsg and recvmmsg systemcalls
On Wed, 31 Aug 2022 00:19:20 +0200, Moritz Buhl wrote: > On Tue, Aug 30, 2022 at 10:59:43PM +0200, Claudio Jeker wrote: > > And nsd in base. It seems unbound does not use recvmmsg. > > After 'make -f Makefile.bsd-wrapper config' recvmmsg is picked up. > The config compile test currently defines NONBLOCKING_IS_BROKEN > because of a missing include. Then vlen for recvmmsg would always > be 1. > > checking for struct mmsghdr... yes > checking for recvmmsg... yes > checking for sendmmsg... no > ... > checking if nonblocking sockets work... yes > > If anybody knows how to make nsd call nsd_recvmmsg this would > make for some nice testing. Does this also fix the configure test? If so, we can submit it upstream. - todd Index: acx_nlnetlabs.m4 === RCS file: /cvs/src/usr.sbin/nsd/acx_nlnetlabs.m4,v retrieving revision 1.7 diff -u -p -u -r1.7 acx_nlnetlabs.m4 --- acx_nlnetlabs.m424 Oct 2021 12:14:18 - 1.7 +++ acx_nlnetlabs.m430 Aug 2022 22:31:48 - @@ -963,6 +963,9 @@ AC_LANG_SOURCE([[ #ifdef HAVE_SYS_TYPES_H #include #endif +#ifdef HAVE_SYS_SELECT_H +#include +#endif #ifdef HAVE_SYS_SOCKET_H #include #endif Index: configure === RCS file: /cvs/src/usr.sbin/nsd/configure,v retrieving revision 1.56 diff -u -p -u -r1.56 configure --- configure 30 Jun 2022 10:49:39 - 1.56 +++ configure 30 Aug 2022 22:32:08 - @@ -6593,6 +6593,9 @@ else #ifdef HAVE_SYS_TYPES_H #include #endif +#ifdef HAVE_SYS_SELECT_H +#include +#endif #ifdef HAVE_SYS_SOCKET_H #include #endif
Re: add sendmmsg and recvmmsg systemcalls
On Tue, Aug 30, 2022 at 10:59:43PM +0200, Claudio Jeker wrote: > And nsd in base. It seems unbound does not use recvmmsg. After 'make -f Makefile.bsd-wrapper config' recvmmsg is picked up. The config compile test currently defines NONBLOCKING_IS_BROKEN because of a missing include. Then vlen for recvmmsg would always be 1. checking for struct mmsghdr... yes checking for recvmmsg... yes checking for sendmmsg... no ... checking if nonblocking sockets work... yes If anybody knows how to make nsd call nsd_recvmmsg this would make for some nice testing. mbuhl Index: Makefile.bsd-wrapper === RCS file: /mount/openbsd/cvs/src/usr.sbin/nsd/Makefile.bsd-wrapper,v retrieving revision 1.19 diff -u -p -r1.19 Makefile.bsd-wrapper --- Makefile.bsd-wrapper30 Jun 2021 11:50:22 - 1.19 +++ Makefile.bsd-wrapper30 Aug 2022 22:09:46 - @@ -22,6 +22,7 @@ CONFIGURE_OPTS= --prefix=/usr \ --with-xfrdfile=${CHROOTDIR}/run/xfrd.state \ --with-libevent=/usr \ --enable-ratelimit \ + --enable-recvmmsg \ --enable-root-server PROG= nsd nsd-checkconf nsd-checkzone nsd-control Index: configure === RCS file: /mount/openbsd/cvs/src/usr.sbin/nsd/configure,v retrieving revision 1.56 diff -u -p -r1.56 configure --- configure 30 Jun 2022 10:49:39 - 1.56 +++ configure 30 Aug 2022 22:04:41 - @@ -6609,6 +6609,8 @@ else #include #endif +#include + int main(void) { int port;
Re: add sendmmsg and recvmmsg systemcalls
On Tue, Aug 30, 2022 at 09:51:46PM +0100, Stuart Henderson wrote: > btw a few ports will likely pick this up: > > paths/devel/glib2.log:Checking for function "recvmmsg" : NO > paths/net/tinc.log:checking for recvmmsg... no > paths/net/knot.log:checking for recvmmsg... no > paths/net/knot.log:Use recvmmsg: no > paths/net/gdnsd.log:checking whether recvmmsg is declared... no > paths/net/gdnsd.log:checking for recvmmsg... no > paths/net/powerdns.log:checking for recvmmsg... no > paths/net/powerdns.log:checking for recvmmsg... (cached) no > paths/net/dnsdist.log:checking for recvmmsg... no > paths/net/powerdns_recursor.log:checking for recvmmsg... no > paths/sysutils/rsyslog,-elasticsearch.log:checking for recvmmsg... no > And nsd in base. It seems unbound does not use recvmmsg. -- :wq Claudio
Re: add sendmmsg and recvmmsg systemcalls
btw a few ports will likely pick this up: paths/devel/glib2.log:Checking for function "recvmmsg" : NO paths/net/tinc.log:checking for recvmmsg... no paths/net/knot.log:checking for recvmmsg... no paths/net/knot.log:Use recvmmsg: no paths/net/gdnsd.log:checking whether recvmmsg is declared... no paths/net/gdnsd.log:checking for recvmmsg... no paths/net/powerdns.log:checking for recvmmsg... no paths/net/powerdns.log:checking for recvmmsg... (cached) no paths/net/dnsdist.log:checking for recvmmsg... no paths/net/powerdns_recursor.log:checking for recvmmsg... no paths/sysutils/rsyslog,-elasticsearch.log:checking for recvmmsg... no
Re: add sendmmsg and recvmmsg systemcalls
Hi tech@, the following diff only contains recvmmsg which should be the more useful syscall of the two. I implemented some minor feedback regarding the man page and attaching an error from recvit to the socket in case some messages were received before. I am also looking into passing the timeout through recvit and soreceive in order to not block indefinetly on a blocking socket as the other implementations do: BUGS The timeout argument does not work as intended. The timeout is checked only after the receipt of each datagram, so that if up to vlen-1 datagrams are received before the timeout expires, but then no further datagrams are received, the call will block forever. https://www.man7.org/linux/man-pages/man2/recvmmsg.2.html But I would prefer doing this in another change. mbuhl Index: lib/libc/Symbols.list === RCS file: /cvs/src/lib/libc/Symbols.list,v retrieving revision 1.75 diff -u -p -r1.75 Symbols.list --- lib/libc/Symbols.list 2 Aug 2022 16:45:00 - 1.75 +++ lib/libc/Symbols.list 30 Aug 2022 15:44:29 - @@ -175,6 +175,7 @@ _thread_sys_readlinkat _thread_sys_readv _thread_sys_reboot _thread_sys_recvfrom +_thread_sys_recvmmsg _thread_sys_recvmsg _thread_sys_rename _thread_sys_renameat @@ -372,6 +373,7 @@ readlinkat readv reboot recvfrom +recvmmsg recvmsg rename renameat Index: lib/libc/shlib_version === RCS file: /cvs/src/lib/libc/shlib_version,v retrieving revision 1.210 diff -u -p -r1.210 shlib_version --- lib/libc/shlib_version 2 Jun 2021 07:29:03 - 1.210 +++ lib/libc/shlib_version 30 Aug 2022 15:44:29 - @@ -1,4 +1,4 @@ major=96 -minor=1 +minor=2 # note: If changes were made to include/thread_private.h or if system calls # were added/changed then librthread/shlib_version must also be updated. Index: lib/libc/hidden/sys/socket.h === RCS file: /cvs/src/lib/libc/hidden/sys/socket.h,v retrieving revision 1.4 diff -u -p -r1.4 socket.h --- lib/libc/hidden/sys/socket.h7 May 2016 19:05:22 - 1.4 +++ lib/libc/hidden/sys/socket.h30 Aug 2022 15:44:29 - @@ -32,6 +32,7 @@ PROTO_NORMAL(getsockopt); PROTO_NORMAL(listen); PROTO_NORMAL(recv); PROTO_CANCEL(recvfrom); +PROTO_CANCEL(recvmmsg); PROTO_CANCEL(recvmsg); PROTO_NORMAL(send); PROTO_CANCEL(sendmsg); Index: lib/libc/sys/Makefile.inc === RCS file: /cvs/src/lib/libc/sys/Makefile.inc,v retrieving revision 1.163 diff -u -p -r1.163 Makefile.inc --- lib/libc/sys/Makefile.inc 17 Jul 2022 03:04:27 - 1.163 +++ lib/libc/sys/Makefile.inc 30 Aug 2022 15:44:29 - @@ -34,7 +34,7 @@ CANCEL= accept accept4 \ nanosleep \ open openat \ poll ppoll pread preadv pselect pwrite pwritev \ - read readv recvfrom recvmsg \ + read readv recvfrom recvmmsg recvmsg \ select sendmsg sendto \ wait4 write writev SRCS+= ${CANCEL:%=w_%.c} Index: lib/libc/sys/recv.2 === RCS file: /cvs/src/lib/libc/sys/recv.2,v retrieving revision 1.48 diff -u -p -r1.48 recv.2 --- lib/libc/sys/recv.2 21 Nov 2021 23:44:55 - 1.48 +++ lib/libc/sys/recv.2 30 Aug 2022 15:44:29 - @@ -46,15 +46,35 @@ .Fn recvfrom "int s" "void *buf" "size_t len" "int flags" "struct sockaddr *from" "socklen_t *fromlen" .Ft ssize_t .Fn recvmsg "int s" "struct msghdr *msg" "int flags" +.Ft int +.Fn recvmmsg "int s" "struct mmsghdr *mmsg" "unsigned int vlen" "unsigned int flags" "struct timespec *timeout" .Sh DESCRIPTION -.Fn recvfrom +.Fn recv , +.Fn recvfrom , +.Fn recvmsg , and -.Fn recvmsg +.Fn recvmmsg are used to receive messages from a socket, -.Fa s , -and may be used to receive +.Fa s . +.Fn recv +is normally used only on a +.Em connected +socket (see +.Xr connect 2 ). +.Fn recvfrom , +.Fn recvmsg , +and +.Fn recvmmsg +may be used to receive data on a socket whether or not it is connection-oriented. .Pp +.Fn recv +is identical to +.Fn recvfrom +with a null +.Fa from +parameter. +.Pp If .Fa from is non-null and the socket is not connection-oriented, @@ -66,25 +86,6 @@ the buffer associated with and modified on return to indicate the actual size of the address stored there. .Pp -The -.Fn recv -call is normally used only on a -.Em connected -socket (see -.Xr connect 2 ) -and is identical to -.Fn recvfrom -with a null -.Fa from -parameter. -.Pp -On successful completion, all three routines return the number of -message bytes read. -If a message is too long to fit in the supplied -buffer, excess bytes may be discarded depending on the type of socket -the message is received from (see -.Xr socket 2 ) . -.Pp If no messages are available at the socket, the receive call waits for
add sendmmsg and recvmmsg systemcalls
Hi tech@, I implemented the sendmmsg and recvmmsg system calls by copying the NetBSD implementation and adjusting it. The idea behind the mmsg system calls is to do less systemcalls per msghdr and thus improving throughput. This should allow faster processing of packages (UDP, raw IP) in userland. int sendmmsg(int s, const struct mmsghdr *mmsg, unsigned int vlen, unsigned int flags); int recvmmsg(int s, struct mmsghdr *mmsg, unsigned int vlen, unsigned int flags, struct timespec *timeout); The interface ist incompatible to the other send and recv systemcalls and the timeout feels unnecessary but this way it is compatible with Linux and NetBSD. The diff is below. mbuhl Index: lib/libc/Symbols.list === RCS file: /cvs/src/lib/libc/Symbols.list,v retrieving revision 1.74 diff -u -p -r1.74 Symbols.list --- lib/libc/Symbols.list 3 Jun 2021 13:19:45 - 1.74 +++ lib/libc/Symbols.list 22 Apr 2022 16:03:30 - @@ -176,6 +176,7 @@ _thread_sys_readv _thread_sys_reboot _thread_sys_recvfrom _thread_sys_recvmsg +_thread_sys_recvmmsg _thread_sys_rename _thread_sys_renameat _thread_sys_revoke @@ -185,6 +186,7 @@ _thread_sys_select _thread_sys_semget _thread_sys_semop _thread_sys_sendmsg +_thread_sys_sendmmsg _thread_sys_sendsyslog _thread_sys_sendto _thread_sys_setegid @@ -373,6 +375,7 @@ readv reboot recvfrom recvmsg +recvmmsg rename renameat revoke @@ -384,6 +387,7 @@ semctl semget semop sendmsg +sendmmsg sendsyslog sendto setegid Index: lib/libc/shlib_version === RCS file: /cvs/src/lib/libc/shlib_version,v retrieving revision 1.210 diff -u -p -r1.210 shlib_version --- lib/libc/shlib_version 2 Jun 2021 07:29:03 - 1.210 +++ lib/libc/shlib_version 22 Apr 2022 16:03:30 - @@ -1,4 +1,4 @@ major=96 -minor=1 +minor=2 # note: If changes were made to include/thread_private.h or if system calls # were added/changed then librthread/shlib_version must also be updated. Index: lib/libc/hidden/sys/socket.h === RCS file: /cvs/src/lib/libc/hidden/sys/socket.h,v retrieving revision 1.4 diff -u -p -r1.4 socket.h --- lib/libc/hidden/sys/socket.h7 May 2016 19:05:22 - 1.4 +++ lib/libc/hidden/sys/socket.h22 Apr 2022 16:03:30 - @@ -33,8 +33,10 @@ PROTO_NORMAL(listen); PROTO_NORMAL(recv); PROTO_CANCEL(recvfrom); PROTO_CANCEL(recvmsg); +PROTO_CANCEL(recvmmsg); PROTO_NORMAL(send); PROTO_CANCEL(sendmsg); +PROTO_CANCEL(sendmmsg); PROTO_CANCEL(sendto); PROTO_NORMAL(setrtable); PROTO_NORMAL(setsockopt); Index: lib/libc/sys/Makefile.inc === RCS file: /cvs/src/lib/libc/sys/Makefile.inc,v retrieving revision 1.161 diff -u -p -r1.161 Makefile.inc --- lib/libc/sys/Makefile.inc 23 Dec 2021 18:50:32 - 1.161 +++ lib/libc/sys/Makefile.inc 22 Apr 2022 16:03:30 - @@ -34,8 +34,8 @@ CANCEL= accept accept4 \ nanosleep \ open openat \ poll ppoll pread preadv pselect pwrite pwritev \ - read readv recvfrom recvmsg \ - select sendmsg sendto \ + read readv recvfrom recvmsg recvmmsg \ + select sendmsg sendmmsg sendto \ wait4 write writev SRCS+= ${CANCEL:%=w_%.c} Index: lib/libc/sys/recv.2 === RCS file: /cvs/src/lib/libc/sys/recv.2,v retrieving revision 1.48 diff -u -p -r1.48 recv.2 --- lib/libc/sys/recv.2 21 Nov 2021 23:44:55 - 1.48 +++ lib/libc/sys/recv.2 22 Apr 2022 16:03:30 - @@ -46,15 +46,35 @@ .Fn recvfrom "int s" "void *buf" "size_t len" "int flags" "struct sockaddr *from" "socklen_t *fromlen" .Ft ssize_t .Fn recvmsg "int s" "struct msghdr *msg" "int flags" +.Ft int +.Fn recvmmsg "int s" "struct mmsghdr *mmsg" "unsigned int vlen" "unsigned int flags" "struct timespec *timeout" .Sh DESCRIPTION -.Fn recvfrom +.Fn recv , +.Fn recvfrom , +.Fn recvmsg , and -.Fn recvmsg +.Fn recvmmsg are used to receive messages from a socket, -.Fa s , -and may be used to receive +.Fa s . +.Fn recv +is normally used only on a +.Em connected +socket (see +.Xr connect 2 ). +.Fn recvfrom , +.Fn recvmsg , +and +.Fn recvmmsg +may be used to receive data on a socket whether or not it is connection-oriented. .Pp +.Fn recv +is identical to +.Fn recvfrom +with a null +.Fa from +parameter. +.Pp If .Fa from is non-null and the socket is not connection-oriented, @@ -66,25 +86,6 @@ the buffer associated with and modified on return to indicate the actual size of the address stored there. .Pp -The -.Fn recv -call is normally used only on a -.Em connected -socket (see -.Xr connect 2 ) -and is identical to -.Fn recvfrom -with a null -.Fa from -parameter. -.Pp -On successful completion, all three routines return the number of -message bytes