Re: add sendmmsg and recvmmsg systemcalls

2022-09-08 Thread Alexander Bluhm
On Thu, Sep 08, 2022 at 03:01:14PM +0200, Moritz Buhl wrote:
> On Wed, Aug 31, 2022 at 05:44:31PM -0900, Philip Guenther wrote:
> > kdump.c will need at least a SYS_recvmmsg line in the big table, and if you
> > do a ktrmmsghdr() bit in the kernel a matching decoder will be needed in
> > kdump.
> 
> Here is a new diff for kdump.
> OK?

OK bluhm@

> Index: usr.bin/kdump/kdump.c
> ===
> RCS file: /cvs/src/usr.bin/kdump/kdump.c,v
> retrieving revision 1.149
> diff -u -p -r1.149 kdump.c
> --- usr.bin/kdump/kdump.c 20 Jul 2022 05:56:36 -  1.149
> +++ usr.bin/kdump/kdump.c 7 Sep 2022 11:14:19 -
> @@ -720,6 +720,8 @@ static const formatter scargs[][8] = {
>  [SYS_ptrace] = { Ptracedecode, Ppid_t, Pptr, Pdecint },
>  [SYS_recvmsg]= { Pfd, Pptr, Sendrecvflagsname },
>  [SYS_sendmsg]= { Pfd, Pptr, Sendrecvflagsname },
> +[SYS_recvmmsg]   = { Pfd, Pptr, Pucount, Sendrecvflagsname, Pptr },
> +[SYS_sendmmsg]   = { Pfd, Pptr, Pucount, Sendrecvflagsname },
>  [SYS_recvfrom]   = { Pfd, Pptr, Pbigsize, Sendrecvflagsname },
>  [SYS_accept] = { Pfd, Pptr, Pptr },
>  [SYS_getpeername]= { Pfd, Pptr, Pptr },
> Index: usr.bin/kdump/ktrstruct.c
> ===
> RCS file: /cvs/src/usr.bin/kdump/ktrstruct.c,v
> retrieving revision 1.29
> diff -u -p -r1.29 ktrstruct.c
> --- usr.bin/kdump/ktrstruct.c 21 Dec 2020 07:47:37 -  1.29
> +++ usr.bin/kdump/ktrstruct.c 7 Sep 2022 10:00:23 -
> @@ -398,6 +398,18 @@ ktrquota(const struct dqblk *quota)
>  }
>  
>  static void
> +ktrmmsghdr(const struct mmsghdr *mmsg)
> +{
> + printf("struct mmsghdr { msg_hdr = { name=%p, namelen=%u, "
> + "iov=%p, iovlen=%u, control=%p, controllen=%u, flags=",
> + mmsg->msg_hdr.msg_name, mmsg->msg_hdr.msg_namelen,
> + mmsg->msg_hdr.msg_iov, mmsg->msg_hdr.msg_iovlen,
> + mmsg->msg_hdr.msg_control, mmsg->msg_hdr.msg_controllen);
> + sendrecvflagsname(mmsg->msg_hdr.msg_flags);
> + printf(" }, msg_len = %u }\n", mmsg->msg_len);
> +}
> +
> +static void
>  ktrmsghdr(const struct msghdr *msg)
>  {
>   printf("struct msghdr { name=%p, namelen=%u, iov=%p, iovlen=%u,"
> @@ -649,6 +661,13 @@ ktrstruct(char *buf, size_t buflen)
>   goto invalid;
>   memcpy(, data, datalen);
>   ktrmsghdr();
> + } else if (strcmp(name, "mmsghdr") == 0) {
> + struct mmsghdr mmsg;
> +
> + if (datalen != sizeof(mmsg))
> + goto invalid;
> + memcpy(, data, datalen);
> + ktrmmsghdr();
>   } else if (strcmp(name, "iovec") == 0) {
>   if (datalen % sizeof(struct iovec))
>   goto invalid;



Re: add sendmmsg and recvmmsg systemcalls

2022-09-08 Thread Moritz Buhl
On Wed, Aug 31, 2022 at 05:44:31PM -0900, Philip Guenther wrote:
> kdump.c will need at least a SYS_recvmmsg line in the big table, and if you
> do a ktrmmsghdr() bit in the kernel a matching decoder will be needed in
> kdump.

Here is a new diff for kdump.
OK?

mbuhl

Index: usr.bin/kdump/kdump.c
===
RCS file: /cvs/src/usr.bin/kdump/kdump.c,v
retrieving revision 1.149
diff -u -p -r1.149 kdump.c
--- usr.bin/kdump/kdump.c   20 Jul 2022 05:56:36 -  1.149
+++ usr.bin/kdump/kdump.c   7 Sep 2022 11:14:19 -
@@ -720,6 +720,8 @@ static const formatter scargs[][8] = {
 [SYS_ptrace]   = { Ptracedecode, Ppid_t, Pptr, Pdecint },
 [SYS_recvmsg]  = { Pfd, Pptr, Sendrecvflagsname },
 [SYS_sendmsg]  = { Pfd, Pptr, Sendrecvflagsname },
+[SYS_recvmmsg] = { Pfd, Pptr, Pucount, Sendrecvflagsname, Pptr },
+[SYS_sendmmsg] = { Pfd, Pptr, Pucount, Sendrecvflagsname },
 [SYS_recvfrom] = { Pfd, Pptr, Pbigsize, Sendrecvflagsname },
 [SYS_accept]   = { Pfd, Pptr, Pptr },
 [SYS_getpeername]  = { Pfd, Pptr, Pptr },
Index: usr.bin/kdump/ktrstruct.c
===
RCS file: /cvs/src/usr.bin/kdump/ktrstruct.c,v
retrieving revision 1.29
diff -u -p -r1.29 ktrstruct.c
--- usr.bin/kdump/ktrstruct.c   21 Dec 2020 07:47:37 -  1.29
+++ usr.bin/kdump/ktrstruct.c   7 Sep 2022 10:00:23 -
@@ -398,6 +398,18 @@ ktrquota(const struct dqblk *quota)
 }
 
 static void
+ktrmmsghdr(const struct mmsghdr *mmsg)
+{
+   printf("struct mmsghdr { msg_hdr = { name=%p, namelen=%u, "
+   "iov=%p, iovlen=%u, control=%p, controllen=%u, flags=",
+   mmsg->msg_hdr.msg_name, mmsg->msg_hdr.msg_namelen,
+   mmsg->msg_hdr.msg_iov, mmsg->msg_hdr.msg_iovlen,
+   mmsg->msg_hdr.msg_control, mmsg->msg_hdr.msg_controllen);
+   sendrecvflagsname(mmsg->msg_hdr.msg_flags);
+   printf(" }, msg_len = %u }\n", mmsg->msg_len);
+}
+
+static void
 ktrmsghdr(const struct msghdr *msg)
 {
printf("struct msghdr { name=%p, namelen=%u, iov=%p, iovlen=%u,"
@@ -649,6 +661,13 @@ ktrstruct(char *buf, size_t buflen)
goto invalid;
memcpy(, data, datalen);
ktrmsghdr();
+   } else if (strcmp(name, "mmsghdr") == 0) {
+   struct mmsghdr mmsg;
+
+   if (datalen != sizeof(mmsg))
+   goto invalid;
+   memcpy(, data, datalen);
+   ktrmmsghdr();
} else if (strcmp(name, "iovec") == 0) {
if (datalen % sizeof(struct iovec))
goto invalid;



Re: add sendmmsg and recvmmsg systemcalls

2022-09-06 Thread Moritz Buhl
On Tue, Sep 06, 2022 at 04:00:39PM +0200, Moritz Buhl wrote:
> Hi,
> here is the most recent diff for the libc part of send and recvmmsg.
> This requires a libc minor bump and therefore should be coordinated
> after snapshots are building normally again.
> 
> To my understanding the minor bump itself should not cause problems
> in ports anymore.

miod reminded me to also bump librthread as stated in libc/shlib_version.

Index: lib/libc/Symbols.list
===
RCS file: /cvs/src/lib/libc/Symbols.list,v
retrieving revision 1.75
diff -u -p -r1.75 Symbols.list
--- lib/libc/Symbols.list   2 Aug 2022 16:45:00 -   1.75
+++ lib/libc/Symbols.list   6 Sep 2022 09:36:40 -
@@ -175,6 +175,7 @@ _thread_sys_readlinkat
 _thread_sys_readv
 _thread_sys_reboot
 _thread_sys_recvfrom
+_thread_sys_recvmmsg
 _thread_sys_recvmsg
 _thread_sys_rename
 _thread_sys_renameat
@@ -184,6 +185,7 @@ _thread_sys_sched_yield
 _thread_sys_select
 _thread_sys_semget
 _thread_sys_semop
+_thread_sys_sendmmsg
 _thread_sys_sendmsg
 _thread_sys_sendsyslog
 _thread_sys_sendto
@@ -372,6 +374,7 @@ readlinkat
 readv
 reboot
 recvfrom
+recvmmsg
 recvmsg
 rename
 renameat
@@ -383,6 +386,7 @@ select
 semctl
 semget
 semop
+sendmmsg
 sendmsg
 sendsyslog
 sendto
Index: lib/libc/shlib_version
===
RCS file: /cvs/src/lib/libc/shlib_version,v
retrieving revision 1.210
diff -u -p -r1.210 shlib_version
--- lib/libc/shlib_version  2 Jun 2021 07:29:03 -   1.210
+++ lib/libc/shlib_version  6 Sep 2022 13:42:09 -
@@ -1,4 +1,4 @@
 major=96
-minor=1
+minor=2
 # note: If changes were made to include/thread_private.h or if system calls
 # were added/changed then librthread/shlib_version must also be updated.
Index: lib/libc/hidden/sys/socket.h
===
RCS file: /cvs/src/lib/libc/hidden/sys/socket.h,v
retrieving revision 1.4
diff -u -p -r1.4 socket.h
--- lib/libc/hidden/sys/socket.h7 May 2016 19:05:22 -   1.4
+++ lib/libc/hidden/sys/socket.h6 Sep 2022 13:41:53 -
@@ -33,9 +33,11 @@ PROTO_NORMAL(listen);
 PROTO_NORMAL(recv);
 PROTO_CANCEL(recvfrom);
 PROTO_CANCEL(recvmsg);
+PROTO_CANCEL(recvmmsg);
 PROTO_NORMAL(send);
-PROTO_CANCEL(sendmsg);
 PROTO_CANCEL(sendto);
+PROTO_CANCEL(sendmsg);
+PROTO_CANCEL(sendmmsg);
 PROTO_NORMAL(setrtable);
 PROTO_NORMAL(setsockopt);
 PROTO_NORMAL(shutdown);
Index: lib/libc/sys/Makefile.inc
===
RCS file: /cvs/src/lib/libc/sys/Makefile.inc,v
retrieving revision 1.163
diff -u -p -r1.163 Makefile.inc
--- lib/libc/sys/Makefile.inc   17 Jul 2022 03:04:27 -  1.163
+++ lib/libc/sys/Makefile.inc   6 Sep 2022 13:41:53 -
@@ -34,8 +34,8 @@ CANCEL=   accept accept4 \
nanosleep \
open openat \
poll ppoll pread preadv pselect pwrite pwritev \
-   read readv recvfrom recvmsg \
-   select sendmsg sendto \
+   read readv recvfrom recvmsg recvmmsg \
+   select sendto sendmsg sendmmsg \
wait4 write writev
 SRCS+= ${CANCEL:%=w_%.c}
 
Index: lib/libc/sys/recv.2
===
RCS file: /cvs/src/lib/libc/sys/recv.2,v
retrieving revision 1.48
diff -u -p -r1.48 recv.2
--- lib/libc/sys/recv.2 21 Nov 2021 23:44:55 -  1.48
+++ lib/libc/sys/recv.2 6 Sep 2022 13:42:12 -
@@ -46,15 +46,35 @@
 .Fn recvfrom "int s" "void *buf" "size_t len" "int flags" "struct sockaddr 
*from" "socklen_t *fromlen"
 .Ft ssize_t
 .Fn recvmsg "int s" "struct msghdr *msg" "int flags"
+.Ft int
+.Fn recvmmsg "int s" "struct mmsghdr *mmsg" "unsigned int vlen" "int flags" 
"struct timespec *timeout"
 .Sh DESCRIPTION
-.Fn recvfrom
+.Fn recv ,
+.Fn recvfrom ,
+.Fn recvmsg ,
 and
-.Fn recvmsg
+.Fn recvmmsg
 are used to receive messages from a socket,
-.Fa s ,
-and may be used to receive
+.Fa s .
+.Fn recv
+is normally used only on a
+.Em connected
+socket (see
+.Xr connect 2 ).
+.Fn recvfrom ,
+.Fn recvmsg ,
+and
+.Fn recvmmsg
+may be used to receive
 data on a socket whether or not it is connection-oriented.
 .Pp
+.Fn recv
+is identical to
+.Fn recvfrom
+with a null
+.Fa from
+parameter.
+.Pp
 If
 .Fa from
 is non-null and the socket is not connection-oriented,
@@ -66,25 +86,6 @@ the buffer associated with
 and modified on return to indicate the actual size of the
 address stored there.
 .Pp
-The
-.Fn recv
-call is normally used only on a
-.Em connected
-socket (see
-.Xr connect 2 )
-and is identical to
-.Fn recvfrom
-with a null
-.Fa from
-parameter.
-.Pp
-On successful completion, all three routines return the number of
-message bytes read.
-If a message is too long to fit in the supplied
-buffer, excess bytes may be discarded depending on the type of socket
-the message is received from (see
-.Xr socket 2 ) .
-.Pp
 If no messages are available at the socket, the
 

Re: add sendmmsg and recvmmsg systemcalls

2022-09-06 Thread Moritz Buhl
Hi,
here is the most recent diff for the libc part of send and recvmmsg.
This requires a libc minor bump and therefore should be coordinated
after snapshots are building normally again.

To my understanding the minor bump itself should not cause problems
in ports anymore.

mbuhl

Index: lib/libc/Symbols.list
===
RCS file: /cvs/src/lib/libc/Symbols.list,v
retrieving revision 1.75
diff -u -p -r1.75 Symbols.list
--- lib/libc/Symbols.list   2 Aug 2022 16:45:00 -   1.75
+++ lib/libc/Symbols.list   6 Sep 2022 09:36:40 -
@@ -175,6 +175,7 @@ _thread_sys_readlinkat
 _thread_sys_readv
 _thread_sys_reboot
 _thread_sys_recvfrom
+_thread_sys_recvmmsg
 _thread_sys_recvmsg
 _thread_sys_rename
 _thread_sys_renameat
@@ -184,6 +185,7 @@ _thread_sys_sched_yield
 _thread_sys_select
 _thread_sys_semget
 _thread_sys_semop
+_thread_sys_sendmmsg
 _thread_sys_sendmsg
 _thread_sys_sendsyslog
 _thread_sys_sendto
@@ -372,6 +374,7 @@ readlinkat
 readv
 reboot
 recvfrom
+recvmmsg
 recvmsg
 rename
 renameat
@@ -383,6 +386,7 @@ select
 semctl
 semget
 semop
+sendmmsg
 sendmsg
 sendsyslog
 sendto
Index: lib/libc/shlib_version
===
RCS file: /cvs/src/lib/libc/shlib_version,v
retrieving revision 1.210
diff -u -p -r1.210 shlib_version
--- lib/libc/shlib_version  2 Jun 2021 07:29:03 -   1.210
+++ lib/libc/shlib_version  5 Sep 2022 11:57:10 -
@@ -1,4 +1,4 @@
 major=96
-minor=1
+minor=2
 # note: If changes were made to include/thread_private.h or if system calls
 # were added/changed then librthread/shlib_version must also be updated.
Index: lib/libc/hidden/sys/socket.h
===
RCS file: /cvs/src/lib/libc/hidden/sys/socket.h,v
retrieving revision 1.4
diff -u -p -r1.4 socket.h
--- lib/libc/hidden/sys/socket.h7 May 2016 19:05:22 -   1.4
+++ lib/libc/hidden/sys/socket.h6 Sep 2022 09:36:49 -
@@ -33,9 +33,11 @@ PROTO_NORMAL(listen);
 PROTO_NORMAL(recv);
 PROTO_CANCEL(recvfrom);
 PROTO_CANCEL(recvmsg);
+PROTO_CANCEL(recvmmsg);
 PROTO_NORMAL(send);
-PROTO_CANCEL(sendmsg);
 PROTO_CANCEL(sendto);
+PROTO_CANCEL(sendmsg);
+PROTO_CANCEL(sendmmsg);
 PROTO_NORMAL(setrtable);
 PROTO_NORMAL(setsockopt);
 PROTO_NORMAL(shutdown);
Index: lib/libc/sys/Makefile.inc
===
RCS file: /cvs/src/lib/libc/sys/Makefile.inc,v
retrieving revision 1.163
diff -u -p -r1.163 Makefile.inc
--- lib/libc/sys/Makefile.inc   17 Jul 2022 03:04:27 -  1.163
+++ lib/libc/sys/Makefile.inc   6 Sep 2022 09:37:18 -
@@ -34,8 +34,8 @@ CANCEL=   accept accept4 \
nanosleep \
open openat \
poll ppoll pread preadv pselect pwrite pwritev \
-   read readv recvfrom recvmsg \
-   select sendmsg sendto \
+   read readv recvfrom recvmsg recvmmsg \
+   select sendto sendmsg sendmmsg \
wait4 write writev
 SRCS+= ${CANCEL:%=w_%.c}
 
Index: lib/libc/sys/recv.2
===
RCS file: /cvs/src/lib/libc/sys/recv.2,v
retrieving revision 1.48
diff -u -p -r1.48 recv.2
--- lib/libc/sys/recv.2 21 Nov 2021 23:44:55 -  1.48
+++ lib/libc/sys/recv.2 5 Sep 2022 14:59:00 -
@@ -46,15 +46,35 @@
 .Fn recvfrom "int s" "void *buf" "size_t len" "int flags" "struct sockaddr 
*from" "socklen_t *fromlen"
 .Ft ssize_t
 .Fn recvmsg "int s" "struct msghdr *msg" "int flags"
+.Ft int
+.Fn recvmmsg "int s" "struct mmsghdr *mmsg" "unsigned int vlen" "int flags" 
"struct timespec *timeout"
 .Sh DESCRIPTION
-.Fn recvfrom
+.Fn recv ,
+.Fn recvfrom ,
+.Fn recvmsg ,
 and
-.Fn recvmsg
+.Fn recvmmsg
 are used to receive messages from a socket,
-.Fa s ,
-and may be used to receive
+.Fa s .
+.Fn recv
+is normally used only on a
+.Em connected
+socket (see
+.Xr connect 2 ).
+.Fn recvfrom ,
+.Fn recvmsg ,
+and
+.Fn recvmmsg
+may be used to receive
 data on a socket whether or not it is connection-oriented.
 .Pp
+.Fn recv
+is identical to
+.Fn recvfrom
+with a null
+.Fa from
+parameter.
+.Pp
 If
 .Fa from
 is non-null and the socket is not connection-oriented,
@@ -66,25 +86,6 @@ the buffer associated with
 and modified on return to indicate the actual size of the
 address stored there.
 .Pp
-The
-.Fn recv
-call is normally used only on a
-.Em connected
-socket (see
-.Xr connect 2 )
-and is identical to
-.Fn recvfrom
-with a null
-.Fa from
-parameter.
-.Pp
-On successful completion, all three routines return the number of
-message bytes read.
-If a message is too long to fit in the supplied
-buffer, excess bytes may be discarded depending on the type of socket
-the message is received from (see
-.Xr socket 2 ) .
-.Pp
 If no messages are available at the socket, the
 receive call waits for a message to arrive, unless
 the socket is nonblocking (see
@@ -158,6 +159,8 @@ The
 .Dv MSG_CMSG_CLOEXEC
 requests that 

Re: add sendmmsg and recvmmsg systemcalls

2022-09-03 Thread Alexander Bluhm
On Sat, Sep 03, 2022 at 01:49:27AM +0200, Moritz Buhl wrote:
> Here is an updated version of the kernel part for sendmmsg.

OK bluhm@

> Index: sys/kern/syscalls.master
> ===
> RCS file: /mount/openbsd/cvs/src/sys/kern/syscalls.master,v
> retrieving revision 1.230
> diff -u -p -r1.230 syscalls.master
> --- sys/kern/syscalls.master  2 Sep 2022 13:18:06 -   1.230
> +++ sys/kern/syscalls.master  2 Sep 2022 20:34:15 -
> @@ -247,7 +247,9 @@
>  116  STD NOLOCK  { int sys_recvmmsg(int s, struct mmsghdr *mmsg, \
>   unsigned int vlen, unsigned int flags, \
>   struct timespec *timeout); }
> -117  UNIMPL  sendmmsg
> +117  STD NOLOCK  { int sys_sendmmsg(int s,  \
> + struct mmsghdr *mmsg, unsigned int vlen, \
> + unsigned int flags); }
>  118  STD { int sys_getsockopt(int s, int level, int name, \
>   void *val, socklen_t *avalsize); }
>  119  STD { int sys_thrkill(pid_t tid, int signum, void *tcb); }
> Index: sys/kern/uipc_syscalls.c
> ===
> RCS file: /mount/openbsd/cvs/src/sys/kern/uipc_syscalls.c,v
> retrieving revision 1.202
> diff -u -p -r1.202 uipc_syscalls.c
> --- sys/kern/uipc_syscalls.c  2 Sep 2022 13:18:06 -   1.202
> +++ sys/kern/uipc_syscalls.c  2 Sep 2022 23:26:08 -
> @@ -606,6 +606,92 @@ done:
>  }
>  
>  int
> +sys_sendmmsg(struct proc *p, void *v, register_t *retval)
> +{
> + struct sys_sendmmsg_args /* {
> + syscallarg(int) s;
> + syscallarg(struct mmsghdr *)mmsg;
> + syscallarg(unsigned int)vlen;
> + syscallarg(unsigned int)flags;
> + } */ *uap = v;
> + struct mmsghdr mmsg, *mmsgp;
> + struct iovec aiov[UIO_SMALLIOV], *iov = aiov, *uiov;
> + size_t iovlen = UIO_SMALLIOV;
> + register_t retsnd;
> + unsigned int vlen, dgrams;
> + int error = 0;
> +
> + /* Arbitrarily capped at 1024 datagrams. */
> + vlen = SCARG(uap, vlen);
> + if (vlen > 1024)
> + vlen = 1024;
> +
> + mmsgp = SCARG(uap, mmsg);
> + for (dgrams = 0; dgrams < vlen; dgrams++) {
> + error = copyin([dgrams], , sizeof(mmsg));
> + if (error)
> + break;
> +
> +#ifdef KTRACE
> + if (KTRPOINT(p, KTR_STRUCT))
> + ktrmmsghdr(p, );
> +#endif
> +
> + if (mmsg.msg_hdr.msg_iovlen > IOV_MAX) {
> + error = EMSGSIZE;
> + break;
> + }
> +
> + if (mmsg.msg_hdr.msg_iovlen > iovlen) {
> + if (iov != aiov)
> + free(iov, M_IOV, iovlen *
> + sizeof(struct iovec));
> +
> + iovlen = mmsg.msg_hdr.msg_iovlen;
> + iov = mallocarray(iovlen, sizeof(struct iovec),
> + M_IOV, M_WAITOK);
> + }
> +
> + if (mmsg.msg_hdr.msg_iovlen > 0) {
> + error = copyin(mmsg.msg_hdr.msg_iov, iov,
> + mmsg.msg_hdr.msg_iovlen * sizeof(struct iovec));
> + if (error)
> + break;
> + }
> +
> +#ifdef KTRACE
> + if (mmsg.msg_hdr.msg_iovlen && KTRPOINT(p, KTR_STRUCT))
> + ktriovec(p, iov, mmsg.msg_hdr.msg_iovlen);
> +#endif
> +
> + uiov = mmsg.msg_hdr.msg_iov;
> + mmsg.msg_hdr.msg_iov = iov;
> + mmsg.msg_hdr.msg_flags = 0;
> +
> + error = sendit(p, SCARG(uap, s), _hdr,
> + SCARG(uap, flags), );
> + if (error)
> + break;
> +
> + mmsg.msg_hdr.msg_iov = uiov;
> + mmsg.msg_len = retsnd;
> +
> + error = copyout(, [dgrams], sizeof(mmsg));
> + if (error)
> + break;
> + }
> +
> + if (iov != aiov)
> + free(iov, M_IOV, sizeof(struct iovec) * iovlen);
> +
> + *retval = dgrams;
> +
> + if (dgrams)
> + return 0;
> + return error;
> +}
> +
> +int
>  sendit(struct proc *p, int s, struct msghdr *mp, int flags, register_t 
> *retsize)
>  {
>   struct file *fp;
> Index: sys/sys/socket.h
> ===
> RCS file: /mount/openbsd/cvs/src/sys/sys/socket.h,v
> retrieving revision 1.103
> diff -u -p -r1.103 socket.h
> --- sys/sys/socket.h  2 Sep 2022 13:18:07 -   1.103
> +++ sys/sys/socket.h  2 Sep 2022 22:31:09 -
> @@ -579,6 +579,7 @@ ssize_t   send(int, const void *, size_t, 
>  ssize_t  sendto(int, const void *,
>   size_t, int, const struct sockaddr *, socklen_t);
>  ssize_t  sendmsg(int, const struct msghdr *, int);
> +int 

Re: add sendmmsg and recvmmsg systemcalls

2022-09-02 Thread Moritz Buhl
Here is an updated version of the kernel part for sendmmsg.

mbuhl

Index: sys/kern/syscalls.master
===
RCS file: /mount/openbsd/cvs/src/sys/kern/syscalls.master,v
retrieving revision 1.230
diff -u -p -r1.230 syscalls.master
--- sys/kern/syscalls.master2 Sep 2022 13:18:06 -   1.230
+++ sys/kern/syscalls.master2 Sep 2022 20:34:15 -
@@ -247,7 +247,9 @@
 116STD NOLOCK  { int sys_recvmmsg(int s, struct mmsghdr *mmsg, \
unsigned int vlen, unsigned int flags, \
struct timespec *timeout); }
-117UNIMPL  sendmmsg
+117STD NOLOCK  { int sys_sendmmsg(int s,  \
+   struct mmsghdr *mmsg, unsigned int vlen, \
+   unsigned int flags); }
 118STD { int sys_getsockopt(int s, int level, int name, \
void *val, socklen_t *avalsize); }
 119STD { int sys_thrkill(pid_t tid, int signum, void *tcb); }
Index: sys/kern/uipc_syscalls.c
===
RCS file: /mount/openbsd/cvs/src/sys/kern/uipc_syscalls.c,v
retrieving revision 1.202
diff -u -p -r1.202 uipc_syscalls.c
--- sys/kern/uipc_syscalls.c2 Sep 2022 13:18:06 -   1.202
+++ sys/kern/uipc_syscalls.c2 Sep 2022 23:26:08 -
@@ -606,6 +606,92 @@ done:
 }
 
 int
+sys_sendmmsg(struct proc *p, void *v, register_t *retval)
+{
+   struct sys_sendmmsg_args /* {
+   syscallarg(int) s;
+   syscallarg(struct mmsghdr *)mmsg;
+   syscallarg(unsigned int)vlen;
+   syscallarg(unsigned int)flags;
+   } */ *uap = v;
+   struct mmsghdr mmsg, *mmsgp;
+   struct iovec aiov[UIO_SMALLIOV], *iov = aiov, *uiov;
+   size_t iovlen = UIO_SMALLIOV;
+   register_t retsnd;
+   unsigned int vlen, dgrams;
+   int error = 0;
+
+   /* Arbitrarily capped at 1024 datagrams. */
+   vlen = SCARG(uap, vlen);
+   if (vlen > 1024)
+   vlen = 1024;
+
+   mmsgp = SCARG(uap, mmsg);
+   for (dgrams = 0; dgrams < vlen; dgrams++) {
+   error = copyin([dgrams], , sizeof(mmsg));
+   if (error)
+   break;
+
+#ifdef KTRACE
+   if (KTRPOINT(p, KTR_STRUCT))
+   ktrmmsghdr(p, );
+#endif
+
+   if (mmsg.msg_hdr.msg_iovlen > IOV_MAX) {
+   error = EMSGSIZE;
+   break;
+   }
+
+   if (mmsg.msg_hdr.msg_iovlen > iovlen) {
+   if (iov != aiov)
+   free(iov, M_IOV, iovlen *
+   sizeof(struct iovec));
+
+   iovlen = mmsg.msg_hdr.msg_iovlen;
+   iov = mallocarray(iovlen, sizeof(struct iovec),
+   M_IOV, M_WAITOK);
+   }
+
+   if (mmsg.msg_hdr.msg_iovlen > 0) {
+   error = copyin(mmsg.msg_hdr.msg_iov, iov,
+   mmsg.msg_hdr.msg_iovlen * sizeof(struct iovec));
+   if (error)
+   break;
+   }
+
+#ifdef KTRACE
+   if (mmsg.msg_hdr.msg_iovlen && KTRPOINT(p, KTR_STRUCT))
+   ktriovec(p, iov, mmsg.msg_hdr.msg_iovlen);
+#endif
+
+   uiov = mmsg.msg_hdr.msg_iov;
+   mmsg.msg_hdr.msg_iov = iov;
+   mmsg.msg_hdr.msg_flags = 0;
+
+   error = sendit(p, SCARG(uap, s), _hdr,
+   SCARG(uap, flags), );
+   if (error)
+   break;
+
+   mmsg.msg_hdr.msg_iov = uiov;
+   mmsg.msg_len = retsnd;
+
+   error = copyout(, [dgrams], sizeof(mmsg));
+   if (error)
+   break;
+   }
+
+   if (iov != aiov)
+   free(iov, M_IOV, sizeof(struct iovec) * iovlen);
+
+   *retval = dgrams;
+
+   if (dgrams)
+   return 0;
+   return error;
+}
+
+int
 sendit(struct proc *p, int s, struct msghdr *mp, int flags, register_t 
*retsize)
 {
struct file *fp;
Index: sys/sys/socket.h
===
RCS file: /mount/openbsd/cvs/src/sys/sys/socket.h,v
retrieving revision 1.103
diff -u -p -r1.103 socket.h
--- sys/sys/socket.h2 Sep 2022 13:18:07 -   1.103
+++ sys/sys/socket.h2 Sep 2022 22:31:09 -
@@ -579,6 +579,7 @@ ssize_t send(int, const void *, size_t, 
 ssize_tsendto(int, const void *,
size_t, int, const struct sockaddr *, socklen_t);
 ssize_tsendmsg(int, const struct msghdr *, int);
+intsendmmsg(int, struct mmsghdr *, unsigned int, unsigned int);
 intsetsockopt(int, int, int, const void *, socklen_t);
 intshutdown(int, int);
 int

Re: add sendmmsg and recvmmsg systemcalls

2022-09-02 Thread Alexander Bluhm
On Thu, Sep 01, 2022 at 06:06:10PM +0200, Moritz Buhl wrote:
> I addressed your concerns as well as these of jca, just the kernel
> part (and the new ktrace stuff) below.
> 
> One minor thing: I didn't see any kdump output where one struct was
> contained in another one but I am printing it like ddb would so I
> guess it should be fine.

OK bluhm@

> Index: kern/syscalls.master
> ===
> RCS file: /mount/openbsd/cvs/src/sys/kern/syscalls.master,v
> retrieving revision 1.229
> diff -u -p -r1.229 syscalls.master
> --- kern/syscalls.master  1 Aug 2022 14:56:59 -   1.229
> +++ kern/syscalls.master  1 Sep 2022 14:52:47 -
> @@ -244,8 +244,10 @@
>   const char *permissions); }
>  115  STD { int sys___realpath(const char *pathname, \
>   char *resolved); }
> -116  OBSOL   t32_gettimeofday
> -117  OBSOL   t32_getrusage
> +116  STD NOLOCK  { int sys_recvmmsg(int s, struct mmsghdr *mmsg, \
> + unsigned int vlen, unsigned int flags, \
> + struct timespec *timeout); }
> +117  UNIMPL  sendmmsg
>  118  STD { int sys_getsockopt(int s, int level, int name, \
>   void *val, socklen_t *avalsize); }
>  119  STD { int sys_thrkill(pid_t tid, int signum, void *tcb); }
> Index: kern/uipc_syscalls.c
> ===
> RCS file: /mount/openbsd/cvs/src/sys/kern/uipc_syscalls.c,v
> retrieving revision 1.201
> diff -u -p -r1.201 uipc_syscalls.c
> --- kern/uipc_syscalls.c  14 Aug 2022 01:58:28 -  1.201
> +++ kern/uipc_syscalls.c  1 Sep 2022 14:37:26 -
> @@ -805,6 +805,140 @@ done:
>  }
>  
>  int
> +sys_recvmmsg(struct proc *p, void *v, register_t *retval)
> +{
> + struct sys_recvmmsg_args /* {
> + syscallarg(int) s;
> + syscallarg(struct mmsghdr *)mmsg;
> + syscallarg(unsigned int)vlen;
> + syscallarg(unsigned int)flags;
> + syscallarg(struct timespec *)   timeout;
> + } */ *uap = v;
> + struct mmsghdr mmsg, *mmsgp;
> + struct timespec ts, now;
> + struct iovec aiov[UIO_SMALLIOV], *uiov, *iov = aiov;
> + struct file *fp;
> + struct socket *so;
> + struct timespec *timeout;
> + size_t iovlen = UIO_SMALLIOV;
> + register_t retrec;
> + unsigned int vlen, dgrams;
> + int error = 0, flags, s;
> +
> + s = SCARG(uap, s);
> + if ((error = getsock(p, s, )))
> + return (error);
> + so = (struct socket *)fp->f_data;
> +
> + timeout = SCARG(uap, timeout);
> + if (timeout != NULL) {
> + error = copyin(timeout, , sizeof(ts));
> + if (error)
> + return error;
> +#ifdef KTRACE
> + if (KTRPOINT(p, KTR_STRUCT))
> + ktrreltimespec(p, );
> +#endif
> + getnanotime();
> + timespecadd(, , );
> + }
> +
> + flags = SCARG(uap, flags);
> +
> + /* Arbitrarily capped at 1024 datagrams. */
> + vlen = SCARG(uap, vlen);
> + if (vlen > 1024)
> + vlen = 1024;
> +
> + mmsgp = SCARG(uap, mmsg);
> + for (dgrams = 0; dgrams < vlen;) {
> + error = copyin([dgrams], , sizeof(mmsg));
> + if (error)
> + break;
> +
> + if (mmsg.msg_hdr.msg_iovlen > IOV_MAX) {
> + error = EMSGSIZE;
> + break;
> + }
> +
> + if (mmsg.msg_hdr.msg_iovlen > iovlen) {
> + if (iov != aiov)
> + free(iov, M_IOV, iovlen *
> + sizeof(struct iovec));
> +
> + iovlen = mmsg.msg_hdr.msg_iovlen;
> + iov = mallocarray(iovlen, sizeof(struct iovec),
> + M_IOV, M_WAITOK);
> + }
> +
> + if (mmsg.msg_hdr.msg_iovlen > 0) {
> + error = copyin(mmsg.msg_hdr.msg_iov, iov,
> + mmsg.msg_hdr.msg_iovlen * sizeof(struct iovec));
> + if (error)
> + break;
> + }
> +
> + uiov = mmsg.msg_hdr.msg_iov;
> + mmsg.msg_hdr.msg_iov = iov;
> + mmsg.msg_hdr.msg_flags = flags;
> +
> + error = recvit(p, s, _hdr, NULL, );
> + if (error) {
> + if (error == EAGAIN && dgrams > 0)
> + error = 0;
> + break;
> + }
> +
> + if (dgrams == 0 && flags & MSG_WAITFORONE) {
> + flags &= ~MSG_WAITFORONE;
> + flags |= MSG_DONTWAIT;
> + }
> +
> + mmsg.msg_hdr.msg_iov = uiov;
> + mmsg.msg_len = retrec;
> 

Re: add sendmmsg and recvmmsg systemcalls

2022-09-01 Thread Moritz Buhl
I addressed your concerns as well as these of jca, just the kernel
part (and the new ktrace stuff) below.

One minor thing: I didn't see any kdump output where one struct was
contained in another one but I am printing it like ddb would so I
guess it should be fine.


Index: kern/syscalls.master
===
RCS file: /mount/openbsd/cvs/src/sys/kern/syscalls.master,v
retrieving revision 1.229
diff -u -p -r1.229 syscalls.master
--- kern/syscalls.master1 Aug 2022 14:56:59 -   1.229
+++ kern/syscalls.master1 Sep 2022 14:52:47 -
@@ -244,8 +244,10 @@
const char *permissions); }
 115STD { int sys___realpath(const char *pathname, \
char *resolved); }
-116OBSOL   t32_gettimeofday
-117OBSOL   t32_getrusage
+116STD NOLOCK  { int sys_recvmmsg(int s, struct mmsghdr *mmsg, \
+   unsigned int vlen, unsigned int flags, \
+   struct timespec *timeout); }
+117UNIMPL  sendmmsg
 118STD { int sys_getsockopt(int s, int level, int name, \
void *val, socklen_t *avalsize); }
 119STD { int sys_thrkill(pid_t tid, int signum, void *tcb); }
Index: kern/uipc_syscalls.c
===
RCS file: /mount/openbsd/cvs/src/sys/kern/uipc_syscalls.c,v
retrieving revision 1.201
diff -u -p -r1.201 uipc_syscalls.c
--- kern/uipc_syscalls.c14 Aug 2022 01:58:28 -  1.201
+++ kern/uipc_syscalls.c1 Sep 2022 14:37:26 -
@@ -805,6 +805,140 @@ done:
 }
 
 int
+sys_recvmmsg(struct proc *p, void *v, register_t *retval)
+{
+   struct sys_recvmmsg_args /* {
+   syscallarg(int) s;
+   syscallarg(struct mmsghdr *)mmsg;
+   syscallarg(unsigned int)vlen;
+   syscallarg(unsigned int)flags;
+   syscallarg(struct timespec *)   timeout;
+   } */ *uap = v;
+   struct mmsghdr mmsg, *mmsgp;
+   struct timespec ts, now;
+   struct iovec aiov[UIO_SMALLIOV], *uiov, *iov = aiov;
+   struct file *fp;
+   struct socket *so;
+   struct timespec *timeout;
+   size_t iovlen = UIO_SMALLIOV;
+   register_t retrec;
+   unsigned int vlen, dgrams;
+   int error = 0, flags, s;
+
+   s = SCARG(uap, s);
+   if ((error = getsock(p, s, )))
+   return (error);
+   so = (struct socket *)fp->f_data;
+
+   timeout = SCARG(uap, timeout);
+   if (timeout != NULL) {
+   error = copyin(timeout, , sizeof(ts));
+   if (error)
+   return error;
+#ifdef KTRACE
+   if (KTRPOINT(p, KTR_STRUCT))
+   ktrreltimespec(p, );
+#endif
+   getnanotime();
+   timespecadd(, , );
+   }
+
+   flags = SCARG(uap, flags);
+
+   /* Arbitrarily capped at 1024 datagrams. */
+   vlen = SCARG(uap, vlen);
+   if (vlen > 1024)
+   vlen = 1024;
+
+   mmsgp = SCARG(uap, mmsg);
+   for (dgrams = 0; dgrams < vlen;) {
+   error = copyin([dgrams], , sizeof(mmsg));
+   if (error)
+   break;
+
+   if (mmsg.msg_hdr.msg_iovlen > IOV_MAX) {
+   error = EMSGSIZE;
+   break;
+   }
+
+   if (mmsg.msg_hdr.msg_iovlen > iovlen) {
+   if (iov != aiov)
+   free(iov, M_IOV, iovlen *
+   sizeof(struct iovec));
+
+   iovlen = mmsg.msg_hdr.msg_iovlen;
+   iov = mallocarray(iovlen, sizeof(struct iovec),
+   M_IOV, M_WAITOK);
+   }
+
+   if (mmsg.msg_hdr.msg_iovlen > 0) {
+   error = copyin(mmsg.msg_hdr.msg_iov, iov,
+   mmsg.msg_hdr.msg_iovlen * sizeof(struct iovec));
+   if (error)
+   break;
+   }
+
+   uiov = mmsg.msg_hdr.msg_iov;
+   mmsg.msg_hdr.msg_iov = iov;
+   mmsg.msg_hdr.msg_flags = flags;
+
+   error = recvit(p, s, _hdr, NULL, );
+   if (error) {
+   if (error == EAGAIN && dgrams > 0)
+   error = 0;
+   break;
+   }
+
+   if (dgrams == 0 && flags & MSG_WAITFORONE) {
+   flags &= ~MSG_WAITFORONE;
+   flags |= MSG_DONTWAIT;
+   }
+
+   mmsg.msg_hdr.msg_iov = uiov;
+   mmsg.msg_len = retrec;
+#ifdef KTRACE
+   if (KTRPOINT(p, KTR_STRUCT)) {
+   ktrmmsghdr(p, );
+   if 

Re: add sendmmsg and recvmmsg systemcalls

2022-08-31 Thread Philip Guenther
On Tue, Aug 30, 2022 at 11:18 AM Moritz Buhl  wrote:

> the following diff only contains recvmmsg which should be the more useful
> syscall of the two.
>

Comments inline.



> --- sys/kern/syscalls.master1 Aug 2022 14:56:59 -   1.229
> +++ sys/kern/syscalls.master30 Aug 2022 15:44:29 -
> @@ -575,3 +575,6 @@
>  328OBSOL   __tfork51
>  329STD NOLOCK  { void sys___set_tcb(void *tcb); }
>  330STD NOLOCK  { void *sys___get_tcb(void); }
> +331STD NOLOCK  { int sys_recvmmsg(int s, struct mmsghdr *mmsg, \
> +   unsigned int vlen, unsigned int flags, \
> +   struct timespec *timeout); }
>

We believe in packing syscall numbers down, modulus having them group
nicely.  So, instead of putting recvmmsg() as a new high-water mark, I
would put it at 116 ("t32_gettimeofday") and mark 117 ("t32_getrusage") as
sendmmsg(), UNIMPL if not part of this round.

(Typically, when a diff changes syscalls.master then you leave out the diff
chunks for the generated files when sending for review, because they are
100% implied and are just noise.  Not a big deal)



--- sys/kern/uipc_syscalls.c14 Aug 2022 01:58:28 -  1.201
> +++ sys/kern/uipc_syscalls.c30 Aug 2022 17:03:09 -
> @@ -805,6 +805,135 @@ done:
>  }
>
>  int
> +sys_recvmmsg(struct proc *p, void *v, register_t *retval)
> +{
> +   struct sys_recvmmsg_args /* {
> +   syscallarg(int) s;
> +   syscallarg(struct mmsghdr *)mmsg;
> +   syscallarg(unsigned int)vlen;
> +   syscallarg(unsigned int)flags;
> +   syscallarg(struct timespec *)   timeout;
> +   } */ *uap = v;
> +   struct mmsghdr mmsg;
> +   struct timespec ts, now;
> +   struct iovec aiov[UIO_SMALLIOV], *uiov, *iov = aiov;
> +   struct file *fp;
> +   struct socket *so;
> +   struct timespec *timeout;
> +   unsigned int vlen, dg;
> +   int error = 0, flags, s;
> +
> +   timeout = SCARG(uap, timeout);
> +   if (timeout != NULL) {
> +   error = copyin(SCARG(uap, timeout), , sizeof(ts));
> +   if (error != 0)
> +   return error;
>

Should have a KTRACE ktrreltimmespec() block here.
Should validate the timespec.
(Follow what sys_kevent() does)


+   getnanotime();
> +   timespecadd(, , );
> +   }
> +
> +   s = SCARG(uap, s);
> +   if ((error = getsock(p, s, )) != 0)
> +   return (error);
> +   so = (struct socket *)fp->f_data;
> +
> +   flags = SCARG(uap, flags);
> +
> +   vlen = SCARG(uap, vlen);
> +   if (vlen > 1024)
> +   vlen = 1024;
> +
> +   for (dg = 0; dg < vlen;) {
> +   error = copyin(SCARG(uap, mmsg) + dg, , sizeof(mmsg));
> +   if (error != 0)
> +   break;
>

Hmm.  This copies in each mmsghdr structure when it gets to it.  Is that
how the Linux version behaves, lazily accessing them such that an early
exit (from timeout, signal, whatever) means later values aren't read?  Or
do they copy them all in, update any that are changed, then copy them out
at the end?

(Not sure it matters, but it's an interesting corner case to think
carefully about.)

 ...

> +   mmsg.msg_hdr.msg_iov = uiov;
> +   mmsg.msg_len = *retval;
> +#ifdef KTRACE
> +   if (KTRPOINT(p, KTR_STRUCT)) {
> +   ktrmsghdr(p, _hdr);
>

I think you should go ahead and define ktrmmsghdr() taking that full
struct, so kdump can report the msg_len value that is being returned.



> +   if (iov != aiov) {
> +   free(iov, M_IOV, sizeof(struct iovec) *
> +   mmsg.msg_hdr.msg_iovlen);
> +   iov = aiov;
> +   }
>

The iov freeing, IMO,, should be done once, at the end of the loop.  Just
keep growing as necessary (tracking the currently allocated size) and free
once.


kdump.c will need at least a SYS_recvmmsg line in the big table, and if you
do a ktrmmsghdr() bit in the kernel a matching decoder will be needed in
kdump.


Philip Guenther


Re: add sendmmsg and recvmmsg systemcalls

2022-08-31 Thread Jeremie Courreges-Anglas
On Tue, Aug 30 2022, Moritz Buhl  wrote:
> Hi tech@,
>
> the following diff only contains recvmmsg which should be the more useful
> syscall of the two.
>
> I implemented some minor feedback regarding the man page and attaching
> an error from recvit to the socket in case some messages were
> received before.
>
> I am also looking into passing the timeout through recvit and
> soreceive in order to not block indefinetly on a blocking socket
> as the other implementations do:
>
> BUGS
>
>The timeout argument does not work as intended.  The timeout is
>checked only after the receipt of each datagram, so that if up to
>vlen-1 datagrams are received before the timeout expires, but
>then no further datagrams are received, the call will block
>forever.
> https://www.man7.org/linux/man-pages/man2/recvmmsg.2.html
>
> But I would prefer doing this in another change.

As discussed with Moritz, ports/net/knot (which I co-maintain) expects
to be able to use both recvmmsg and sendmmsg.  I didn't check other
ports but from the code we read, using both recvmmsg and sendmmsg looks
legit (useful).

And since Moritz insists, please find my bikeshedding below. :)
tl;dr nothing looks wrong to me but I haven't tested it yet.

[...]

> Index: sys/kern/uipc_syscalls.c
> ===
> RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
> retrieving revision 1.201
> diff -u -p -r1.201 uipc_syscalls.c
> --- sys/kern/uipc_syscalls.c  14 Aug 2022 01:58:28 -  1.201
> +++ sys/kern/uipc_syscalls.c  30 Aug 2022 17:03:09 -
> @@ -805,6 +805,135 @@ done:
>  }
>  
>  int
> +sys_recvmmsg(struct proc *p, void *v, register_t *retval)
> +{
> + struct sys_recvmmsg_args /* {
> + syscallarg(int) s;
> + syscallarg(struct mmsghdr *)mmsg;
> + syscallarg(unsigned int)vlen;
> + syscallarg(unsigned int)flags;
> + syscallarg(struct timespec *)   timeout;
> + } */ *uap = v;
> + struct mmsghdr mmsg;
> + struct timespec ts, now;
> + struct iovec aiov[UIO_SMALLIOV], *uiov, *iov = aiov;
> + struct file *fp;
> + struct socket *so;
> + struct timespec *timeout;
> + unsigned int vlen, dg;

"dg" is a weird name.  At first it looks like an index, I would name it
idx but if you want to keep the original name maybe put it on its own
line and add a comment like /* number of datagrams received */?

> + int error = 0, flags, s;
> +
> + timeout = SCARG(uap, timeout);
> + if (timeout != NULL) {
> + error = copyin(SCARG(uap, timeout), , sizeof(ts));
   ^^^ could reuse the
   timeout local variable

> + if (error != 0)
The local idiom is
if (error)
> + return error;

There are other occurrences below which you may or may not address if
you prefer to limit the differences with NetBSD.

> + getnanotime();
> + timespecadd(, , );
> + }
> +
> + s = SCARG(uap, s);
> + if ((error = getsock(p, s, )) != 0)
> + return (error);
> + so = (struct socket *)fp->f_data;
> +
> + flags = SCARG(uap, flags);
> +
> + vlen = SCARG(uap, vlen);

Maybe add a comment?
/* Arbitrarily capped at 1024 items. */
> + if (vlen > 1024)
> + vlen = 1024;
> +
> + for (dg = 0; dg < vlen;) {
> + error = copyin(SCARG(uap, mmsg) + dg, , sizeof(mmsg));

We could have a struct mmsghdr *mmsgp local pointer and use mmsgp[idx]
instead of a mix of a macro and pointer arithmetics.

> + if (error != 0)
> + break;
> +
> + if (mmsg.msg_hdr.msg_iovlen > IOV_MAX) {
> + error = EMSGSIZE;
> + break;
> + }
> +
> + if (mmsg.msg_hdr.msg_iovlen > UIO_SMALLIOV)
> + iov = mallocarray(mmsg.msg_hdr.msg_iovlen,
> + sizeof(struct iovec), M_IOV, M_WAITOK);
> + else
> + iov = aiov;
> +
> + if (mmsg.msg_hdr.msg_iovlen > 0) {
> + error = copyin(mmsg.msg_hdr.msg_iov, iov,
> + mmsg.msg_hdr.msg_iovlen * sizeof(struct iovec));
> + if (error)
> + break;
> + }
> +
> + uiov = mmsg.msg_hdr.msg_iov;
> + mmsg.msg_hdr.msg_iov = iov;
> + mmsg.msg_hdr.msg_flags = flags;
> +
> + error = recvit(p, s, _hdr, NULL, retval);
> + if (error != 0) {
> + if (error == EAGAIN && dg > 0)
> + error = 0;
> + break;
> + }
> +
> + if (dg == 0 && flags & MSG_WAITFORONE) {
> + flags &= ~MSG_WAITFORONE;
> + flags |= 

Re: add sendmmsg and recvmmsg systemcalls

2022-08-31 Thread Moritz Buhl
I created a pull request on their github:
https://github.com/NLnetLabs/nsd/pull/231

On Tue, Aug 30, 2022 at 04:33:10PM -0600, Todd C. Miller wrote:
> On Wed, 31 Aug 2022 00:19:20 +0200, Moritz Buhl wrote:
> 
> > On Tue, Aug 30, 2022 at 10:59:43PM +0200, Claudio Jeker wrote:
> > > And nsd in base. It seems unbound does not use recvmmsg. 
> >
> > After 'make -f Makefile.bsd-wrapper config' recvmmsg is picked up.
> > The config compile test currently defines NONBLOCKING_IS_BROKEN
> > because of a missing include.  Then vlen for recvmmsg would always
> > be 1.
> >
> > checking for struct mmsghdr... yes
> > checking for recvmmsg... yes
> > checking for sendmmsg... no
> > ...
> > checking if nonblocking sockets work... yes
> >
> > If anybody knows how to make nsd call nsd_recvmmsg this would
> > make for some nice testing.
> 
> Does this also fix the configure test?  If so, we can submit it
> upstream.
> 
>  - todd
> 
> Index: acx_nlnetlabs.m4
> ===
> RCS file: /cvs/src/usr.sbin/nsd/acx_nlnetlabs.m4,v
> retrieving revision 1.7
> diff -u -p -u -r1.7 acx_nlnetlabs.m4
> --- acx_nlnetlabs.m4  24 Oct 2021 12:14:18 -  1.7
> +++ acx_nlnetlabs.m4  30 Aug 2022 22:31:48 -
> @@ -963,6 +963,9 @@ AC_LANG_SOURCE([[
>  #ifdef HAVE_SYS_TYPES_H
>  #include 
>  #endif
> +#ifdef HAVE_SYS_SELECT_H
> +#include 
> +#endif
>  #ifdef HAVE_SYS_SOCKET_H
>  #include 
>  #endif
> Index: configure
> ===
> RCS file: /cvs/src/usr.sbin/nsd/configure,v
> retrieving revision 1.56
> diff -u -p -u -r1.56 configure
> --- configure 30 Jun 2022 10:49:39 -  1.56
> +++ configure 30 Aug 2022 22:32:08 -
> @@ -6593,6 +6593,9 @@ else
>  #ifdef HAVE_SYS_TYPES_H
>  #include 
>  #endif
> +#ifdef HAVE_SYS_SELECT_H
> +#include 
> +#endif
>  #ifdef HAVE_SYS_SOCKET_H
>  #include 
>  #endif
> 



Re: add sendmmsg and recvmmsg systemcalls

2022-08-30 Thread Todd C . Miller
On Wed, 31 Aug 2022 00:19:20 +0200, Moritz Buhl wrote:

> On Tue, Aug 30, 2022 at 10:59:43PM +0200, Claudio Jeker wrote:
> > And nsd in base. It seems unbound does not use recvmmsg. 
>
> After 'make -f Makefile.bsd-wrapper config' recvmmsg is picked up.
> The config compile test currently defines NONBLOCKING_IS_BROKEN
> because of a missing include.  Then vlen for recvmmsg would always
> be 1.
>
> checking for struct mmsghdr... yes
> checking for recvmmsg... yes
> checking for sendmmsg... no
> ...
> checking if nonblocking sockets work... yes
>
> If anybody knows how to make nsd call nsd_recvmmsg this would
> make for some nice testing.

Does this also fix the configure test?  If so, we can submit it
upstream.

 - todd

Index: acx_nlnetlabs.m4
===
RCS file: /cvs/src/usr.sbin/nsd/acx_nlnetlabs.m4,v
retrieving revision 1.7
diff -u -p -u -r1.7 acx_nlnetlabs.m4
--- acx_nlnetlabs.m424 Oct 2021 12:14:18 -  1.7
+++ acx_nlnetlabs.m430 Aug 2022 22:31:48 -
@@ -963,6 +963,9 @@ AC_LANG_SOURCE([[
 #ifdef HAVE_SYS_TYPES_H
 #include 
 #endif
+#ifdef HAVE_SYS_SELECT_H
+#include 
+#endif
 #ifdef HAVE_SYS_SOCKET_H
 #include 
 #endif
Index: configure
===
RCS file: /cvs/src/usr.sbin/nsd/configure,v
retrieving revision 1.56
diff -u -p -u -r1.56 configure
--- configure   30 Jun 2022 10:49:39 -  1.56
+++ configure   30 Aug 2022 22:32:08 -
@@ -6593,6 +6593,9 @@ else
 #ifdef HAVE_SYS_TYPES_H
 #include 
 #endif
+#ifdef HAVE_SYS_SELECT_H
+#include 
+#endif
 #ifdef HAVE_SYS_SOCKET_H
 #include 
 #endif



Re: add sendmmsg and recvmmsg systemcalls

2022-08-30 Thread Moritz Buhl
On Tue, Aug 30, 2022 at 10:59:43PM +0200, Claudio Jeker wrote:
> And nsd in base. It seems unbound does not use recvmmsg. 

After 'make -f Makefile.bsd-wrapper config' recvmmsg is picked up.
The config compile test currently defines NONBLOCKING_IS_BROKEN
because of a missing include.  Then vlen for recvmmsg would always
be 1.

checking for struct mmsghdr... yes
checking for recvmmsg... yes
checking for sendmmsg... no
...
checking if nonblocking sockets work... yes

If anybody knows how to make nsd call nsd_recvmmsg this would
make for some nice testing.

mbuhl

Index: Makefile.bsd-wrapper
===
RCS file: /mount/openbsd/cvs/src/usr.sbin/nsd/Makefile.bsd-wrapper,v
retrieving revision 1.19
diff -u -p -r1.19 Makefile.bsd-wrapper
--- Makefile.bsd-wrapper30 Jun 2021 11:50:22 -  1.19
+++ Makefile.bsd-wrapper30 Aug 2022 22:09:46 -
@@ -22,6 +22,7 @@ CONFIGURE_OPTS=   --prefix=/usr \
--with-xfrdfile=${CHROOTDIR}/run/xfrd.state \
--with-libevent=/usr \
--enable-ratelimit \
+   --enable-recvmmsg \
--enable-root-server
 
 PROG=  nsd nsd-checkconf nsd-checkzone nsd-control
Index: configure
===
RCS file: /mount/openbsd/cvs/src/usr.sbin/nsd/configure,v
retrieving revision 1.56
diff -u -p -r1.56 configure
--- configure   30 Jun 2022 10:49:39 -  1.56
+++ configure   30 Aug 2022 22:04:41 -
@@ -6609,6 +6609,8 @@ else
 #include 
 #endif
 
+#include 
+
 int main(void)
 {
int port;



Re: add sendmmsg and recvmmsg systemcalls

2022-08-30 Thread Claudio Jeker
On Tue, Aug 30, 2022 at 09:51:46PM +0100, Stuart Henderson wrote:
> btw a few ports will likely pick this up:
> 
> paths/devel/glib2.log:Checking for function "recvmmsg" : NO
> paths/net/tinc.log:checking for recvmmsg... no
> paths/net/knot.log:checking for recvmmsg... no
> paths/net/knot.log:Use recvmmsg:   no
> paths/net/gdnsd.log:checking whether recvmmsg is declared... no
> paths/net/gdnsd.log:checking for recvmmsg... no
> paths/net/powerdns.log:checking for recvmmsg... no
> paths/net/powerdns.log:checking for recvmmsg... (cached) no
> paths/net/dnsdist.log:checking for recvmmsg... no
> paths/net/powerdns_recursor.log:checking for recvmmsg... no
> paths/sysutils/rsyslog,-elasticsearch.log:checking for recvmmsg... no
> 

And nsd in base. It seems unbound does not use recvmmsg. 

-- 
:wq Claudio



Re: add sendmmsg and recvmmsg systemcalls

2022-08-30 Thread Stuart Henderson
btw a few ports will likely pick this up:

paths/devel/glib2.log:Checking for function "recvmmsg" : NO
paths/net/tinc.log:checking for recvmmsg... no
paths/net/knot.log:checking for recvmmsg... no
paths/net/knot.log:Use recvmmsg:   no
paths/net/gdnsd.log:checking whether recvmmsg is declared... no
paths/net/gdnsd.log:checking for recvmmsg... no
paths/net/powerdns.log:checking for recvmmsg... no
paths/net/powerdns.log:checking for recvmmsg... (cached) no
paths/net/dnsdist.log:checking for recvmmsg... no
paths/net/powerdns_recursor.log:checking for recvmmsg... no
paths/sysutils/rsyslog,-elasticsearch.log:checking for recvmmsg... no




Re: add sendmmsg and recvmmsg systemcalls

2022-08-30 Thread Moritz Buhl
Hi tech@,

the following diff only contains recvmmsg which should be the more useful
syscall of the two.

I implemented some minor feedback regarding the man page and attaching
an error from recvit to the socket in case some messages were
received before.

I am also looking into passing the timeout through recvit and
soreceive in order to not block indefinetly on a blocking socket
as the other implementations do:

BUGS

   The timeout argument does not work as intended.  The timeout is
   checked only after the receipt of each datagram, so that if up to
   vlen-1 datagrams are received before the timeout expires, but
   then no further datagrams are received, the call will block
   forever.
https://www.man7.org/linux/man-pages/man2/recvmmsg.2.html

But I would prefer doing this in another change.

mbuhl

Index: lib/libc/Symbols.list
===
RCS file: /cvs/src/lib/libc/Symbols.list,v
retrieving revision 1.75
diff -u -p -r1.75 Symbols.list
--- lib/libc/Symbols.list   2 Aug 2022 16:45:00 -   1.75
+++ lib/libc/Symbols.list   30 Aug 2022 15:44:29 -
@@ -175,6 +175,7 @@ _thread_sys_readlinkat
 _thread_sys_readv
 _thread_sys_reboot
 _thread_sys_recvfrom
+_thread_sys_recvmmsg
 _thread_sys_recvmsg
 _thread_sys_rename
 _thread_sys_renameat
@@ -372,6 +373,7 @@ readlinkat
 readv
 reboot
 recvfrom
+recvmmsg
 recvmsg
 rename
 renameat
Index: lib/libc/shlib_version
===
RCS file: /cvs/src/lib/libc/shlib_version,v
retrieving revision 1.210
diff -u -p -r1.210 shlib_version
--- lib/libc/shlib_version  2 Jun 2021 07:29:03 -   1.210
+++ lib/libc/shlib_version  30 Aug 2022 15:44:29 -
@@ -1,4 +1,4 @@
 major=96
-minor=1
+minor=2
 # note: If changes were made to include/thread_private.h or if system calls
 # were added/changed then librthread/shlib_version must also be updated.
Index: lib/libc/hidden/sys/socket.h
===
RCS file: /cvs/src/lib/libc/hidden/sys/socket.h,v
retrieving revision 1.4
diff -u -p -r1.4 socket.h
--- lib/libc/hidden/sys/socket.h7 May 2016 19:05:22 -   1.4
+++ lib/libc/hidden/sys/socket.h30 Aug 2022 15:44:29 -
@@ -32,6 +32,7 @@ PROTO_NORMAL(getsockopt);
 PROTO_NORMAL(listen);
 PROTO_NORMAL(recv);
 PROTO_CANCEL(recvfrom);
+PROTO_CANCEL(recvmmsg);
 PROTO_CANCEL(recvmsg);
 PROTO_NORMAL(send);
 PROTO_CANCEL(sendmsg);
Index: lib/libc/sys/Makefile.inc
===
RCS file: /cvs/src/lib/libc/sys/Makefile.inc,v
retrieving revision 1.163
diff -u -p -r1.163 Makefile.inc
--- lib/libc/sys/Makefile.inc   17 Jul 2022 03:04:27 -  1.163
+++ lib/libc/sys/Makefile.inc   30 Aug 2022 15:44:29 -
@@ -34,7 +34,7 @@ CANCEL=   accept accept4 \
nanosleep \
open openat \
poll ppoll pread preadv pselect pwrite pwritev \
-   read readv recvfrom recvmsg \
+   read readv recvfrom recvmmsg recvmsg \
select sendmsg sendto \
wait4 write writev
 SRCS+= ${CANCEL:%=w_%.c}
Index: lib/libc/sys/recv.2
===
RCS file: /cvs/src/lib/libc/sys/recv.2,v
retrieving revision 1.48
diff -u -p -r1.48 recv.2
--- lib/libc/sys/recv.2 21 Nov 2021 23:44:55 -  1.48
+++ lib/libc/sys/recv.2 30 Aug 2022 15:44:29 -
@@ -46,15 +46,35 @@
 .Fn recvfrom "int s" "void *buf" "size_t len" "int flags" "struct sockaddr 
*from" "socklen_t *fromlen"
 .Ft ssize_t
 .Fn recvmsg "int s" "struct msghdr *msg" "int flags"
+.Ft int
+.Fn recvmmsg "int s" "struct mmsghdr *mmsg" "unsigned int vlen" "unsigned int 
flags" "struct timespec *timeout"
 .Sh DESCRIPTION
-.Fn recvfrom
+.Fn recv ,
+.Fn recvfrom ,
+.Fn recvmsg ,
 and
-.Fn recvmsg
+.Fn recvmmsg
 are used to receive messages from a socket,
-.Fa s ,
-and may be used to receive
+.Fa s .
+.Fn recv
+is normally used only on a
+.Em connected
+socket (see
+.Xr connect 2 ).
+.Fn recvfrom ,
+.Fn recvmsg ,
+and
+.Fn recvmmsg
+may be used to receive
 data on a socket whether or not it is connection-oriented.
 .Pp
+.Fn recv
+is identical to
+.Fn recvfrom
+with a null
+.Fa from
+parameter.
+.Pp
 If
 .Fa from
 is non-null and the socket is not connection-oriented,
@@ -66,25 +86,6 @@ the buffer associated with
 and modified on return to indicate the actual size of the
 address stored there.
 .Pp
-The
-.Fn recv
-call is normally used only on a
-.Em connected
-socket (see
-.Xr connect 2 )
-and is identical to
-.Fn recvfrom
-with a null
-.Fa from
-parameter.
-.Pp
-On successful completion, all three routines return the number of
-message bytes read.
-If a message is too long to fit in the supplied
-buffer, excess bytes may be discarded depending on the type of socket
-the message is received from (see
-.Xr socket 2 ) .
-.Pp
 If no messages are available at the socket, the
 receive call waits for 

add sendmmsg and recvmmsg systemcalls

2022-04-22 Thread Moritz Buhl
Hi tech@,

I implemented the sendmmsg and recvmmsg system calls by copying the
NetBSD implementation and adjusting it.

The idea behind the mmsg system calls is to do less systemcalls per
msghdr and thus improving throughput.
This should allow faster processing of packages (UDP, raw IP) in
userland.

int
sendmmsg(int s, const struct mmsghdr *mmsg, unsigned int vlen,
unsigned int flags);

int
recvmmsg(int s, struct mmsghdr *mmsg, unsigned int vlen,
unsigned int flags, struct timespec *timeout);

The interface ist incompatible to the other send and recv systemcalls
and the timeout feels unnecessary but this way it is compatible
with Linux and NetBSD.

The diff is below.
mbuhl

Index: lib/libc/Symbols.list
===
RCS file: /cvs/src/lib/libc/Symbols.list,v
retrieving revision 1.74
diff -u -p -r1.74 Symbols.list
--- lib/libc/Symbols.list   3 Jun 2021 13:19:45 -   1.74
+++ lib/libc/Symbols.list   22 Apr 2022 16:03:30 -
@@ -176,6 +176,7 @@ _thread_sys_readv
 _thread_sys_reboot
 _thread_sys_recvfrom
 _thread_sys_recvmsg
+_thread_sys_recvmmsg
 _thread_sys_rename
 _thread_sys_renameat
 _thread_sys_revoke
@@ -185,6 +186,7 @@ _thread_sys_select
 _thread_sys_semget
 _thread_sys_semop
 _thread_sys_sendmsg
+_thread_sys_sendmmsg
 _thread_sys_sendsyslog
 _thread_sys_sendto
 _thread_sys_setegid
@@ -373,6 +375,7 @@ readv
 reboot
 recvfrom
 recvmsg
+recvmmsg
 rename
 renameat
 revoke
@@ -384,6 +387,7 @@ semctl
 semget
 semop
 sendmsg
+sendmmsg
 sendsyslog
 sendto
 setegid
Index: lib/libc/shlib_version
===
RCS file: /cvs/src/lib/libc/shlib_version,v
retrieving revision 1.210
diff -u -p -r1.210 shlib_version
--- lib/libc/shlib_version  2 Jun 2021 07:29:03 -   1.210
+++ lib/libc/shlib_version  22 Apr 2022 16:03:30 -
@@ -1,4 +1,4 @@
 major=96
-minor=1
+minor=2
 # note: If changes were made to include/thread_private.h or if system calls
 # were added/changed then librthread/shlib_version must also be updated.
Index: lib/libc/hidden/sys/socket.h
===
RCS file: /cvs/src/lib/libc/hidden/sys/socket.h,v
retrieving revision 1.4
diff -u -p -r1.4 socket.h
--- lib/libc/hidden/sys/socket.h7 May 2016 19:05:22 -   1.4
+++ lib/libc/hidden/sys/socket.h22 Apr 2022 16:03:30 -
@@ -33,8 +33,10 @@ PROTO_NORMAL(listen);
 PROTO_NORMAL(recv);
 PROTO_CANCEL(recvfrom);
 PROTO_CANCEL(recvmsg);
+PROTO_CANCEL(recvmmsg);
 PROTO_NORMAL(send);
 PROTO_CANCEL(sendmsg);
+PROTO_CANCEL(sendmmsg);
 PROTO_CANCEL(sendto);
 PROTO_NORMAL(setrtable);
 PROTO_NORMAL(setsockopt);
Index: lib/libc/sys/Makefile.inc
===
RCS file: /cvs/src/lib/libc/sys/Makefile.inc,v
retrieving revision 1.161
diff -u -p -r1.161 Makefile.inc
--- lib/libc/sys/Makefile.inc   23 Dec 2021 18:50:32 -  1.161
+++ lib/libc/sys/Makefile.inc   22 Apr 2022 16:03:30 -
@@ -34,8 +34,8 @@ CANCEL=   accept accept4 \
nanosleep \
open openat \
poll ppoll pread preadv pselect pwrite pwritev \
-   read readv recvfrom recvmsg \
-   select sendmsg sendto \
+   read readv recvfrom recvmsg recvmmsg \
+   select sendmsg sendmmsg sendto \
wait4 write writev
 SRCS+= ${CANCEL:%=w_%.c}
 
Index: lib/libc/sys/recv.2
===
RCS file: /cvs/src/lib/libc/sys/recv.2,v
retrieving revision 1.48
diff -u -p -r1.48 recv.2
--- lib/libc/sys/recv.2 21 Nov 2021 23:44:55 -  1.48
+++ lib/libc/sys/recv.2 22 Apr 2022 16:03:30 -
@@ -46,15 +46,35 @@
 .Fn recvfrom "int s" "void *buf" "size_t len" "int flags" "struct sockaddr 
*from" "socklen_t *fromlen"
 .Ft ssize_t
 .Fn recvmsg "int s" "struct msghdr *msg" "int flags"
+.Ft int
+.Fn recvmmsg "int s" "struct mmsghdr *mmsg" "unsigned int vlen" "unsigned int 
flags" "struct timespec *timeout"
 .Sh DESCRIPTION
-.Fn recvfrom
+.Fn recv ,
+.Fn recvfrom ,
+.Fn recvmsg ,
 and
-.Fn recvmsg
+.Fn recvmmsg
 are used to receive messages from a socket,
-.Fa s ,
-and may be used to receive
+.Fa s .
+.Fn recv
+is normally used only on a
+.Em connected
+socket (see
+.Xr connect 2 ).
+.Fn recvfrom ,
+.Fn recvmsg ,
+and
+.Fn recvmmsg
+may be used to receive
 data on a socket whether or not it is connection-oriented.
 .Pp
+.Fn recv
+is identical to
+.Fn recvfrom
+with a null
+.Fa from
+parameter.
+.Pp
 If
 .Fa from
 is non-null and the socket is not connection-oriented,
@@ -66,25 +86,6 @@ the buffer associated with
 and modified on return to indicate the actual size of the
 address stored there.
 .Pp
-The
-.Fn recv
-call is normally used only on a
-.Em connected
-socket (see
-.Xr connect 2 )
-and is identical to
-.Fn recvfrom
-with a null
-.Fa from
-parameter.
-.Pp
-On successful completion, all three routines return the number of
-message bytes