On Mon, Nov 04, 2024 at 03:58:38PM +0100, Alexander Bluhm wrote:
> On Sun, Oct 27, 2024 at 02:11:22AM +0300, Vitaliy Makkoveev wrote:
> > I just checked this diff with 7.6-stable. It perfectly applies to
> > sources, compiles and runs. I'm not surprised, because -current is very
> > close to release. It seems you did something wrong.
> >
> > The attached diff was made against 7.6-stable. As you can see it is the
> > same. Please be sure your mail client doesn't drop spaces in the
> > beginning of line or something else.
>
> I finally got vxlan over UDP multicast working and did send UDP
> multicast packets over it. For now it is only one packet, no
> multicore performance test yet. Multicast receive, route, send
> over vxlan works, after I have tweaked mvs@ inpcb iterator diff.
>
> Changes to the previous version:
> - PCB Iterator is only checked for UDP sockets. If we implement
> it for others, we can adopt.
> - tmp = TAILQ_NEXT(inp, inp_queue) was wrong, it must be
> tmp = TAILQ_NEXT((struct inpcb *)iter, inp_queue)
> - in_pcbunref(inp) automatically does a NULL check.
> - When exiting the iterater loop early, in_pcb_iterator_abort()
> removes the iterator from the queue.
> - Rename tinp to last. This was the name in 4.4BSD before I
> splitted the loop into two.
>
> mvs@, what do you think about this?
>
Your diff is ok by me. I forgot to remove `iter' in the early exit case,
that's why my diff crashed. in_pcb_iterator_abort() fixes this.
We also could save a little stack space by moving `inp_queue' and
`inp_table' to the beginning of 'inpcb' structure, but this reordering
could be done separately.
> I am throwing this on regress right now. And as mentioned before,
> I should implement automatic UDP multicast tests over vxlan. With
> that, unlocking multicast in general will be easier.
>
> bluhm
>
> Index: kern/kern_sysctl.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v
> diff -u -p -r1.451 kern_sysctl.c
> --- kern/kern_sysctl.c 31 Oct 2024 10:06:51 -0000 1.451
> +++ kern/kern_sysctl.c 4 Nov 2024 10:10:34 -0000
> @@ -1689,13 +1689,19 @@ sysctl_file(int *name, u_int namelen, ch
> mtx_leave(&tcb6table.inpt_mtx);
> #endif
> mtx_enter(&udbtable.inpt_mtx);
> - TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue)
> + TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) {
> + if (in_pcb_is_iterator(inp))
> + continue;
> FILLSO(inp->inp_socket);
> + }
> mtx_leave(&udbtable.inpt_mtx);
> #ifdef INET6
> mtx_enter(&udb6table.inpt_mtx);
> - TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue)
> + TAILQ_FOREACH(inp, &udb6table.inpt_queue, inp_queue) {
> + if (in_pcb_is_iterator(inp))
> + continue;
> FILLSO(inp->inp_socket);
> + }
> mtx_leave(&udb6table.inpt_mtx);
> #endif
> mtx_enter(&rawcbtable.inpt_mtx);
> Index: netinet/in_pcb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
> diff -u -p -r1.303 in_pcb.c
> --- netinet/in_pcb.c 12 Jul 2024 19:50:35 -0000 1.303
> +++ netinet/in_pcb.c 4 Nov 2024 13:49:46 -0000
> @@ -644,6 +644,55 @@ in_pcbunref(struct inpcb *inp)
> pool_put(&inpcb_pool, inp);
> }
>
> +struct inpcb *
> +in_pcb_iterator(struct inpcbtable *table, struct inpcb *inp,
> + struct inpcb_iterator *iter)
> +{
> + struct inpcb *tmp;
> +
> + mtx_enter(&table->inpt_mtx);
> +
> + if (inp)
> + tmp = TAILQ_NEXT((struct inpcb *)iter, inp_queue);
> + else
> + tmp = TAILQ_FIRST(&table->inpt_queue);
> +
> + while (tmp && tmp->inp_table == NULL)
> + tmp = TAILQ_NEXT(tmp, inp_queue);
> +
> + if (inp) {
> + TAILQ_REMOVE(&table->inpt_queue, (struct inpcb *)iter,
> + inp_queue);
> + }
> + if (tmp) {
> + TAILQ_INSERT_AFTER(&table->inpt_queue, tmp,
> + (struct inpcb *)iter, inp_queue);
> + in_pcbref(tmp);
> + }
> +
> + mtx_leave(&table->inpt_mtx);
> +
> + in_pcbunref(inp);
> +
> + return tmp;
> +}
> +
> +void
> +in_pcb_iterator_abort(struct inpcbtable *table, struct inpcb *inp,
> + struct inpcb_iterator *iter)
> +{
> + mtx_enter(&table->inpt_mtx);
> +
> + if (inp) {
> + TAILQ_REMOVE(&table->inpt_queue, (struct inpcb *)iter,
> + inp_queue);
> + }
> +
> + mtx_leave(&table->inpt_mtx);
> +
> + in_pcbunref(inp);
> +}
> +
> void
> in_setsockaddr(struct inpcb *inp, struct mbuf *nam)
> {
> @@ -743,6 +792,8 @@ in_pcbnotifyall(struct inpcbtable *table
> rw_enter_write(&table->inpt_notify);
> mtx_enter(&table->inpt_mtx);
> TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
> + if (in_pcb_is_iterator(inp))
> + continue;
> KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
>
> if (inp->inp_faddr.s_addr != dst->sin_addr.s_addr ||
> @@ -1098,6 +1149,8 @@ in_pcbresize(struct inpcbtable *table, i
> table->inpt_size = hashsize;
>
> TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
> + if (in_pcb_is_iterator(inp))
> + continue;
> LIST_REMOVE(inp, inp_lhash);
> LIST_REMOVE(inp, inp_hash);
> in_pcbhash_insert(inp);
> Index: netinet/in_pcb.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
> diff -u -p -r1.158 in_pcb.h
> --- netinet/in_pcb.h 12 Jul 2024 19:50:35 -0000 1.158
> +++ netinet/in_pcb.h 4 Nov 2024 13:49:40 -0000
> @@ -178,6 +178,20 @@ struct inpcb {
>
> LIST_HEAD(inpcbhead, inpcb);
>
> +struct inpcb_iterator {
> + LIST_ENTRY(inpcb) inp_hash; /* unused */
> + LIST_ENTRY(inpcb) inp_lhash; /* unused */
> + TAILQ_ENTRY(inpcb) inp_queue; /* [t] inet PCB queue */
> + SIMPLEQ_ENTRY(inpcb) inp_notify; /* unused */
> + struct inpcbtable *inp_table; /* [I] always NULL */
> +};
> +
> +static inline int
> +in_pcb_is_iterator(struct inpcb *inp)
> +{
> + return (inp->inp_table == NULL ? 1 : 0);
> +}
> +
> struct inpcbtable {
> struct mutex inpt_mtx; /* protect queue and hash */
> struct rwlock inpt_notify; /* protect inp_notify list */
> @@ -302,6 +316,11 @@ struct inpcb *
> in_pcbref(struct inpcb *);
> void in_pcbunref(struct inpcb *);
> void in_pcbdisconnect(struct inpcb *);
> +struct inpcb *
> + in_pcb_iterator(struct inpcbtable *, struct inpcb *,
> + struct inpcb_iterator *);
> +void in_pcb_iterator_abort(struct inpcbtable *, struct inpcb *,
> + struct inpcb_iterator *);
> struct inpcb *
> in_pcblookup(struct inpcbtable *, struct in_addr,
> u_int, struct in_addr, u_int, u_int);
> Index: netinet/udp_usrreq.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
> diff -u -p -r1.325 udp_usrreq.c
> --- netinet/udp_usrreq.c 3 Nov 2024 14:28:06 -0000 1.325
> +++ netinet/udp_usrreq.c 4 Nov 2024 13:48:51 -0000
> @@ -382,7 +382,8 @@ udp_input(struct mbuf **mp, int *offp, i
> }
>
> if (m->m_flags & (M_BCAST|M_MCAST)) {
> - SIMPLEQ_HEAD(, inpcb) inpcblist;
> + struct inpcb_iterator iter = {.inp_table = NULL};
> + struct inpcb *last;
> struct inpcbtable *table;
>
> /*
> @@ -401,11 +402,6 @@ udp_input(struct mbuf **mp, int *offp, i
> * fixing the interface. Maybe 4.5BSD will remedy this?)
> */
>
> - /*
> - * Locate pcb(s) for datagram.
> - * (Algorithm copied from raw_intr().)
> - */
> - SIMPLEQ_INIT(&inpcblist);
> #ifdef INET6
> if (ip6)
> table = &udb6table;
> @@ -413,9 +409,8 @@ udp_input(struct mbuf **mp, int *offp, i
> #endif
> table = &udbtable;
>
> - rw_enter_write(&table->inpt_notify);
> - mtx_enter(&table->inpt_mtx);
> - TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
> + last = inp = NULL;
> + while ((inp = in_pcb_iterator(table, inp, &iter)) != NULL) {
> if (ip6)
> KASSERT(ISSET(inp->inp_flags, INP_IPV6));
> else
> @@ -466,8 +461,18 @@ udp_input(struct mbuf **mp, int *offp, i
> continue;
> }
>
> - in_pcbref(inp);
> - SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify);
> + if (last != NULL) {
> + struct mbuf *n;
> +
> + n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
> + if (n != NULL) {
> + udp_sbappend(last, n, ip, ip6, iphlen,
> + uh, &srcsa.sa, 0);
> + }
> + in_pcbunref(last);
> + }
> +
> + last = in_pcbref(inp);
>
> /*
> * Don't look for additional matches if this one does
> @@ -478,14 +483,13 @@ udp_input(struct mbuf **mp, int *offp, i
> * clear these options after setting them.
> */
> if ((inp->inp_socket->so_options & (SO_REUSEPORT |
> - SO_REUSEADDR)) == 0)
> + SO_REUSEADDR)) == 0) {
> + in_pcb_iterator_abort(table, inp, &iter);
> break;
> + }
> }
> - mtx_leave(&table->inpt_mtx);
> -
> - if (SIMPLEQ_EMPTY(&inpcblist)) {
> - rw_exit_write(&table->inpt_notify);
>
> + if (last == NULL) {
> /*
> * No matching pcb found; discard datagram.
> * (No need to send an ICMP Port Unreachable
> @@ -495,21 +499,8 @@ udp_input(struct mbuf **mp, int *offp, i
> goto bad;
> }
>
> - while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
> - struct mbuf *n;
> -
> - SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify);
> - if (SIMPLEQ_EMPTY(&inpcblist))
> - n = m;
> - else
> - n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
> - if (n != NULL) {
> - udp_sbappend(inp, n, ip, ip6, iphlen, uh,
> - &srcsa.sa, 0);
> - }
> - in_pcbunref(inp);
> - }
> - rw_exit_write(&table->inpt_notify);
> + udp_sbappend(last, m, ip, ip6, iphlen, uh, &srcsa.sa, 0);
> + in_pcbunref(last);
>
> return IPPROTO_DONE;
> }
> Index: netinet6/in6_pcb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v
> diff -u -p -r1.144 in6_pcb.c
> --- netinet6/in6_pcb.c 12 Apr 2024 16:07:09 -0000 1.144
> +++ netinet6/in6_pcb.c 3 Nov 2024 17:58:52 -0000
> @@ -479,6 +479,8 @@ in6_pcbnotify(struct inpcbtable *table,
> rw_enter_write(&table->inpt_notify);
> mtx_enter(&table->inpt_mtx);
> TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
> + if (in_pcb_is_iterator(inp))
> + continue;
> KASSERT(ISSET(inp->inp_flags, INP_IPV6));
>
> /*
>