Re: npppd(8): remove PIPEXCSESSION ioctl(2) command

2022-06-30 Thread YASUOKA Masahiko
ok yasuoka

On Thu, 30 Jun 2022 12:26:55 +0300
Vitaliy Makkoveev  wrote:
> yasuoka@ remonded me, long time ago pipex(4) sessions can't be deleted
> until both input and output queues become empty:
> 
> pipex_timer(void *ignored_arg)
> {
>   /* ... */
>   switch (session->state) {
>   /* ... */
>   case PIPEX_STATE_CLOSED:
>   /*
>* mbuf queued in pipexinq or pipexoutq may have a
>* refererce to this session.
>*/
>   if (!mq_empty(&pipexinq) || !mq_empty(&pipexoutq))
>   continue;
> 
>   pipex_destroy_session(session);
>   break;
>   /* ... */
> }
> 
> Such dead sessions were linked to the stack and the `ip_forward' flag
> was used to prevent packets forwarding.
> 
> But since we started to unlink close session from the stack, this logic
> became unnecessary. Also pipex(4) session could be closed just after
> close request.
> 
> I want to remove it. This makes the pipex(4) session flags immutable and
> reduces locking games.
> 
> This diff removes PIPEXCSESSION call only from npppd(8). It deletes
> session just after PIPEXCSESSION ioctl(2) call so nothing changed in
> session life within kernel space. I will modify kernel and pipex(4) man
> page with separate diff, after I finish to fix pipex(4) locking.
> 
> Index: usr.sbin/npppd/npppd/npppd.c
> ===
> RCS file: /cvs/src/usr.sbin/npppd/npppd/npppd.c,v
> retrieving revision 1.52
> diff -u -p -r1.52 npppd.c
> --- usr.sbin/npppd/npppd/npppd.c  15 Nov 2021 15:14:24 -  1.52
> +++ usr.sbin/npppd/npppd/npppd.c  30 Jun 2022 08:49:29 -
> @@ -114,7 +114,6 @@ static struct in_addr loop;   /* initializ
>  static uint32_tstr_hash(const void *, int);
>  
>  #ifdef USE_NPPPD_PIPEX
> -static int npppd_ppp_pipex_ip_disable(npppd *, npppd_ppp *);
>  static void pipex_periodic(npppd *);
>  #endif /* USE_NPPPD_PIPEX */
>  
> @@ -1246,62 +1245,6 @@ npppd_ppp_pipex_disable(npppd *_this, np
>   return error;
>  }
>  
> -/* XXX: s/npppd_ppp_pipex_ip_disable/npppd_ppp_pipex_stop/ ?? */
> -
> -/** Stop PIPEX of the {@link npppd_ppp ppp} */
> -static int
> -npppd_ppp_pipex_ip_disable(npppd *_this, npppd_ppp *ppp)
> -{
> - struct pipex_session_config_req req;
> -#ifdef USE_NPPPD_PPPOE
> - pppoe_session *pppoe;
> -#endif
> -#ifdef USE_NPPPD_PPTP
> - pptp_call *call;
> -#endif
> -#ifdef USE_NPPPD_L2TP
> - l2tp_call *l2tp;
> -#endif
> - if (ppp->pipex_started == 0)
> - return 0;   /* not started */
> -
> - bzero(&req, sizeof(req));
> - switch(ppp->tunnel_type) {
> -#ifdef USE_NPPPD_PPPOE
> - case NPPPD_TUNNEL_PPPOE:
> - pppoe = (pppoe_session *)ppp->phy_context;
> -
> - /* PPPoE specific information */
> - req.pcr_protocol = PIPEX_PROTO_PPPOE;
> - req.pcr_session_id = pppoe->session_id;
> - break;
> -#endif
> -#ifdef USE_NPPPD_PPTP
> - case NPPPD_TUNNEL_PPTP:
> - call = (pptp_call *)ppp->phy_context;
> -
> - /* PPTP specific information */
> - req.pcr_session_id = call->id;
> - req.pcr_protocol = PIPEX_PROTO_PPTP;
> - break;
> -#endif
> -#ifdef USE_NPPPD_L2TP
> - case NPPPD_TUNNEL_L2TP:
> - l2tp = (l2tp_call *)ppp->phy_context;
> -
> - /* L2TP specific context */
> - req.pcr_session_id = l2tp->session_id;
> - req.pcr_protocol = PIPEX_PROTO_L2TP;
> - break;
> -#endif
> - default:
> - return 1;
> - }
> - req.pcr_ip_forward = 0;
> -
> - return ioctl(_this->iface[ppp->ifidx].devf, PIPEXCSESSION, &req);
> -}
> -
>  static void
>  pipex_periodic(npppd *_this)
>  {
> @@ -1565,11 +1508,6 @@ npppd_set_ip_enabled(npppd *_this, npppd
>   hl->key = ppp1->username;
>   }
>   }
> -#ifdef USE_NPPPD_PIPEX
> - if (npppd_ppp_pipex_ip_disable(_this, ppp) != 0)
> - ppp_log(ppp, LOG_ERR,
> - "npppd_ppp_pipex_ip_disable() failed: %m");
> -#endif /* USE_NPPPD_PIPEX */
>   }
>  }
>  
> 



Re: one send_rtmsg is enough for bgpd

2022-06-30 Thread Theo Buehler
On Thu, Jun 30, 2022 at 04:59:50PM +0200, Claudio Jeker wrote:
> Implement send_rtmsg() using kroute_full and just use one version of this
> magical code. I use struct sockaddr_storage for all sockaddrs added to
> ensure that there is a) enough space and b) that ROUNDUP() does not cause
> the system to pass uninitialized stack memory to the kernel.
> 
> I tested IPv4 and IPv6 but not yet the MPLS version.
> kroute_full now also carries the mplslabel which could be shown in bgpctl
> show fib output (if anyone wants to add that).
> 
> I renamed some struct kroute_full pointers from *kl to *kf. I want to use
> that everywhere in the end but step by step.

ok tb

No comments for once



Re: Faster M operation for the swapper to be great again

2022-06-30 Thread Mark Kettenis
Makes sense to.

ok kettenis@

> On 06/30/2022 4:05 PM Martin Pieuchot  wrote:
> 
>  
> Diff below uses two tricks to make uvm_pagermapin/out() faster and less
> likely to fail in OOM situations.
> 
> These functions are used to map buffers when swapping pages in/out and
> when faulting on mmaped files.  robert@ even measured a 75% improvement
> when populating pages related to files that aren't yet in the buffer
> cache.
> 
> The first trick is to use the direct map when available.  I'm doing this
> for single pages but km_alloc(9) also does that for single segment...
> uvm_io() only maps one page at a time for the moment so this should be
> enough.
> 
> The second trick is to use pmap_kenter_pa() which doesn't fail and is
> faster.
> 
> With this changes the "freeze" happening on my server when entering many
> pages to swap in OOM situation is much shorter and the machine becomes
> quickly responsive.
> 
> ok?
> 
> Index: uvm/uvm_pager.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_pager.c,v
> retrieving revision 1.81
> diff -u -p -r1.81 uvm_pager.c
> --- uvm/uvm_pager.c   28 Jun 2022 19:07:40 -  1.81
> +++ uvm/uvm_pager.c   30 Jun 2022 13:34:46 -
> @@ -258,6 +258,16 @@ uvm_pagermapin(struct vm_page **pps, int
>   vsize_t size;
>   struct vm_page *pp;
>  
> +#ifdef __HAVE_PMAP_DIRECT
> + /* use direct mappings for single page */
> + if (npages == 1) {
> + KASSERT(pps[0]);
> + KASSERT(pps[0]->pg_flags & PG_BUSY);
> + kva = pmap_map_direct(pps[0]);
> + return kva;
> + }
> +#endif
> +
>   prot = PROT_READ;
>   if (flags & UVMPAGER_MAPIN_READ)
>   prot |= PROT_WRITE;
> @@ -273,14 +283,7 @@ uvm_pagermapin(struct vm_page **pps, int
>   pp = *pps++;
>   KASSERT(pp);
>   KASSERT(pp->pg_flags & PG_BUSY);
> - /* Allow pmap_enter to fail. */
> - if (pmap_enter(pmap_kernel(), cva, VM_PAGE_TO_PHYS(pp),
> - prot, PMAP_WIRED | PMAP_CANFAIL | prot) != 0) {
> - pmap_remove(pmap_kernel(), kva, cva);
> - pmap_update(pmap_kernel());
> - uvm_pseg_release(kva);
> - return 0;
> - }
> + pmap_kenter_pa(cva, VM_PAGE_TO_PHYS(pp), prot);
>   }
>   pmap_update(pmap_kernel());
>   return kva;
> @@ -294,8 +297,15 @@ uvm_pagermapin(struct vm_page **pps, int
>  void
>  uvm_pagermapout(vaddr_t kva, int npages)
>  {
> +#ifdef __HAVE_PMAP_DIRECT
> + /* use direct mappings for single page */
> + if (npages == 1) {
> + pmap_unmap_direct(kva);
> + return;
> + }
> +#endif
>  
> - pmap_remove(pmap_kernel(), kva, kva + ((vsize_t)npages << PAGE_SHIFT));
> + pmap_kremove(kva, (vsize_t)npages << PAGE_SHIFT);
>   pmap_update(pmap_kernel());
>   uvm_pseg_release(kva);



Re: powerpc, macppc: retrigger deferred DEC interrupts from splx(9)

2022-06-30 Thread George Koehler
On Wed, 29 Jun 2022 22:47:19 -0500
Scott Cheloha  wrote:

> To be perfectly clear, you are concerned about this scenario:
> 
> > > + if (ci->ci_dec_deferred && newcpl < IPL_CLOCK) {
> > > + ppc_mtdec(0);
> 
>   /* DEC interrupt fires *here*. */
>   /* We jump to decrint() and then call decr_intr(). */
> 
> > > + ppc_mtdec(UINT32_MAX);  /* raise DEC exception */
> > > + }
> 
> I think it's possible for the DEC exception to occur in that spot.
> However, external/DEC *interrupts* are explicitly disabled, so I don't
> think that we will jump to decrint() until the next time we do
> 
>   ppc_intr_enable(1);

I missed the ppc_intr_disable(), which disables PSL_EE, in
macintr_splx and openpic_splx.  You are correct, it can't call
decr_intr until ppc_intr_enable(1).

ppc_dflt_splx also looks good, because we don't enable PSL_EE until
we switch to macintr_splx or openpic_splx.

> > Would this be better?
> > 
> > ppc_mtdec(1 >> UINT32_MAX);
> > ppc_mtdec(UINT32_MAX);
> 
> I assume you meant to type
> 
>   ppc_mtdec(UINT32_MAX >> 1);

Yes, I meant UINT32_MAX >> 1, but you have persuaded me that the
existing ppc_mtdec(0) is correct, and no change is necessary.  I
will continue running your diff with ppc_mtdec(0).



Re: Use SMR instead of SRP list in rtsock.c

2022-06-30 Thread Visa Hankala
On Thu, Jun 30, 2022 at 11:51:52AM +0200, Claudio Jeker wrote:
> After discussing this with mpi@ and jmatthew@ we came to the conclusion
> that we need to smr_barrier() before refcnt_finalize() to ensure that no
> other CPU is between the SMR_TAILQ_FOREACH, refcnt_take() and
> smr_read_leave().

[...]

> @@ -509,7 +487,8 @@ route_input(struct mbuf *m0, struct sock
>   return;
>   }
>  
> - SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
> + smr_read_enter();
> + SMR_TAILQ_FOREACH(rop, &rtptable.rtp_list, rop_list) {
>   /*
>* If route socket is bound to an address family only send
>* messages that match the address family. Address family
> @@ -519,7 +498,8 @@ route_input(struct mbuf *m0, struct sock
>   rop->rop_proto != sa_family)
>   continue;
>  
> -
> + refcnt_take(&rop->rop_refcnt);
> + smr_read_leave();
>   so = rop->rop_socket;
>   solock(so);
>  
> @@ -579,8 +559,10 @@ route_input(struct mbuf *m0, struct sock
>   rtm_sendup(so, m);
>  next:
>   sounlock(so);
> + smr_read_enter();
> + refcnt_rele_wake(&rop->rop_refcnt);

This does not look correct.

smr_barrier() can proceed after smr_read_leave(), so refcnt_rele_wake()
might drop the final reference and this thread can no longer access
rop safely (SMR_TAILQ_NEXT() inside SMR_TAILQ_FOREACH()).

Also, SMR_TAILQ_NEXT() of rop becomes potentially dangling after
smr_read_leave(). After this thread leaves the read-side critical
section, another thread might free rop's successor.



Re: netstart: create virtual interfaces upfront when passing specific ones

2022-06-30 Thread Klemens Nanni
On Tue, Dec 07, 2021 at 08:15:41PM +, Klemens Nanni wrote:
> On Tue, Nov 23, 2021 at 01:17:14AM +, Klemens Nanni wrote:
> > On Tue, Nov 16, 2021 at 11:09:40PM +, Klemens Nanni wrote:
> > > Run on boot without arguments, netstart(8) creates all virtual
> > > interfaces *for which hostname.if files exist* before configuring them.
> > > 
> > > This prevents ordering problems with bridges and its members, as dlg's
> > > commit message from 2018 reminds us.
> > > 
> > > But it also helps interface types like pair(4) which pair one another
> > > in whatever way the user says:
> > > 
> > >   $ cat /etc/hostname.pair1
> > >   patch pair2
> > >   $ cat /etc/hostname.pair2
> > >   rdomain 1
> > > 
> > > On boot this works, but `sh /etc/netstart pair1 pair2' won't work
> > > because pair2 does not exist a creation time of pair1 because netstart
> > > does not create virtual interfaces upfront.
> > > 
> > > I just hit this exact use case when setting up gelatod(8) (see ports@).
> > > 
> > > To fix this, pass the list of interfaces to vifscreate() and make it
> > > create only those iff given.
> > > 
> > > Regular boot, i.e. `sh /etc/netstart', stays uneffected by this and
> > > selective runs as shown work as expected without requring users to know
> > > the order in which netstart creates/configures interfaces.
> > > 
> > > The installer's internal version of netstart doesn't need this at all;
> > > neither does it have the selective semantic nor does vifscreate() exist.
> > 
> > Anyone?
> > 
> > It seems only logical to treat subsets of interfaces the same way as
> > a full `sh /etc/netstart'.
> > 
> > A pair of pair(4) is one example, I'm certain there are more scenarios
> > where you craft interfaces with `ifconfig ...' in the shell, then set up
> > the hostname.* files and test them with `sh /etc/netstart bridge0 ...'
> > where pseudo interfaces are involved.
> 
> Anyone?
> 
> This is really practical and fixes things at least for me when I destroy
> interfaces, reconfigure and recreate them together, for example like so:
> 
>   # ifconfig pair2 destroy
>   # ifconfig pair1 destroy
>   ... edit hostname.*
>   # sh /etc/netstart pair1 pair2
>   ifconfig: patch pair2: No such file or directory
>   add net default: gateway 192.0.0.1
> 
> (redoing it because who knows what failed due to the order problem and
> what didn't...)
> 
>   # ifconfig pair2 destroy
>   # ifconfig pair1 destroy
>   # sh /usr/src/etc/netstart pair1 pair2
>   add net default: gateway 192.0.0.1
> 
> Feedback? Objection? OK?

One last ping with the same diff on top of -CURRENT.


Index: etc/netstart
===
RCS file: /cvs/src/etc/netstart,v
retrieving revision 1.218
diff -u -p -r1.218 netstart
--- etc/netstart26 Jun 2022 09:36:13 -  1.218
+++ etc/netstart30 Jun 2022 14:48:46 -
@@ -94,9 +94,11 @@ ifcreate() {
 }
 
 # Create interfaces for network pseudo-devices referred to by hostname.if 
files.
-# Usage: vifscreate
+# Optionally, limit creation to given interfaces only.
+# Usage: vifscreate [if ...]
 vifscreate() {
-   local _vif _hn _if
+   local _vif _hn _if _ifs
+   set -A _ifs -- "$@"
 
for _vif in $(ifconfig -C); do
for _hn in /etc/hostname.${_vif}+([[:digit:]]); do
@@ -106,6 +108,9 @@ vifscreate() {
# loopback for routing domain is created by kernel
[[ -n ${_if##lo[1-9]*} ]] || continue
 
+   ((${#_ifs[*]} > 0)) && [[ ${_ifs[*]} != *${_if}* ]] &&
+   continue
+
if ! ifcreate $_if; then
print -u2 "${0##*/}: create for '$_if' failed."
fi
@@ -314,6 +319,7 @@ $PRINT_ONLY || [[ ! -f /etc/soii.key ]] 
 # If we were invoked with a list of interface names, just reconfigure these
 # interfaces (or bridges), add default routes and return.
 if (($# > 0)); then
+   vifscreate "$@"
for _if; do ifstart $_if; done
defaultroute
return



Re: amd64 serial console changes

2022-06-30 Thread Hrvoje Popovski
On 30.6.2022. 17:03, Stuart Henderson wrote:
> On 2022/06/30 16:55, Hrvoje Popovski wrote:
>> On 30.6.2022. 16:48, Hrvoje Popovski wrote:
>>> On 30.6.2022. 15:14, Anton Lindqvist wrote:
 On Thu, Jun 30, 2022 at 01:07:46PM +0200, Mark Kettenis wrote:
> Ah right.  Please commit!
 Here's the complete diff, ok?
>>>
>>>
>>> Hi,
>>>
>>> with this diff :
>>>
>>> dell r620 - serial console
>>> com1 at acpi0 COMA addr 0x2f8/0x8 irq 3: ns16550a, 16 byte fifo
>>> com1: console
>>> com0 at acpi0 COMB addr 0x3f8/0x8 irq 4: ns16550a, 16 byte fifo
>>>
>>> works fast as before with first boot but second boot is slow...
>>>
>>> supermicro - ipmi console
>>> com0 at acpi0 UAR1 addr 0x3f8/0x8 irq 4: ns16550a, 16 byte fifo
>>> com1 at acpi0 UAR2 addr 0x2f8/0x8 irq 3: ns16550a, 16 byte fifo
>>> com1: console
>>>
>>> is slow as without this diff ..
>>>
>>>
>>> i will try on few more machines this diff ...
>>>
>>
>> after applying diff i did
>> cd /sys/arch/amd64/compile/GENERIC.MP && make -j6 obj && make config &&
>> make clean && time make -j6 && make install && reboot
>>
>>
>> is this ok?
>>
> 
> This is in the bootloader not the kernel - "make obj/make/make install"
> in sys/arch/amd64/stand and "installboot"
> 

Thank you sthen@ and jca@ ...
After this steps everything works just fine ..

Thank you guys ..



Re: amd64 serial console changes

2022-06-30 Thread Stuart Henderson
On 2022/06/30 16:55, Hrvoje Popovski wrote:
> On 30.6.2022. 16:48, Hrvoje Popovski wrote:
> > On 30.6.2022. 15:14, Anton Lindqvist wrote:
> >> On Thu, Jun 30, 2022 at 01:07:46PM +0200, Mark Kettenis wrote:
> >>> Ah right.  Please commit!
> >> Here's the complete diff, ok?
> > 
> > 
> > Hi,
> > 
> > with this diff :
> > 
> > dell r620 - serial console
> > com1 at acpi0 COMA addr 0x2f8/0x8 irq 3: ns16550a, 16 byte fifo
> > com1: console
> > com0 at acpi0 COMB addr 0x3f8/0x8 irq 4: ns16550a, 16 byte fifo
> > 
> > works fast as before with first boot but second boot is slow...
> > 
> > supermicro - ipmi console
> > com0 at acpi0 UAR1 addr 0x3f8/0x8 irq 4: ns16550a, 16 byte fifo
> > com1 at acpi0 UAR2 addr 0x2f8/0x8 irq 3: ns16550a, 16 byte fifo
> > com1: console
> > 
> > is slow as without this diff ..
> > 
> > 
> > i will try on few more machines this diff ...
> > 
> 
> after applying diff i did
> cd /sys/arch/amd64/compile/GENERIC.MP && make -j6 obj && make config &&
> make clean && time make -j6 && make install && reboot
> 
> 
> is this ok?
> 

This is in the bootloader not the kernel - "make obj/make/make install"
in sys/arch/amd64/stand and "installboot"



Re: amd64 serial console changes

2022-06-30 Thread Jeremie Courreges-Anglas
On Thu, Jun 30 2022, Hrvoje Popovski  wrote:
> On 30.6.2022. 16:48, Hrvoje Popovski wrote:
>> On 30.6.2022. 15:14, Anton Lindqvist wrote:
>>> On Thu, Jun 30, 2022 at 01:07:46PM +0200, Mark Kettenis wrote:
 Ah right.  Please commit!
>>> Here's the complete diff, ok?
>> 
>> 
>> Hi,
>> 
>> with this diff :
>> 
>> dell r620 - serial console
>> com1 at acpi0 COMA addr 0x2f8/0x8 irq 3: ns16550a, 16 byte fifo
>> com1: console
>> com0 at acpi0 COMB addr 0x3f8/0x8 irq 4: ns16550a, 16 byte fifo
>> 
>> works fast as before with first boot but second boot is slow...
>> 
>> supermicro - ipmi console
>> com0 at acpi0 UAR1 addr 0x3f8/0x8 irq 4: ns16550a, 16 byte fifo
>> com1 at acpi0 UAR2 addr 0x2f8/0x8 irq 3: ns16550a, 16 byte fifo
>> com1: console
>> 
>> is slow as without this diff ..
>> 
>> 
>> i will try on few more machines this diff ...
>> 
>
> after applying diff i did
> cd /sys/arch/amd64/compile/GENERIC.MP && make -j6 obj && make config &&
> make clean && time make -j6 && make install && reboot
>
>
> is this ok?

You need to rebuild the boot files in /sys/arch/amd64/stand, install
them and then run installboot.

-- 
jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE



one send_rtmsg is enough for bgpd

2022-06-30 Thread Claudio Jeker
Implement send_rtmsg() using kroute_full and just use one version of this
magical code. I use struct sockaddr_storage for all sockaddrs added to
ensure that there is a) enough space and b) that ROUNDUP() does not cause
the system to pass uninitialized stack memory to the kernel.

I tested IPv4 and IPv6 but not yet the MPLS version.
kroute_full now also carries the mplslabel which could be shown in bgpctl
show fib output (if anyone wants to add that).

I renamed some struct kroute_full pointers from *kl to *kf. I want to use
that everywhere in the end but step by step.
-- 
:wq Claudio

Index: bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.438
diff -u -p -r1.438 bgpd.h
--- bgpd.h  27 Jun 2022 13:26:51 -  1.438
+++ bgpd.h  30 Jun 2022 14:22:39 -
@@ -693,6 +693,7 @@ struct kroute_full {
struct bgpd_addrprefix;
struct bgpd_addrnexthop;
charlabel[RTLABEL_LEN];
+   uint32_tmplslabel;
uint16_tflags;
u_short ifindex;
uint8_t prefixlen;
Index: kroute.c
===
RCS file: /cvs/src/usr.sbin/bgpd/kroute.c,v
retrieving revision 1.270
diff -u -p -r1.270 kroute.c
--- kroute.c25 Jun 2022 19:21:27 -  1.270
+++ kroute.c30 Jun 2022 14:45:39 -
@@ -204,8 +204,7 @@ voidget_rtaddrs(int, struct sockaddr *
 void   if_change(u_short, int, struct if_data *);
 void   if_announce(void *);
 
-intsend_rtmsg(int, int, struct ktable *, struct kroute *);
-intsend_rt6msg(int, int, struct ktable *, struct kroute6 *);
+intsend_rtmsg(int, int, struct ktable *, struct kroute_full *);
 intdispatch_rtmsg(void);
 intfetchtable(struct ktable *);
 intfetchifs(int);
@@ -231,12 +230,6 @@ RB_GENERATE(kif_tree, kif_node, entry, k
 
 #define KT2KNT(x)  (&(ktable_get((x)->nhtableid)->knt))
 
-static const struct in_addrinet4allone = { INADDR_BROADCAST };
-static const struct in6_addr   inet6allone = {{{ 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff }}};
-
-
 /*
  * exported functions
  */
@@ -500,23 +493,23 @@ kr_change(u_int rtableid, struct kroute_
 }
 
 int
-kr4_change(struct ktable *kt, struct kroute_full *kl)
+kr4_change(struct ktable *kt, struct kroute_full *kf)
 {
struct kroute   *kr;
int  action = RTM_ADD;
uint16_t labelid;
 
/* for blackhole and reject routes nexthop needs to be 127.0.0.1 */
-   if (kl->flags & (F_BLACKHOLE|F_REJECT))
-   kl->nexthop.v4.s_addr = htonl(INADDR_LOOPBACK);
+   if (kf->flags & (F_BLACKHOLE|F_REJECT))
+   kf->nexthop.v4.s_addr = htonl(INADDR_LOOPBACK);
/* nexthop within 127/8 -> ignore silently */
-   else if ((kl->nexthop.v4.s_addr & htonl(IN_CLASSA_NET)) ==
+   else if ((kf->nexthop.v4.s_addr & htonl(IN_CLASSA_NET)) ==
htonl(INADDR_LOOPBACK & IN_CLASSA_NET))
return (0);
 
-   labelid = rtlabel_name2id(kl->label);
+   labelid = rtlabel_name2id(kf->label);
 
-   if ((kr = kroute_find(kt, &kl->prefix, kl->prefixlen,
+   if ((kr = kroute_find(kt, &kf->prefix, kf->prefixlen,
RTP_MINE)) != NULL)
action = RTM_CHANGE;
 
@@ -525,10 +518,10 @@ kr4_change(struct ktable *kt, struct kro
log_warn("%s", __func__);
return (-1);
}
-   kr->prefix.s_addr = kl->prefix.v4.s_addr;
-   kr->prefixlen = kl->prefixlen;
-   kr->nexthop.s_addr = kl->nexthop.v4.s_addr;
-   kr->flags = kl->flags | F_BGPD;
+   kr->prefix.s_addr = kf->prefix.v4.s_addr;
+   kr->prefixlen = kf->prefixlen;
+   kr->nexthop.s_addr = kf->nexthop.v4.s_addr;
+   kr->flags = kf->flags | F_BGPD;
kr->priority = RTP_MINE;
kr->labelid = labelid;
 
@@ -538,27 +531,27 @@ kr4_change(struct ktable *kt, struct kro
return (-1);
}
} else {
-   kr->nexthop.s_addr = kl->nexthop.v4.s_addr;
+   kr->nexthop.s_addr = kf->nexthop.v4.s_addr;
rtlabel_unref(kr->labelid);
kr->labelid = labelid;
-   if (kl->flags & F_BLACKHOLE)
+   if (kf->flags & F_BLACKHOLE)
kr->flags |= F_BLACKHOLE;
else
kr->flags &= ~F_BLACKHOLE;
-   if (kl->flags & F_REJECT)
+   if (kf->flags & F_REJECT)
kr->flags |= F_REJECT;
else
kr->flags &= ~F_REJECT;
}
 
-   if (send_rtms

Re: amd64 serial console changes

2022-06-30 Thread Hrvoje Popovski
On 30.6.2022. 16:48, Hrvoje Popovski wrote:
> On 30.6.2022. 15:14, Anton Lindqvist wrote:
>> On Thu, Jun 30, 2022 at 01:07:46PM +0200, Mark Kettenis wrote:
>>> Ah right.  Please commit!
>> Here's the complete diff, ok?
> 
> 
> Hi,
> 
> with this diff :
> 
> dell r620 - serial console
> com1 at acpi0 COMA addr 0x2f8/0x8 irq 3: ns16550a, 16 byte fifo
> com1: console
> com0 at acpi0 COMB addr 0x3f8/0x8 irq 4: ns16550a, 16 byte fifo
> 
> works fast as before with first boot but second boot is slow...
> 
> supermicro - ipmi console
> com0 at acpi0 UAR1 addr 0x3f8/0x8 irq 4: ns16550a, 16 byte fifo
> com1 at acpi0 UAR2 addr 0x2f8/0x8 irq 3: ns16550a, 16 byte fifo
> com1: console
> 
> is slow as without this diff ..
> 
> 
> i will try on few more machines this diff ...
> 

after applying diff i did
cd /sys/arch/amd64/compile/GENERIC.MP && make -j6 obj && make config &&
make clean && time make -j6 && make install && reboot


is this ok?



Re: amd64 serial console changes

2022-06-30 Thread Hrvoje Popovski
On 30.6.2022. 15:14, Anton Lindqvist wrote:
> On Thu, Jun 30, 2022 at 01:07:46PM +0200, Mark Kettenis wrote:
>> Ah right.  Please commit!
> Here's the complete diff, ok?


Hi,

with this diff :

dell r620 - serial console
com1 at acpi0 COMA addr 0x2f8/0x8 irq 3: ns16550a, 16 byte fifo
com1: console
com0 at acpi0 COMB addr 0x3f8/0x8 irq 4: ns16550a, 16 byte fifo

works fast as before with first boot but second boot is slow...

supermicro - ipmi console
com0 at acpi0 UAR1 addr 0x3f8/0x8 irq 4: ns16550a, 16 byte fifo
com1 at acpi0 UAR2 addr 0x2f8/0x8 irq 3: ns16550a, 16 byte fifo
com1: console

is slow as without this diff ..


i will try on few more machines this diff ...



Re: amd64 serial console changes

2022-06-30 Thread Mark Kettenis
Forget about efi32 and efi64 (I just got permission from mlarkin to remove 
those).
The efiboot and stand/libsa bits are ok kettenis@

> Op 30-06-2022 15:14 schreef Anton Lindqvist :
> 
>  
> On Thu, Jun 30, 2022 at 01:07:46PM +0200, Mark Kettenis wrote:
> > Ah right.  Please commit!
> 
> Here's the complete diff, ok?
> 
> diff --git sys/arch/amd64/stand/efi32/exec_i386.c 
> sys/arch/amd64/stand/efi32/exec_i386.c
> index 8349d97fefb..21f13cd273b 100644
> --- sys/arch/amd64/stand/efi32/exec_i386.c
> +++ sys/arch/amd64/stand/efi32/exec_i386.c
> @@ -66,7 +66,7 @@ run_loadfile(uint64_t *marks, int howto)
>   dev_t bootdev = bootdev_dip->bootdev;
>   size_t ac = BOOTARG_LEN;
>   caddr_t av = (caddr_t)BOOTARG_OFF;
> - bios_consdev_t cd;
> + bios_oconsdev_t cd;
>   extern int com_speed; /* from bioscons.c */
>   extern int com_addr;
>   bios_ddb_t ddb;
> diff --git sys/arch/amd64/stand/efi64/exec_i386.c 
> sys/arch/amd64/stand/efi64/exec_i386.c
> index 48e460cb040..043193f93be 100644
> --- sys/arch/amd64/stand/efi64/exec_i386.c
> +++ sys/arch/amd64/stand/efi64/exec_i386.c
> @@ -66,7 +66,7 @@ run_loadfile(uint64_t *marks, int howto)
>   dev_t bootdev = bootdev_dip->bootdev;
>   size_t ac = BOOTARG_LEN;
>   caddr_t av = (caddr_t)BOOTARG_OFF;
> - bios_consdev_t cd;
> + bios_oconsdev_t cd;
>   extern int com_speed; /* from bioscons.c */
>   extern int com_addr;
>   bios_ddb_t ddb;
> diff --git sys/arch/amd64/stand/efiboot/exec_i386.c 
> sys/arch/amd64/stand/efiboot/exec_i386.c
> index ea8fa67820b..70ff23c9d32 100644
> --- sys/arch/amd64/stand/efiboot/exec_i386.c
> +++ sys/arch/amd64/stand/efiboot/exec_i386.c
> @@ -72,7 +72,7 @@ run_loadfile(uint64_t *marks, int howto)
>   dev_t bootdev = bootdev_dip->bootdev;
>   size_t ac = BOOTARG_LEN;
>   caddr_t av = (caddr_t)BOOTARG_OFF;
> - bios_consdev_t cd;
> + bios_oconsdev_t cd;
>   extern int com_speed; /* from bioscons.c */
>   extern int com_addr;
>   bios_ddb_t ddb;
> diff --git sys/arch/amd64/stand/libsa/exec_i386.c 
> sys/arch/amd64/stand/libsa/exec_i386.c
> index 22067931829..25d20d359c0 100644
> --- sys/arch/amd64/stand/libsa/exec_i386.c
> +++ sys/arch/amd64/stand/libsa/exec_i386.c
> @@ -91,7 +91,7 @@ run_loadfile(uint64_t *marks, int howto)
>   dev_t bootdev = bootdev_dip->bootdev;
>   size_t ac = BOOTARG_LEN;
>   caddr_t av = (caddr_t)BOOTARG_OFF;
> - bios_consdev_t cd;
> + bios_oconsdev_t cd;
>   extern int com_speed; /* from bioscons.c */
>   extern int com_addr;
>   bios_ddb_t ddb;



Re: amd64 serial console changes

2022-06-30 Thread Hrvoje Popovski
On 30.6.2022. 10:26, Mark Kettenis wrote:
> Hi Hrvoje,
> 
> I assume it was faster before?  What hardware are you seeing this on?

Hi,

yes, it was faster before.

dell r620 - serial console
com1 at acpi0 COMA addr 0x2f8/0x8 irq 3: ns16550a, 16 byte fifo
com1: console
com0 at acpi0 COMB addr 0x3f8/0x8 irq 4: ns16550a, 16 byte fifo

cat /etc/boot.conf
stty com1 115200
set tty com1

/etc/ttys
tty01   "/usr/libexec/getty std.115200" vt220   on  secure


dell r430 - ipmi console
com0 at acpi0 COMA addr 0x3f8/0x8 irq 4: ns16550a, 16 byte fifo
com1 at acpi0 COMB addr 0x2f8/0x8 irq 3: ns16550a, 16 byte fifo
com1: console

cat /etc/boot.conf
stty com1 115200
set tty com1

/etc/ttys
tty01   "/usr/libexec/getty std.115200" vt200   on  secure


supermicro - ipmi console
com0 at acpi0 UAR1 addr 0x3f8/0x8 irq 4: ns16550a, 16 byte fifo
com1 at acpi0 UAR2 addr 0x2f8/0x8 irq 3: ns16550a, 16 byte fifo
com1: console

cat /etc/boot.conf
stty com1 115200
set tty com1

/etc/ttys
tty01   "/usr/libexec/getty std.115200" vt220   on  secure



will try now anton@ last diff ..



Re: powerpc, macppc: retrigger deferred DEC interrupts from splx(9)

2022-06-30 Thread Miod Vallat
> We only run on New World Macs, and the only ones without openpic(4)
> might be the oldest models of iMac G3 from 1998; these would attach
> macintr0 and not openpic0 in dmesg.  I don't know anyone who might
> have such an iMac.  The iMac model PowerMac2,1 from 1999 (with the
> (slot-loading cd drive) does have openpic(4).

This diff appears to work on PowerMac1,1 using macintr0 (dmesg below).
vmstat -i reports 99 clock and stat interrupts per second, ntpd does not
complain about clock drift so far.

[ using 1319132 bytes of bsd ELF symbol table ]
console out [ATY,Rage128y] console in [keyboard]USB and ADB found, using USB
: memaddr 8400, size 400 : consaddr 8400 : ioaddr 80b2, size 
2: width 640 linebytes 640 height 480 depth 8
Copyright (c) 1982, 1986, 1989, 1991, 1993
The Regents of the University of California.  All rights reserved.
Copyright (c) 1995-2022 OpenBSD. All rights reserved.  https://www.OpenBSD.org

OpenBSD 7.1-current (GENERIC) #1: Thu Jun 30 13:45:19 GMT 2022
m...@allanche.gentiane.org:/usr/src/sys/arch/macppc/compile/GENERIC
real mem = 268435456 (256MB)
avail mem = 244858880 (233MB)
random: good seed from bootblocks
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root: model PowerMac1,1
cpu0 at mainbus0: 750 (Revision 0x202): 400 MHz: 1MB backside cache
mem0 at mainbus0
mpcpcibr0 at mainbus0 pci: grackle
pci0 at mpcpcibr0 bus 0
ppb0 at pci0 dev 13 function 0 "DEC 21154" rev 0x02
pci1 at ppb0 bus 1
macobio0 at pci1 dev 5 function 0 "Apple Paddington" rev 0x00
macintr0 at macobio0 offset 0x10
"scsi" at macobio0 offset 0x1 not configured
"escc-legacy" at macobio0 offset 0x12000 not configured
zs0 at macobio0 offset 0x13000: irq 15,16
zstty0 at zs0 channel 0
zstty1 at zs0 channel 1
awacs0 at macobio0 offset 0x14000: irq 17,8,9 headphones
audio0 at awacs0
"power-mgt" at macobio0 offset 0x0 not configured
"fdc" at macobio0 offset 0x15000 not configured
adb0 at macobio0 offset 0x16000: irq 18, via-cuda, 0 targets
wdc0 at macobio0 offset 0x2 irq 13: DMA
atapiscsi0 at wdc0 channel 0 drive 0
scsibus1 at atapiscsi0: 2 targets
cd0 at scsibus1 targ 0 lun 0:  removable
cd0(wdc0:0:0): using BIOS timings, DMA mode 2
bm0 at macobio0 offset 0x11000 irq 42,33: address 00:50:e4:fa:e4:72
lxtphy0 at bm0 phy 0: LXT970 10/100 PHY, rev. 1
"nvram" at macobio0 offset 0x6 not configured
"TI TSB12LV21 FireWire" rev 0x02 at pci1 dev 0 function 0 not configured
pciide0 at pci1 dev 1 function 0 "CMD Technology PCI0646" rev 0x07: DMA, 
channel 0 configured to native-PCI, channel 1 configured to native-PCI
pciide0: using irq 26 for native-PCI interrupt
wd0 at pciide0 channel 0 drive 0: 
wd0: 16-sector PIO, LBA, 12427MB, 25450992 sectors
wd0(pciide0:0:0): using PIO mode 4, Ultra-DMA mode 2
pciide0: channel 1 ignored (disabled)
fxp0 at pci1 dev 4 function 0 "Intel 8255x" rev 0x02, i82557: irq 25, address 
00:a0:c9:ab:37:29
inphy0 at fxp0 phy 1: i82555 10/100 PHY, rev. 0
ohci0 at pci1 dev 6 function 0 "Opti 82C861" rev 0x10: irq 28, version 1.0, 
legacy support
usb0 at ohci0: USB revision 1.0
uhub0 at usb0 configuration 1 interface 0 "Opti OHCI root hub" rev 1.00/1.00 
addr 1
vgafb0 at pci0 dev 16 function 0 "ATI Rage 128" rev 0x00, mmio
wsdisplay0 at vgafb0 mux 1: console (std, vt100 emulation)
wsdisplay0: screen 1-5 added (std, vt100 emulation)
uhub1 at uhub0 port 1 configuration 1 interface 0 "Mitsumi Electric Hub in 
Apple Extended USB Keyboard" rev 1.10/4.10 addr 2
uhidev0 at uhub1 port 3 configuration 1 interface 0 "Mitsumi Electric Apple 
Extended USB Keyboard" rev 1.10/4.10 addr 3
uhidev0: iclass 3/1
ukbd0 at uhidev0: 8 variable keys, 6 key codes, country code 13
wskbd0 at ukbd0: console keyboard, using wsdisplay0
uhidev1 at uhub1 port 3 configuration 1 interface 1 "Mitsumi Electric Apple 
Extended USB Keyboard" rev 1.10/4.10 addr 3
uhidev1: iclass 3/0, 3 report ids
uhid0 at uhidev1 reportid 2: input=1, output=0, feature=0
ucc0 at uhidev1 reportid 3: 4 usages, 4 keys, enum
wskbd1 at ucc0 mux 1
wskbd1: connecting to wsdisplay0
vscsi0 at root
scsibus2 at vscsi0: 256 targets
softraid0 at root
scsibus3 at softraid0: 256 targets
bootpath: /pci/@d/pci-ata@1/ata-4@0/disk@0:/bsd
root on wd0a (410f22971b6a6734.a) swap on wd0b dump on wd0b



Faster M operation for the swapper to be great again

2022-06-30 Thread Martin Pieuchot
Diff below uses two tricks to make uvm_pagermapin/out() faster and less
likely to fail in OOM situations.

These functions are used to map buffers when swapping pages in/out and
when faulting on mmaped files.  robert@ even measured a 75% improvement
when populating pages related to files that aren't yet in the buffer
cache.

The first trick is to use the direct map when available.  I'm doing this
for single pages but km_alloc(9) also does that for single segment...
uvm_io() only maps one page at a time for the moment so this should be
enough.

The second trick is to use pmap_kenter_pa() which doesn't fail and is
faster.

With this changes the "freeze" happening on my server when entering many
pages to swap in OOM situation is much shorter and the machine becomes
quickly responsive.

ok?

Index: uvm/uvm_pager.c
===
RCS file: /cvs/src/sys/uvm/uvm_pager.c,v
retrieving revision 1.81
diff -u -p -r1.81 uvm_pager.c
--- uvm/uvm_pager.c 28 Jun 2022 19:07:40 -  1.81
+++ uvm/uvm_pager.c 30 Jun 2022 13:34:46 -
@@ -258,6 +258,16 @@ uvm_pagermapin(struct vm_page **pps, int
vsize_t size;
struct vm_page *pp;
 
+#ifdef __HAVE_PMAP_DIRECT
+   /* use direct mappings for single page */
+   if (npages == 1) {
+   KASSERT(pps[0]);
+   KASSERT(pps[0]->pg_flags & PG_BUSY);
+   kva = pmap_map_direct(pps[0]);
+   return kva;
+   }
+#endif
+
prot = PROT_READ;
if (flags & UVMPAGER_MAPIN_READ)
prot |= PROT_WRITE;
@@ -273,14 +283,7 @@ uvm_pagermapin(struct vm_page **pps, int
pp = *pps++;
KASSERT(pp);
KASSERT(pp->pg_flags & PG_BUSY);
-   /* Allow pmap_enter to fail. */
-   if (pmap_enter(pmap_kernel(), cva, VM_PAGE_TO_PHYS(pp),
-   prot, PMAP_WIRED | PMAP_CANFAIL | prot) != 0) {
-   pmap_remove(pmap_kernel(), kva, cva);
-   pmap_update(pmap_kernel());
-   uvm_pseg_release(kva);
-   return 0;
-   }
+   pmap_kenter_pa(cva, VM_PAGE_TO_PHYS(pp), prot);
}
pmap_update(pmap_kernel());
return kva;
@@ -294,8 +297,15 @@ uvm_pagermapin(struct vm_page **pps, int
 void
 uvm_pagermapout(vaddr_t kva, int npages)
 {
+#ifdef __HAVE_PMAP_DIRECT
+   /* use direct mappings for single page */
+   if (npages == 1) {
+   pmap_unmap_direct(kva);
+   return;
+   }
+#endif
 
-   pmap_remove(pmap_kernel(), kva, kva + ((vsize_t)npages << PAGE_SHIFT));
+   pmap_kremove(kva, (vsize_t)npages << PAGE_SHIFT);
pmap_update(pmap_kernel());
uvm_pseg_release(kva);
 



Re: amd64 serial console changes

2022-06-30 Thread Anton Lindqvist
On Thu, Jun 30, 2022 at 01:07:46PM +0200, Mark Kettenis wrote:
> Ah right.  Please commit!

Here's the complete diff, ok?

diff --git sys/arch/amd64/stand/efi32/exec_i386.c 
sys/arch/amd64/stand/efi32/exec_i386.c
index 8349d97fefb..21f13cd273b 100644
--- sys/arch/amd64/stand/efi32/exec_i386.c
+++ sys/arch/amd64/stand/efi32/exec_i386.c
@@ -66,7 +66,7 @@ run_loadfile(uint64_t *marks, int howto)
dev_t bootdev = bootdev_dip->bootdev;
size_t ac = BOOTARG_LEN;
caddr_t av = (caddr_t)BOOTARG_OFF;
-   bios_consdev_t cd;
+   bios_oconsdev_t cd;
extern int com_speed; /* from bioscons.c */
extern int com_addr;
bios_ddb_t ddb;
diff --git sys/arch/amd64/stand/efi64/exec_i386.c 
sys/arch/amd64/stand/efi64/exec_i386.c
index 48e460cb040..043193f93be 100644
--- sys/arch/amd64/stand/efi64/exec_i386.c
+++ sys/arch/amd64/stand/efi64/exec_i386.c
@@ -66,7 +66,7 @@ run_loadfile(uint64_t *marks, int howto)
dev_t bootdev = bootdev_dip->bootdev;
size_t ac = BOOTARG_LEN;
caddr_t av = (caddr_t)BOOTARG_OFF;
-   bios_consdev_t cd;
+   bios_oconsdev_t cd;
extern int com_speed; /* from bioscons.c */
extern int com_addr;
bios_ddb_t ddb;
diff --git sys/arch/amd64/stand/efiboot/exec_i386.c 
sys/arch/amd64/stand/efiboot/exec_i386.c
index ea8fa67820b..70ff23c9d32 100644
--- sys/arch/amd64/stand/efiboot/exec_i386.c
+++ sys/arch/amd64/stand/efiboot/exec_i386.c
@@ -72,7 +72,7 @@ run_loadfile(uint64_t *marks, int howto)
dev_t bootdev = bootdev_dip->bootdev;
size_t ac = BOOTARG_LEN;
caddr_t av = (caddr_t)BOOTARG_OFF;
-   bios_consdev_t cd;
+   bios_oconsdev_t cd;
extern int com_speed; /* from bioscons.c */
extern int com_addr;
bios_ddb_t ddb;
diff --git sys/arch/amd64/stand/libsa/exec_i386.c 
sys/arch/amd64/stand/libsa/exec_i386.c
index 22067931829..25d20d359c0 100644
--- sys/arch/amd64/stand/libsa/exec_i386.c
+++ sys/arch/amd64/stand/libsa/exec_i386.c
@@ -91,7 +91,7 @@ run_loadfile(uint64_t *marks, int howto)
dev_t bootdev = bootdev_dip->bootdev;
size_t ac = BOOTARG_LEN;
caddr_t av = (caddr_t)BOOTARG_OFF;
-   bios_consdev_t cd;
+   bios_oconsdev_t cd;
extern int com_speed; /* from bioscons.c */
extern int com_addr;
bios_ddb_t ddb;



Re: Use SMR instead of SRP list in rtsock.c

2022-06-30 Thread Vitaliy Makkoveev
On Thu, Jun 30, 2022 at 02:32:03PM +0200, Claudio Jeker wrote:
> On Thu, Jun 30, 2022 at 03:21:40PM +0300, Vitaliy Makkoveev wrote:
> > On Thu, Jun 30, 2022 at 11:56:55AM +0200, Claudio Jeker wrote:
> > > On Thu, Jun 30, 2022 at 12:34:33PM +0300, Vitaliy Makkoveev wrote:
> > > > On Thu, Jun 30, 2022 at 11:08:48AM +0200, Claudio Jeker wrote:
> > > > > This diff converts the SRP list to a SMR list in rtsock.c
> > > > > SRP is a bit strange with how it works and the SMR code is a bit 
> > > > > easier to
> > > > > understand. Since we can sleep in the SMR_TAILQ_FOREACH() we need to 
> > > > > grab
> > > > > a refcount on the route pcb so that we can leave the SMR critical 
> > > > > section
> > > > > and then enter the SMR critical section at the end of the loop before
> > > > > dropping the refcount again.
> > > > > 
> > > > > The diff does not immeditaly explode but I doubt we can exploit
> > > > > parallelism in route_input() so this may fail at some later stage if 
> > > > > it is
> > > > > wrong.
> > > > > 
> > > > > Comments from the lock critics welcome
> > > > 
> > > > We use `so_lock' rwlock(9) to protect route domain sockets. We can't
> > > > convert this SRP list to SMR list because we call solock() within
> > > > foreach loop.
> > > 
> > > because of the so_lock the code uses a refcnt on the route pcb to make
> > > sure that the object is not freed while we sleep. So that is handled by
> > > this diff.
> > >  
> > > > We can easily crash kernel by running in parallel some "route monitor"
> > > > commands and "while true; ifconfig vether0 create ; ifconfig vether0
> > > > destroy; done".
> > > 
> > > That does not cause problem on my system.
> > >  
> > 
> > Sorry, I missed you leave SMR section before solock() call:
> > 
> > > > > @@ -519,7 +498,8 @@ route_input(struct mbuf *m0, struct sock
> > > > >   rop->rop_proto != sa_family)
> > > > >   continue;
> > > > >  
> > > > > -
> > > > > + refcnt_take(&rop->rop_refcnt);
> > > > > + smr_read_leave();
> > > > >   so = rop->rop_socket;
> > > > >   solock(so);
> > > > >  
> > 
> > My system is stable with the second version of this diff and the
> > following test.
> 
> Cool but I fear that the kernel still synchronizes somewhere and we don't
> really get full concurrency here. So I'm not sure if this is able to
> trigger a reference bug.
>  

route_input() and route_{attach,detach}() are fully asynchronous, except
the `rtp_lk' rwlock(9). When called from route_output() path,
route_input() is not kernel or net locked. We can add some such threads
if we want more concurrency.



Re: Use SMR instead of SRP list in rtsock.c

2022-06-30 Thread Claudio Jeker
On Thu, Jun 30, 2022 at 03:21:40PM +0300, Vitaliy Makkoveev wrote:
> On Thu, Jun 30, 2022 at 11:56:55AM +0200, Claudio Jeker wrote:
> > On Thu, Jun 30, 2022 at 12:34:33PM +0300, Vitaliy Makkoveev wrote:
> > > On Thu, Jun 30, 2022 at 11:08:48AM +0200, Claudio Jeker wrote:
> > > > This diff converts the SRP list to a SMR list in rtsock.c
> > > > SRP is a bit strange with how it works and the SMR code is a bit easier 
> > > > to
> > > > understand. Since we can sleep in the SMR_TAILQ_FOREACH() we need to 
> > > > grab
> > > > a refcount on the route pcb so that we can leave the SMR critical 
> > > > section
> > > > and then enter the SMR critical section at the end of the loop before
> > > > dropping the refcount again.
> > > > 
> > > > The diff does not immeditaly explode but I doubt we can exploit
> > > > parallelism in route_input() so this may fail at some later stage if it 
> > > > is
> > > > wrong.
> > > > 
> > > > Comments from the lock critics welcome
> > > 
> > > We use `so_lock' rwlock(9) to protect route domain sockets. We can't
> > > convert this SRP list to SMR list because we call solock() within
> > > foreach loop.
> > 
> > because of the so_lock the code uses a refcnt on the route pcb to make
> > sure that the object is not freed while we sleep. So that is handled by
> > this diff.
> >  
> > > We can easily crash kernel by running in parallel some "route monitor"
> > > commands and "while true; ifconfig vether0 create ; ifconfig vether0
> > > destroy; done".
> > 
> > That does not cause problem on my system.
> >  
> 
> Sorry, I missed you leave SMR section before solock() call:
> 
> > > > @@ -519,7 +498,8 @@ route_input(struct mbuf *m0, struct sock
> > > > rop->rop_proto != sa_family)
> > > > continue;
> > > >  
> > > > -
> > > > +   refcnt_take(&rop->rop_refcnt);
> > > > +   smr_read_leave();
> > > > so = rop->rop_socket;
> > > > solock(so);
> > > >  
> 
> My system is stable with the second version of this diff and the
> following test.

Cool but I fear that the kernel still synchronizes somewhere and we don't
really get full concurrency here. So I'm not sure if this is able to
trigger a reference bug.
 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> 
> static struct ifreq ifr;
> 
> static void *clone(void *arg)
> {
>   int s;
> 
>   if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
>   err(1, "socket");
>   while (1) {
>   if (ioctl(s, SIOCIFCREATE, &ifr) < 0)
>   if (errno == EINVAL)
>   err(1, "ioctl");
>   if(ioctl(s, SIOCIFDESTROY, &ifr)<0)
>   if(errno == EINVAL)
>   err(1, "ioctl");
>   }
> 
>   return NULL;
> }
> 
> static void *rtsock(void *arg)
> {
>   int s;
> 
>   while (1){
>   if ((s = socket(AF_ROUTE, SOCK_RAW, 0)) < 0)
>   err(1, "socket");
>   close(s);
>   }
> 
>   return NULL;
> }
> 
> int main(int argc, char *argv[])
> {
>   pthread_t thr;
>   int i;
> 
>   if( argc != 2) {
>   fprintf(stderr, "usage: %s ifname\n", getprogname());
>   return 1;
>   }
> 
>   if (getuid() != 0) {
>   fprintf(stderr, "should be root\n");
>   return 1;
>   }
> 
>   memset(&ifr, 0, sizeof(ifr));
>   strlcpy(ifr.ifr_name, argv[1], sizeof(ifr.ifr_name));
> 
>   for(i = 0; i < 8 * 4; ++i) {
>   if (pthread_create(&thr, NULL, clone, NULL)!=0)
>   errx(1, "pthread_create");
>   }
> 
>   for (i = 0; i < 8 * 4; ++i) {
>   if (pthread_create(&thr, NULL, rtsock, NULL) != 0)
>   errx(1, "pthread_create");
>   }
> 
>   select(0, NULL, NULL, NULL, NULL);
> 
>   return 0;
> }
> 

-- 
:wq Claudio



Re: Use SMR instead of SRP list in rtsock.c

2022-06-30 Thread Vitaliy Makkoveev
On Thu, Jun 30, 2022 at 11:51:52AM +0200, Claudio Jeker wrote:
> On Thu, Jun 30, 2022 at 11:08:48AM +0200, Claudio Jeker wrote:
> > This diff converts the SRP list to a SMR list in rtsock.c
> > SRP is a bit strange with how it works and the SMR code is a bit easier to
> > understand. Since we can sleep in the SMR_TAILQ_FOREACH() we need to grab
> > a refcount on the route pcb so that we can leave the SMR critical section
> > and then enter the SMR critical section at the end of the loop before
> > dropping the refcount again.
> > 
> > The diff does not immeditaly explode but I doubt we can exploit
> > parallelism in route_input() so this may fail at some later stage if it is
> > wrong.
> > 
> > Comments from the lock critics welcome
> 
> After discussing this with mpi@ and jmatthew@ we came to the conclusion
> that we need to smr_barrier() before refcnt_finalize() to ensure that no
> other CPU is between the SMR_TAILQ_FOREACH, refcnt_take() and
> smr_read_leave().
> 
> Updated diff below

ok by me

> -- 
> :wq Claudio
> 
> Index: net/rtsock.c
> ===
> RCS file: /cvs/src/sys/net/rtsock.c,v
> retrieving revision 1.334
> diff -u -p -r1.334 rtsock.c
> --- net/rtsock.c  28 Jun 2022 10:01:13 -  1.334
> +++ net/rtsock.c  30 Jun 2022 09:25:53 -
> @@ -71,7 +71,7 @@
>  #include 
>  #include 
>  #include 
> -#include 
> +#include 
>  
>  #include 
>  #include 
> @@ -107,8 +107,6 @@ struct walkarg {
>  };
>  
>  void route_prinit(void);
> -void rcb_ref(void *, void *);
> -void rcb_unref(void *, void *);
>  int  route_output(struct mbuf *, struct socket *, struct sockaddr *,
>   struct mbuf *);
>  int  route_ctloutput(int, struct socket *, int, int, struct mbuf *);
> @@ -149,7 +147,7 @@ intrt_setsource(unsigned int, struct 
>  struct rtpcb {
>   struct socket   *rop_socket;/* [I] */
>  
> - SRPL_ENTRY(rtpcb)   rop_list;
> + SMR_TAILQ_ENTRY(rtpcb)  rop_list;
>   struct refcnt   rop_refcnt;
>   struct timeout  rop_timeout;
>   unsigned introp_msgfilter;  /* [s] */
> @@ -162,8 +160,7 @@ struct rtpcb {
>  #define  sotortpcb(so)   ((struct rtpcb *)(so)->so_pcb)
>  
>  struct rtptable {
> - SRPL_HEAD(, rtpcb)  rtp_list;
> - struct srpl_rc  rtp_rc;
> + SMR_TAILQ_HEAD(, rtpcb) rtp_list;
>   struct rwlock   rtp_lk;
>   unsigned intrtp_count;
>  };
> @@ -185,29 +182,12 @@ struct rtptable rtptable;
>  void
>  route_prinit(void)
>  {
> - srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
>   rw_init(&rtptable.rtp_lk, "rtsock");
> - SRPL_INIT(&rtptable.rtp_list);
> + SMR_TAILQ_INIT(&rtptable.rtp_list);
>   pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0,
>   IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL);
>  }
>  
> -void
> -rcb_ref(void *null, void *v)
> -{
> - struct rtpcb *rop = v;
> -
> - refcnt_take(&rop->rop_refcnt);
> -}
> -
> -void
> -rcb_unref(void *null, void *v)
> -{
> - struct rtpcb *rop = v;
> -
> - refcnt_rele_wake(&rop->rop_refcnt);
> -}
> -
>  int
>  route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
>  struct mbuf *control, struct proc *p)
> @@ -325,8 +305,7 @@ route_attach(struct socket *so, int prot
>   so->so_options |= SO_USELOOPBACK;
>  
>   rw_enter(&rtptable.rtp_lk, RW_WRITE);
> - SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop,
> - rop_list);
> + SMR_TAILQ_INSERT_HEAD_LOCKED(&rtptable.rtp_list, rop, rop_list);
>   rtptable.rtp_count++;
>   rw_exit(&rtptable.rtp_lk);
>  
> @@ -347,13 +326,13 @@ route_detach(struct socket *so)
>   rw_enter(&rtptable.rtp_lk, RW_WRITE);
>  
>   rtptable.rtp_count--;
> - SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
> - rop_list);
> + SMR_TAILQ_REMOVE_LOCKED(&rtptable.rtp_list, rop, rop_list);
>   rw_exit(&rtptable.rtp_lk);
>  
>   sounlock(so);
>  
>   /* wait for all references to drop */
> + smr_barrier();
>   refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
>   timeout_del_barrier(&rop->rop_timeout);
>  
> @@ -501,7 +480,6 @@ route_input(struct mbuf *m0, struct sock
>   struct rtpcb *rop;
>   struct rt_msghdr *rtm;
>   struct mbuf *m = m0;
> - struct srp_ref sr;
>  
>   /* ensure that we can access the rtm_type via mtod() */
>   if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
> @@ -509,7 +487,8 @@ route_input(struct mbuf *m0, struct sock
>   return;
>   }
>  
> - SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
> + smr_read_enter();
> + SMR_TAILQ_FOREACH(rop, &rtptable.rtp_list, rop_list) {
>   /*
>* If route socket is bound to an address family only send
>* messages that match the address family. Address family
> @@ -519,7 +498,8 @@ 

Re: Use SMR instead of SRP list in rtsock.c

2022-06-30 Thread Vitaliy Makkoveev
On Thu, Jun 30, 2022 at 11:56:55AM +0200, Claudio Jeker wrote:
> On Thu, Jun 30, 2022 at 12:34:33PM +0300, Vitaliy Makkoveev wrote:
> > On Thu, Jun 30, 2022 at 11:08:48AM +0200, Claudio Jeker wrote:
> > > This diff converts the SRP list to a SMR list in rtsock.c
> > > SRP is a bit strange with how it works and the SMR code is a bit easier to
> > > understand. Since we can sleep in the SMR_TAILQ_FOREACH() we need to grab
> > > a refcount on the route pcb so that we can leave the SMR critical section
> > > and then enter the SMR critical section at the end of the loop before
> > > dropping the refcount again.
> > > 
> > > The diff does not immeditaly explode but I doubt we can exploit
> > > parallelism in route_input() so this may fail at some later stage if it is
> > > wrong.
> > > 
> > > Comments from the lock critics welcome
> > 
> > We use `so_lock' rwlock(9) to protect route domain sockets. We can't
> > convert this SRP list to SMR list because we call solock() within
> > foreach loop.
> 
> because of the so_lock the code uses a refcnt on the route pcb to make
> sure that the object is not freed while we sleep. So that is handled by
> this diff.
>  
> > We can easily crash kernel by running in parallel some "route monitor"
> > commands and "while true; ifconfig vether0 create ; ifconfig vether0
> > destroy; done".
> 
> That does not cause problem on my system.
>  

Sorry, I missed you leave SMR section before solock() call:

> > > @@ -519,7 +498,8 @@ route_input(struct mbuf *m0, struct sock
> > >   rop->rop_proto != sa_family)
> > >   continue;
> > >  
> > > -
> > > + refcnt_take(&rop->rop_refcnt);
> > > + smr_read_leave();
> > >   so = rop->rop_socket;
> > >   solock(so);
> > >  

My system is stable with the second version of this diff and the
following test.

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

static struct ifreq ifr;

static void *clone(void *arg)
{
int s;

if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
err(1, "socket");
while (1) {
if (ioctl(s, SIOCIFCREATE, &ifr) < 0)
if (errno == EINVAL)
err(1, "ioctl");
if(ioctl(s, SIOCIFDESTROY, &ifr)<0)
if(errno == EINVAL)
err(1, "ioctl");
}

return NULL;
}

static void *rtsock(void *arg)
{
int s;

while (1){
if ((s = socket(AF_ROUTE, SOCK_RAW, 0)) < 0)
err(1, "socket");
close(s);
}

return NULL;
}

int main(int argc, char *argv[])
{
pthread_t thr;
int i;

if( argc != 2) {
fprintf(stderr, "usage: %s ifname\n", getprogname());
return 1;
}

if (getuid() != 0) {
fprintf(stderr, "should be root\n");
return 1;
}

memset(&ifr, 0, sizeof(ifr));
strlcpy(ifr.ifr_name, argv[1], sizeof(ifr.ifr_name));

for(i = 0; i < 8 * 4; ++i) {
if (pthread_create(&thr, NULL, clone, NULL)!=0)
errx(1, "pthread_create");
}

for (i = 0; i < 8 * 4; ++i) {
if (pthread_create(&thr, NULL, rtsock, NULL) != 0)
errx(1, "pthread_create");
}

select(0, NULL, NULL, NULL, NULL);

return 0;
}



Re: snmpd(8): Add blocklist feature

2022-06-30 Thread Theo Buehler
On Wed, Jun 29, 2022 at 01:35:37PM +0200, Martijn van Duren wrote:
> On Tue, 2022-06-28 at 12:33 +0200, Martijn van Duren wrote:
> > On Tue, 2022-06-28 at 12:21 +0200, Martijn van Duren wrote:
> > > On Tue, 2022-06-28 at 10:21 +0200, Martijn van Duren wrote:
> > > > Back in 2020 florian@ added the filter-pf-addresses keyword.
> > > > Although useful, I always felt it was a bit too case-specific. The diff
> > > > below adds a new blocklist feature/backend, which takes hold of an
> > > > entire subtree, effectively removing it from the tree.
> > > > 
> > > > With this I've deprecated the filter-pf-address case and should
> > > > probably be removed somewhere after 7.4. The filter-routes case can't
> > > > be removed unfortunately, since its behaviour is not identical, and
> > > > instead adds filters to the routing socket, preventing updates being
> > > > pushed to snmpd(8).
> > > > 
> > > > Feedback/OK?
> > > > 
> > > > martijn@
> > > > 
> > > Also clean up after ourselves if appl_exception fails.
> > > 
> > *sigh* Missed a return.
> > 
> And also some cleanup during shutdown.

Can't spot anything wrong in this one and works fine in some basic
testing.

ok tb

> 
> Index: Makefile
> ===
> RCS file: /cvs/src/usr.sbin/snmpd/Makefile,v
> retrieving revision 1.18
> diff -u -p -r1.18 Makefile
> --- Makefile  19 Jan 2022 11:00:56 -  1.18
> +++ Makefile  29 Jun 2022 11:35:23 -
> @@ -3,6 +3,7 @@
>  PROG=snmpd
>  MAN= snmpd.8 snmpd.conf.5
>  SRCS=parse.y log.c snmpe.c application.c 
> application_legacy.c \
> + application_blocklist.c \
>   mps.c trap.c mib.c smi.c kroute.c snmpd.c timer.c \
>   pf.c proc.c usm.c traphandler.c util.c
>  
> Index: application.c
> ===
> RCS file: /cvs/src/usr.sbin/snmpd/application.c,v
> retrieving revision 1.5
> diff -u -p -r1.5 application.c
> --- application.c 27 Jun 2022 10:31:17 -  1.5
> +++ application.c 29 Jun 2022 11:35:23 -
> @@ -148,6 +148,7 @@ RB_PROTOTYPE_STATIC(appl_requests, appl_
>  void
>  appl_init(void)
>  {
> + appl_blocklist_init();
>   appl_legacy_init();
>  }
>  
> @@ -156,6 +157,7 @@ appl_shutdown(void)
>  {
>   struct appl_context *ctx, *tctx;
>  
> + appl_blocklist_shutdown();
>   appl_legacy_shutdown();
>  
>   TAILQ_FOREACH_SAFE(ctx, &contexts, ac_entries, tctx) {
> Index: application.h
> ===
> RCS file: /cvs/src/usr.sbin/snmpd/application.h,v
> retrieving revision 1.1
> diff -u -p -r1.1 application.h
> --- application.h 19 Jan 2022 10:59:35 -  1.1
> +++ application.h 29 Jun 2022 11:35:23 -
> @@ -133,3 +133,7 @@ struct ber_element *appl_exception(enum 
>  /* application_legacy.c */
>  void  appl_legacy_init(void);
>  void  appl_legacy_shutdown(void);
> +
> +/* application_blocklist.c */
> +void  appl_blocklist_init(void);
> +void  appl_blocklist_shutdown(void);
> Index: application_blocklist.c
> ===
> RCS file: application_blocklist.c
> diff -N application_blocklist.c
> --- /dev/null 1 Jan 1970 00:00:00 -
> +++ application_blocklist.c   29 Jun 2022 11:35:23 -
> @@ -0,0 +1,141 @@
> +/*   $OpenBSD: application.c,v 1.5 2022/06/27 10:31:17 martijn Exp $ */
> +
> +/*
> + * Copyright (c) 2022 Martijn van Duren 
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include 
> +#include 
> +
> +#include "application.h"
> +#include "snmpd.h"
> +
> +struct appl_varbind *appl_blocklist_response(size_t);
> +void appl_blocklist_get(struct appl_backend *, int32_t, int32_t, const char 
> *,
> +struct appl_varbind *);
> +void appl_blocklist_getnext(struct appl_backend *, int32_t, int32_t,
> +const char *, struct appl_varbind *);
> +
> +struct appl_backend_functions appl_blocklist_functions = {
> + .ab_get = appl_blocklist_get,
> + .ab_getnext = appl_blocklist_getnext,
> + .ab_getbulk = NULL,
> +};
> +
> +struct appl_backend appl_blocklist = {
> + .ab_name = "blocklist",
> + .ab_cookie = NULL,
> + .ab_retries = 

Re: amd64 serial console changes

2022-06-30 Thread Mark Kettenis
Ah right.  Please commit!

> Op 30-06-2022 11:55 schreef Anton Lindqvist :
> 
>  
> On Thu, Jun 30, 2022 at 08:52:33AM +0200, Hrvoje Popovski wrote:
> > On 27.6.2022. 23:44, Mark Kettenis wrote:
> > > The Ryzen Embedded V1000 processors have an arm64-style Synposys
> > > DesignWare UART instead if a PC-compatible NS16x50 UART.  To make this
> > > UART work as a serial console, we need to pass some more information
> > > from the bootloader to the kernel.  This diff adds the logic to handle
> > > that information to the kernel.  I'd like some folks that use a serial
> > > console on their amd64 machines to test this.  But testing this diff
> > > on amd64 machines with a glass console doesn't hurt.
> > > 
> > > Thanks,
> > > 
> > > Mark
> > 
> > 
> > Hi,
> > 
> > I've sysupgrade few boxes few minutes ago and booting output is quite
> > slow. Everything is working but console output needs cca 2 or 3 minutes
> > to finish depends how big is dmesg.
> > 
> > I have few console over ipmi and few connected to serial and all of them
> > are slow.
> > 
> > When boot output finish working over console backs to normal fast output.
> 
> I would guess the problem arises after running installboot causing the
> new bios_consdev_t structure to be used, whereas only the fields in the
> old bios_oconsdev_t structure are populated? Maybe the rename should
> have been the other way around until the new structure is populated.
> 
> This makes the boot output "fast" again on my machine. I missed
> installboot while testing the original diff.
> 
> diff --git sys/arch/amd64/stand/libsa/exec_i386.c 
> sys/arch/amd64/stand/libsa/exec_i386.c
> index 22067931829..25d20d359c0 100644
> --- sys/arch/amd64/stand/libsa/exec_i386.c
> +++ sys/arch/amd64/stand/libsa/exec_i386.c
> @@ -91,7 +91,7 @@ run_loadfile(uint64_t *marks, int howto)
>   dev_t bootdev = bootdev_dip->bootdev;
>   size_t ac = BOOTARG_LEN;
>   caddr_t av = (caddr_t)BOOTARG_OFF;
> - bios_consdev_t cd;
> + bios_oconsdev_t cd;
>   extern int com_speed; /* from bioscons.c */
>   extern int com_addr;
>   bios_ddb_t ddb;



Re: Simplify aiodone daemon

2022-06-30 Thread Mark Kettenis


> Op 29-06-2022 16:17 schreef Martin Pieuchot :
> 
>  
> The aiodone daemon accounts for and frees/releases pages they were
> written to swap.  It is only used for asynchronous write.  The diff
> below uses this knowledge to:
> 
> - Stop suggesting that uvm_swap_get() can be asynchronous.  There's an
>   assert for PGO_SYNCIO 3 lines above.
> 
> - Remove unused support for asynchronous read, including error
>   conditions, from uvm_aio_aiodone_pages().
> 
> - Grab the proper lock for each page that has been written to swap.
>   This allows to enable an assert in uvm_page_unbusy().
> 
> - Move the uvm_anon_release() call outside of uvm_page_unbusy() and
>   assert for the different anon cases.  This will allows us to unify
>   code paths waiting for busy pages.
> 
> This is adapted/simplified from what is in NetBSD.
> 
> ok?

I don't fully understand the old code, but the diff makes sense combined with 
what you state above.  Two small nits below.

ok kettenis@
 
> Index: uvm/uvm_aobj.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v
> retrieving revision 1.103
> diff -u -p -r1.103 uvm_aobj.c
> --- uvm/uvm_aobj.c29 Dec 2021 20:22:06 -  1.103
> +++ uvm/uvm_aobj.c29 Jun 2022 11:16:35 -
> @@ -143,7 +143,6 @@ struct pool uvm_aobj_pool;
>  
>  static struct uao_swhash_elt *uao_find_swhash_elt(struct uvm_aobj *, int,
>boolean_t);
> -static intuao_find_swslot(struct uvm_object *, int);
>  static boolean_t  uao_flush(struct uvm_object *, voff_t,
>voff_t, int);
>  static void   uao_free(struct uvm_aobj *);
> @@ -241,7 +240,7 @@ uao_find_swhash_elt(struct uvm_aobj *aob
>  /*
>   * uao_find_swslot: find the swap slot number for an aobj/pageidx
>   */
> -inline static int
> +int
>  uao_find_swslot(struct uvm_object *uobj, int pageidx)
>  {
>   struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
> Index: uvm/uvm_aobj.h
> ===
> RCS file: /cvs/src/sys/uvm/uvm_aobj.h,v
> retrieving revision 1.17
> diff -u -p -r1.17 uvm_aobj.h
> --- uvm/uvm_aobj.h21 Oct 2020 09:08:14 -  1.17
> +++ uvm/uvm_aobj.h29 Jun 2022 11:16:35 -
> @@ -60,6 +60,7 @@
>  
>  void uao_init(void);
>  int uao_set_swslot(struct uvm_object *, int, int);
> +int uao_find_swslot (struct uvm_object *, int);

Spurious space

>  int uao_dropswap(struct uvm_object *, int);
>  int uao_swap_off(int, int);
>  int uao_shrink(struct uvm_object *, int);
> Index: uvm/uvm_page.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_page.c,v
> retrieving revision 1.166
> diff -u -p -r1.166 uvm_page.c
> --- uvm/uvm_page.c12 May 2022 12:48:36 -  1.166
> +++ uvm/uvm_page.c29 Jun 2022 11:47:55 -
> @@ -1036,13 +1036,14 @@ uvm_pagefree(struct vm_page *pg)
>   * uvm_page_unbusy: unbusy an array of pages.
>   *
>   * => pages must either all belong to the same object, or all belong to 
> anons.
> + * => if pages are object-owned, object must be locked.
>   * => if pages are anon-owned, anons must have 0 refcount.
> + * => caller must make sure that anon-owned pages are not PG_RELEASED.
>   */
>  void
>  uvm_page_unbusy(struct vm_page **pgs, int npgs)
>  {
>   struct vm_page *pg;
> - struct uvm_object *uobj;
>   int i;
>  
>   for (i = 0; i < npgs; i++) {
> @@ -1052,35 +1053,19 @@ uvm_page_unbusy(struct vm_page **pgs, in
>   continue;
>   }
>  
> -#if notyet
> - /*
> - * XXX swap case in uvm_aio_aiodone() is not holding the 
> lock.
> -  *
> -  * This isn't compatible with the PG_RELEASED anon case below.
> -  */
>   KASSERT(uvm_page_owner_locked_p(pg));
> -#endif
>   KASSERT(pg->pg_flags & PG_BUSY);
>  
>   if (pg->pg_flags & PG_WANTED) {
>   wakeup(pg);
>   }
>   if (pg->pg_flags & PG_RELEASED) {
> - uobj = pg->uobject;
> - if (uobj != NULL) {
> - uvm_lock_pageq();
> - pmap_page_protect(pg, PROT_NONE);
> - /* XXX won't happen right now */
> - if (pg->pg_flags & PQ_AOBJ)
> - uao_dropswap(uobj,
> - pg->offset >> PAGE_SHIFT);
> - uvm_pagefree(pg);
> - uvm_unlock_pageq();
> - } else {
> - rw_enter(pg->uanon->an_lock, RW_WRITE);
> - uvm_anon_release(pg->uanon);
> - }
> + KASSERT(pg->uobject != NULL ||
> + (pg->uanon != NULL && pg-

Re: TSO Large Send Offloading for ix(4)

2022-06-30 Thread Claudio Jeker
On Wed, Jun 29, 2022 at 02:24:35PM +0200, Jan Klemkow wrote:
> Hi,
> 
> This diff introduces the sending side of TSO to our TCP/IP stack.
> If the hardware has TSO capabilities tcp_output() will send huge TCP
> segments down the stack to the interface.  ip{6}_output() will ignore
> the size is greater then eny MTU in this case.
> 
> I tested it with IPv4, IPv6 and VLAN.  VLAN sending is not offloaded
> now, because this interface does not inherited the TSO capability.
> I will do this in a later diff.
> 
> If you have an ix(4) NIC of 82599 or newer, just enable TSO with
> ifconfig(8):
> 
> # ifconfig ix0 tso
> 
> Thanks for testing,
> Jan

A few comments below: 

> Index: netinet/tcp_input.c
> ===
> RCS file: /mount/openbsd/cvs/src/sys/netinet/tcp_input.c,v
> retrieving revision 1.375
> diff -u -p -r1.375 tcp_input.c
> --- netinet/tcp_input.c   4 Jan 2022 06:32:39 -   1.375
> +++ netinet/tcp_input.c   28 Jun 2022 17:12:43 -
> @@ -2851,6 +2851,15 @@ tcp_mss(struct tcpcb *tp, int offer)
>   mssopt = ifp->if_mtu - iphlen - sizeof(struct tcphdr);
>   mssopt = max(tcp_mssdflt, mssopt);
>   }
> +
> + if (ISSET(ifp->if_xflags, IFXF_TSO)) {
> + tp->t_flags |= TF_TSO;
> +
> + if (ifp->if_hw_tsomax < MAXMCLBYTES)
> + tp->t_tsomax = ifp->if_hw_tsomax;
> + else
> + tp->t_tsomax = MAXMCLBYTES;
> + }

Why is there a limit on MAXMCLBYTES? I guess the card must support chained
buffers because a 64k mbuf is not linearly mapped.

>   out:
>   if_put(ifp);
>   /*

> Index: sys/mbuf.h
> ===
> RCS file: /mount/openbsd/cvs/src/sys/sys/mbuf.h,v
> retrieving revision 1.254
> diff -u -p -r1.254 mbuf.h
> --- sys/mbuf.h14 Feb 2022 04:33:18 -  1.254
> +++ sys/mbuf.h28 Jun 2022 17:29:00 -
> @@ -133,6 +133,7 @@ structpkthdr {
>   u_int16_tph_flowid; /* pseudo unique flow id */
>   u_int16_tcsum_flags;/* checksum flags */
>   u_int16_tether_vtag;/* Ethernet 802.1p+Q vlan tag */
> + u_int16_tph_mss;/* max. seg. size. */
>   u_intph_rtableid;   /* routing table id */
>   u_intph_ifidx;  /* rcv interface index */
>   u_int8_t ph_loopcnt;/* mbuf is looping in kernel */

Please move the two u_int fields above the u_int16_t block so that the
pkthdr packs nicely. You add a 5th uint16_t and so that would insert an
extra pad.

> @@ -226,13 +227,14 @@ struct mbuf {
>  #define  M_IPV6_DF_OUT   0x1000  /* don't fragment outgoing IPv6 
> */
>  #define  M_TIMESTAMP 0x2000  /* ph_timestamp is set */
>  #define  M_FLOWID0x4000  /* ph_flowid is set */
> +#define  M_TCP_TSO   0x8000  /* TCP Segmentation Offload 
> needed */
>  
>  #ifdef _KERNEL
>  #define MCS_BITS \
>  ("\20\1IPV4_CSUM_OUT\2TCP_CSUM_OUT\3UDP_CSUM_OUT\4IPV4_CSUM_IN_OK" \
>  "\5IPV4_CSUM_IN_BAD\6TCP_CSUM_IN_OK\7TCP_CSUM_IN_BAD\10UDP_CSUM_IN_OK" \
>  
> "\11UDP_CSUM_IN_BAD\12ICMP_CSUM_OUT\13ICMP_CSUM_IN_OK\14ICMP_CSUM_IN_BAD" \
> -"\15IPV6_NODF_OUT" "\16TIMESTAMP" "\17FLOWID")
> +"\15IPV6_NODF_OUT" "\16TIMESTAMP" "\17FLOWID" "\20TCP_TSO")
>  #endif
>  
>  /* mbuf types */
> 

What does happen when pf reroutes the packets to a non TSO capable
interface? I think this should be done like HW CSUM offloading where the
stack fixies up packets in ip_output() when it realizes that the HW does
not support it. Only then this becomes generally more usable.
Also doing TSO LSO in ip_output would allow to use LRO / LSO in forwarding
and in many more places. It will also improve performance since we can
batch send TCP packets.

-- 
:wq Claudio



Re: Use SMR instead of SRP list in rtsock.c

2022-06-30 Thread Martin Pieuchot
On 30/06/22(Thu) 11:56, Claudio Jeker wrote:
> On Thu, Jun 30, 2022 at 12:34:33PM +0300, Vitaliy Makkoveev wrote:
> > On Thu, Jun 30, 2022 at 11:08:48AM +0200, Claudio Jeker wrote:
> > > This diff converts the SRP list to a SMR list in rtsock.c
> > > SRP is a bit strange with how it works and the SMR code is a bit easier to
> > > understand. Since we can sleep in the SMR_TAILQ_FOREACH() we need to grab
> > > a refcount on the route pcb so that we can leave the SMR critical section
> > > and then enter the SMR critical section at the end of the loop before
> > > dropping the refcount again.
> > > 
> > > The diff does not immeditaly explode but I doubt we can exploit
> > > parallelism in route_input() so this may fail at some later stage if it is
> > > wrong.
> > > 
> > > Comments from the lock critics welcome
> > 
> > We use `so_lock' rwlock(9) to protect route domain sockets. We can't
> > convert this SRP list to SMR list because we call solock() within
> > foreach loop.

We shouldn't use SRP list either, no?  Or are we allowed to sleep
holding a SRP reference?  That's the question that triggered this diff.

> because of the so_lock the code uses a refcnt on the route pcb to make
> sure that the object is not freed while we sleep. So that is handled by
> this diff.
>  
> > We can easily crash kernel by running in parallel some "route monitor"
> > commands and "while true; ifconfig vether0 create ; ifconfig vether0
> > destroy; done".
> 
> That does not cause problem on my system.
>  
> > > -- 
> > > :wq Claudio
> > > 
> > > Index: sys/net/rtsock.c
> > > ===
> > > RCS file: /cvs/src/sys/net/rtsock.c,v
> > > retrieving revision 1.334
> > > diff -u -p -r1.334 rtsock.c
> > > --- sys/net/rtsock.c  28 Jun 2022 10:01:13 -  1.334
> > > +++ sys/net/rtsock.c  30 Jun 2022 08:02:09 -
> > > @@ -71,7 +71,7 @@
> > >  #include 
> > >  #include 
> > >  #include 
> > > -#include 
> > > +#include 
> > >  
> > >  #include 
> > >  #include 
> > > @@ -107,8 +107,6 @@ struct walkarg {
> > >  };
> > >  
> > >  void route_prinit(void);
> > > -void rcb_ref(void *, void *);
> > > -void rcb_unref(void *, void *);
> > >  int  route_output(struct mbuf *, struct socket *, struct sockaddr *,
> > >   struct mbuf *);
> > >  int  route_ctloutput(int, struct socket *, int, int, struct mbuf *);
> > > @@ -149,7 +147,7 @@ intrt_setsource(unsigned int, struct 
> > >  struct rtpcb {
> > >   struct socket   *rop_socket;/* [I] */
> > >  
> > > - SRPL_ENTRY(rtpcb)   rop_list;
> > > + SMR_TAILQ_ENTRY(rtpcb)  rop_list;
> > >   struct refcnt   rop_refcnt;
> > >   struct timeout  rop_timeout;
> > >   unsigned introp_msgfilter;  /* [s] */
> > > @@ -162,8 +160,7 @@ struct rtpcb {
> > >  #define  sotortpcb(so)   ((struct rtpcb *)(so)->so_pcb)
> > >  
> > >  struct rtptable {
> > > - SRPL_HEAD(, rtpcb)  rtp_list;
> > > - struct srpl_rc  rtp_rc;
> > > + SMR_TAILQ_HEAD(, rtpcb) rtp_list;
> > >   struct rwlock   rtp_lk;
> > >   unsigned intrtp_count;
> > >  };
> > > @@ -185,29 +182,12 @@ struct rtptable rtptable;
> > >  void
> > >  route_prinit(void)
> > >  {
> > > - srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
> > >   rw_init(&rtptable.rtp_lk, "rtsock");
> > > - SRPL_INIT(&rtptable.rtp_list);
> > > + SMR_TAILQ_INIT(&rtptable.rtp_list);
> > >   pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0,
> > >   IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL);
> > >  }
> > >  
> > > -void
> > > -rcb_ref(void *null, void *v)
> > > -{
> > > - struct rtpcb *rop = v;
> > > -
> > > - refcnt_take(&rop->rop_refcnt);
> > > -}
> > > -
> > > -void
> > > -rcb_unref(void *null, void *v)
> > > -{
> > > - struct rtpcb *rop = v;
> > > -
> > > - refcnt_rele_wake(&rop->rop_refcnt);
> > > -}
> > > -
> > >  int
> > >  route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf 
> > > *nam,
> > >  struct mbuf *control, struct proc *p)
> > > @@ -325,8 +305,7 @@ route_attach(struct socket *so, int prot
> > >   so->so_options |= SO_USELOOPBACK;
> > >  
> > >   rw_enter(&rtptable.rtp_lk, RW_WRITE);
> > > - SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop,
> > > - rop_list);
> > > + SMR_TAILQ_INSERT_HEAD_LOCKED(&rtptable.rtp_list, rop, rop_list);
> > >   rtptable.rtp_count++;
> > >   rw_exit(&rtptable.rtp_lk);
> > >  
> > > @@ -347,8 +326,7 @@ route_detach(struct socket *so)
> > >   rw_enter(&rtptable.rtp_lk, RW_WRITE);
> > >  
> > >   rtptable.rtp_count--;
> > > - SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
> > > - rop_list);
> > > + SMR_TAILQ_REMOVE_LOCKED(&rtptable.rtp_list, rop, rop_list);
> > >   rw_exit(&rtptable.rtp_lk);
> > >  
> > >   sounlock(so);
> > > @@ -356,6 +334,7 @@ route_detach(struct socket *so)
> > >   /* wait for all references to drop */
> > >   refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
> > 

Re: Use SMR instead of SRP list in rtsock.c

2022-06-30 Thread Claudio Jeker
On Thu, Jun 30, 2022 at 12:34:33PM +0300, Vitaliy Makkoveev wrote:
> On Thu, Jun 30, 2022 at 11:08:48AM +0200, Claudio Jeker wrote:
> > This diff converts the SRP list to a SMR list in rtsock.c
> > SRP is a bit strange with how it works and the SMR code is a bit easier to
> > understand. Since we can sleep in the SMR_TAILQ_FOREACH() we need to grab
> > a refcount on the route pcb so that we can leave the SMR critical section
> > and then enter the SMR critical section at the end of the loop before
> > dropping the refcount again.
> > 
> > The diff does not immeditaly explode but I doubt we can exploit
> > parallelism in route_input() so this may fail at some later stage if it is
> > wrong.
> > 
> > Comments from the lock critics welcome
> 
> We use `so_lock' rwlock(9) to protect route domain sockets. We can't
> convert this SRP list to SMR list because we call solock() within
> foreach loop.

because of the so_lock the code uses a refcnt on the route pcb to make
sure that the object is not freed while we sleep. So that is handled by
this diff.
 
> We can easily crash kernel by running in parallel some "route monitor"
> commands and "while true; ifconfig vether0 create ; ifconfig vether0
> destroy; done".

That does not cause problem on my system.
 
> > -- 
> > :wq Claudio
> > 
> > Index: sys/net/rtsock.c
> > ===
> > RCS file: /cvs/src/sys/net/rtsock.c,v
> > retrieving revision 1.334
> > diff -u -p -r1.334 rtsock.c
> > --- sys/net/rtsock.c28 Jun 2022 10:01:13 -  1.334
> > +++ sys/net/rtsock.c30 Jun 2022 08:02:09 -
> > @@ -71,7 +71,7 @@
> >  #include 
> >  #include 
> >  #include 
> > -#include 
> > +#include 
> >  
> >  #include 
> >  #include 
> > @@ -107,8 +107,6 @@ struct walkarg {
> >  };
> >  
> >  void   route_prinit(void);
> > -void   rcb_ref(void *, void *);
> > -void   rcb_unref(void *, void *);
> >  introute_output(struct mbuf *, struct socket *, struct sockaddr *,
> > struct mbuf *);
> >  introute_ctloutput(int, struct socket *, int, int, struct mbuf *);
> > @@ -149,7 +147,7 @@ int  rt_setsource(unsigned int, struct 
> >  struct rtpcb {
> > struct socket   *rop_socket;/* [I] */
> >  
> > -   SRPL_ENTRY(rtpcb)   rop_list;
> > +   SMR_TAILQ_ENTRY(rtpcb)  rop_list;
> > struct refcnt   rop_refcnt;
> > struct timeout  rop_timeout;
> > unsigned introp_msgfilter;  /* [s] */
> > @@ -162,8 +160,7 @@ struct rtpcb {
> >  #definesotortpcb(so)   ((struct rtpcb *)(so)->so_pcb)
> >  
> >  struct rtptable {
> > -   SRPL_HEAD(, rtpcb)  rtp_list;
> > -   struct srpl_rc  rtp_rc;
> > +   SMR_TAILQ_HEAD(, rtpcb) rtp_list;
> > struct rwlock   rtp_lk;
> > unsigned intrtp_count;
> >  };
> > @@ -185,29 +182,12 @@ struct rtptable rtptable;
> >  void
> >  route_prinit(void)
> >  {
> > -   srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
> > rw_init(&rtptable.rtp_lk, "rtsock");
> > -   SRPL_INIT(&rtptable.rtp_list);
> > +   SMR_TAILQ_INIT(&rtptable.rtp_list);
> > pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0,
> > IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL);
> >  }
> >  
> > -void
> > -rcb_ref(void *null, void *v)
> > -{
> > -   struct rtpcb *rop = v;
> > -
> > -   refcnt_take(&rop->rop_refcnt);
> > -}
> > -
> > -void
> > -rcb_unref(void *null, void *v)
> > -{
> > -   struct rtpcb *rop = v;
> > -
> > -   refcnt_rele_wake(&rop->rop_refcnt);
> > -}
> > -
> >  int
> >  route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
> >  struct mbuf *control, struct proc *p)
> > @@ -325,8 +305,7 @@ route_attach(struct socket *so, int prot
> > so->so_options |= SO_USELOOPBACK;
> >  
> > rw_enter(&rtptable.rtp_lk, RW_WRITE);
> > -   SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop,
> > -   rop_list);
> > +   SMR_TAILQ_INSERT_HEAD_LOCKED(&rtptable.rtp_list, rop, rop_list);
> > rtptable.rtp_count++;
> > rw_exit(&rtptable.rtp_lk);
> >  
> > @@ -347,8 +326,7 @@ route_detach(struct socket *so)
> > rw_enter(&rtptable.rtp_lk, RW_WRITE);
> >  
> > rtptable.rtp_count--;
> > -   SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
> > -   rop_list);
> > +   SMR_TAILQ_REMOVE_LOCKED(&rtptable.rtp_list, rop, rop_list);
> > rw_exit(&rtptable.rtp_lk);
> >  
> > sounlock(so);
> > @@ -356,6 +334,7 @@ route_detach(struct socket *so)
> > /* wait for all references to drop */
> > refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
> > timeout_del_barrier(&rop->rop_timeout);
> > +   smr_barrier();
> >  
> > solock(so);
> >  
> > @@ -501,7 +480,6 @@ route_input(struct mbuf *m0, struct sock
> > struct rtpcb *rop;
> > struct rt_msghdr *rtm;
> > struct mbuf *m = m0;
> > -   struct srp_ref sr;
> >  
> > /* ensure that we can access the rtm_type via mtod() */
> >

Re: amd64 serial console changes

2022-06-30 Thread Anton Lindqvist
On Thu, Jun 30, 2022 at 08:52:33AM +0200, Hrvoje Popovski wrote:
> On 27.6.2022. 23:44, Mark Kettenis wrote:
> > The Ryzen Embedded V1000 processors have an arm64-style Synposys
> > DesignWare UART instead if a PC-compatible NS16x50 UART.  To make this
> > UART work as a serial console, we need to pass some more information
> > from the bootloader to the kernel.  This diff adds the logic to handle
> > that information to the kernel.  I'd like some folks that use a serial
> > console on their amd64 machines to test this.  But testing this diff
> > on amd64 machines with a glass console doesn't hurt.
> > 
> > Thanks,
> > 
> > Mark
> 
> 
> Hi,
> 
> I've sysupgrade few boxes few minutes ago and booting output is quite
> slow. Everything is working but console output needs cca 2 or 3 minutes
> to finish depends how big is dmesg.
> 
> I have few console over ipmi and few connected to serial and all of them
> are slow.
> 
> When boot output finish working over console backs to normal fast output.

I would guess the problem arises after running installboot causing the
new bios_consdev_t structure to be used, whereas only the fields in the
old bios_oconsdev_t structure are populated? Maybe the rename should
have been the other way around until the new structure is populated.

This makes the boot output "fast" again on my machine. I missed
installboot while testing the original diff.

diff --git sys/arch/amd64/stand/libsa/exec_i386.c 
sys/arch/amd64/stand/libsa/exec_i386.c
index 22067931829..25d20d359c0 100644
--- sys/arch/amd64/stand/libsa/exec_i386.c
+++ sys/arch/amd64/stand/libsa/exec_i386.c
@@ -91,7 +91,7 @@ run_loadfile(uint64_t *marks, int howto)
dev_t bootdev = bootdev_dip->bootdev;
size_t ac = BOOTARG_LEN;
caddr_t av = (caddr_t)BOOTARG_OFF;
-   bios_consdev_t cd;
+   bios_oconsdev_t cd;
extern int com_speed; /* from bioscons.c */
extern int com_addr;
bios_ddb_t ddb;



Re: Use SMR instead of SRP list in rtsock.c

2022-06-30 Thread Claudio Jeker
On Thu, Jun 30, 2022 at 11:08:48AM +0200, Claudio Jeker wrote:
> This diff converts the SRP list to a SMR list in rtsock.c
> SRP is a bit strange with how it works and the SMR code is a bit easier to
> understand. Since we can sleep in the SMR_TAILQ_FOREACH() we need to grab
> a refcount on the route pcb so that we can leave the SMR critical section
> and then enter the SMR critical section at the end of the loop before
> dropping the refcount again.
> 
> The diff does not immeditaly explode but I doubt we can exploit
> parallelism in route_input() so this may fail at some later stage if it is
> wrong.
> 
> Comments from the lock critics welcome

After discussing this with mpi@ and jmatthew@ we came to the conclusion
that we need to smr_barrier() before refcnt_finalize() to ensure that no
other CPU is between the SMR_TAILQ_FOREACH, refcnt_take() and
smr_read_leave().

Updated diff below
-- 
:wq Claudio

Index: net/rtsock.c
===
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.334
diff -u -p -r1.334 rtsock.c
--- net/rtsock.c28 Jun 2022 10:01:13 -  1.334
+++ net/rtsock.c30 Jun 2022 09:25:53 -
@@ -71,7 +71,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 
 #include 
 #include 
@@ -107,8 +107,6 @@ struct walkarg {
 };
 
 void   route_prinit(void);
-void   rcb_ref(void *, void *);
-void   rcb_unref(void *, void *);
 introute_output(struct mbuf *, struct socket *, struct sockaddr *,
struct mbuf *);
 introute_ctloutput(int, struct socket *, int, int, struct mbuf *);
@@ -149,7 +147,7 @@ int  rt_setsource(unsigned int, struct 
 struct rtpcb {
struct socket   *rop_socket;/* [I] */
 
-   SRPL_ENTRY(rtpcb)   rop_list;
+   SMR_TAILQ_ENTRY(rtpcb)  rop_list;
struct refcnt   rop_refcnt;
struct timeout  rop_timeout;
unsigned introp_msgfilter;  /* [s] */
@@ -162,8 +160,7 @@ struct rtpcb {
 #definesotortpcb(so)   ((struct rtpcb *)(so)->so_pcb)
 
 struct rtptable {
-   SRPL_HEAD(, rtpcb)  rtp_list;
-   struct srpl_rc  rtp_rc;
+   SMR_TAILQ_HEAD(, rtpcb) rtp_list;
struct rwlock   rtp_lk;
unsigned intrtp_count;
 };
@@ -185,29 +182,12 @@ struct rtptable rtptable;
 void
 route_prinit(void)
 {
-   srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
rw_init(&rtptable.rtp_lk, "rtsock");
-   SRPL_INIT(&rtptable.rtp_list);
+   SMR_TAILQ_INIT(&rtptable.rtp_list);
pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0,
IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL);
 }
 
-void
-rcb_ref(void *null, void *v)
-{
-   struct rtpcb *rop = v;
-
-   refcnt_take(&rop->rop_refcnt);
-}
-
-void
-rcb_unref(void *null, void *v)
-{
-   struct rtpcb *rop = v;
-
-   refcnt_rele_wake(&rop->rop_refcnt);
-}
-
 int
 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
 struct mbuf *control, struct proc *p)
@@ -325,8 +305,7 @@ route_attach(struct socket *so, int prot
so->so_options |= SO_USELOOPBACK;
 
rw_enter(&rtptable.rtp_lk, RW_WRITE);
-   SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop,
-   rop_list);
+   SMR_TAILQ_INSERT_HEAD_LOCKED(&rtptable.rtp_list, rop, rop_list);
rtptable.rtp_count++;
rw_exit(&rtptable.rtp_lk);
 
@@ -347,13 +326,13 @@ route_detach(struct socket *so)
rw_enter(&rtptable.rtp_lk, RW_WRITE);
 
rtptable.rtp_count--;
-   SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
-   rop_list);
+   SMR_TAILQ_REMOVE_LOCKED(&rtptable.rtp_list, rop, rop_list);
rw_exit(&rtptable.rtp_lk);
 
sounlock(so);
 
/* wait for all references to drop */
+   smr_barrier();
refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
timeout_del_barrier(&rop->rop_timeout);
 
@@ -501,7 +480,6 @@ route_input(struct mbuf *m0, struct sock
struct rtpcb *rop;
struct rt_msghdr *rtm;
struct mbuf *m = m0;
-   struct srp_ref sr;
 
/* ensure that we can access the rtm_type via mtod() */
if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
@@ -509,7 +487,8 @@ route_input(struct mbuf *m0, struct sock
return;
}
 
-   SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
+   smr_read_enter();
+   SMR_TAILQ_FOREACH(rop, &rtptable.rtp_list, rop_list) {
/*
 * If route socket is bound to an address family only send
 * messages that match the address family. Address family
@@ -519,7 +498,8 @@ route_input(struct mbuf *m0, struct sock
rop->rop_proto != sa_family)
continue;
 
-
+   refcnt_take(&rop->rop_refcnt);
+   smr_read_leave();
so = rop->rop_soc

Re: Use SMR instead of SRP list in rtsock.c

2022-06-30 Thread Vitaliy Makkoveev
On Thu, Jun 30, 2022 at 11:08:48AM +0200, Claudio Jeker wrote:
> This diff converts the SRP list to a SMR list in rtsock.c
> SRP is a bit strange with how it works and the SMR code is a bit easier to
> understand. Since we can sleep in the SMR_TAILQ_FOREACH() we need to grab
> a refcount on the route pcb so that we can leave the SMR critical section
> and then enter the SMR critical section at the end of the loop before
> dropping the refcount again.
> 
> The diff does not immeditaly explode but I doubt we can exploit
> parallelism in route_input() so this may fail at some later stage if it is
> wrong.
> 
> Comments from the lock critics welcome

We use `so_lock' rwlock(9) to protect route domain sockets. We can't
convert this SRP list to SMR list because we call solock() within
foreach loop.

We can easily crash kernel by running in parallel some "route monitor"
commands and "while true; ifconfig vether0 create ; ifconfig vether0
destroy; done".

> -- 
> :wq Claudio
> 
> Index: sys/net/rtsock.c
> ===
> RCS file: /cvs/src/sys/net/rtsock.c,v
> retrieving revision 1.334
> diff -u -p -r1.334 rtsock.c
> --- sys/net/rtsock.c  28 Jun 2022 10:01:13 -  1.334
> +++ sys/net/rtsock.c  30 Jun 2022 08:02:09 -
> @@ -71,7 +71,7 @@
>  #include 
>  #include 
>  #include 
> -#include 
> +#include 
>  
>  #include 
>  #include 
> @@ -107,8 +107,6 @@ struct walkarg {
>  };
>  
>  void route_prinit(void);
> -void rcb_ref(void *, void *);
> -void rcb_unref(void *, void *);
>  int  route_output(struct mbuf *, struct socket *, struct sockaddr *,
>   struct mbuf *);
>  int  route_ctloutput(int, struct socket *, int, int, struct mbuf *);
> @@ -149,7 +147,7 @@ intrt_setsource(unsigned int, struct 
>  struct rtpcb {
>   struct socket   *rop_socket;/* [I] */
>  
> - SRPL_ENTRY(rtpcb)   rop_list;
> + SMR_TAILQ_ENTRY(rtpcb)  rop_list;
>   struct refcnt   rop_refcnt;
>   struct timeout  rop_timeout;
>   unsigned introp_msgfilter;  /* [s] */
> @@ -162,8 +160,7 @@ struct rtpcb {
>  #define  sotortpcb(so)   ((struct rtpcb *)(so)->so_pcb)
>  
>  struct rtptable {
> - SRPL_HEAD(, rtpcb)  rtp_list;
> - struct srpl_rc  rtp_rc;
> + SMR_TAILQ_HEAD(, rtpcb) rtp_list;
>   struct rwlock   rtp_lk;
>   unsigned intrtp_count;
>  };
> @@ -185,29 +182,12 @@ struct rtptable rtptable;
>  void
>  route_prinit(void)
>  {
> - srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
>   rw_init(&rtptable.rtp_lk, "rtsock");
> - SRPL_INIT(&rtptable.rtp_list);
> + SMR_TAILQ_INIT(&rtptable.rtp_list);
>   pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0,
>   IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL);
>  }
>  
> -void
> -rcb_ref(void *null, void *v)
> -{
> - struct rtpcb *rop = v;
> -
> - refcnt_take(&rop->rop_refcnt);
> -}
> -
> -void
> -rcb_unref(void *null, void *v)
> -{
> - struct rtpcb *rop = v;
> -
> - refcnt_rele_wake(&rop->rop_refcnt);
> -}
> -
>  int
>  route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
>  struct mbuf *control, struct proc *p)
> @@ -325,8 +305,7 @@ route_attach(struct socket *so, int prot
>   so->so_options |= SO_USELOOPBACK;
>  
>   rw_enter(&rtptable.rtp_lk, RW_WRITE);
> - SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop,
> - rop_list);
> + SMR_TAILQ_INSERT_HEAD_LOCKED(&rtptable.rtp_list, rop, rop_list);
>   rtptable.rtp_count++;
>   rw_exit(&rtptable.rtp_lk);
>  
> @@ -347,8 +326,7 @@ route_detach(struct socket *so)
>   rw_enter(&rtptable.rtp_lk, RW_WRITE);
>  
>   rtptable.rtp_count--;
> - SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
> - rop_list);
> + SMR_TAILQ_REMOVE_LOCKED(&rtptable.rtp_list, rop, rop_list);
>   rw_exit(&rtptable.rtp_lk);
>  
>   sounlock(so);
> @@ -356,6 +334,7 @@ route_detach(struct socket *so)
>   /* wait for all references to drop */
>   refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
>   timeout_del_barrier(&rop->rop_timeout);
> + smr_barrier();
>  
>   solock(so);
>  
> @@ -501,7 +480,6 @@ route_input(struct mbuf *m0, struct sock
>   struct rtpcb *rop;
>   struct rt_msghdr *rtm;
>   struct mbuf *m = m0;
> - struct srp_ref sr;
>  
>   /* ensure that we can access the rtm_type via mtod() */
>   if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
> @@ -509,7 +487,8 @@ route_input(struct mbuf *m0, struct sock
>   return;
>   }
>  
> - SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
> + smr_read_enter();
> + SMR_TAILQ_FOREACH(rop, &rtptable.rtp_list, rop_list) {
>   /*
>* If route socket is bound to an address family only send
>* messages that match the address family. Address famil

npppd(8): remove PIPEXCSESSION ioctl(2) command

2022-06-30 Thread Vitaliy Makkoveev
yasuoka@ remonded me, long time ago pipex(4) sessions can't be deleted
until both input and output queues become empty:

pipex_timer(void *ignored_arg)
{
/* ... */
switch (session->state) {
/* ... */
case PIPEX_STATE_CLOSED:
/*
 * mbuf queued in pipexinq or pipexoutq may have a
 * refererce to this session.
 */
if (!mq_empty(&pipexinq) || !mq_empty(&pipexoutq))
continue;

pipex_destroy_session(session);
break;
/* ... */
}

Such dead sessions were linked to the stack and the `ip_forward' flag
was used to prevent packets forwarding.

But since we started to unlink close session from the stack, this logic
became unnecessary. Also pipex(4) session could be closed just after
close request.

I want to remove it. This makes the pipex(4) session flags immutable and
reduces locking games.

This diff removes PIPEXCSESSION call only from npppd(8). It deletes
session just after PIPEXCSESSION ioctl(2) call so nothing changed in
session life within kernel space. I will modify kernel and pipex(4) man
page with separate diff, after I finish to fix pipex(4) locking.

Index: usr.sbin/npppd/npppd/npppd.c
===
RCS file: /cvs/src/usr.sbin/npppd/npppd/npppd.c,v
retrieving revision 1.52
diff -u -p -r1.52 npppd.c
--- usr.sbin/npppd/npppd/npppd.c15 Nov 2021 15:14:24 -  1.52
+++ usr.sbin/npppd/npppd/npppd.c30 Jun 2022 08:49:29 -
@@ -114,7 +114,6 @@ static struct in_addr loop; /* initializ
 static uint32_tstr_hash(const void *, int);
 
 #ifdef USE_NPPPD_PIPEX
-static int npppd_ppp_pipex_ip_disable(npppd *, npppd_ppp *);
 static void pipex_periodic(npppd *);
 #endif /* USE_NPPPD_PIPEX */
 
@@ -1246,62 +1245,6 @@ npppd_ppp_pipex_disable(npppd *_this, np
return error;
 }
 
-/* XXX: s/npppd_ppp_pipex_ip_disable/npppd_ppp_pipex_stop/ ?? */
-
-/** Stop PIPEX of the {@link npppd_ppp ppp} */
-static int
-npppd_ppp_pipex_ip_disable(npppd *_this, npppd_ppp *ppp)
-{
-   struct pipex_session_config_req req;
-#ifdef USE_NPPPD_PPPOE
-   pppoe_session *pppoe;
-#endif
-#ifdef USE_NPPPD_PPTP
-   pptp_call *call;
-#endif
-#ifdef USE_NPPPD_L2TP
-   l2tp_call *l2tp;
-#endif
-   if (ppp->pipex_started == 0)
-   return 0;   /* not started */
-
-   bzero(&req, sizeof(req));
-   switch(ppp->tunnel_type) {
-#ifdef USE_NPPPD_PPPOE
-   case NPPPD_TUNNEL_PPPOE:
-   pppoe = (pppoe_session *)ppp->phy_context;
-
-   /* PPPoE specific information */
-   req.pcr_protocol = PIPEX_PROTO_PPPOE;
-   req.pcr_session_id = pppoe->session_id;
-   break;
-#endif
-#ifdef USE_NPPPD_PPTP
-   case NPPPD_TUNNEL_PPTP:
-   call = (pptp_call *)ppp->phy_context;
-
-   /* PPTP specific information */
-   req.pcr_session_id = call->id;
-   req.pcr_protocol = PIPEX_PROTO_PPTP;
-   break;
-#endif
-#ifdef USE_NPPPD_L2TP
-   case NPPPD_TUNNEL_L2TP:
-   l2tp = (l2tp_call *)ppp->phy_context;
-
-   /* L2TP specific context */
-   req.pcr_session_id = l2tp->session_id;
-   req.pcr_protocol = PIPEX_PROTO_L2TP;
-   break;
-#endif
-   default:
-   return 1;
-   }
-   req.pcr_ip_forward = 0;
-
-   return ioctl(_this->iface[ppp->ifidx].devf, PIPEXCSESSION, &req);
-}
-
 static void
 pipex_periodic(npppd *_this)
 {
@@ -1565,11 +1508,6 @@ npppd_set_ip_enabled(npppd *_this, npppd
hl->key = ppp1->username;
}
}
-#ifdef USE_NPPPD_PIPEX
-   if (npppd_ppp_pipex_ip_disable(_this, ppp) != 0)
-   ppp_log(ppp, LOG_ERR,
-   "npppd_ppp_pipex_ip_disable() failed: %m");
-#endif /* USE_NPPPD_PIPEX */
}
 }
 



Use SMR instead of SRP list in rtsock.c

2022-06-30 Thread Claudio Jeker
This diff converts the SRP list to a SMR list in rtsock.c
SRP is a bit strange with how it works and the SMR code is a bit easier to
understand. Since we can sleep in the SMR_TAILQ_FOREACH() we need to grab
a refcount on the route pcb so that we can leave the SMR critical section
and then enter the SMR critical section at the end of the loop before
dropping the refcount again.

The diff does not immeditaly explode but I doubt we can exploit
parallelism in route_input() so this may fail at some later stage if it is
wrong.

Comments from the lock critics welcome
-- 
:wq Claudio

Index: sys/net/rtsock.c
===
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.334
diff -u -p -r1.334 rtsock.c
--- sys/net/rtsock.c28 Jun 2022 10:01:13 -  1.334
+++ sys/net/rtsock.c30 Jun 2022 08:02:09 -
@@ -71,7 +71,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 
 #include 
 #include 
@@ -107,8 +107,6 @@ struct walkarg {
 };
 
 void   route_prinit(void);
-void   rcb_ref(void *, void *);
-void   rcb_unref(void *, void *);
 introute_output(struct mbuf *, struct socket *, struct sockaddr *,
struct mbuf *);
 introute_ctloutput(int, struct socket *, int, int, struct mbuf *);
@@ -149,7 +147,7 @@ int  rt_setsource(unsigned int, struct 
 struct rtpcb {
struct socket   *rop_socket;/* [I] */
 
-   SRPL_ENTRY(rtpcb)   rop_list;
+   SMR_TAILQ_ENTRY(rtpcb)  rop_list;
struct refcnt   rop_refcnt;
struct timeout  rop_timeout;
unsigned introp_msgfilter;  /* [s] */
@@ -162,8 +160,7 @@ struct rtpcb {
 #definesotortpcb(so)   ((struct rtpcb *)(so)->so_pcb)
 
 struct rtptable {
-   SRPL_HEAD(, rtpcb)  rtp_list;
-   struct srpl_rc  rtp_rc;
+   SMR_TAILQ_HEAD(, rtpcb) rtp_list;
struct rwlock   rtp_lk;
unsigned intrtp_count;
 };
@@ -185,29 +182,12 @@ struct rtptable rtptable;
 void
 route_prinit(void)
 {
-   srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
rw_init(&rtptable.rtp_lk, "rtsock");
-   SRPL_INIT(&rtptable.rtp_list);
+   SMR_TAILQ_INIT(&rtptable.rtp_list);
pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0,
IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL);
 }
 
-void
-rcb_ref(void *null, void *v)
-{
-   struct rtpcb *rop = v;
-
-   refcnt_take(&rop->rop_refcnt);
-}
-
-void
-rcb_unref(void *null, void *v)
-{
-   struct rtpcb *rop = v;
-
-   refcnt_rele_wake(&rop->rop_refcnt);
-}
-
 int
 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
 struct mbuf *control, struct proc *p)
@@ -325,8 +305,7 @@ route_attach(struct socket *so, int prot
so->so_options |= SO_USELOOPBACK;
 
rw_enter(&rtptable.rtp_lk, RW_WRITE);
-   SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop,
-   rop_list);
+   SMR_TAILQ_INSERT_HEAD_LOCKED(&rtptable.rtp_list, rop, rop_list);
rtptable.rtp_count++;
rw_exit(&rtptable.rtp_lk);
 
@@ -347,8 +326,7 @@ route_detach(struct socket *so)
rw_enter(&rtptable.rtp_lk, RW_WRITE);
 
rtptable.rtp_count--;
-   SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
-   rop_list);
+   SMR_TAILQ_REMOVE_LOCKED(&rtptable.rtp_list, rop, rop_list);
rw_exit(&rtptable.rtp_lk);
 
sounlock(so);
@@ -356,6 +334,7 @@ route_detach(struct socket *so)
/* wait for all references to drop */
refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
timeout_del_barrier(&rop->rop_timeout);
+   smr_barrier();
 
solock(so);
 
@@ -501,7 +480,6 @@ route_input(struct mbuf *m0, struct sock
struct rtpcb *rop;
struct rt_msghdr *rtm;
struct mbuf *m = m0;
-   struct srp_ref sr;
 
/* ensure that we can access the rtm_type via mtod() */
if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
@@ -509,7 +487,8 @@ route_input(struct mbuf *m0, struct sock
return;
}
 
-   SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
+   smr_read_enter();
+   SMR_TAILQ_FOREACH(rop, &rtptable.rtp_list, rop_list) {
/*
 * If route socket is bound to an address family only send
 * messages that match the address family. Address family
@@ -519,7 +498,8 @@ route_input(struct mbuf *m0, struct sock
rop->rop_proto != sa_family)
continue;
 
-
+   refcnt_take(&rop->rop_refcnt);
+   smr_read_leave();
so = rop->rop_socket;
solock(so);
 
@@ -579,8 +559,10 @@ route_input(struct mbuf *m0, struct sock
rtm_sendup(so, m);
 next:
sounlock(so);
+   smr_read_enter();
+   refcnt_rele_wake(&rop->rop_refcnt);

Re: amd64 serial console changes

2022-06-30 Thread Mark Kettenis
Hi Hrvoje,

I assume it was faster before?  What hardware are you seeing this on?

Thanks,

Mark

> Op 30-06-2022 08:52 schreef Hrvoje Popovski :
> 
>  
> On 27.6.2022. 23:44, Mark Kettenis wrote:
> > The Ryzen Embedded V1000 processors have an arm64-style Synposys
> > DesignWare UART instead if a PC-compatible NS16x50 UART.  To make this
> > UART work as a serial console, we need to pass some more information
> > from the bootloader to the kernel.  This diff adds the logic to handle
> > that information to the kernel.  I'd like some folks that use a serial
> > console on their amd64 machines to test this.  But testing this diff
> > on amd64 machines with a glass console doesn't hurt.
> > 
> > Thanks,
> > 
> > Mark
> 
> 
> Hi,
> 
> I've sysupgrade few boxes few minutes ago and booting output is quite
> slow. Everything is working but console output needs cca 2 or 3 minutes
> to finish depends how big is dmesg.
> 
> I have few console over ipmi and few connected to serial and all of them
> are slow.
> 
> When boot output finish working over console backs to normal fast output.



nsd 4.6.0

2022-06-30 Thread Florian Obser
OK?

diff --git Makefile.in Makefile.in
index b6b7eb37570..96d0784f610 100644
--- Makefile.in
+++ Makefile.in
@@ -81,13 +81,13 @@ MANUALS=nsd.8 nsd-checkconf.8 nsd-checkzone.8 nsd-control.8 
nsd.conf.5
 
 COMMON_OBJ=answer.o axfr.o ixfr.o ixfrcreate.o buffer.o configlexer.o 
configparser.o dname.o dns.o edns.o iterated_hash.o lookup3.o namedb.o nsec3.o 
options.o packet.o query.o rbtree.o radtree.o rdata.o region-allocator.o rrl.o 
siphash.o tsig.o tsig-openssl.o udb.o udbradtree.o udbzone.o util.o bitset.o 
popen3.o
 XFRD_OBJ=xfrd-disk.o xfrd-notify.o xfrd-tcp.o xfrd.o remote.o $(DNSTAP_OBJ)
-NSD_OBJ=$(COMMON_OBJ) $(XFRD_OBJ) difffile.o ipc.o mini_event.o netio.o nsd.o 
server.o dbaccess.o dbcreate.o zlexer.o zonec.o zparser.o
+NSD_OBJ=$(COMMON_OBJ) $(XFRD_OBJ) difffile.o ipc.o mini_event.o netio.o nsd.o 
server.o dbaccess.o dbcreate.o zlexer.o zonec.o zparser.o verify.o
 ALL_OBJ=$(NSD_OBJ) nsd-checkconf.o nsd-checkzone.o nsd-control.o nsd-mem.o 
xfr-inspect.o
 NSD_CHECKCONF_OBJ=$(COMMON_OBJ) nsd-checkconf.o
-NSD_CHECKZONE_OBJ=$(COMMON_OBJ) $(XFRD_OBJ) dbaccess.o dbcreate.o difffile.o 
ipc.o mini_event.o netio.o server.o zonec.o zparser.o zlexer.o nsd-checkzone.o
+NSD_CHECKZONE_OBJ=$(COMMON_OBJ) $(XFRD_OBJ) dbaccess.o dbcreate.o difffile.o 
ipc.o mini_event.o netio.o server.o zonec.o zparser.o zlexer.o nsd-checkzone.o 
verify.o
 NSD_CONTROL_OBJ=$(COMMON_OBJ) nsd-control.o
-CUTEST_OBJ=$(COMMON_OBJ) $(XFRD_OBJ) dbaccess.o dbcreate.o difffile.o ipc.o 
mini_event.o netio.o server.o zonec.o zparser.o zlexer.o cutest_dname.o 
cutest_dns.o cutest_iterated_hash.o cutest_run.o cutest_radtree.o 
cutest_rbtree.o cutest_namedb.o cutest_options.o cutest_region.o cutest_rrl.o 
cutest_udb.o cutest_udbrad.o cutest_util.o cutest_bitset.o cutest_popen3.o 
cutest_iter.o cutest_event.o cutest.o qtest.o
-NSD_MEM_OBJ=$(COMMON_OBJ) $(XFRD_OBJ) dbaccess.o dbcreate.o difffile.o ipc.o 
mini_event.o netio.o server.o zonec.o zparser.o zlexer.o nsd-mem.o
+CUTEST_OBJ=$(COMMON_OBJ) $(XFRD_OBJ) dbaccess.o dbcreate.o difffile.o ipc.o 
mini_event.o netio.o server.o verify.o zonec.o zparser.o zlexer.o 
cutest_dname.o cutest_dns.o cutest_iterated_hash.o cutest_run.o 
cutest_radtree.o cutest_rbtree.o cutest_namedb.o cutest_options.o 
cutest_region.o cutest_rrl.o cutest_udb.o cutest_udbrad.o cutest_util.o 
cutest_bitset.o cutest_popen3.o cutest_iter.o cutest_event.o cutest.o qtest.o
+NSD_MEM_OBJ=$(COMMON_OBJ) $(XFRD_OBJ) dbaccess.o dbcreate.o difffile.o ipc.o 
mini_event.o netio.o verify.o server.o zonec.o zparser.o zlexer.o nsd-mem.o
 all:   $(TARGETS) $(MANUALS)
 
 $(ALL_OBJ):
@@ -496,7 +496,7 @@ rrl.o: $(srcdir)/rrl.c config.h $(srcdir)/rrl.h 
$(srcdir)/query.h $(srcdir)/name
 server.o: $(srcdir)/server.c config.h $(srcdir)/axfr.h $(srcdir)/nsd.h 
$(srcdir)/dns.h $(srcdir)/edns.h $(srcdir)/buffer.h \
  $(srcdir)/region-allocator.h $(srcdir)/util.h $(srcdir)/query.h 
$(srcdir)/namedb.h $(srcdir)/dname.h $(srcdir)/radtree.h $(srcdir)/rbtree.h \
  $(srcdir)/packet.h $(srcdir)/tsig.h $(srcdir)/netio.h $(srcdir)/xfrd.h 
$(srcdir)/options.h $(srcdir)/xfrd-tcp.h $(srcdir)/xfrd-disk.h \
- $(srcdir)/difffile.h $(srcdir)/udb.h $(srcdir)/nsec3.h $(srcdir)/ipc.h 
$(srcdir)/remote.h $(srcdir)/lookup3.h $(srcdir)/dnstap/dnstap_collector.h 
$(srcdir)/rrl.h $(srcdir)/ixfr.h
+ $(srcdir)/difffile.h $(srcdir)/udb.h $(srcdir)/nsec3.h $(srcdir)/ipc.h 
$(srcdir)/remote.h $(srcdir)/lookup3.h $(srcdir)/dnstap/dnstap_collector.h 
$(srcdir)/rrl.h $(srcdir)/ixfr.h $(srcdir)/verify.h
 siphash.o: $(srcdir)/siphash.c
 tsig.o: $(srcdir)/tsig.c config.h $(srcdir)/tsig.h $(srcdir)/buffer.h 
$(srcdir)/region-allocator.h $(srcdir)/util.h $(srcdir)/dname.h \
  $(srcdir)/tsig-openssl.h $(srcdir)/dns.h $(srcdir)/packet.h 
$(srcdir)/namedb.h $(srcdir)/radtree.h $(srcdir)/rbtree.h $(srcdir)/query.h 
$(srcdir)/nsd.h \
@@ -511,6 +511,9 @@ udbzone.o: $(srcdir)/udbzone.c config.h $(srcdir)/udbzone.h 
$(srcdir)/udb.h $(sr
 util.o: $(srcdir)/util.c config.h $(srcdir)/util.h 
$(srcdir)/region-allocator.h $(srcdir)/dname.h $(srcdir)/buffer.h \
  $(srcdir)/namedb.h $(srcdir)/dns.h $(srcdir)/radtree.h $(srcdir)/rbtree.h 
$(srcdir)/rdata.h $(srcdir)/zonec.h
 bitset.o: $(srcdir)/bitset.c $(srcdir)/bitset.h
+verify.o: $(srcdir)/verify.c config.h $(srcdir)/region-allocator.h 
$(srcdir)/namedb.h $(srcdir)/dname.h $(srcdir)/buffer.h \
+ $(srcdir)/util.h config.h $(srcdir)/dns.h $(srcdir)/rbtree.h $(srcdir)/nsd.h 
$(srcdir)/edns.h $(srcdir)/options.h $(srcdir)/difffile.h \
+ $(srcdir)/netio.h $(srcdir)/verify.h
 xfrd.o: $(srcdir)/xfrd.c config.h $(srcdir)/xfrd.h $(srcdir)/rbtree.h 
$(srcdir)/region-allocator.h $(srcdir)/namedb.h \
  $(srcdir)/dname.h $(srcdir)/buffer.h $(srcdir)/util.h $(srcdir)/dns.h 
$(srcdir)/radtree.h $(srcdir)/options.h $(srcdir)/tsig.h $(srcdir)/xfrd-tcp.h \
  $(srcdir)/xfrd-disk.h $(srcdir)/xfrd-notify.h $(srcdir)/netio.h 
$(srcdir)/nsd.h $(srcdir)/edns.h $(srcdir)/packet.h $(srcdir)/rdata.h \
diff --git config.h.in config.h.in
index 34a89602063..741669c83fe 100644
--- co