Re: Help me testing the netlock
On Tue, Oct 04, 2016 at 04:44:29PM +0200, Martin Pieuchot wrote: > On 10/03/16 16:43, Martin Pieuchot wrote: > > Diff below introduces a single write lock that will be used to serialize > > access to ip_output(). > > > > This lock will be then split in multiple readers and writers to allow > > multiple forwarding paths to run in parallel of each others but still > > serialized with the socket layer. > > > > I'm currently looking for people wanting to run this diff and try to > > break it. In other words, your machine might panic with it and if it > > does report the panic to me so the diff can be improved. > > > > I tested NFS v2 and v3 so I'm quite confident, but I might have missed > > some obvious stuff. > > Updated diff attaced including a fix for syn_cache_timer(), problem > reported by Chris Jackman. > So far, so good, on i386 and amd64 vmm(4) VMs. booted, did a pkg_add upgrade, and cvsync. No issues seen so far. -ml > Index: kern/kern_rwlock.c > === > RCS file: /cvs/src/sys/kern/kern_rwlock.c,v > retrieving revision 1.27 > diff -u -p -r1.27 kern_rwlock.c > --- kern/kern_rwlock.c14 Mar 2015 07:33:42 - 1.27 > +++ kern/kern_rwlock.c4 Oct 2016 14:40:29 - > @@ -98,6 +98,12 @@ rw_enter_read(struct rwlock *rwl) > membar_enter(); > } > > +#if 1 > +#include > +#include > +#include > +#endif > + > void > rw_enter_write(struct rwlock *rwl) > { > @@ -108,6 +114,15 @@ rw_enter_write(struct rwlock *rwl) > rw_enter(rwl, RW_WRITE); > else > membar_enter(); > + > +#if 1 > + if ((rwl == &netlock) && (splassert_ctl == 3)) { > + printf("ENTER::%d::", cpu_number()); > + db_stack_trace_print( > + (db_expr_t)__builtin_frame_address(1), > + TRUE, 1, "", printf); > + } > +#endif > } > > void > @@ -129,6 +144,15 @@ rw_exit_write(struct rwlock *rwl) > unsigned long owner = rwl->rwl_owner; > > rw_assert_wrlock(rwl); > + > +#if 1 > + if ((rwl == &netlock) && (splassert_ctl == 3)) { > + printf("EXIT::%d::", cpu_number()); > + db_stack_trace_print( > + (db_expr_t)__builtin_frame_address(1), > + TRUE, 1, "", printf); > + } > +#endif > > membar_exit(); > if (__predict_false((owner & RWLOCK_WAIT) || > Index: kern/sys_socket.c > === > RCS file: /cvs/src/sys/kern/sys_socket.c,v > retrieving revision 1.21 > diff -u -p -r1.21 sys_socket.c > --- kern/sys_socket.c 5 Dec 2015 10:11:53 - 1.21 > +++ kern/sys_socket.c 4 Oct 2016 14:40:29 - > @@ -131,8 +131,10 @@ soo_poll(struct file *fp, int events, st > { > struct socket *so = fp->f_data; > int revents = 0; > - int s = splsoftnet(); > + int s; > > + rw_enter_write(&netlock); > + s = splsoftnet(); > if (events & (POLLIN | POLLRDNORM)) { > if (soreadable(so)) > revents |= events & (POLLIN | POLLRDNORM); > @@ -159,6 +161,7 @@ soo_poll(struct file *fp, int events, st > } > } > splx(s); > + rw_exit_write(&netlock); > return (revents); > } > > Index: kern/uipc_socket.c > === > RCS file: /cvs/src/sys/kern/uipc_socket.c,v > retrieving revision 1.161 > diff -u -p -r1.161 uipc_socket.c > --- kern/uipc_socket.c20 Sep 2016 14:27:43 - 1.161 > +++ kern/uipc_socket.c4 Oct 2016 14:40:29 - > @@ -123,6 +123,7 @@ socreate(int dom, struct socket **aso, i > return (EPROTONOSUPPORT); > if (prp->pr_type != type) > return (EPROTOTYPE); > + rw_enter_write(&netlock); > s = splsoftnet(); > so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO); > TAILQ_INIT(&so->so_q0); > @@ -142,9 +143,11 @@ socreate(int dom, struct socket **aso, i > so->so_state |= SS_NOFDREF; > sofree(so); > splx(s); > + rw_exit_write(&netlock); > return (error); > } > splx(s); > + rw_exit_write(&netlock); > *aso = so; > return (0); > } > @@ -152,11 +155,13 @@ socreate(int dom, struct socket **aso, i > int > sobind(struct socket *so, struct mbuf *nam, struct proc *p) > { > - int s = splsoftnet(); > - int error; > + int s, error; > > + rw_enter_write(&netlock); > + s = splsoftnet(); > error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p); > splx(s); > + rw_exit_write(&netlock); > return (error); > } > > @@ -171,11 +176,13 @@ solisten(struct socket *so, int backlog) > if (isspliced(so) || issplicedback(so)) > return (EOPNOTSUPP); > #endif /* SOCKET_SPLICE */ > + rw_enter_write(&netlock); > s = splsoftnet(); > erro
armv7 pinmux woes
Hello, I'm writing a driver (amdisplay) supporting the LCD controller on the am335x (Beaglebone Black). The LCD controller pins and the MMC1 pins (used for interacting with the onboard eMMC flash chip) exist on the same set of pads such that only the LCD controller or the eMMC may be used at once. My driver needs to recognize cases where the root disk is MMC1 and abort as to avoid hanging the system when the kernel tries to init userspace on a disk it is no longer pinmuxxed to. I am not sure how to properly and reliably do this, or if there is perhaps a better way to go about solving this problem. Thanks, Ian
Re: add in6 multicast support to vxlan(4) ; question on mbufs
On Tue, 4 Oct 2016 17:27:12 +0200 Mike Belopuhov wrote: > On Tue, Oct 04, 2016 at 01:07 +0200, Vincent Gross wrote: >> On Sat, 24 Sep 2016 10:58:10 +0200 >> Vincent Gross wrote: >> >> > Hi, >> > >> [snip] >> > >> > Aside from the mbuf issue, is this Ok ? >> >> I will go back on the mbuff stuff later. >> >> Diff rebased, ok anyone ? >> >> Index: net/if_vxlan.c >> === >> RCS file: /cvs/src/sys/net/if_vxlan.c,v >> retrieving revision 1.48 >> diff -u -p -r1.48 if_vxlan.c >> --- net/if_vxlan.c 30 Sep 2016 10:22:05 - 1.48 >> +++ net/if_vxlan.c 3 Oct 2016 23:12:42 - >> @@ -638,7 +749,9 @@ vxlan_lookup(struct mbuf *m, struct udph >> if (m->m_pkthdr.len < skip + sizeof(struct ether_header)) >> return (EINVAL); >> >> -m_adj(m, skip); >> +m_adj(m, skip - ETHER_ALIGN); >> +m = m_pullup(m, ETHER_HDR_LEN + ETHER_ALIGN); >> +m_adj(m, ETHER_ALIGN); >> ifp = &sc->sc_ac.ac_if; >> >> #if NBRIDGE > 0 > > I think this chunk is correct. First you ensure that m->m_data > points to a contiguous and well-aligned ethernet header and then > you trim the alignment so that mtod() points directly at it. Isn't it possible that m_pullup() may return non aligned pointer? > Possibly add a comment to that effect to spare a eyebrow-raising > moment for the next person.
optimise search for start of vmmap fill
uvm_map_fill_vmmap lets callers specify an address after which they are interested in entries. generally theyre interested in addresses after 0, but if you start further along the address space the lookup has to traverse the addresses looking for it. this can be optimised by borrowing the red-black tree nfind semantic to do a binary search for the starting point. it's the same cost for searching for the first (0th) address, but is much faster for other starting points. im doing the nfind by hand so i dont have to put a vmentry on the stack to pass to an actual RBT_NFIND call. ok? Index: uvm_map.c === RCS file: /cvs/src/sys/uvm/uvm_map.c,v retrieving revision 1.225 diff -u -p -r1.225 uvm_map.c --- uvm_map.c 16 Sep 2016 02:35:42 - 1.225 +++ uvm_map.c 5 Oct 2016 00:27:05 - @@ -5215,7 +5215,7 @@ int uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve, size_t *lenp) { - struct vm_map_entry *entry; + struct vm_map_entry *entry = NULL, *tmp; vaddr_t start; int cnt, maxcnt, error = 0; @@ -5233,13 +5233,22 @@ uvm_map_fill_vmmap(struct vm_map *map, s start = (vaddr_t)kve[0].kve_start; vm_map_lock(map); - RBT_FOREACH(entry, uvm_map_addr, &map->addr) { - if (cnt == maxcnt) { - error = ENOMEM; + + /* look for the starting address by doing nfind (by hand) */ + tmp = RBT_ROOT(uvm_map_addr, &map->addr); + while (tmp != NULL) { + if (start < tmp->start) { + entry = tmp; + tmp = RBT_LEFT(uvm_map_addr, tmp); + } else if (start > tmp->start) { + tmp = RBT_RIGHT(uvm_map_addr, tmp); + } else { /* exact match */ + entry = tmp; break; } - if (start != 0 && entry->start < start) - continue; + } + + while (entry != NULL) { kve->kve_start = entry->start; kve->kve_end = entry->end; kve->kve_guard = entry->guard; @@ -5253,8 +5262,14 @@ uvm_map_fill_vmmap(struct vm_map *map, s kve->kve_advice = entry->advice; kve->kve_inheritance = entry->inheritance; kve->kve_flags = entry->flags; + + if (++cnt == maxcnt) { + error = ENOMEM; + break; + } + kve++; - cnt++; + entry = RBT_NEXT(uvm_map_addr, entry); } vm_map_unlock(map);
Smarter OpenBSD/sgi boot blocks
The sgi boot blocks use the PROM (ARCBios or ARCS) for its I/O routines. When using a disk-based path, these routines are using the partition table found in the ``volume header''. In order to be able to use 16 partitions per disk, the OpenBSD port only claims one volume header partition, #0, as the OpenBSD area, and puts its own label in there. Unfortunately, this means that `sd0a', the OpenBSD partition on which the kernel is found, may not actually start at the same location as the volume header partition #0, even though this is the case in most setups. When reinstalling an O2 some time ago, I did not pay attention to this during install: Use (A)uto layout, (E)dit auto layout, or create (C)ustom layout? [a] c [...] Label editor (enter '?' for help at any prompt) > p OpenBSD area: 0-17773524; size: 17773524; free: 0 #size offset fstype [fsize bsize cpg] a: 17770389 3135 4.2BSD 1024 819216 c: 177735240 unused p: 31350 unknown > d a > a partition: [a] offset: [3135] size: [17770389] 400M Rounding size to cylinder (3360 sectors): 816705 FS type: [4.2BSD] mount point: [none] / Rounding offset to bsize (32 sectors): 3136 Rounding size to bsize (32 sectors): 816704 And `sd0a' ended up starting one sector after the beginning of the volume header partition #0. Of course, rebooting afterwards did not work as expected: OpenBSD/sgi-IP32 ARCBios boot version 1.7 arg 0: pci(0)scsi(0)disk(1)rdisk(0)partition(8)/boot arg 1: OSLoadOptions=auto arg 2: ConsoleIn=serial(0) arg 3: ConsoleOut=serial(0) arg 4: SystemPartition=pci(0)scsi(0)disk(1)rdisk(0)partition(8) arg 5: OSLoader=boot arg 6: OSLoadPartition=pci(0)scsi(0)disk(1)rdisk(0)partition(0) arg 7: OSLoadFilename=bsd Boot: pci(0)scsi(0)disk(1)rdisk(0)partition(0)bsd cannot open pci(0)scsi(0)disk(1)rdisk(0)partition(0)/etc/random.seed: Invalid argument open pci(0)scsi(0)disk(1)rdisk(0)partition(0)bsd: Invalid argument Boot FAILED! The proper way to fix this is to make the boot blocks read the real OpenBSD label instead of assuming sd0a spans volume header partition #0 in all cases. This is achieved by the diff below, which will attempt to use the raw disk device (volume header partition #10, equivalent to sd0c) for disk accesses and will read both the volume header (to know where the OpenBSD disklabel lies) and then the OpenBSD label (to know where sd0a really is). Of course, this has to cope with the two valid disk syntaxes (true ARCBios used on older sgi systems, and ARCS dksc() syntax used on Origin and later systems). This diff has been tested on O2 (IP32) and Origin 350 (IP27). It is written in a conservative way, in order to revert to the existing behaviour if anything fails (invalid volume header, no OpenBSD label yet...) and should not cause any regression. Note that network boots are not affected by these changes. Index: Makefile32.inc === RCS file: /OpenBSD/src/sys/arch/sgi/stand/Makefile32.inc,v retrieving revision 1.5 diff -u -p -r1.5 Makefile32.inc --- Makefile32.inc 19 Oct 2012 13:51:59 - 1.5 +++ Makefile32.inc 30 Sep 2016 09:19:15 - @@ -18,6 +18,7 @@ AS+= -32 LD?= ld LD+= -m elf32btsmip LIBSA_CPPFLAGS= +CFLAGS+= -DLIBSA_LONGLONG_PRINTF .endif ### Figure out what to use for libsa and libz Index: boot/Makefile === RCS file: /OpenBSD/src/sys/arch/sgi/stand/boot/Makefile,v retrieving revision 1.16 diff -u -p -r1.16 Makefile --- boot/Makefile 30 Jul 2016 03:25:49 - 1.16 +++ boot/Makefile 30 Sep 2016 09:19:15 - @@ -13,14 +13,14 @@ AFLAGS+=${SAABI} S= ${.CURDIR}/../../../.. SRCS= start.S arcbios.c boot.c conf.c diskio.c filesystem.c \ - netfs.c netio.c strchr.c strstr.c + netfs.c netio.c strstr.c .PATH: ${S}/lib/libsa SRCS+= loadfile.c .PATH: ${S}/lib/libkern/arch/mips64 ${S}/lib/libkern -SRCS+= strlcpy.c memcpy.c strlen.c strrchr.c strlcat.c strncmp.c \ - strcmp.S +SRCS+= memcpy.c strchr.c strcmp.S strlcat.c strlcpy.c strlen.c \ + strncmp.c strrchr.c CLEANFILES+= machine mips64 Index: boot/diskio.c === RCS file: /OpenBSD/src/sys/arch/sgi/stand/boot/diskio.c,v retrieving revision 1.10 diff -u -p -r1.10 diskio.c --- boot/diskio.c 30 Sep 2015 22:45:57 - 1.10 +++ boot/diskio.c 30 Sep 2016 09:19:15 - @@ -1,6 +1,21 @@ /* $OpenBSD: diskio.c,v 1.10 2015/09/30 22:45:57 krw Exp $ */ /* + * Copyright (c) 2016 Miodrag Vallat. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is
Re: httpd(8)/proc.c: use less fds on startup
On Tue, Oct 04, 2016 at 07:46:52PM +0200, Rafael Zalamena wrote: > This diff makes proc.c daemons to use less file descriptors on startup, > this way we increase the number of child we can have considerably. This > also improves the solution on a bug reported in bugs@ > "httpd errors out with 'too many open files'". > > To achieve that I delayed the socket distribution and made a minimal > socket allocation in proc_init(), so only the necessary children socket > are allocated and passed with proc_exec(). After the event_init() is called > we call proc_connect() which creates the socketpair() and immediatly after > each call we already sends them without accumulating. > > Note: We still have to calculate how many fds we will want to have and > then limit the daemon prefork configuration. > > ok? > Paul de Weerd still found problems with the diff, because the httpd(8) would not exit successfully with '-n' flag. It happened because the new proc_connect() code tried to write fds to children process that did not start caused by the ps_noaction flag. (thanks Paul!) This new diff just adds a check for ps_noaction in proc_init() and proc_connect() so it doesn't try to do anything if we are not really going to start the daemon. Also I removed the ps_noaction from proc_run() since we are not going to get to this point anymore. ok? Index: proc.c === RCS file: /home/obsdcvs/src/usr.sbin/httpd/proc.c,v retrieving revision 1.27 diff -u -p -r1.27 proc.c --- proc.c 28 Sep 2016 12:01:04 - 1.27 +++ proc.c 4 Oct 2016 21:50:43 - @@ -36,8 +36,6 @@ voidproc_exec(struct privsep *, struct privsep_proc *, unsigned int, int, char **); -voidproc_connectpeer(struct privsep *, enum privsep_procid, int, - struct privsep_pipes *); voidproc_setup(struct privsep *, struct privsep_proc *, unsigned int); voidproc_open(struct privsep *, int, int); voidproc_accept(struct privsep *, int, enum privsep_procid, @@ -147,72 +145,18 @@ proc_exec(struct privsep *ps, struct pri } void -proc_connectpeer(struct privsep *ps, enum privsep_procid id, int inst, -struct privsep_pipes *pp) -{ - unsigned int i, j; - struct privsep_fdpf; - - for (i = 0; i < PROC_MAX; i++) { - /* Parent is already connected with everyone. */ - if (i == PROC_PARENT) - continue; - - for (j = 0; j < ps->ps_instances[i]; j++) { - /* Don't send socket to child itself. */ - if (i == (unsigned int)id && - j == (unsigned int)inst) - continue; - if (pp->pp_pipes[i][j] == -1) - continue; - - pf.pf_procid = i; - pf.pf_instance = j; - proc_compose_imsg(ps, id, inst, IMSG_CTL_PROCFD, - -1, pp->pp_pipes[i][j], &pf, sizeof(pf)); - pp->pp_pipes[i][j] = -1; - } - } -} - -/* Inter-connect all process except with ourself. */ -void proc_connect(struct privsep *ps) { - unsigned int src, i, j; - struct privsep_pipes*pp; - struct imsgev *iev; - - /* Listen on appropriate pipes. */ - src = privsep_process; - pp = &ps->ps_pipes[src][ps->ps_instance]; - - for (i = 0; i < PROC_MAX; i++) { - /* Don't listen to ourself. */ - if (i == src) - continue; - - for (j = 0; j < ps->ps_instances[i]; j++) { - if (pp->pp_pipes[i][j] == -1) - continue; - - iev = &ps->ps_ievs[i][j]; - imsg_init(&iev->ibuf, pp->pp_pipes[i][j]); - event_set(&iev->ev, iev->ibuf.fd, iev->events, - iev->handler, iev->data); - event_add(&iev->ev, NULL); - } - } + unsigned int src, dst; - /* Exchange pipes between process. */ - for (i = 0; i < PROC_MAX; i++) { - /* Parent is already connected with everyone. */ - if (i == PROC_PARENT) - continue; + /* Don't distribute any sockets if we are not really going to run. */ + if (ps->ps_noaction) + return; - for (j = 0; j < ps->ps_instances[i]; j++) - proc_connectpeer(ps, i, j, &ps->ps_pipes[i][j]); - } + /* Distribute the socketpair()s for everyone. */ + for (src = 0; src < PROC_MAX; src++) + for (dst = src; dst < PROC_MAX; dst++) + proc_open(ps, src, dst); } void @@ -220,17 +164,41 @@ proc_init(struct privsep *ps, struct pri int argc, char **ar
httpd(8)/proc.c: use less fds on startup
This diff makes proc.c daemons to use less file descriptors on startup, this way we increase the number of child we can have considerably. This also improves the solution on a bug reported in bugs@ "httpd errors out with 'too many open files'". To achieve that I delayed the socket distribution and made a minimal socket allocation in proc_init(), so only the necessary children socket are allocated and passed with proc_exec(). After the event_init() is called we call proc_connect() which creates the socketpair() and immediatly after each call we already sends them without accumulating. Note: We still have to calculate how many fds we will want to have and then limit the daemon prefork configuration. ok? Index: proc.c === RCS file: /home/obsdcvs/src/usr.sbin/httpd/proc.c,v retrieving revision 1.27 diff -u -p -r1.27 proc.c --- proc.c 28 Sep 2016 12:01:04 - 1.27 +++ proc.c 4 Oct 2016 17:26:41 - @@ -36,8 +36,6 @@ voidproc_exec(struct privsep *, struct privsep_proc *, unsigned int, int, char **); -voidproc_connectpeer(struct privsep *, enum privsep_procid, int, - struct privsep_pipes *); voidproc_setup(struct privsep *, struct privsep_proc *, unsigned int); voidproc_open(struct privsep *, int, int); voidproc_accept(struct privsep *, int, enum privsep_procid, @@ -147,72 +145,14 @@ proc_exec(struct privsep *ps, struct pri } void -proc_connectpeer(struct privsep *ps, enum privsep_procid id, int inst, -struct privsep_pipes *pp) -{ - unsigned int i, j; - struct privsep_fdpf; - - for (i = 0; i < PROC_MAX; i++) { - /* Parent is already connected with everyone. */ - if (i == PROC_PARENT) - continue; - - for (j = 0; j < ps->ps_instances[i]; j++) { - /* Don't send socket to child itself. */ - if (i == (unsigned int)id && - j == (unsigned int)inst) - continue; - if (pp->pp_pipes[i][j] == -1) - continue; - - pf.pf_procid = i; - pf.pf_instance = j; - proc_compose_imsg(ps, id, inst, IMSG_CTL_PROCFD, - -1, pp->pp_pipes[i][j], &pf, sizeof(pf)); - pp->pp_pipes[i][j] = -1; - } - } -} - -/* Inter-connect all process except with ourself. */ -void proc_connect(struct privsep *ps) { - unsigned int src, i, j; - struct privsep_pipes*pp; - struct imsgev *iev; - - /* Listen on appropriate pipes. */ - src = privsep_process; - pp = &ps->ps_pipes[src][ps->ps_instance]; - - for (i = 0; i < PROC_MAX; i++) { - /* Don't listen to ourself. */ - if (i == src) - continue; - - for (j = 0; j < ps->ps_instances[i]; j++) { - if (pp->pp_pipes[i][j] == -1) - continue; - - iev = &ps->ps_ievs[i][j]; - imsg_init(&iev->ibuf, pp->pp_pipes[i][j]); - event_set(&iev->ev, iev->ibuf.fd, iev->events, - iev->handler, iev->data); - event_add(&iev->ev, NULL); - } - } - - /* Exchange pipes between process. */ - for (i = 0; i < PROC_MAX; i++) { - /* Parent is already connected with everyone. */ - if (i == PROC_PARENT) - continue; + unsigned int src, dst; - for (j = 0; j < ps->ps_instances[i]; j++) - proc_connectpeer(ps, i, j, &ps->ps_pipes[i][j]); - } + /* Distribute the socketpair()s for everyone. */ + for (src = 0; src < PROC_MAX; src++) + for (dst = src; dst < PROC_MAX; dst++) + proc_open(ps, src, dst); } void @@ -220,17 +160,37 @@ proc_init(struct privsep *ps, struct pri int argc, char **argv, enum privsep_procid proc_id) { struct privsep_proc *p = NULL; + struct privsep_pipes*pa, *pb; unsigned int proc; - unsigned int src, dst; + unsigned int dst; + int fds[2]; if (proc_id == PROC_PARENT) { privsep_process = PROC_PARENT; proc_setup(ps, procs, nproc); - /* Open socketpair()s for everyone. */ - for (src = 0; src < PROC_MAX; src++) - for (dst = 0; dst < PROC_MAX; dst++) - proc_open(ps, src, dst); + /* +* Create the children sockets so we can use them +* to distribute the r
Re: add in6 multicast support to vxlan(4) ; question on mbufs
On Tue, Oct 04, 2016 at 01:07 +0200, Vincent Gross wrote: > On Sat, 24 Sep 2016 10:58:10 +0200 > Vincent Gross wrote: > > > Hi, > > > [snip] > > > > Aside from the mbuf issue, is this Ok ? > > I will go back on the mbuff stuff later. > > Diff rebased, ok anyone ? > > Index: net/if_vxlan.c > === > RCS file: /cvs/src/sys/net/if_vxlan.c,v > retrieving revision 1.48 > diff -u -p -r1.48 if_vxlan.c > --- net/if_vxlan.c30 Sep 2016 10:22:05 - 1.48 > +++ net/if_vxlan.c3 Oct 2016 23:12:42 - > @@ -638,7 +749,9 @@ vxlan_lookup(struct mbuf *m, struct udph > if (m->m_pkthdr.len < skip + sizeof(struct ether_header)) > return (EINVAL); > > - m_adj(m, skip); > + m_adj(m, skip - ETHER_ALIGN); > + m = m_pullup(m, ETHER_HDR_LEN + ETHER_ALIGN); > + m_adj(m, ETHER_ALIGN); > ifp = &sc->sc_ac.ac_if; > > #if NBRIDGE > 0 I think this chunk is correct. First you ensure that m->m_data points to a contiguous and well-aligned ethernet header and then you trim the alignment so that mtod() points directly at it. Possibly add a comment to that effect to spare a eyebrow-raising moment for the next person.
Re: Unexpected behavior in su/doas
FYI, sudo supports running the command in a new pty, which should avoid the issue. Commands are always run in a new pty when logging input or output, otherwise the use_pty flag needs to be set in sudoers. - todd
Re: Help me testing the netlock
On 10/03/16 16:43, Martin Pieuchot wrote: Diff below introduces a single write lock that will be used to serialize access to ip_output(). This lock will be then split in multiple readers and writers to allow multiple forwarding paths to run in parallel of each others but still serialized with the socket layer. I'm currently looking for people wanting to run this diff and try to break it. In other words, your machine might panic with it and if it does report the panic to me so the diff can be improved. I tested NFS v2 and v3 so I'm quite confident, but I might have missed some obvious stuff. Updated diff attaced including a fix for syn_cache_timer(), problem reported by Chris Jackman. Index: kern/kern_rwlock.c === RCS file: /cvs/src/sys/kern/kern_rwlock.c,v retrieving revision 1.27 diff -u -p -r1.27 kern_rwlock.c --- kern/kern_rwlock.c 14 Mar 2015 07:33:42 - 1.27 +++ kern/kern_rwlock.c 4 Oct 2016 14:40:29 - @@ -98,6 +98,12 @@ rw_enter_read(struct rwlock *rwl) membar_enter(); } +#if 1 +#include +#include +#include +#endif + void rw_enter_write(struct rwlock *rwl) { @@ -108,6 +114,15 @@ rw_enter_write(struct rwlock *rwl) rw_enter(rwl, RW_WRITE); else membar_enter(); + +#if 1 + if ((rwl == &netlock) && (splassert_ctl == 3)) { + printf("ENTER::%d::", cpu_number()); + db_stack_trace_print( + (db_expr_t)__builtin_frame_address(1), + TRUE, 1, "", printf); + } +#endif } void @@ -129,6 +144,15 @@ rw_exit_write(struct rwlock *rwl) unsigned long owner = rwl->rwl_owner; rw_assert_wrlock(rwl); + +#if 1 + if ((rwl == &netlock) && (splassert_ctl == 3)) { + printf("EXIT::%d::", cpu_number()); + db_stack_trace_print( + (db_expr_t)__builtin_frame_address(1), + TRUE, 1, "", printf); + } +#endif membar_exit(); if (__predict_false((owner & RWLOCK_WAIT) || Index: kern/sys_socket.c === RCS file: /cvs/src/sys/kern/sys_socket.c,v retrieving revision 1.21 diff -u -p -r1.21 sys_socket.c --- kern/sys_socket.c 5 Dec 2015 10:11:53 - 1.21 +++ kern/sys_socket.c 4 Oct 2016 14:40:29 - @@ -131,8 +131,10 @@ soo_poll(struct file *fp, int events, st { struct socket *so = fp->f_data; int revents = 0; - int s = splsoftnet(); + int s; + rw_enter_write(&netlock); + s = splsoftnet(); if (events & (POLLIN | POLLRDNORM)) { if (soreadable(so)) revents |= events & (POLLIN | POLLRDNORM); @@ -159,6 +161,7 @@ soo_poll(struct file *fp, int events, st } } splx(s); + rw_exit_write(&netlock); return (revents); } Index: kern/uipc_socket.c === RCS file: /cvs/src/sys/kern/uipc_socket.c,v retrieving revision 1.161 diff -u -p -r1.161 uipc_socket.c --- kern/uipc_socket.c 20 Sep 2016 14:27:43 - 1.161 +++ kern/uipc_socket.c 4 Oct 2016 14:40:29 - @@ -123,6 +123,7 @@ socreate(int dom, struct socket **aso, i return (EPROTONOSUPPORT); if (prp->pr_type != type) return (EPROTOTYPE); + rw_enter_write(&netlock); s = splsoftnet(); so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO); TAILQ_INIT(&so->so_q0); @@ -142,9 +143,11 @@ socreate(int dom, struct socket **aso, i so->so_state |= SS_NOFDREF; sofree(so); splx(s); + rw_exit_write(&netlock); return (error); } splx(s); + rw_exit_write(&netlock); *aso = so; return (0); } @@ -152,11 +155,13 @@ socreate(int dom, struct socket **aso, i int sobind(struct socket *so, struct mbuf *nam, struct proc *p) { - int s = splsoftnet(); - int error; + int s, error; + rw_enter_write(&netlock); + s = splsoftnet(); error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p); splx(s); + rw_exit_write(&netlock); return (error); } @@ -171,11 +176,13 @@ solisten(struct socket *so, int backlog) if (isspliced(so) || issplicedback(so)) return (EOPNOTSUPP); #endif /* SOCKET_SPLICE */ + rw_enter_write(&netlock); s = splsoftnet(); error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL, curproc); if (error) { splx(s); + rw_exit_write(&netlock); return (error); } if (TAILQ_FIRST(&so->so_q) == NULL) @@ -186,6 +193,7 @@ solisten(struct socket *so, int backlog) backlog = sominconn; so->so_qlimit = backlog; splx(s); + rw_exit_write(&netlock); return (0); } @@ -196,6 +204,7 @@ solisten(struct socket *so, int backlog) void sofree(struct socket *so) { + rw_assert_wrlock(&netlock); splsoftassert(IPL_SOFTNET); if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) @@ -234,9 +243,10 @@ int soclose(struct socket *so) { struct socket *so2; - int s = splsoftnet(); /* conservative */ - int error = 0; + int s, error = 0; + rw_enter_write(&netlock); + s = splsoftnet(); /* conservative */ if (so->so_options & SO_ACCEPTCONN) { while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { (void) soqremque(so2, 0); @@ -260,7 +270,7 @@ soclose(struct soc
Re: cwm: remove mouse resize window
On Tue 2016.10.04 at 15:29 +0200, Vadim Vygonets wrote: > Quoth Okan Demirmen on Wed, Sep 28, 2016: > > We currently print the x/y dimensions only for mouse based actions; we > > don't for kbd, nor do we do anything with mouse/kbd window moves (such > > as printing the x/y coordinates, etc). So why have mouse-resize be > > different? Thus, the below crudely removes it. > > I use it for xterms, and I'd rather have it in both mouse and > keyboard based resizes. Hi Vadim - Not sure if you saw my other reply, or commit, but it's staying and was expanded a bit. As for displaying on kb requests, that's harder to do since a KeyRelease event would make it disappear so it'll be visible for a tiny amount of time; maybe a timer or some such or other idea, not sure - haven't thought about it...not too important imho yet. Thanks, Okan
Re: cwm: remove mouse resize window
Quoth Okan Demirmen on Wed, Sep 28, 2016: > We currently print the x/y dimensions only for mouse based actions; we > don't for kbd, nor do we do anything with mouse/kbd window moves (such > as printing the x/y coordinates, etc). So why have mouse-resize be > different? Thus, the below crudely removes it. I use it for xterms, and I'd rather have it in both mouse and keyboard based resizes. Vadik. -- Weiner's Law of Libraries: There are no answers, only cross references.
Re: disklabel template: percentage of disk optional?
On Tue, Oct 04, 2016, Otto Moerbeek wrote: > On Tue, Oct 04, 2016 at 04:00:50AM -0700, Claus Assmann wrote: > > This doesn't seem to resolve the problem that sa->rate is > > not initialized, so a simple file like this still triggers > Likely, this is better. > RCS file: /cvs/src/sbin/disklabel/editor.c,v > sa = &(alloc_table[0].table[idx]); > + memset(sa, 0, sizeof(*sa)); Clearing the entire struct is certainly the easiest approach. Thanks.
Re: disklabel template: percentage of disk optional?
On Tue, Oct 04, 2016 at 04:00:50AM -0700, Claus Assmann wrote: > On Tue, Oct 04, 2016, Dmitrij D. Czarkoff wrote: > > [please do not Cc me] > > > I shouldn't have started on sending patches at 3AM. This one should do > > what I intended it to do. Sorry for noise. > > > + else if (t == NULL && sa->minsz != sa->maxsz) > > + errx(1, "%s: parse error on line %u", filename, idx); > > This doesn't seem to resolve the problem that sa->rate is > not initialized, so a simple file like this still triggers > the error message (at least in my tests): > > / 500M > swap 1G > /usr 2G > /home 500G Likely, this is better. -Otto Index: editor.c === RCS file: /cvs/src/sbin/disklabel/editor.c,v retrieving revision 1.303 diff -u -p -r1.303 editor.c --- editor.c2 Sep 2016 10:47:17 - 1.303 +++ editor.c4 Oct 2016 11:25:36 - @@ -2386,6 +2386,7 @@ parse_autotable(char *filename) idx + 1, sizeof(*sa))) == NULL) err(1, NULL); sa = &(alloc_table[0].table[idx]); + memset(sa, 0, sizeof(*sa)); idx++; if ((sa->mp = get_token(&buf, &len)) == NULL ||
Re: disklabel template: percentage of disk optional?
On Tue, Oct 04, 2016, Dmitrij D. Czarkoff wrote: [please do not Cc me] > I shouldn't have started on sending patches at 3AM. This one should do > what I intended it to do. Sorry for noise. > + else if (t == NULL && sa->minsz != sa->maxsz) > + errx(1, "%s: parse error on line %u", filename, idx); This doesn't seem to resolve the problem that sa->rate is not initialized, so a simple file like this still triggers the error message (at least in my tests): / 500M swap1G /usr2G /home 500G
Re: 64-bit address for bus_space_map(9) on arm
> Date: Tue, 4 Oct 2016 10:59:26 +0200 > From: Patrick Wildt > > Hi, > > continuing where we left off before the hackathon I would like to add > a diff to the 64-bit bus_addr_t discussion. This diff does not > increase the size of bus_addr_t. Instead it changes the argument > of the function pointer stored in the bus tag from bus_addr_t to > uint64_t. This is an arm only change and will not affect any other > architecture. > > This is needed to pass virtual addresses retrieved from the device tree > between the different simple-busses in the tree topology. In the end > every single hardware that we want to speak to is in the 32-bit range, > not outside. This means the final bus_space_map(9) will work on a > 32-bit value. > > Opinions? ok? I think this is an acceptable hack. ok kettenis@ > diff --git sys/arch/arm/armv7/armv7_space.c sys/arch/arm/armv7/armv7_space.c > index 4f6c1e0..fbd558a 100644 > --- sys/arch/arm/armv7/armv7_space.c > +++ sys/arch/arm/armv7/armv7_space.c > @@ -165,7 +165,7 @@ struct bus_space armv7_bs_tag = { > }; > > int > -armv7_bs_map(void *t, bus_addr_t bpa, bus_size_t size, > +armv7_bs_map(void *t, uint64_t bpa, bus_size_t size, > int flags, bus_space_handle_t *bshp) > { > u_long startpa, endpa, pa; > diff --git sys/arch/arm/include/bus.h sys/arch/arm/include/bus.h > index f00c897..c108359 100644 > --- sys/arch/arm/include/bus.h > +++ sys/arch/arm/include/bus.h > @@ -93,7 +93,7 @@ struct bus_space { > void*bs_cookie; > > /* mapping/unmapping */ > - int (*bs_map) (void *, bus_addr_t, bus_size_t, > + int (*bs_map) (void *, uint64_t, bus_size_t, > int, bus_space_handle_t *); > void(*bs_unmap) (void *, bus_space_handle_t, > bus_size_t); > @@ -373,7 +373,7 @@ struct bus_space { > */ > > #define bs_map_proto(f) > \ > -int __bs_c(f,_bs_map) (void *t, bus_addr_t addr,\ > +int __bs_c(f,_bs_map) (void *t, uint64_t addr, \ > bus_size_t size, int flags, bus_space_handle_t *bshp); > > #define bs_unmap_proto(f)\ > diff --git sys/arch/arm/simplebus/simplebus.c > sys/arch/arm/simplebus/simplebus.c > index d2f5bfe..325e149 100644 > --- sys/arch/arm/simplebus/simplebus.c > +++ sys/arch/arm/simplebus/simplebus.c > @@ -30,7 +30,7 @@ int simplebus_match(struct device *, void *, void *); > void simplebus_attach(struct device *, struct device *, void *); > > void simplebus_attach_node(struct device *, int); > -int simplebus_bs_map(void *, bus_addr_t, bus_size_t, int, bus_space_handle_t > *); > +int simplebus_bs_map(void *, uint64_t, bus_size_t, int, bus_space_handle_t > *); > > struct simplebus_softc { > struct devicesc_dev; > @@ -205,7 +205,7 @@ simplebus_attach_node(struct device *self, int node) > * Translate memory address if needed. > */ > int > -simplebus_bs_map(void *t, bus_addr_t bpa, bus_size_t size, > +simplebus_bs_map(void *t, uint64_t bpa, bus_size_t size, > int flag, bus_space_handle_t *bshp) > { > struct simplebus_softc *sc = (struct simplebus_softc *)t; > diff --git sys/arch/armv7/armv7/armv7_machdep.c > sys/arch/armv7/armv7/armv7_machdep.c > index e869c2c..839e45b 100644 > --- sys/arch/armv7/armv7/armv7_machdep.c > +++ sys/arch/armv7/armv7/armv7_machdep.c > @@ -199,7 +199,7 @@ int safepri = 0; > /* Prototypes */ > > char bootargs[MAX_BOOT_STRING]; > -int bootstrap_bs_map(void *, bus_addr_t, bus_size_t, int, > +int bootstrap_bs_map(void *, uint64_t, bus_size_t, int, > bus_space_handle_t *); > void process_kernel_args(char *); > void consinit(void); > @@ -318,7 +318,7 @@ read_ttb(void) > static vaddr_t section_free = 0xfd00; /* XXX - huh */ > > int > -bootstrap_bs_map(void *t, bus_addr_t bpa, bus_size_t size, > +bootstrap_bs_map(void *t, uint64_t bpa, bus_size_t size, > int flags, bus_space_handle_t *bshp) > { > u_long startpa, pa, endpa; > @@ -393,7 +393,7 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t > loadaddr) > > /* early bus_space_map support */ > struct bus_space tmp_bs_tag; > - int (*map_func_save)(void *, bus_addr_t, bus_size_t, int, > + int (*map_func_save)(void *, uint64_t, bus_size_t, int, > bus_space_handle_t *); > > if (arg0) > >
Re: clang dev/pv/hyperv.c
On Sat, Sep 24, 2016 at 07:05:54PM +0200, Mark Kettenis wrote: > Another obvious mistake caught by clang. > > ok? ok jsg@ > > > Index: dev/pv/hyperv.c > === > RCS file: /cvs/src/sys/dev/pv/hyperv.c,v > retrieving revision 1.16 > diff -u -p -r1.16 hyperv.c > --- dev/pv/hyperv.c 20 Sep 2016 10:27:14 - 1.16 > +++ dev/pv/hyperv.c 24 Sep 2016 17:04:24 - > @@ -700,7 +700,7 @@ hv_vmbus_connect(struct hv_softc *sc) > sc->sc_revents = (u_long *)((caddr_t)sc->sc_events + (PAGE_SIZE >> 1)); > > sc->sc_monitor[0] = km_alloc(PAGE_SIZE, &kv_any, &kp_zero, &kd_nowait); > - if (sc->sc_monitor == NULL) { > + if (sc->sc_monitor[0] == NULL) { > printf(": failed to allocate monitor page 1\n"); > goto errout; > } > @@ -710,7 +710,7 @@ hv_vmbus_connect(struct hv_softc *sc) > } > > sc->sc_monitor[1] = km_alloc(PAGE_SIZE, &kv_any, &kp_zero, &kd_nowait); > - if (sc->sc_monitor == NULL) { > + if (sc->sc_monitor[1] == NULL) { > printf(": failed to allocate monitor page 2\n"); > goto errout; > } >
64-bit address for bus_space_map(9) on arm
Hi, continuing where we left off before the hackathon I would like to add a diff to the 64-bit bus_addr_t discussion. This diff does not increase the size of bus_addr_t. Instead it changes the argument of the function pointer stored in the bus tag from bus_addr_t to uint64_t. This is an arm only change and will not affect any other architecture. This is needed to pass virtual addresses retrieved from the device tree between the different simple-busses in the tree topology. In the end every single hardware that we want to speak to is in the 32-bit range, not outside. This means the final bus_space_map(9) will work on a 32-bit value. Opinions? ok? Patrick diff --git sys/arch/arm/armv7/armv7_space.c sys/arch/arm/armv7/armv7_space.c index 4f6c1e0..fbd558a 100644 --- sys/arch/arm/armv7/armv7_space.c +++ sys/arch/arm/armv7/armv7_space.c @@ -165,7 +165,7 @@ struct bus_space armv7_bs_tag = { }; int -armv7_bs_map(void *t, bus_addr_t bpa, bus_size_t size, +armv7_bs_map(void *t, uint64_t bpa, bus_size_t size, int flags, bus_space_handle_t *bshp) { u_long startpa, endpa, pa; diff --git sys/arch/arm/include/bus.h sys/arch/arm/include/bus.h index f00c897..c108359 100644 --- sys/arch/arm/include/bus.h +++ sys/arch/arm/include/bus.h @@ -93,7 +93,7 @@ struct bus_space { void*bs_cookie; /* mapping/unmapping */ - int (*bs_map) (void *, bus_addr_t, bus_size_t, + int (*bs_map) (void *, uint64_t, bus_size_t, int, bus_space_handle_t *); void(*bs_unmap) (void *, bus_space_handle_t, bus_size_t); @@ -373,7 +373,7 @@ struct bus_space { */ #define bs_map_proto(f) \ -int__bs_c(f,_bs_map) (void *t, bus_addr_t addr,\ +int__bs_c(f,_bs_map) (void *t, uint64_t addr, \ bus_size_t size, int flags, bus_space_handle_t *bshp); #define bs_unmap_proto(f) \ diff --git sys/arch/arm/simplebus/simplebus.c sys/arch/arm/simplebus/simplebus.c index d2f5bfe..325e149 100644 --- sys/arch/arm/simplebus/simplebus.c +++ sys/arch/arm/simplebus/simplebus.c @@ -30,7 +30,7 @@ int simplebus_match(struct device *, void *, void *); void simplebus_attach(struct device *, struct device *, void *); void simplebus_attach_node(struct device *, int); -int simplebus_bs_map(void *, bus_addr_t, bus_size_t, int, bus_space_handle_t *); +int simplebus_bs_map(void *, uint64_t, bus_size_t, int, bus_space_handle_t *); struct simplebus_softc { struct devicesc_dev; @@ -205,7 +205,7 @@ simplebus_attach_node(struct device *self, int node) * Translate memory address if needed. */ int -simplebus_bs_map(void *t, bus_addr_t bpa, bus_size_t size, +simplebus_bs_map(void *t, uint64_t bpa, bus_size_t size, int flag, bus_space_handle_t *bshp) { struct simplebus_softc *sc = (struct simplebus_softc *)t; diff --git sys/arch/armv7/armv7/armv7_machdep.c sys/arch/armv7/armv7/armv7_machdep.c index e869c2c..839e45b 100644 --- sys/arch/armv7/armv7/armv7_machdep.c +++ sys/arch/armv7/armv7/armv7_machdep.c @@ -199,7 +199,7 @@ int safepri = 0; /* Prototypes */ char bootargs[MAX_BOOT_STRING]; -intbootstrap_bs_map(void *, bus_addr_t, bus_size_t, int, +intbootstrap_bs_map(void *, uint64_t, bus_size_t, int, bus_space_handle_t *); void process_kernel_args(char *); void consinit(void); @@ -318,7 +318,7 @@ read_ttb(void) static vaddr_t section_free = 0xfd00; /* XXX - huh */ int -bootstrap_bs_map(void *t, bus_addr_t bpa, bus_size_t size, +bootstrap_bs_map(void *t, uint64_t bpa, bus_size_t size, int flags, bus_space_handle_t *bshp) { u_long startpa, pa, endpa; @@ -393,7 +393,7 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t loadaddr) /* early bus_space_map support */ struct bus_space tmp_bs_tag; - int (*map_func_save)(void *, bus_addr_t, bus_size_t, int, + int (*map_func_save)(void *, uint64_t, bus_size_t, int, bus_space_handle_t *); if (arg0)
ahci(4) disk attach fix - testing required
Hi, a colleague has been fiddling with ahci(4) wondering why the disk fails on attach on his hardware. He came up with the following diff. Considering this is quite an intrusive though small diff, I would like to ask for tests on various hardware to make sure this does not break any hardware we currently support. Disks should keep being properly attached. Patrick ---zip--- Turn off the code which waits for AHCI_PREG_CMD_CR to be set by the HBA after ahci_default_port_start() sets AHCI_PREG_CMD_ST. The AHCI spec. rev. 1.3 only requires the inverse to be true, i. e. that a HBA clears AHCI_PREG_CMD_CR when AHCI_PREG_CMD_ST gets cleared by software/driver. In fact, some HBAs will not raise AHCI_PREG_CMD_CR as an immediate consequence of AHCI_PREG_CMD_ST being set. Actually neither the FreeBSD, Linux nor NetBSD counterpart of ahci(4) has an analogous check. Disabling that wait fixes "failed to start command DMA on port N, disabling" bails during attach. >From Marius Strobl diff --git sys/dev/ic/ahci.c sys/dev/ic/ahci.c index d600f3e..da93478 100644 --- sys/dev/ic/ahci.c +++ sys/dev/ic/ahci.c @@ -890,10 +890,20 @@ ahci_default_port_start(struct ahci_port *ap, int fre_only) } #endif +#if 0 /* Wait for CR to come on */ + /* +* Doesn't work: According to section 5.3.16.1, HBAs are required +* to clear AHCI_PREG_CMD_CR if AHCI_PREG_CMD_ST is cleared to 0. +* However, nowhere in the AHCI v1.3 specification the inverse is +* stated, i. e. that an HBA must transit to a state in which the +* AHCI_PREG_CMD_CR bit must be set as an immediate result of the +* software/driver setting AHCI_PREG_CMD_ST. +*/ if (!fre_only && ahci_pwait_set(ap, AHCI_PREG_CMD, AHCI_PREG_CMD_CR, 1)) return (1); +#endif return (0); }