Re: Help me testing the netlock

2016-10-04 Thread Mike Larkin
On Tue, Oct 04, 2016 at 04:44:29PM +0200, Martin Pieuchot wrote:
> On 10/03/16 16:43, Martin Pieuchot wrote:
> > Diff below introduces a single write lock that will be used to serialize
> > access to ip_output().
> > 
> > This lock will be then split in multiple readers and writers to allow
> > multiple forwarding paths to run in parallel of each others but still
> > serialized with the socket layer.
> > 
> > I'm currently looking for people wanting to run this diff and try to
> > break it.  In other words, your machine might panic with it and if it
> > does report the panic to me so the diff can be improved.
> > 
> > I tested NFS v2 and v3 so I'm quite confident, but I might have missed
> > some obvious stuff.
> 
> Updated diff attaced including a fix for syn_cache_timer(), problem
> reported by Chris Jackman.
> 

So far, so good, on i386 and amd64 vmm(4) VMs. booted, did a pkg_add 
upgrade, and cvsync.

No issues seen so far.

-ml

> Index: kern/kern_rwlock.c
> ===
> RCS file: /cvs/src/sys/kern/kern_rwlock.c,v
> retrieving revision 1.27
> diff -u -p -r1.27 kern_rwlock.c
> --- kern/kern_rwlock.c14 Mar 2015 07:33:42 -  1.27
> +++ kern/kern_rwlock.c4 Oct 2016 14:40:29 -
> @@ -98,6 +98,12 @@ rw_enter_read(struct rwlock *rwl)
>   membar_enter();
>  }
>  
> +#if 1
> +#include 
> +#include 
> +#include 
> +#endif
> +
>  void
>  rw_enter_write(struct rwlock *rwl)
>  {
> @@ -108,6 +114,15 @@ rw_enter_write(struct rwlock *rwl)
>   rw_enter(rwl, RW_WRITE);
>   else
>   membar_enter();
> +
> +#if 1
> + if ((rwl == &netlock) && (splassert_ctl == 3)) {
> + printf("ENTER::%d::", cpu_number());
> + db_stack_trace_print(
> + (db_expr_t)__builtin_frame_address(1),
> + TRUE, 1, "", printf);
> + }
> +#endif
>  }
>  
>  void
> @@ -129,6 +144,15 @@ rw_exit_write(struct rwlock *rwl)
>   unsigned long owner = rwl->rwl_owner;
>  
>   rw_assert_wrlock(rwl);
> +
> +#if 1
> + if ((rwl == &netlock) && (splassert_ctl == 3)) {
> + printf("EXIT::%d::", cpu_number());
> + db_stack_trace_print(
> + (db_expr_t)__builtin_frame_address(1),
> + TRUE, 1, "", printf);
> + }
> +#endif
>  
>   membar_exit();
>   if (__predict_false((owner & RWLOCK_WAIT) ||
> Index: kern/sys_socket.c
> ===
> RCS file: /cvs/src/sys/kern/sys_socket.c,v
> retrieving revision 1.21
> diff -u -p -r1.21 sys_socket.c
> --- kern/sys_socket.c 5 Dec 2015 10:11:53 -   1.21
> +++ kern/sys_socket.c 4 Oct 2016 14:40:29 -
> @@ -131,8 +131,10 @@ soo_poll(struct file *fp, int events, st
>  {
>   struct socket *so = fp->f_data;
>   int revents = 0;
> - int s = splsoftnet();
> + int s;
>  
> + rw_enter_write(&netlock);
> + s = splsoftnet();
>   if (events & (POLLIN | POLLRDNORM)) {
>   if (soreadable(so))
>   revents |= events & (POLLIN | POLLRDNORM);
> @@ -159,6 +161,7 @@ soo_poll(struct file *fp, int events, st
>   }
>   }
>   splx(s);
> + rw_exit_write(&netlock);
>   return (revents);
>  }
>  
> Index: kern/uipc_socket.c
> ===
> RCS file: /cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.161
> diff -u -p -r1.161 uipc_socket.c
> --- kern/uipc_socket.c20 Sep 2016 14:27:43 -  1.161
> +++ kern/uipc_socket.c4 Oct 2016 14:40:29 -
> @@ -123,6 +123,7 @@ socreate(int dom, struct socket **aso, i
>   return (EPROTONOSUPPORT);
>   if (prp->pr_type != type)
>   return (EPROTOTYPE);
> + rw_enter_write(&netlock);
>   s = splsoftnet();
>   so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO);
>   TAILQ_INIT(&so->so_q0);
> @@ -142,9 +143,11 @@ socreate(int dom, struct socket **aso, i
>   so->so_state |= SS_NOFDREF;
>   sofree(so);
>   splx(s);
> + rw_exit_write(&netlock);
>   return (error);
>   }
>   splx(s);
> + rw_exit_write(&netlock);
>   *aso = so;
>   return (0);
>  }
> @@ -152,11 +155,13 @@ socreate(int dom, struct socket **aso, i
>  int
>  sobind(struct socket *so, struct mbuf *nam, struct proc *p)
>  {
> - int s = splsoftnet();
> - int error;
> + int s, error;
>  
> + rw_enter_write(&netlock);
> + s = splsoftnet();
>   error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p);
>   splx(s);
> + rw_exit_write(&netlock);
>   return (error);
>  }
>  
> @@ -171,11 +176,13 @@ solisten(struct socket *so, int backlog)
>   if (isspliced(so) || issplicedback(so))
>   return (EOPNOTSUPP);
>  #endif /* SOCKET_SPLICE */
> + rw_enter_write(&netlock);
>   s = splsoftnet();
>   erro

armv7 pinmux woes

2016-10-04 Thread Ian Sutton
Hello,

I'm writing a driver (amdisplay) supporting the LCD controller on the
am335x (Beaglebone Black). The LCD controller pins and the MMC1 pins
(used for interacting with the onboard eMMC flash chip) exist on the
same set of pads such that only the LCD controller or the eMMC may be
used at once.

My driver needs to recognize cases where the root disk is MMC1 and abort
as to avoid hanging the system when the kernel tries to init userspace
on a disk it is no longer pinmuxxed to. I am not sure how to properly
and reliably do this, or if there is perhaps a better way to go about
solving this problem.

Thanks,
Ian



Re: add in6 multicast support to vxlan(4) ; question on mbufs

2016-10-04 Thread YASUOKA Masahiko
On Tue, 4 Oct 2016 17:27:12 +0200
Mike Belopuhov  wrote:
> On Tue, Oct 04, 2016 at 01:07 +0200, Vincent Gross wrote:
>> On Sat, 24 Sep 2016 10:58:10 +0200
>> Vincent Gross  wrote:
>> 
>> > Hi,
>> > 
>> [snip]
>> > 
>> > Aside from the mbuf issue, is this Ok ?
>> 
>> I will go back on the mbuff stuff later.
>> 
>> Diff rebased, ok anyone ?
>> 
>> Index: net/if_vxlan.c
>> ===
>> RCS file: /cvs/src/sys/net/if_vxlan.c,v
>> retrieving revision 1.48
>> diff -u -p -r1.48 if_vxlan.c
>> --- net/if_vxlan.c   30 Sep 2016 10:22:05 -  1.48
>> +++ net/if_vxlan.c   3 Oct 2016 23:12:42 -
>> @@ -638,7 +749,9 @@ vxlan_lookup(struct mbuf *m, struct udph
>>  if (m->m_pkthdr.len < skip + sizeof(struct ether_header))
>>  return (EINVAL);
>>  
>> -m_adj(m, skip);
>> +m_adj(m, skip - ETHER_ALIGN);
>> +m = m_pullup(m, ETHER_HDR_LEN + ETHER_ALIGN);
>> +m_adj(m, ETHER_ALIGN);
>>  ifp = &sc->sc_ac.ac_if;
>>  
>>  #if NBRIDGE > 0
> 
> I think this chunk is correct.  First you ensure that m->m_data
> points to a contiguous and well-aligned ethernet header and then
> you trim the alignment so that mtod() points directly at it.

Isn't it possible that m_pullup() may return non aligned pointer?

> Possibly add a comment to that effect to spare a eyebrow-raising
> moment for the next person.



optimise search for start of vmmap fill

2016-10-04 Thread David Gwynne
uvm_map_fill_vmmap lets callers specify an address after which they are 
interested in entries. generally theyre interested in addresses after 0, but if 
you start further along the address space the lookup has to traverse the 
addresses looking for it.

this can be optimised by borrowing the red-black tree nfind semantic
to do a binary search for the starting point. it's the same cost
for searching for the first (0th) address, but is much faster for
other starting points.

im doing the nfind by hand so i dont have to put a vmentry on the
stack to pass to an actual RBT_NFIND call.

ok?

Index: uvm_map.c
===
RCS file: /cvs/src/sys/uvm/uvm_map.c,v
retrieving revision 1.225
diff -u -p -r1.225 uvm_map.c
--- uvm_map.c   16 Sep 2016 02:35:42 -  1.225
+++ uvm_map.c   5 Oct 2016 00:27:05 -
@@ -5215,7 +5215,7 @@ int
 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve,
 size_t *lenp)
 {
-   struct vm_map_entry *entry;
+   struct vm_map_entry *entry = NULL, *tmp;
vaddr_t start;
int cnt, maxcnt, error = 0;
 
@@ -5233,13 +5233,22 @@ uvm_map_fill_vmmap(struct vm_map *map, s
start = (vaddr_t)kve[0].kve_start;
 
vm_map_lock(map);
-   RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
-   if (cnt == maxcnt) {
-   error = ENOMEM;
+
+   /* look for the starting address by doing nfind (by hand) */
+   tmp = RBT_ROOT(uvm_map_addr, &map->addr);
+   while (tmp != NULL) {
+   if (start < tmp->start) {
+   entry = tmp;
+   tmp = RBT_LEFT(uvm_map_addr, tmp);
+   } else if (start > tmp->start) {
+   tmp = RBT_RIGHT(uvm_map_addr, tmp);
+   } else { /* exact match */
+   entry = tmp;
break;
}
-   if (start != 0 && entry->start < start)
-   continue;
+   }
+
+   while (entry != NULL) {
kve->kve_start = entry->start;
kve->kve_end = entry->end;
kve->kve_guard = entry->guard;
@@ -5253,8 +5262,14 @@ uvm_map_fill_vmmap(struct vm_map *map, s
kve->kve_advice = entry->advice;
kve->kve_inheritance = entry->inheritance;
kve->kve_flags = entry->flags;
+
+   if (++cnt == maxcnt) {
+   error = ENOMEM;
+   break;
+   }
+
kve++;
-   cnt++;
+   entry = RBT_NEXT(uvm_map_addr, entry);
}
vm_map_unlock(map);
 



Smarter OpenBSD/sgi boot blocks

2016-10-04 Thread Miod Vallat
The sgi boot blocks use the PROM (ARCBios or ARCS) for its I/O routines.
When using a disk-based path, these routines are using the partition
table found in the ``volume header''.

In order to be able to use 16 partitions per disk, the OpenBSD port only
claims one volume header partition, #0, as the OpenBSD area, and puts
its own label in there.

Unfortunately, this means that `sd0a', the OpenBSD partition on which
the kernel is found, may not actually start at the same location as the
volume header partition #0, even though this is the case in most setups.

When reinstalling an O2 some time ago, I did not pay attention to this
during install:

  Use (A)uto layout, (E)dit auto layout, or create (C)ustom layout? [a] c
  [...]
  Label editor (enter '?' for help at any prompt)
  > p
  OpenBSD area: 0-17773524; size: 17773524; free: 0
  #size   offset  fstype [fsize bsize   cpg]
a: 17770389 3135  4.2BSD   1024  819216
c: 177735240  unused
p: 31350 unknown
  > d a
  > a
  partition: [a]
  offset: [3135]
  size: [17770389] 400M
  Rounding size to cylinder (3360 sectors): 816705
  FS type: [4.2BSD]
  mount point: [none] /
  Rounding offset to bsize (32 sectors): 3136
  Rounding size to bsize (32 sectors): 816704

And `sd0a' ended up starting one sector after the beginning of the
volume header partition #0.

Of course, rebooting afterwards did not work as expected:

  OpenBSD/sgi-IP32 ARCBios boot version 1.7
  arg 0: pci(0)scsi(0)disk(1)rdisk(0)partition(8)/boot
  arg 1: OSLoadOptions=auto
  arg 2: ConsoleIn=serial(0)
  arg 3: ConsoleOut=serial(0)
  arg 4: SystemPartition=pci(0)scsi(0)disk(1)rdisk(0)partition(8)
  arg 5: OSLoader=boot
  arg 6: OSLoadPartition=pci(0)scsi(0)disk(1)rdisk(0)partition(0)
  arg 7: OSLoadFilename=bsd
  Boot: pci(0)scsi(0)disk(1)rdisk(0)partition(0)bsd
  cannot open pci(0)scsi(0)disk(1)rdisk(0)partition(0)/etc/random.seed: Invalid 
argument
  open pci(0)scsi(0)disk(1)rdisk(0)partition(0)bsd: Invalid argument
  Boot FAILED!

The proper way to fix this is to make the boot blocks read the real
OpenBSD label instead of assuming sd0a spans volume header partition #0
in all cases.

This is achieved by the diff below, which will attempt to use the raw
disk device (volume header partition #10, equivalent to sd0c) for disk
accesses and will read both the volume header (to know where the OpenBSD
disklabel lies) and then the OpenBSD label (to know where sd0a really
is).

Of course, this has to cope with the two valid disk syntaxes (true
ARCBios used on older sgi systems, and ARCS dksc() syntax used on Origin
and later systems).

This diff has been tested on O2 (IP32) and Origin 350 (IP27). It is
written in a conservative way, in order to revert to the existing
behaviour if anything fails (invalid volume header, no OpenBSD label
yet...) and should not cause any regression.

Note that network boots are not affected by these changes.

Index: Makefile32.inc
===
RCS file: /OpenBSD/src/sys/arch/sgi/stand/Makefile32.inc,v
retrieving revision 1.5
diff -u -p -r1.5 Makefile32.inc
--- Makefile32.inc  19 Oct 2012 13:51:59 -  1.5
+++ Makefile32.inc  30 Sep 2016 09:19:15 -
@@ -18,6 +18,7 @@ AS+=  -32
 LD?=   ld
 LD+=   -m elf32btsmip
 LIBSA_CPPFLAGS=
+CFLAGS+=   -DLIBSA_LONGLONG_PRINTF
 .endif
 
 ### Figure out what to use for libsa and libz
Index: boot/Makefile
===
RCS file: /OpenBSD/src/sys/arch/sgi/stand/boot/Makefile,v
retrieving revision 1.16
diff -u -p -r1.16 Makefile
--- boot/Makefile   30 Jul 2016 03:25:49 -  1.16
+++ boot/Makefile   30 Sep 2016 09:19:15 -
@@ -13,14 +13,14 @@ AFLAGS+=${SAABI}
 
 S= ${.CURDIR}/../../../..
 SRCS=  start.S arcbios.c boot.c conf.c diskio.c filesystem.c \
-   netfs.c netio.c strchr.c strstr.c
+   netfs.c netio.c strstr.c
 
 .PATH: ${S}/lib/libsa
 SRCS+= loadfile.c
 
 .PATH: ${S}/lib/libkern/arch/mips64 ${S}/lib/libkern
-SRCS+= strlcpy.c memcpy.c strlen.c strrchr.c strlcat.c strncmp.c \
-   strcmp.S
+SRCS+= memcpy.c strchr.c strcmp.S strlcat.c strlcpy.c strlen.c \
+   strncmp.c strrchr.c
 
 CLEANFILES+=   machine mips64
 
Index: boot/diskio.c
===
RCS file: /OpenBSD/src/sys/arch/sgi/stand/boot/diskio.c,v
retrieving revision 1.10
diff -u -p -r1.10 diskio.c
--- boot/diskio.c   30 Sep 2015 22:45:57 -  1.10
+++ boot/diskio.c   30 Sep 2016 09:19:15 -
@@ -1,6 +1,21 @@
 /* $OpenBSD: diskio.c,v 1.10 2015/09/30 22:45:57 krw Exp $ */
 
 /*
+ * Copyright (c) 2016 Miodrag Vallat.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is 

Re: httpd(8)/proc.c: use less fds on startup

2016-10-04 Thread Rafael Zalamena
On Tue, Oct 04, 2016 at 07:46:52PM +0200, Rafael Zalamena wrote:
> This diff makes proc.c daemons to use less file descriptors on startup,
> this way we increase the number of child we can have considerably. This
> also improves the solution on a bug reported in bugs@
> "httpd errors out with 'too many open files'". 
> 
> To achieve that I delayed the socket distribution and made a minimal
> socket allocation in proc_init(), so only the necessary children socket
> are allocated and passed with proc_exec(). After the event_init() is called
> we call proc_connect() which creates the socketpair() and immediatly after
> each call we already sends them without accumulating.
> 
> Note: We still have to calculate how many fds we will want to have and
>   then limit the daemon prefork configuration.
> 
> ok?
> 

Paul de Weerd still found problems with the diff, because the httpd(8)
would not exit successfully with '-n' flag. It happened because the
new proc_connect() code tried to write fds to children process that
did not start caused by the ps_noaction flag. (thanks Paul!)

This new diff just adds a check for ps_noaction in proc_init() and
proc_connect() so it doesn't try to do anything if we are not really
going to start the daemon. Also I removed the ps_noaction from proc_run()
since we are not going to get to this point anymore.

ok?


Index: proc.c
===
RCS file: /home/obsdcvs/src/usr.sbin/httpd/proc.c,v
retrieving revision 1.27
diff -u -p -r1.27 proc.c
--- proc.c  28 Sep 2016 12:01:04 -  1.27
+++ proc.c  4 Oct 2016 21:50:43 -
@@ -36,8 +36,6 @@
 
 voidproc_exec(struct privsep *, struct privsep_proc *, unsigned int,
int, char **);
-voidproc_connectpeer(struct privsep *, enum privsep_procid, int,
-   struct privsep_pipes *);
 voidproc_setup(struct privsep *, struct privsep_proc *, unsigned int);
 voidproc_open(struct privsep *, int, int);
 voidproc_accept(struct privsep *, int, enum privsep_procid,
@@ -147,72 +145,18 @@ proc_exec(struct privsep *ps, struct pri
 }
 
 void
-proc_connectpeer(struct privsep *ps, enum privsep_procid id, int inst,
-struct privsep_pipes *pp)
-{
-   unsigned int i, j;
-   struct privsep_fdpf;
-
-   for (i = 0; i < PROC_MAX; i++) {
-   /* Parent is already connected with everyone. */
-   if (i == PROC_PARENT)
-   continue;
-
-   for (j = 0; j < ps->ps_instances[i]; j++) {
-   /* Don't send socket to child itself. */
-   if (i == (unsigned int)id &&
-   j == (unsigned int)inst)
-   continue;
-   if (pp->pp_pipes[i][j] == -1)
-   continue;
-
-   pf.pf_procid = i;
-   pf.pf_instance = j;
-   proc_compose_imsg(ps, id, inst, IMSG_CTL_PROCFD,
-   -1, pp->pp_pipes[i][j], &pf, sizeof(pf));
-   pp->pp_pipes[i][j] = -1;
-   }
-   }
-}
-
-/* Inter-connect all process except with ourself. */
-void
 proc_connect(struct privsep *ps)
 {
-   unsigned int src, i, j;
-   struct privsep_pipes*pp;
-   struct imsgev   *iev;
-
-   /* Listen on appropriate pipes. */
-   src = privsep_process;
-   pp = &ps->ps_pipes[src][ps->ps_instance];
-
-   for (i = 0; i < PROC_MAX; i++) {
-   /* Don't listen to ourself. */
-   if (i == src)
-   continue;
-
-   for (j = 0; j < ps->ps_instances[i]; j++) {
-   if (pp->pp_pipes[i][j] == -1)
-   continue;
-
-   iev = &ps->ps_ievs[i][j];
-   imsg_init(&iev->ibuf, pp->pp_pipes[i][j]);
-   event_set(&iev->ev, iev->ibuf.fd, iev->events,
-   iev->handler, iev->data);
-   event_add(&iev->ev, NULL);
-   }
-   }
+   unsigned int src, dst;
 
-   /* Exchange pipes between process. */
-   for (i = 0; i < PROC_MAX; i++) {
-   /* Parent is already connected with everyone. */
-   if (i == PROC_PARENT)
-   continue;
+   /* Don't distribute any sockets if we are not really going to run. */
+   if (ps->ps_noaction)
+   return;
 
-   for (j = 0; j < ps->ps_instances[i]; j++)
-   proc_connectpeer(ps, i, j, &ps->ps_pipes[i][j]);
-   }
+   /* Distribute the socketpair()s for everyone. */
+   for (src = 0; src < PROC_MAX; src++)
+   for (dst = src; dst < PROC_MAX; dst++)
+   proc_open(ps, src, dst);
 }
 
 void
@@ -220,17 +164,41 @@ proc_init(struct privsep *ps, struct pri
 int argc, char **ar

httpd(8)/proc.c: use less fds on startup

2016-10-04 Thread Rafael Zalamena
This diff makes proc.c daemons to use less file descriptors on startup,
this way we increase the number of child we can have considerably. This
also improves the solution on a bug reported in bugs@
"httpd errors out with 'too many open files'". 

To achieve that I delayed the socket distribution and made a minimal
socket allocation in proc_init(), so only the necessary children socket
are allocated and passed with proc_exec(). After the event_init() is called
we call proc_connect() which creates the socketpair() and immediatly after
each call we already sends them without accumulating.

Note: We still have to calculate how many fds we will want to have and
  then limit the daemon prefork configuration.

ok?


Index: proc.c
===
RCS file: /home/obsdcvs/src/usr.sbin/httpd/proc.c,v
retrieving revision 1.27
diff -u -p -r1.27 proc.c
--- proc.c  28 Sep 2016 12:01:04 -  1.27
+++ proc.c  4 Oct 2016 17:26:41 -
@@ -36,8 +36,6 @@
 
 voidproc_exec(struct privsep *, struct privsep_proc *, unsigned int,
int, char **);
-voidproc_connectpeer(struct privsep *, enum privsep_procid, int,
-   struct privsep_pipes *);
 voidproc_setup(struct privsep *, struct privsep_proc *, unsigned int);
 voidproc_open(struct privsep *, int, int);
 voidproc_accept(struct privsep *, int, enum privsep_procid,
@@ -147,72 +145,14 @@ proc_exec(struct privsep *ps, struct pri
 }
 
 void
-proc_connectpeer(struct privsep *ps, enum privsep_procid id, int inst,
-struct privsep_pipes *pp)
-{
-   unsigned int i, j;
-   struct privsep_fdpf;
-
-   for (i = 0; i < PROC_MAX; i++) {
-   /* Parent is already connected with everyone. */
-   if (i == PROC_PARENT)
-   continue;
-
-   for (j = 0; j < ps->ps_instances[i]; j++) {
-   /* Don't send socket to child itself. */
-   if (i == (unsigned int)id &&
-   j == (unsigned int)inst)
-   continue;
-   if (pp->pp_pipes[i][j] == -1)
-   continue;
-
-   pf.pf_procid = i;
-   pf.pf_instance = j;
-   proc_compose_imsg(ps, id, inst, IMSG_CTL_PROCFD,
-   -1, pp->pp_pipes[i][j], &pf, sizeof(pf));
-   pp->pp_pipes[i][j] = -1;
-   }
-   }
-}
-
-/* Inter-connect all process except with ourself. */
-void
 proc_connect(struct privsep *ps)
 {
-   unsigned int src, i, j;
-   struct privsep_pipes*pp;
-   struct imsgev   *iev;
-
-   /* Listen on appropriate pipes. */
-   src = privsep_process;
-   pp = &ps->ps_pipes[src][ps->ps_instance];
-
-   for (i = 0; i < PROC_MAX; i++) {
-   /* Don't listen to ourself. */
-   if (i == src)
-   continue;
-
-   for (j = 0; j < ps->ps_instances[i]; j++) {
-   if (pp->pp_pipes[i][j] == -1)
-   continue;
-
-   iev = &ps->ps_ievs[i][j];
-   imsg_init(&iev->ibuf, pp->pp_pipes[i][j]);
-   event_set(&iev->ev, iev->ibuf.fd, iev->events,
-   iev->handler, iev->data);
-   event_add(&iev->ev, NULL);
-   }
-   }
-
-   /* Exchange pipes between process. */
-   for (i = 0; i < PROC_MAX; i++) {
-   /* Parent is already connected with everyone. */
-   if (i == PROC_PARENT)
-   continue;
+   unsigned int src, dst;
 
-   for (j = 0; j < ps->ps_instances[i]; j++)
-   proc_connectpeer(ps, i, j, &ps->ps_pipes[i][j]);
-   }
+   /* Distribute the socketpair()s for everyone. */
+   for (src = 0; src < PROC_MAX; src++)
+   for (dst = src; dst < PROC_MAX; dst++)
+   proc_open(ps, src, dst);
 }
 
 void
@@ -220,17 +160,37 @@ proc_init(struct privsep *ps, struct pri
 int argc, char **argv, enum privsep_procid proc_id)
 {
struct privsep_proc *p = NULL;
+   struct privsep_pipes*pa, *pb;
unsigned int proc;
-   unsigned int src, dst;
+   unsigned int dst;
+   int  fds[2];
 
if (proc_id == PROC_PARENT) {
privsep_process = PROC_PARENT;
proc_setup(ps, procs, nproc);
 
-   /* Open socketpair()s for everyone. */
-   for (src = 0; src < PROC_MAX; src++)
-   for (dst = 0; dst < PROC_MAX; dst++)
-   proc_open(ps, src, dst);
+   /*
+* Create the children sockets so we can use them 
+* to distribute the r

Re: add in6 multicast support to vxlan(4) ; question on mbufs

2016-10-04 Thread Mike Belopuhov
On Tue, Oct 04, 2016 at 01:07 +0200, Vincent Gross wrote:
> On Sat, 24 Sep 2016 10:58:10 +0200
> Vincent Gross  wrote:
> 
> > Hi,
> > 
> [snip]
> > 
> > Aside from the mbuf issue, is this Ok ?
> 
> I will go back on the mbuff stuff later.
> 
> Diff rebased, ok anyone ?
> 
> Index: net/if_vxlan.c
> ===
> RCS file: /cvs/src/sys/net/if_vxlan.c,v
> retrieving revision 1.48
> diff -u -p -r1.48 if_vxlan.c
> --- net/if_vxlan.c30 Sep 2016 10:22:05 -  1.48
> +++ net/if_vxlan.c3 Oct 2016 23:12:42 -
> @@ -638,7 +749,9 @@ vxlan_lookup(struct mbuf *m, struct udph
>   if (m->m_pkthdr.len < skip + sizeof(struct ether_header))
>   return (EINVAL);
>  
> - m_adj(m, skip);
> + m_adj(m, skip - ETHER_ALIGN);
> + m = m_pullup(m, ETHER_HDR_LEN + ETHER_ALIGN);
> + m_adj(m, ETHER_ALIGN);
>   ifp = &sc->sc_ac.ac_if;
>  
>  #if NBRIDGE > 0

I think this chunk is correct.  First you ensure that m->m_data
points to a contiguous and well-aligned ethernet header and then
you trim the alignment so that mtod() points directly at it.

Possibly add a comment to that effect to spare a eyebrow-raising
moment for the next person.



Re: Unexpected behavior in su/doas

2016-10-04 Thread Todd C. Miller
FYI, sudo supports running the command in a new pty, which should
avoid the issue.  Commands are always run in a new pty when logging
input or output, otherwise the use_pty flag needs to be set in
sudoers.

 - todd



Re: Help me testing the netlock

2016-10-04 Thread Martin Pieuchot

On 10/03/16 16:43, Martin Pieuchot wrote:

Diff below introduces a single write lock that will be used to serialize
access to ip_output().

This lock will be then split in multiple readers and writers to allow
multiple forwarding paths to run in parallel of each others but still
serialized with the socket layer.

I'm currently looking for people wanting to run this diff and try to
break it.  In other words, your machine might panic with it and if it
does report the panic to me so the diff can be improved.

I tested NFS v2 and v3 so I'm quite confident, but I might have missed
some obvious stuff.


Updated diff attaced including a fix for syn_cache_timer(), problem
reported by Chris Jackman.

Index: kern/kern_rwlock.c
===
RCS file: /cvs/src/sys/kern/kern_rwlock.c,v
retrieving revision 1.27
diff -u -p -r1.27 kern_rwlock.c
--- kern/kern_rwlock.c	14 Mar 2015 07:33:42 -	1.27
+++ kern/kern_rwlock.c	4 Oct 2016 14:40:29 -
@@ -98,6 +98,12 @@ rw_enter_read(struct rwlock *rwl)
 		membar_enter();
 }
 
+#if 1
+#include 
+#include 
+#include 
+#endif
+
 void
 rw_enter_write(struct rwlock *rwl)
 {
@@ -108,6 +114,15 @@ rw_enter_write(struct rwlock *rwl)
 		rw_enter(rwl, RW_WRITE);
 	else
 		membar_enter();
+
+#if 1
+	if ((rwl == &netlock) && (splassert_ctl == 3)) {
+		printf("ENTER::%d::", cpu_number());
+		db_stack_trace_print(
+		(db_expr_t)__builtin_frame_address(1),
+		TRUE, 1, "", printf);
+	}
+#endif
 }
 
 void
@@ -129,6 +144,15 @@ rw_exit_write(struct rwlock *rwl)
 	unsigned long owner = rwl->rwl_owner;
 
 	rw_assert_wrlock(rwl);
+
+#if 1
+	if ((rwl == &netlock) && (splassert_ctl == 3)) {
+		printf("EXIT::%d::", cpu_number());
+		db_stack_trace_print(
+		(db_expr_t)__builtin_frame_address(1),
+		TRUE, 1, "", printf);
+	}
+#endif
 
 	membar_exit();
 	if (__predict_false((owner & RWLOCK_WAIT) ||
Index: kern/sys_socket.c
===
RCS file: /cvs/src/sys/kern/sys_socket.c,v
retrieving revision 1.21
diff -u -p -r1.21 sys_socket.c
--- kern/sys_socket.c	5 Dec 2015 10:11:53 -	1.21
+++ kern/sys_socket.c	4 Oct 2016 14:40:29 -
@@ -131,8 +131,10 @@ soo_poll(struct file *fp, int events, st
 {
 	struct socket *so = fp->f_data;
 	int revents = 0;
-	int s = splsoftnet();
+	int s;
 
+	rw_enter_write(&netlock);
+	s = splsoftnet();
 	if (events & (POLLIN | POLLRDNORM)) {
 		if (soreadable(so))
 			revents |= events & (POLLIN | POLLRDNORM);
@@ -159,6 +161,7 @@ soo_poll(struct file *fp, int events, st
 		}
 	}
 	splx(s);
+	rw_exit_write(&netlock);
 	return (revents);
 }
 
Index: kern/uipc_socket.c
===
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.161
diff -u -p -r1.161 uipc_socket.c
--- kern/uipc_socket.c	20 Sep 2016 14:27:43 -	1.161
+++ kern/uipc_socket.c	4 Oct 2016 14:40:29 -
@@ -123,6 +123,7 @@ socreate(int dom, struct socket **aso, i
 		return (EPROTONOSUPPORT);
 	if (prp->pr_type != type)
 		return (EPROTOTYPE);
+	rw_enter_write(&netlock);
 	s = splsoftnet();
 	so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO);
 	TAILQ_INIT(&so->so_q0);
@@ -142,9 +143,11 @@ socreate(int dom, struct socket **aso, i
 		so->so_state |= SS_NOFDREF;
 		sofree(so);
 		splx(s);
+		rw_exit_write(&netlock);
 		return (error);
 	}
 	splx(s);
+	rw_exit_write(&netlock);
 	*aso = so;
 	return (0);
 }
@@ -152,11 +155,13 @@ socreate(int dom, struct socket **aso, i
 int
 sobind(struct socket *so, struct mbuf *nam, struct proc *p)
 {
-	int s = splsoftnet();
-	int error;
+	int s, error;
 
+	rw_enter_write(&netlock);
+	s = splsoftnet();
 	error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p);
 	splx(s);
+	rw_exit_write(&netlock);
 	return (error);
 }
 
@@ -171,11 +176,13 @@ solisten(struct socket *so, int backlog)
 	if (isspliced(so) || issplicedback(so))
 		return (EOPNOTSUPP);
 #endif /* SOCKET_SPLICE */
+	rw_enter_write(&netlock);
 	s = splsoftnet();
 	error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL,
 	curproc);
 	if (error) {
 		splx(s);
+		rw_exit_write(&netlock);
 		return (error);
 	}
 	if (TAILQ_FIRST(&so->so_q) == NULL)
@@ -186,6 +193,7 @@ solisten(struct socket *so, int backlog)
 		backlog = sominconn;
 	so->so_qlimit = backlog;
 	splx(s);
+	rw_exit_write(&netlock);
 	return (0);
 }
 
@@ -196,6 +204,7 @@ solisten(struct socket *so, int backlog)
 void
 sofree(struct socket *so)
 {
+	rw_assert_wrlock(&netlock);
 	splsoftassert(IPL_SOFTNET);
 
 	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
@@ -234,9 +243,10 @@ int
 soclose(struct socket *so)
 {
 	struct socket *so2;
-	int s = splsoftnet();		/* conservative */
-	int error = 0;
+	int s, error = 0;
 
+	rw_enter_write(&netlock);
+	s = splsoftnet();		/* conservative */
 	if (so->so_options & SO_ACCEPTCONN) {
 		while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
 			(void) soqremque(so2, 0);
@@ -260,7 +270,7 @@ soclose(struct soc

Re: cwm: remove mouse resize window

2016-10-04 Thread Okan Demirmen
On Tue 2016.10.04 at 15:29 +0200, Vadim Vygonets wrote:
> Quoth Okan Demirmen on Wed, Sep 28, 2016:
> > We currently print the x/y dimensions only for mouse based actions; we
> > don't for kbd, nor do we do anything with mouse/kbd window moves (such
> > as printing the x/y coordinates, etc). So why have mouse-resize be
> > different?  Thus, the below crudely removes it.
> 
> I use it for xterms, and I'd rather have it in both mouse and
> keyboard based resizes.

Hi Vadim - Not sure if you saw my other reply, or commit, but it's
staying and was expanded a bit. As for displaying on kb requests, that's
harder to do since a KeyRelease event would make it disappear so it'll
be visible for a tiny amount of time; maybe a timer or some such or
other idea, not sure - haven't thought about it...not too important
imho yet.

Thanks,
Okan



Re: cwm: remove mouse resize window

2016-10-04 Thread Vadim Vygonets
Quoth Okan Demirmen on Wed, Sep 28, 2016:
> We currently print the x/y dimensions only for mouse based actions; we
> don't for kbd, nor do we do anything with mouse/kbd window moves (such
> as printing the x/y coordinates, etc). So why have mouse-resize be
> different?  Thus, the below crudely removes it.

I use it for xterms, and I'd rather have it in both mouse and
keyboard based resizes.

Vadik.

-- 
Weiner's Law of Libraries:
There are no answers, only cross references.



Re: disklabel template: percentage of disk optional?

2016-10-04 Thread Claus Assmann
On Tue, Oct 04, 2016, Otto Moerbeek wrote:
> On Tue, Oct 04, 2016 at 04:00:50AM -0700, Claus Assmann wrote:

> > This doesn't seem to resolve the problem that sa->rate is
> > not initialized, so a simple file like this still triggers

> Likely, this is better.

> RCS file: /cvs/src/sbin/disklabel/editor.c,v

>   sa = &(alloc_table[0].table[idx]);
> + memset(sa, 0, sizeof(*sa));

Clearing the entire struct is certainly the easiest approach.
Thanks.



Re: disklabel template: percentage of disk optional?

2016-10-04 Thread Otto Moerbeek
On Tue, Oct 04, 2016 at 04:00:50AM -0700, Claus Assmann wrote:

> On Tue, Oct 04, 2016, Dmitrij D. Czarkoff wrote:
> 
> [please do not Cc me]
> 
> > I shouldn't have started on sending patches at 3AM.  This one should do
> > what I intended it to do.  Sorry for noise.
> 
> > +   else if (t == NULL && sa->minsz != sa->maxsz)
> > +   errx(1, "%s: parse error on line %u", filename, idx);
> 
> This doesn't seem to resolve the problem that sa->rate is
> not initialized, so a simple file like this still triggers
> the error message (at least in my tests):
> 
> / 500M
> swap  1G
> /usr  2G
> /home 500G

Likely, this is better.

-Otto

Index: editor.c
===
RCS file: /cvs/src/sbin/disklabel/editor.c,v
retrieving revision 1.303
diff -u -p -r1.303 editor.c
--- editor.c2 Sep 2016 10:47:17 -   1.303
+++ editor.c4 Oct 2016 11:25:36 -
@@ -2386,6 +2386,7 @@ parse_autotable(char *filename)
idx + 1, sizeof(*sa))) == NULL)
err(1, NULL);
sa = &(alloc_table[0].table[idx]);
+   memset(sa, 0, sizeof(*sa));
idx++;
 
if ((sa->mp = get_token(&buf, &len)) == NULL ||



Re: disklabel template: percentage of disk optional?

2016-10-04 Thread Claus Assmann
On Tue, Oct 04, 2016, Dmitrij D. Czarkoff wrote:

[please do not Cc me]

> I shouldn't have started on sending patches at 3AM.  This one should do
> what I intended it to do.  Sorry for noise.

> + else if (t == NULL && sa->minsz != sa->maxsz)
> + errx(1, "%s: parse error on line %u", filename, idx);

This doesn't seem to resolve the problem that sa->rate is
not initialized, so a simple file like this still triggers
the error message (at least in my tests):

/   500M
swap1G
/usr2G
/home   500G



Re: 64-bit address for bus_space_map(9) on arm

2016-10-04 Thread Mark Kettenis
> Date: Tue, 4 Oct 2016 10:59:26 +0200
> From: Patrick Wildt 
> 
> Hi,
> 
> continuing where we left off before the hackathon I would like to add
> a diff to the 64-bit bus_addr_t discussion.  This diff does not
> increase the size of bus_addr_t.  Instead it changes the argument
> of the function pointer stored in the bus tag from bus_addr_t to
> uint64_t.  This is an arm only change and will not affect any other
> architecture.
> 
> This is needed to pass virtual addresses retrieved from the device tree
> between the different simple-busses in the tree topology.  In the end
> every single hardware that we want to speak to is in the 32-bit range,
> not outside.  This means the final bus_space_map(9) will work on a
> 32-bit value.
> 
> Opinions? ok?

I think this is an acceptable hack.

ok kettenis@

> diff --git sys/arch/arm/armv7/armv7_space.c sys/arch/arm/armv7/armv7_space.c
> index 4f6c1e0..fbd558a 100644
> --- sys/arch/arm/armv7/armv7_space.c
> +++ sys/arch/arm/armv7/armv7_space.c
> @@ -165,7 +165,7 @@ struct bus_space armv7_bs_tag = {
>  };
>  
>  int
> -armv7_bs_map(void *t, bus_addr_t bpa, bus_size_t size,
> +armv7_bs_map(void *t, uint64_t bpa, bus_size_t size,
> int flags, bus_space_handle_t *bshp)
>  {
>   u_long startpa, endpa, pa;
> diff --git sys/arch/arm/include/bus.h sys/arch/arm/include/bus.h
> index f00c897..c108359 100644
> --- sys/arch/arm/include/bus.h
> +++ sys/arch/arm/include/bus.h
> @@ -93,7 +93,7 @@ struct bus_space {
>   void*bs_cookie;
>  
>   /* mapping/unmapping */
> - int (*bs_map) (void *, bus_addr_t, bus_size_t,
> + int (*bs_map) (void *, uint64_t, bus_size_t,
>   int, bus_space_handle_t *);
>   void(*bs_unmap) (void *, bus_space_handle_t,
>   bus_size_t);
> @@ -373,7 +373,7 @@ struct bus_space {
>   */
>  
>  #define bs_map_proto(f)  
> \
> -int  __bs_c(f,_bs_map) (void *t, bus_addr_t addr,\
> +int  __bs_c(f,_bs_map) (void *t, uint64_t addr,  \
>   bus_size_t size, int flags, bus_space_handle_t *bshp);
>  
>  #define bs_unmap_proto(f)\
> diff --git sys/arch/arm/simplebus/simplebus.c 
> sys/arch/arm/simplebus/simplebus.c
> index d2f5bfe..325e149 100644
> --- sys/arch/arm/simplebus/simplebus.c
> +++ sys/arch/arm/simplebus/simplebus.c
> @@ -30,7 +30,7 @@ int simplebus_match(struct device *, void *, void *);
>  void simplebus_attach(struct device *, struct device *, void *);
>  
>  void simplebus_attach_node(struct device *, int);
> -int simplebus_bs_map(void *, bus_addr_t, bus_size_t, int, bus_space_handle_t 
> *);
> +int simplebus_bs_map(void *, uint64_t, bus_size_t, int, bus_space_handle_t 
> *);
>  
>  struct simplebus_softc {
>   struct devicesc_dev;
> @@ -205,7 +205,7 @@ simplebus_attach_node(struct device *self, int node)
>   * Translate memory address if needed.
>   */
>  int
> -simplebus_bs_map(void *t, bus_addr_t bpa, bus_size_t size,
> +simplebus_bs_map(void *t, uint64_t bpa, bus_size_t size,
>  int flag, bus_space_handle_t *bshp)
>  {
>   struct simplebus_softc *sc = (struct simplebus_softc *)t;
> diff --git sys/arch/armv7/armv7/armv7_machdep.c 
> sys/arch/armv7/armv7/armv7_machdep.c
> index e869c2c..839e45b 100644
> --- sys/arch/armv7/armv7/armv7_machdep.c
> +++ sys/arch/armv7/armv7/armv7_machdep.c
> @@ -199,7 +199,7 @@ int   safepri = 0;
>  /* Prototypes */
>  
>  char bootargs[MAX_BOOT_STRING];
> -int  bootstrap_bs_map(void *, bus_addr_t, bus_size_t, int,
> +int  bootstrap_bs_map(void *, uint64_t, bus_size_t, int,
>  bus_space_handle_t *);
>  void process_kernel_args(char *);
>  void consinit(void);
> @@ -318,7 +318,7 @@ read_ttb(void)
>  static vaddr_t section_free = 0xfd00; /* XXX - huh */
>  
>  int
> -bootstrap_bs_map(void *t, bus_addr_t bpa, bus_size_t size,
> +bootstrap_bs_map(void *t, uint64_t bpa, bus_size_t size,
>  int flags, bus_space_handle_t *bshp)
>  {
>   u_long startpa, pa, endpa;
> @@ -393,7 +393,7 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t 
> loadaddr)
>  
>   /* early bus_space_map support */
>   struct bus_space tmp_bs_tag;
> - int (*map_func_save)(void *, bus_addr_t, bus_size_t, int,
> + int (*map_func_save)(void *, uint64_t, bus_size_t, int,
>   bus_space_handle_t *);
>  
>   if (arg0)
> 
> 



Re: clang dev/pv/hyperv.c

2016-10-04 Thread Jonathan Gray
On Sat, Sep 24, 2016 at 07:05:54PM +0200, Mark Kettenis wrote:
> Another obvious mistake caught by clang.
> 
> ok?

ok jsg@

> 
> 
> Index: dev/pv/hyperv.c
> ===
> RCS file: /cvs/src/sys/dev/pv/hyperv.c,v
> retrieving revision 1.16
> diff -u -p -r1.16 hyperv.c
> --- dev/pv/hyperv.c   20 Sep 2016 10:27:14 -  1.16
> +++ dev/pv/hyperv.c   24 Sep 2016 17:04:24 -
> @@ -700,7 +700,7 @@ hv_vmbus_connect(struct hv_softc *sc)
>   sc->sc_revents = (u_long *)((caddr_t)sc->sc_events + (PAGE_SIZE >> 1));
>  
>   sc->sc_monitor[0] = km_alloc(PAGE_SIZE, &kv_any, &kp_zero, &kd_nowait);
> - if (sc->sc_monitor == NULL) {
> + if (sc->sc_monitor[0] == NULL) {
>   printf(": failed to allocate monitor page 1\n");
>   goto errout;
>   }
> @@ -710,7 +710,7 @@ hv_vmbus_connect(struct hv_softc *sc)
>   }
>  
>   sc->sc_monitor[1] = km_alloc(PAGE_SIZE, &kv_any, &kp_zero, &kd_nowait);
> - if (sc->sc_monitor == NULL) {
> + if (sc->sc_monitor[1] == NULL) {
>   printf(": failed to allocate monitor page 2\n");
>   goto errout;
>   }
> 



64-bit address for bus_space_map(9) on arm

2016-10-04 Thread Patrick Wildt
Hi,

continuing where we left off before the hackathon I would like to add
a diff to the 64-bit bus_addr_t discussion.  This diff does not
increase the size of bus_addr_t.  Instead it changes the argument
of the function pointer stored in the bus tag from bus_addr_t to
uint64_t.  This is an arm only change and will not affect any other
architecture.

This is needed to pass virtual addresses retrieved from the device tree
between the different simple-busses in the tree topology.  In the end
every single hardware that we want to speak to is in the 32-bit range,
not outside.  This means the final bus_space_map(9) will work on a
32-bit value.

Opinions? ok?

Patrick

diff --git sys/arch/arm/armv7/armv7_space.c sys/arch/arm/armv7/armv7_space.c
index 4f6c1e0..fbd558a 100644
--- sys/arch/arm/armv7/armv7_space.c
+++ sys/arch/arm/armv7/armv7_space.c
@@ -165,7 +165,7 @@ struct bus_space armv7_bs_tag = {
 };
 
 int
-armv7_bs_map(void *t, bus_addr_t bpa, bus_size_t size,
+armv7_bs_map(void *t, uint64_t bpa, bus_size_t size,
  int flags, bus_space_handle_t *bshp)
 {
u_long startpa, endpa, pa;
diff --git sys/arch/arm/include/bus.h sys/arch/arm/include/bus.h
index f00c897..c108359 100644
--- sys/arch/arm/include/bus.h
+++ sys/arch/arm/include/bus.h
@@ -93,7 +93,7 @@ struct bus_space {
void*bs_cookie;
 
/* mapping/unmapping */
-   int (*bs_map) (void *, bus_addr_t, bus_size_t,
+   int (*bs_map) (void *, uint64_t, bus_size_t,
int, bus_space_handle_t *);
void(*bs_unmap) (void *, bus_space_handle_t,
bus_size_t);
@@ -373,7 +373,7 @@ struct bus_space {
  */
 
 #define bs_map_proto(f)
\
-int__bs_c(f,_bs_map) (void *t, bus_addr_t addr,\
+int__bs_c(f,_bs_map) (void *t, uint64_t addr,  \
bus_size_t size, int flags, bus_space_handle_t *bshp);
 
 #define bs_unmap_proto(f)  \
diff --git sys/arch/arm/simplebus/simplebus.c sys/arch/arm/simplebus/simplebus.c
index d2f5bfe..325e149 100644
--- sys/arch/arm/simplebus/simplebus.c
+++ sys/arch/arm/simplebus/simplebus.c
@@ -30,7 +30,7 @@ int simplebus_match(struct device *, void *, void *);
 void simplebus_attach(struct device *, struct device *, void *);
 
 void simplebus_attach_node(struct device *, int);
-int simplebus_bs_map(void *, bus_addr_t, bus_size_t, int, bus_space_handle_t 
*);
+int simplebus_bs_map(void *, uint64_t, bus_size_t, int, bus_space_handle_t *);
 
 struct simplebus_softc {
struct devicesc_dev;
@@ -205,7 +205,7 @@ simplebus_attach_node(struct device *self, int node)
  * Translate memory address if needed.
  */
 int
-simplebus_bs_map(void *t, bus_addr_t bpa, bus_size_t size,
+simplebus_bs_map(void *t, uint64_t bpa, bus_size_t size,
 int flag, bus_space_handle_t *bshp)
 {
struct simplebus_softc *sc = (struct simplebus_softc *)t;
diff --git sys/arch/armv7/armv7/armv7_machdep.c 
sys/arch/armv7/armv7/armv7_machdep.c
index e869c2c..839e45b 100644
--- sys/arch/armv7/armv7/armv7_machdep.c
+++ sys/arch/armv7/armv7/armv7_machdep.c
@@ -199,7 +199,7 @@ int   safepri = 0;
 /* Prototypes */
 
 char   bootargs[MAX_BOOT_STRING];
-intbootstrap_bs_map(void *, bus_addr_t, bus_size_t, int,
+intbootstrap_bs_map(void *, uint64_t, bus_size_t, int,
 bus_space_handle_t *);
 void   process_kernel_args(char *);
 void   consinit(void);
@@ -318,7 +318,7 @@ read_ttb(void)
 static vaddr_t section_free = 0xfd00; /* XXX - huh */
 
 int
-bootstrap_bs_map(void *t, bus_addr_t bpa, bus_size_t size,
+bootstrap_bs_map(void *t, uint64_t bpa, bus_size_t size,
 int flags, bus_space_handle_t *bshp)
 {
u_long startpa, pa, endpa;
@@ -393,7 +393,7 @@ initarm(void *arg0, void *arg1, void *arg2, paddr_t 
loadaddr)
 
/* early bus_space_map support */
struct bus_space tmp_bs_tag;
-   int (*map_func_save)(void *, bus_addr_t, bus_size_t, int,
+   int (*map_func_save)(void *, uint64_t, bus_size_t, int,
bus_space_handle_t *);
 
if (arg0)



ahci(4) disk attach fix - testing required

2016-10-04 Thread Patrick Wildt
Hi,

a colleague has been fiddling with ahci(4) wondering why the disk fails
on attach on his hardware.  He came up with the following diff.

Considering this is quite an intrusive though small diff, I would like
to ask for tests on various hardware to make sure this does not break
any hardware we currently support.  Disks should keep being properly
attached.

Patrick

---zip---

Turn off the code which waits for AHCI_PREG_CMD_CR to be set by the
HBA after ahci_default_port_start() sets AHCI_PREG_CMD_ST. The AHCI
spec. rev. 1.3 only requires the inverse to be true, i. e. that a
HBA clears AHCI_PREG_CMD_CR when AHCI_PREG_CMD_ST gets cleared by
software/driver. In fact, some HBAs will not raise AHCI_PREG_CMD_CR
as an immediate consequence of AHCI_PREG_CMD_ST being set. Actually
neither the FreeBSD, Linux nor NetBSD counterpart of ahci(4) has an
analogous check. Disabling that wait fixes "failed to start command
DMA on port N, disabling" bails during attach.

>From Marius Strobl

diff --git sys/dev/ic/ahci.c sys/dev/ic/ahci.c
index d600f3e..da93478 100644
--- sys/dev/ic/ahci.c
+++ sys/dev/ic/ahci.c
@@ -890,10 +890,20 @@ ahci_default_port_start(struct ahci_port *ap, int 
fre_only)
}
 #endif
 
+#if 0
/* Wait for CR to come on */
+   /*
+* Doesn't work: According to section 5.3.16.1, HBAs are required
+* to clear AHCI_PREG_CMD_CR if AHCI_PREG_CMD_ST is cleared to 0.
+* However, nowhere in the AHCI v1.3 specification the inverse is
+* stated, i. e. that an HBA must transit to a state in which the
+* AHCI_PREG_CMD_CR bit must be set as an immediate result of the
+* software/driver setting AHCI_PREG_CMD_ST.
+*/
if (!fre_only &&
ahci_pwait_set(ap, AHCI_PREG_CMD, AHCI_PREG_CMD_CR, 1))
return (1);
+#endif
 
return (0);
 }