Re: [PATCH] [0/1] pf refactoring

2016-08-19 Thread Richard Procter
Hi Mike, 

On Fri, 19 Aug 2016, Mike Belopuhov wrote:

> I've looked through it and couldn't find anything wrong with it.

Thanks. 

> I do however find pacthing of values in pf_translate_icmp_af
> unneccessary since we'll be throwing away the original header
> anyway.

Do you mean e.g. circa line 2602, pf.c:pf_translate_icmp_af() 
(HEAD + complete diff)?

pf_patch_8(pd, &icmp4->icmp_type, type, PF_HI);
pf_patch_8(pd, &icmp4->icmp_code, code, PF_LO);
pf_patch_16(pd, &icmp4->icmp_nextmtu, htons(mtu));
if (ptr >= 0)
pf_patch_32(pd, &icmp4->icmp_void, htonl(ptr));

My understanding is that the ICMP v4 and v6 headers are so similar that pf 
af-to hacks the one into the other; the code above is filling an ICMPv4 
header with v6 values; the updates are not redundant. It muddled me a bit 
when I was converting it.

> 
> OK mikeb for the diff.

best, 
Richard. 



Re: connect_sync

2016-08-19 Thread Ted Unangst
Todd C. Miller wrote:
> On Fri, 19 Aug 2016 19:14:54 -0400, "Ted Unangst" wrote:
> 
> > hmm. so I was trying to avoid the need for two different functions. I think
> > there's a mental overhead to "do this, then maybe that". this loop reads
> > very strangely to me. it's hard to mentally trace the code. it's not really 
> > a
> > loop, just goto spelled with break and continue.
> 
> I suppose it is better to just use connect() and connect_wait() in
> a for() loop.  Whichever way you choose we should put as an example
> in connect(2).

> + for (error = connect(s, res->ai_addr, res->ai_addrlen);
> + error != 0 && errno == EINTR; error = connect_wait(s))
> + continue;
> + if (error != 0) {

This is a pretty cool style.



Re: connect_sync

2016-08-19 Thread Todd C. Miller
On Fri, 19 Aug 2016 19:14:54 -0400, "Ted Unangst" wrote:

> hmm. so I was trying to avoid the need for two different functions. I think
> there's a mental overhead to "do this, then maybe that". this loop reads
> very strangely to me. it's hard to mentally trace the code. it's not really a
> loop, just goto spelled with break and continue.

I suppose it is better to just use connect() and connect_wait() in
a for() loop.  Whichever way you choose we should put as an example
in connect(2).

 - todd

Index: usr.bin/ftp/extern.h
===
RCS file: /cvs/src/usr.bin/ftp/extern.h,v
retrieving revision 1.43
diff -u -p -u -r1.43 extern.h
--- usr.bin/ftp/extern.h18 Aug 2016 16:23:06 -  1.43
+++ usr.bin/ftp/extern.h19 Aug 2016 21:54:32 -
@@ -62,7 +62,6 @@
  */
 
 #include 
-#include 
 
 void   abort_remote(FILE *);
 void   abortpt(int);
@@ -76,7 +75,7 @@ void  cmdabort(int);
 void   cmdscanner(int);
 intcommand(const char *, ...);
 intconfirm(const char *, const char *);
-intconnect_sync(int, const struct sockaddr *, socklen_t);
+intconnect_wait(int);
 FILE   *dataconn(const char *);
 intforegroundproc(void);
 intfileindir(const char *, const char *);
Index: usr.bin/ftp/fetch.c
===
RCS file: /cvs/src/usr.bin/ftp/fetch.c,v
retrieving revision 1.148
diff -u -p -u -r1.148 fetch.c
--- usr.bin/ftp/fetch.c 18 Aug 2016 16:23:06 -  1.148
+++ usr.bin/ftp/fetch.c 19 Aug 2016 23:28:11 -
@@ -557,8 +557,10 @@ noslash:
}
 #endif /* !SMALL */
 
-again:
-   if (connect_sync(s, res->ai_addr, res->ai_addrlen) < 0) {
+   for (error = connect(s, res->ai_addr, res->ai_addrlen);
+   error != 0 && errno == EINTR; error = connect_wait(s))
+   continue;
+   if (error != 0) {
save_errno = errno;
close(s);
errno = save_errno;
Index: usr.bin/ftp/ftp.c
===
RCS file: /cvs/src/usr.bin/ftp/ftp.c,v
retrieving revision 1.98
diff -u -p -u -r1.98 ftp.c
--- usr.bin/ftp/ftp.c   18 Aug 2016 16:23:06 -  1.98
+++ usr.bin/ftp/ftp.c   19 Aug 2016 23:28:22 -
@@ -219,8 +219,10 @@ hookup(char *host, char *port)
}
}
 #endif /* !SMALL */
-   error = connect_sync(s, res->ai_addr, res->ai_addrlen);
-   if (error) {
+   for (error = connect(s, res->ai_addr, res->ai_addrlen);
+   error != 0 && errno == EINTR; error = connect_wait(s))
+   continue;
+   if (error != 0) {
/* this "if" clause is to prevent print warning twice */
if (verbose && res->ai_next) {
if (getnameinfo(res->ai_addr, res->ai_addrlen,
@@ -1514,8 +1516,11 @@ reinit:
} else
goto bad;
 
-   if (connect_sync(data, (struct sockaddr *)&data_addr,
-   data_addr.su_len) < 0) {
+   for (error = connect(data, (struct sockaddr *)&data_addr,
+   data_addr.su_len); error != 0 && errno == EINTR;
+   error = connect_wait(data))
+   continue;
+   if (error != 0) {
if (activefallback) {
(void)close(data);
data = -1;
Index: usr.bin/ftp/util.c
===
RCS file: /cvs/src/usr.bin/ftp/util.c,v
retrieving revision 1.80
diff -u -p -u -r1.80 util.c
--- usr.bin/ftp/util.c  18 Aug 2016 16:23:06 -  1.80
+++ usr.bin/ftp/util.c  19 Aug 2016 22:03:10 -
@@ -67,6 +67,7 @@
  * FTP User Program -- Misc support routines
  */
 #include 
+#include 
 #include 
 #include 
 
@@ -1070,35 +1071,25 @@ controlediting(void)
 #endif /* !SMALL */
 
 /*
- * Wrapper for connect(2) that restarts the syscall when
- * interrupted and operates synchronously.
+ * Wait for an asynchronous connect(2) attempt to finish.
  */
 int
-connect_sync(int s, const struct sockaddr *name, socklen_t namelen)
+connect_wait(int s)
 {
struct pollfd pfd[1];
int error = 0;
socklen_t len = sizeof(error);
 
-   if (connect(s, name, namelen) < 0) {
-   if (errno != EINTR)
-   return -1;
-   }
-
-   /* An interrupted connect(2) continues asyncronously. */
pfd[0].fd = s;
pfd[0].events = POLLOUT;
-   for (;;) {
-   if (poll(pfd, 1, -1) == -1) {
-   if (errno != EINTR)
-   return -1;
-   continue;
-   }
-   if (getsockopt(s, SOL_SOCKET, SO_ERROR, &error, &len) < 0)
- 

Re: connect_sync

2016-08-19 Thread Ted Unangst
Todd C. Miller wrote:
> Here's a rewrite of my connect_sync() changes to use connect_wait()
> instead.  Unlike the version in the connect(2) manual, this one
> returns EINTR when interrupted by a signal which is probably better.

> Index: usr.bin/ftp/fetch.c
> ===
> RCS file: /cvs/src/usr.bin/ftp/fetch.c,v
> retrieving revision 1.148
> diff -u -p -u -r1.148 fetch.c
> --- usr.bin/ftp/fetch.c   18 Aug 2016 16:23:06 -  1.148
> +++ usr.bin/ftp/fetch.c   19 Aug 2016 22:00:26 -
> @@ -557,8 +557,12 @@ noslash:
>   }
>  #endif /* !SMALL */
>  
> -again:
> - if (connect_sync(s, res->ai_addr, res->ai_addrlen) < 0) {
> + error = connect(s, res->ai_addr, res->ai_addrlen);
> + while (error != 0) {
> + if (errno == EINTR) {
> + error = connect_wait(s);
> + continue;
> + }
>   save_errno = errno;
>   close(s);
>   errno = save_errno;

continue;

hmm. so I was trying to avoid the need for two different functions. I think
there's a mental overhead to "do this, then maybe that". this loop reads
very strangely to me. it's hard to mentally trace the code. it's not really a
loop, just goto spelled with break and continue.

there's a bug, too, since the existing continue wasn't replaced. now you need
some break/continue the other loop dance.



Re: connect_sync

2016-08-19 Thread Todd C. Miller
Here's a rewrite of my connect_sync() changes to use connect_wait()
instead.  Unlike the version in the connect(2) manual, this one
returns EINTR when interrupted by a signal which is probably better.

 - todd

Index: usr.bin/ftp/extern.h
===
RCS file: /cvs/src/usr.bin/ftp/extern.h,v
retrieving revision 1.43
diff -u -p -u -r1.43 extern.h
--- usr.bin/ftp/extern.h18 Aug 2016 16:23:06 -  1.43
+++ usr.bin/ftp/extern.h19 Aug 2016 21:54:32 -
@@ -62,7 +62,6 @@
  */
 
 #include 
-#include 
 
 void   abort_remote(FILE *);
 void   abortpt(int);
@@ -76,7 +75,7 @@ void  cmdabort(int);
 void   cmdscanner(int);
 intcommand(const char *, ...);
 intconfirm(const char *, const char *);
-intconnect_sync(int, const struct sockaddr *, socklen_t);
+intconnect_wait(int);
 FILE   *dataconn(const char *);
 intforegroundproc(void);
 intfileindir(const char *, const char *);
Index: usr.bin/ftp/fetch.c
===
RCS file: /cvs/src/usr.bin/ftp/fetch.c,v
retrieving revision 1.148
diff -u -p -u -r1.148 fetch.c
--- usr.bin/ftp/fetch.c 18 Aug 2016 16:23:06 -  1.148
+++ usr.bin/ftp/fetch.c 19 Aug 2016 22:00:26 -
@@ -557,8 +557,12 @@ noslash:
}
 #endif /* !SMALL */
 
-again:
-   if (connect_sync(s, res->ai_addr, res->ai_addrlen) < 0) {
+   error = connect(s, res->ai_addr, res->ai_addrlen);
+   while (error != 0) {
+   if (errno == EINTR) {
+   error = connect_wait(s);
+   continue;
+   }
save_errno = errno;
close(s);
errno = save_errno;
Index: usr.bin/ftp/ftp.c
===
RCS file: /cvs/src/usr.bin/ftp/ftp.c,v
retrieving revision 1.98
diff -u -p -u -r1.98 ftp.c
--- usr.bin/ftp/ftp.c   18 Aug 2016 16:23:06 -  1.98
+++ usr.bin/ftp/ftp.c   19 Aug 2016 22:02:35 -
@@ -219,8 +219,12 @@ hookup(char *host, char *port)
}
}
 #endif /* !SMALL */
-   error = connect_sync(s, res->ai_addr, res->ai_addrlen);
-   if (error) {
+   error = connect(s, res->ai_addr, res->ai_addrlen);
+   while (error != 0) {
+   if (errno == EINTR) {
+   error = connect_wait(s);
+   continue;
+   }
/* this "if" clause is to prevent print warning twice */
if (verbose && res->ai_next) {
if (getnameinfo(res->ai_addr, res->ai_addrlen,
@@ -235,11 +239,12 @@ hookup(char *host, char *port)
close(s);
errno = error;
s = -1;
-   continue;
+   break;
}
 
/* finally we got one */
-   break;
+   if (error == 0)
+   break;
}
if (s < 0) {
warn("%s", cause);
@@ -1514,8 +1519,13 @@ reinit:
} else
goto bad;
 
-   if (connect_sync(data, (struct sockaddr *)&data_addr,
-   data_addr.su_len) < 0) {
+   error = connect(data, (struct sockaddr *)&data_addr,
+   data_addr.su_len);
+   while (error != 0) {
+   if (errno == EINTR) {
+   error = connect_wait(data);
+   continue;
+   }
if (activefallback) {
(void)close(data);
data = -1;
Index: usr.bin/ftp/util.c
===
RCS file: /cvs/src/usr.bin/ftp/util.c,v
retrieving revision 1.80
diff -u -p -u -r1.80 util.c
--- usr.bin/ftp/util.c  18 Aug 2016 16:23:06 -  1.80
+++ usr.bin/ftp/util.c  19 Aug 2016 22:03:10 -
@@ -67,6 +67,7 @@
  * FTP User Program -- Misc support routines
  */
 #include 
+#include 
 #include 
 #include 
 
@@ -1070,35 +1071,25 @@ controlediting(void)
 #endif /* !SMALL */
 
 /*
- * Wrapper for connect(2) that restarts the syscall when
- * interrupted and operates synchronously.
+ * Wait for an asynchronous connect(2) attempt to finish.
  */
 int
-connect_sync(int s, const struct sockaddr *name, socklen_t namelen)
+connect_wait(int s)
 {
struct pollfd pfd[1];
int error = 0;
socklen_t len = sizeof(error);
 
-   if (connect(s, name, namelen) < 0) {
-   if (errno != EINTR)
-   return -1;
-   }
-
-   /* An interrupted connect(2) continues asyncronously. */
pfd[0].fd

Re: connect_sync

2016-08-19 Thread Todd C. Miller
Another option is to use the connect_wait() function I added to
EXAMPLES in connect(2).  You would only call it if connect(2) returns
-1 with errno == EINTR.

 - todd



connect_sync

2016-08-19 Thread Ted Unangst
connect_sync looks like it could be a useful piece of code in other places.
however, putting the loop in the function means that ^C may not work... ftp
has disgusting signal handlers and longjmps, but i don't think that's how we
want other programs to be written.

i've rewritten the function slightly to return an error code. if the error is
EAGAIN, then the caller should call it again. or not, if a signal flag has
been set. thus we don't get stuck in a loop.

the changes required to the callers in ftp are minimal but this is a more
useful "recipe". thoughts?


Index: fetch.c
===
RCS file: /cvs/src/usr.bin/ftp/fetch.c,v
retrieving revision 1.148
diff -u -p -r1.148 fetch.c
--- fetch.c 18 Aug 2016 16:23:06 -  1.148
+++ fetch.c 19 Aug 2016 21:03:01 -
@@ -558,7 +558,10 @@ noslash:
 #endif /* !SMALL */
 
 again:
-   if (connect_sync(s, res->ai_addr, res->ai_addrlen) < 0) {
+   while ((error = connect_sync(s, res->ai_addr, res->ai_addrlen))
+   == EAGAIN)
+   continue;
+   if (error) {
save_errno = errno;
close(s);
errno = save_errno;
Index: ftp.c
===
RCS file: /cvs/src/usr.bin/ftp/ftp.c,v
retrieving revision 1.98
diff -u -p -r1.98 ftp.c
--- ftp.c   18 Aug 2016 16:23:06 -  1.98
+++ ftp.c   19 Aug 2016 21:04:43 -
@@ -219,7 +219,9 @@ hookup(char *host, char *port)
}
}
 #endif /* !SMALL */
-   error = connect_sync(s, res->ai_addr, res->ai_addrlen);
+   while ((error = connect_sync(s, res->ai_addr, res->ai_addrlen))
+   == EAGAIN)
+   continue;
if (error) {
/* this "if" clause is to prevent print warning twice */
if (verbose && res->ai_next) {
@@ -1514,8 +1516,10 @@ reinit:
} else
goto bad;
 
-   if (connect_sync(data, (struct sockaddr *)&data_addr,
-   data_addr.su_len) < 0) {
+   while ((error = connect_sync(data, (struct sockaddr 
*)&data_addr,
+   data_addr.su_len)) == EAGAIN)
+   continue;
+   if (error) {
if (activefallback) {
(void)close(data);
data = -1;
Index: util.c
===
RCS file: /cvs/src/usr.bin/ftp/util.c,v
retrieving revision 1.80
diff -u -p -r1.80 util.c
--- util.c  18 Aug 2016 16:23:06 -  1.80
+++ util.c  19 Aug 2016 21:00:31 -
@@ -1076,29 +1076,29 @@ controlediting(void)
 int
 connect_sync(int s, const struct sockaddr *name, socklen_t namelen)
 {
-   struct pollfd pfd[1];
-   int error = 0;
-   socklen_t len = sizeof(error);
+   int error;
 
-   if (connect(s, name, namelen) < 0) {
-   if (errno != EINTR)
-   return -1;
-   }
+   if (connect(s, name, namelen) == 0)
+   return 0;
+   if (errno == EINTR)
+   return EAGAIN;
+   if (errno == EALREADY) {
+   /* An interrupted connect(2) continues asyncronously. */
+   struct pollfd pfd[1];
+   socklen_t len = sizeof(error);
 
-   /* An interrupted connect(2) continues asyncronously. */
-   pfd[0].fd = s;
-   pfd[0].events = POLLOUT;
-   for (;;) {
+   pfd[0].fd = s;
+   pfd[0].events = POLLOUT;
if (poll(pfd, 1, -1) == -1) {
-   if (errno != EINTR)
-   return -1;
-   continue;
+   if (errno == EINTR)
+   return EAGAIN;
+   return errno;
}
if (getsockopt(s, SOL_SOCKET, SO_ERROR, &error, &len) < 0)
-   return -1;
+   return errno;
if (error != 0)
-   errno = error;
-   break;
+   return error;
}
-   return (error ? -1 : 0);
+
+   return errno;
 }



Re: ld.so initarray support

2016-08-19 Thread Mark Kettenis
> From: Philip Guenther 
> Date: Thu, 18 Aug 2016 21:09:10 -0700
> 
> On Thursday, August 18, 2016, Mark Kettenis  wrote:
> ...
> >
> > > > There is a functional change here.  Our current ld.so doesn't run
> > > > DT_INIT and DT_FINI for the main executable.  The ELF standard is a bit
> > > > ambiguous about this, but Linux does run tose for the main executable.
> > > > And Solaris allegedly does as well.  So my diff changes that.
> > >
> > > ld.so doesn't run them because __start() in csu does!  Note that
> > csu needs > to run them for static executables, and we use the
> > same crt0.o for both > static and dynamic executables.  I think
> > you're double executing them with > this.
> >
> > We're not double executing because we don't create a DT_INIT entry for
> > them.
> 
> Hmm, is that a bug?  Static and dynamic should ideally behave the same for
> all these, no?

Ah, perhaps I wasn't clear.  We don't create DT_INIT for both static
and dynamic executables.

You raise an interesting question though.  Traditional static
executables cannot have DT_INIT since they don't have a .dynamic
section.  But static PIE executables can have DT_INIT.  So should our
self-relocation code attempt to exeute it?



Re: MP-safe L2 "lookup" w/o atomic operation

2016-08-19 Thread Alexander Bluhm
On Mon, Aug 15, 2016 at 01:52:46PM +0200, Martin Pieuchot wrote:
> More tests, comments, ok are welcome.

There is an issue with path mtu discovery that my regression test
found, but i think that can be fixed with this diff in tree.

> +/*
> + * Invalidate the cached route entry of the gateway entry ``rt''.
> + */
> +void
> +rt_putgwroute(struct rtentry *rt)
> +{
> + struct rtentry *nhrt = rt->rt_gwroute;
> +
> + KERNEL_ASSERT_LOCKED();
> +
> + if (!ISSET(rt->rt_flags, RTF_GATEWAY) || nhrt == NULL)
> + return;
> +
> + KASSERT(nhrt->rt_cachecnt > 0);

Could you put a KASSERT(ISSET(rt->rt_flags, RTF_CACHED)) before
accessing rt_cachecnt?


> @@ -615,7 +615,27 @@ route_output(struct mbuf *m, ...)
>   }
>   break;
>   case RTM_DELETE:
> - error = rtrequest(RTM_DELETE, &info, prio, &rt, tableid);
...
> + error = rtrequest(RTM_DELETE, &info, prio, NULL, tableid);
>   if (error == 0)
>   goto report;
>   break;

I think you should keep passing &rt instead of NULL, otherwise rt
might be NULL when you goto report.

> +void
> +arpinvalidate(struct rtentry *rt)
> +{
> + struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
> + struct sockaddr_dl *sdl = satosdl(rt->rt_gateway);
> +
> + la_hold_total -= ml_purge(&la->la_ml);
> + sdl->sdl_alen = 0;
> + la->la_asked = 0;
> +}

Is it safe to modify the la and sdl while another CPU might use it?
Especially ml_purge() looks dangerous.

> +void
> +nd6_invalidate(struct rtentry *rt)
> +{
> + struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo;
> +
> + m_freem(ln->ln_hold);
> + ln->ln_hold = NULL;
> + ln->ln_state = ND6_LLINFO_INCOMPLETE;
> + ln->ln_asked = 0;
> +}

Same here.  Is it safe to free ln_hold?

> @@ -1495,18 +1512,13 @@ nd6_resolve(struct ifnet *ifp, struct rt
>   struct sockaddr_dl *sdl;
>   struct rtentry *rt;
>   struct llinfo_nd6 *ln = NULL;
> - int error;
>  
>   if (m->m_flags & M_MCAST) {
>   ETHER_MAP_IPV6_MULTICAST(&satosin6(dst)->sin6_addr, desten);
>   return (0);
>   }
>  
> - error = rt_checkgate(rt0, &rt);
> - if (error) {
> - m_freem(m);
> - return (error);
> - }
> + rt = rt_getll(rt0);

Should we check for RTF_REJECT like it was done in rt_checkgate()
before?

bluhm



XN bit support for armv7

2016-08-19 Thread Mark Kettenis
The diff below makes the armv7 pmap use the XN (eXecure Never) bit.
This bit makes pages non-executable, making OpenBSD/armv7 catch up
with many of our architectures and provide this important security
measure.  The biggest change made by the diff is that pmap_protect()
now also deals with setting the XN bit if PROT_EXEC permission is
removed from a page.

As far as I can tell, this also gives us full kernel W^X.

ok?


Index: arch/arm/arm/pmap7.c
===
RCS file: /cvs/src/sys/arch/arm/arm/pmap7.c,v
retrieving revision 1.40
diff -u -p -r1.40 pmap7.c
--- arch/arm/arm/pmap7.c19 Aug 2016 13:56:08 -  1.40
+++ arch/arm/arm/pmap7.c19 Aug 2016 14:55:16 -
@@ -1553,41 +1553,27 @@ void
 pmap_protect(pmap_t pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
 {
struct l2_bucket *l2b;
-   pt_entry_t *ptep, opte;
+   pt_entry_t *ptep, opte, npte;
vaddr_t next_bucket;
-   u_int flags;
int flush;
 
NPDEBUG(PDB_PROTECT,
printf("pmap_protect: pm %p sva 0x%lx eva 0x%lx prot 0x%x",
pm, sva, eva, prot));
 
-   if ((prot & PROT_READ) == 0) {
-NPDEBUG(PDB_PROTECT, printf("\n"));
-   pmap_remove(pm, sva, eva);
+   if ((prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
return;
-   }
 
-   if (prot & PROT_WRITE) {
-   /*
-* If this is a read->write transition, just ignore it and let
-* uvm_fault() take care of it later.
-*/
-NPDEBUG(PDB_PROTECT, printf("\n"));
-/* XXX WHAT IF RWX -> RW ??? */
+   if (prot == PROT_NONE) {
+   pmap_remove(pm, sva, eva);
return;
}
-
-   /*
-* OK, at this point, we know we're doing write-protect operation.
-*/
-
+   
/* XXX is that threshold of 4 the best choice for v7? */
if (pmap_is_current(pm))
flush = ((eva - sva) > (PAGE_SIZE * 4)) ? -1 : 0;
else
flush = -1;
-   flags = 0;
 
while (sva < eva) {
next_bucket = L2_NEXT_BUCKET(sva);
@@ -1603,27 +1589,27 @@ NPDEBUG(PDB_PROTECT, printf("\n"));
ptep = &l2b->l2b_kva[l2pte_index(sva)];
 
while (sva < next_bucket) {
-   opte = *ptep;
-   if (opte != L2_TYPE_INV && l2pte_is_writeable(opte, 
pm)) {
+   npte = opte = *ptep;
+   if (opte != L2_TYPE_INV) {
struct vm_page *pg;
-   u_int f;
 
-   pg = PHYS_TO_VM_PAGE(l2pte_pa(opte));
-   *ptep = opte | L2_V7_AP(0x4);
+   if ((prot & PROT_WRITE) == 0)
+   npte |= L2_V7_AP(0x4);
+   if ((prot & PROT_EXEC) == 0)
+   npte |= L2_V7_S_XN;
+   *ptep = npte;
PTE_SYNC(ptep);
 
-   if (pg != NULL) {
-   f = pmap_modify_pv(pg, pm, sva,
+   pg = PHYS_TO_VM_PAGE(l2pte_pa(opte));
+   if (pg != NULL && (prot & PROT_WRITE) == 0)
+   pmap_modify_pv(pg, pm, sva,
PVF_WRITE, 0);
-   } else
-   f = PVF_EXEC;
 
if (flush >= 0) {
flush++;
if (opte & L2_V7_AF)
cpu_tlb_flushID_SE(sva);
-   } else
-   flags |= f;
+   }
}
 
sva += PAGE_SIZE;
@@ -1634,7 +1620,7 @@ NPDEBUG(PDB_PROTECT, printf("\n"));
if (flush < 0)
pmap_tlb_flushID(pm);
 
-NPDEBUG(PDB_PROTECT, printf("\n"));
+   NPDEBUG(PDB_PROTECT, printf("\n"));
 }
 
 void
@@ -1752,6 +1738,9 @@ pmap_fault_fixup(pmap_t pm, vaddr_t va, 
if (pte == L2_TYPE_INV)
goto out;
 
+   if ((ftype & PROT_EXEC) && (pte & L2_V7_S_XN))
+   goto out;
+
/* only if vectors are low ?? */
/*
 * Catch a userland access to the vector page mapped at 0x0
@@ -1763,15 +1752,6 @@ pmap_fault_fixup(pmap_t pm, vaddr_t va, 
 
pa = l2pte_pa(pte);
 
-   if ((ftype & PROT_EXEC) && (pte & L2_V7_S_XN)) {
-printf("%s: va %08lx ftype %x %c pte %08x\n", __func__, va, ftype, user ? 'u' 
: 's', pte);
-printf("fault on exec\n");
-#ifdef DDB
-Debugger();
-#endif
-   /* XXX FIX THIS */
-   goto out;
-   }
if ((ftype & PR

Re: [PATCH] [1/1] pf refactoring

2016-08-19 Thread Mike Belopuhov
On 19 August 2016 at 11:49, Richard Procter  wrote:
>
> The final patch in the pf series. Will commit when I do the previous one
> in around 24 hours unless there are objections.
>
>
> - pushes the 'field changed' guards into the 'change field' functions.
> This lets us normalise many of the existing guards and eliminate lots of
> code from pf_translate().
>
> - while here, optimise pf_patch_32() a little. Relatively clean and gains
> 25% speedup for a function that can be called four times per TCP segment.
> I don't know if this matters in the big picture.
>
> - simplify pf_match_addr(). Compiler achieves the same optimisation;
> I saw no difference in the assembly.
>
>
> The 'working' for these appears at http://203.79.107.124/ under the
> phase two heading.
>
> best,
> Richard.
>

I've looked through this diff and I believe it's correct.
OK mikeb



Re: [PATCH] [0/1] pf refactoring

2016-08-19 Thread Mike Belopuhov
On 19 August 2016 at 11:33, Richard Procter  wrote:
> Hi,
>
> I've reduced the pf refactor (phase two) to two patches, which I'll be
> committing in 24 hours or so unless there are any objections.
>
> I'm confident it won't, but supposing post-commit these have in
> fact blown up, my first suspect would be the afto paths.
>

I've looked through it and couldn't find anything wrong with it.
I do however find pacthing of values in pf_translate_icmp_af
unneccessary since we'll be throwing away the original header
anyway.

OK mikeb for the diff.

>
> This patch removes pf_translate_ap().
> The 'working' appears at http://203.79.107.124/ in the 012*.diff files.
>
> best,
> Richard.
>



Re: netinet6 free() diff

2016-08-19 Thread David Hill
Hello -

Regenerated diff against -current. 

Index: netinet6/frag6.c
===
RCS file: /cvs/src/sys/netinet6/frag6.c,v
retrieving revision 1.67
diff -u -p -r1.67 frag6.c
--- netinet6/frag6.c7 Mar 2016 18:44:00 -   1.67
+++ netinet6/frag6.c19 Aug 2016 12:46:40 -
@@ -303,7 +303,7 @@ frag6_input(struct mbuf **mp, int *offp,
 
/* dequeue the fragment. */
LIST_REMOVE(af6, ip6af_list);
-   free(af6, M_FTABLE, 0);
+   free(af6, M_FTABLE, sizeof(*af6));
 
/* adjust pointer. */
ip6err = mtod(merr, struct ip6_hdr *);
@@ -348,14 +348,14 @@ frag6_input(struct mbuf **mp, int *offp,
ecn0 = (ntohl(af6->ip6af_flow) >> 20) & IPTOS_ECN_MASK;
if (ecn == IPTOS_ECN_CE) {
if (ecn0 == IPTOS_ECN_NOTECT) {
-   free(ip6af, M_FTABLE, 0);
+   free(ip6af, M_FTABLE, sizeof(*ip6af));
goto dropfrag;
}
if (ecn0 != IPTOS_ECN_CE)
af6->ip6af_flow |= htonl(IPTOS_ECN_CE << 20);
}
if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) {
-   free(ip6af, M_FTABLE, 0);
+   free(ip6af, M_FTABLE, sizeof(*ip6af));
goto dropfrag;
}
 
@@ -384,7 +384,7 @@ frag6_input(struct mbuf **mp, int *offp,
i,
inet_ntop(AF_INET6, &q6->ip6q_src, ip, sizeof(ip)));
 #endif
-   free(ip6af, M_FTABLE, 0);
+   free(ip6af, M_FTABLE, sizeof(*ip6af));
goto flushfrags;
}
}
@@ -398,7 +398,7 @@ frag6_input(struct mbuf **mp, int *offp,
i,
inet_ntop(AF_INET6, &q6->ip6q_src, ip, sizeof(ip)));
 #endif
-   free(ip6af, M_FTABLE, 0);
+   free(ip6af, M_FTABLE, sizeof(*ip6af));
goto flushfrags;
}
}
@@ -449,12 +449,12 @@ frag6_input(struct mbuf **mp, int *offp,
t = t->m_next;
t->m_next = IP6_REASS_MBUF(af6);
m_adj(t->m_next, af6->ip6af_offset);
-   free(af6, M_FTABLE, 0);
+   free(af6, M_FTABLE, sizeof(*af6));
}
 
/* adjust offset to point where the original next header starts */
offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
-   free(ip6af, M_FTABLE, 0);
+   free(ip6af, M_FTABLE, sizeof(*ip6af));
ip6 = mtod(m, struct ip6_hdr *);
ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr));
ip6->ip6_src = q6->ip6q_src;
@@ -465,7 +465,7 @@ frag6_input(struct mbuf **mp, int *offp,
if (frag6_deletefraghdr(m, offset) != 0) {
TAILQ_REMOVE(&frag6_queue, q6, ip6q_queue);
frag6_nfrags -= q6->ip6q_nfrag;
-   free(q6, M_FTABLE, 0);
+   free(q6, M_FTABLE, sizeof(*q6));
frag6_nfragpackets--;
goto dropfrag;
}
@@ -480,7 +480,7 @@ frag6_input(struct mbuf **mp, int *offp,
 
TAILQ_REMOVE(&frag6_queue, q6, ip6q_queue);
frag6_nfrags -= q6->ip6q_nfrag;
-   free(q6, M_FTABLE, 0);
+   free(q6, M_FTABLE, sizeof(*q6));
frag6_nfragpackets--;
 
if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */
@@ -506,12 +506,12 @@ frag6_input(struct mbuf **mp, int *offp,
while ((af6 = LIST_FIRST(&q6->ip6q_asfrag)) != NULL) {
LIST_REMOVE(af6, ip6af_list);
m_freem(IP6_REASS_MBUF(af6));
-   free(af6, M_FTABLE, 0);
+   free(af6, M_FTABLE, sizeof(*af6));
}
ip6stat.ip6s_fragdropped += q6->ip6q_nfrag;
TAILQ_REMOVE(&frag6_queue, q6, ip6q_queue);
frag6_nfrags -= q6->ip6q_nfrag;
-   free(q6, M_FTABLE, 0);
+   free(q6, M_FTABLE, sizeof(*q6));
frag6_nfragpackets--;
 
  dropfrag:
@@ -579,11 +579,11 @@ frag6_freef(struct ip6q *q6)
ICMP6_TIME_EXCEED_REASSEMBLY, 0);
} else
m_freem(m);
-   free(af6, M_FTABLE, 0);
+   free(af6, M_FTABLE, sizeof(*af6));
}
TAILQ_REMOVE(&frag6_queue, q6, ip6q_queue);
frag6_nfrags -= q6->ip6q_nfrag;
-   free(q6, M_FTABLE, 0);
+   free(q6, M_FTABLE, sizeof(*q6));
frag6_nfragpackets--;
 }
 
Index: netinet6/in6.c
===
RCS file: /cvs/src/sys/netinet6/in6.c,v
retrieving revision 1.190
diff -u -p -r1.190 in6.c
--- netinet6/in6.c  8 Aug 2016 13:09:36 -   1.190
+++ netinet6/in6.c  19 Aug 2016 12:46:41 -
@@ -1917,5 +1917,5 @@ in6_domifdetach

com@fdt simplification

2016-08-19 Thread Mark Kettenis
Figuring out the clock frequency that is used for a com(4) device on
armv7 is hard.  If you're lucky, the device tree contains a
"clock-frequency" property that gives you the answer.  But if that
property isn't there you're supposed to traverse the clock tree to
find out the base frequency and apply the proper clock scaling.  In
many cases that requires reading hardware registers of a clock control
unit.  And during the early boot stages the driver for such a clock
control unit isn't there yet.

The clock frequency is necessary to set up the proper baud rate.
However, on armv7 u-boot will have configured the console serial port.
So we can simply skip configuring the baud rate and such for the early
console and deal with it later, when the com(4) driver properly
attaches.  Reading and writing single characters from the serial port
will work just fine, and that is all the serial console does.

I think this makes it much more likely to have a working early boot
console (which will help bringing up new boards) and gets rid of the
duplicated code to guess the right clock frequency that we currently
have.

The diff also installs its own cngetc and cnputc functions.  For now,
these are copies of comcngetc() and comcnputc().  But a future patch
will replace them with versions that account for the fact that
register access is on 32-bit boundaries such that we can get rid of
the armv7_a4x_bs_tag hack.

ok?


Index: arch/armv7/dev/com_fdt.c
===
RCS file: /cvs/src/sys/arch/armv7/dev/com_fdt.c,v
retrieving revision 1.6
diff -u -p -r1.6 com_fdt.c
--- arch/armv7/dev/com_fdt.c17 Aug 2016 13:44:48 -  1.6
+++ arch/armv7/dev/com_fdt.c19 Aug 2016 12:16:58 -
@@ -26,6 +26,7 @@
 
 #include 
 #include 
+#include 
 
 /* pick up armv7_a4x_bs_tag */
 #include 
@@ -55,12 +56,20 @@ struct cfattach com_fdt_ca = {
sizeof (struct com_fdt_softc), com_fdt_match, com_fdt_attach
 };
 
+int com_fdt_cngetc(dev_t);
+void com_fdt_cnputc(dev_t, int);
+void com_fdt_cnpollc(dev_t, int);
+
+struct consdev com_fdt_cons = {
+   NULL, NULL, com_fdt_cngetc, com_fdt_cnputc, com_fdt_cnpollc, NULL,
+   NODEV, CN_LOWPRI
+};
+
 void
 com_fdt_init_cons(void)
 {
struct fdt_reg reg;
void *node;
-   int freq = 4800;
 
if ((node = fdt_find_cons("ti,omap3-uart")) == NULL &&
(node = fdt_find_cons("ti,omap4-uart")) == NULL &&
@@ -69,16 +78,21 @@ com_fdt_init_cons(void)
if (fdt_get_reg(node, 0, ®))
return;
 
-   if ((node = fdt_find_node("/")) != NULL &&
-   (fdt_is_compatible(node, "allwinner,sun4i-a10") ||
-   fdt_is_compatible(node, "allwinner,sun5i-a10s") ||
-   fdt_is_compatible(node, "allwinner,sun5i-r8") ||
-   fdt_is_compatible(node, "allwinner,sun7i-a20")))
-   freq = 2400;
-
-   comcnattach(&armv7_a4x_bs_tag, reg.addr, comcnspeed, freq,
-   comcnmode);
-   comdefaultrate = comcnspeed;
+   /*
+* Figuring out the clock frequency is rather complicated as
+* om many SoCs this requires traversing a fair amount of the
+* clock tree.  Instead we rely on the firmware to set up the
+* console for us and bypass the cominit() call that
+* comcnattach() does by doing the minimal setup here.
+*/
+
+   comconsiot = &armv7_a4x_bs_tag;
+   if (bus_space_map(comconsiot, reg.addr, reg.size, 0, &comconsioh))
+   return;
+
+   comconsrate = comcnspeed;
+   comconscflag = comcnmode;
+   cn_tab = &com_fdt_cons;
 }
 
 int
@@ -130,6 +144,7 @@ com_fdt_attach(struct device *parent, st
if (stdout_node == faa->fa_node) {
SET(sc->sc.sc_hwflags, COM_HW_CONSOLE);
SET(sc->sc.sc_swflags, COM_SW_SOFTCAR);
+   comconsfreq = sc->sc.sc_frequency;
}
 
if (bus_space_map(sc->sc.sc_iot, sc->sc.sc_iobase,
@@ -154,4 +169,21 @@ com_fdt_intr_designware(void *cookie)
bus_space_read_1(sc->sc_iot, sc->sc_ioh, com_usr);
 
return comintr(sc);
+}
+
+int
+com_fdt_cngetc(dev_t dev)
+{
+   return com_common_getc(comconsiot, comconsioh);
+}
+
+void
+com_fdt_cnputc(dev_t dev, int c)
+{
+   com_common_putc(comconsiot, comconsioh, c);
+}
+
+void
+com_fdt_cnpollc(dev_t dev, int on)
+{
 }



Re: man update after login.conf uses auto rounds

2016-08-19 Thread Jason McIntyre
On Fri, Aug 19, 2016 at 11:12:02AM +0100, Stuart Henderson wrote:
> On 2016/08/19 12:04, Daniel Jakots wrote:
> > Hi,
> > 
> > In June tedu@ committed [0] a diff to move login.conf to use auto
> > rounds for bcrypt on amd64, sparc64, i386 and macppc.
> > 
> > Here's a diff to update the man pages. Currently the man pages are
> > outdated on these four architectures but I guess it's still right for
> > other (old) architectures. After this diff, man page on these four
> > architectures will be up to date, but on others it will be wrong (as
> > they're still using 8 rounds). Is it possible to have different man
> > pages for different architectures or am I missing something?
> > 
> > 
> > [0]: https://marc.info/?l=openbsd-cvs&m=146697318611223&w=2
> > 
> > Cheers,
> > Daniel
> > 
> > Index: passwd.1
> > ===
> > RCS file: /cvs/src/usr.bin/passwd/passwd.1,v
> > retrieving revision 1.44
> > diff -u -p -r1.44 passwd.1
> > --- passwd.126 Nov 2015 19:01:47 -  1.44
> > +++ passwd.119 Aug 2016 09:25:41 -
> > @@ -70,7 +70,8 @@ Password encryption parameters depend on
> >  .Dq localcipher
> >  capability in
> >  .Xr login.conf 5 .
> > -If none is specified, then blowfish with 8 rounds is used.
> > +If none is specified, then blowfish with an automatically selected
> > +number of rounds, based on system performance is used.
> 
> How about:
> 
> If none is specified, then blowfish is used, with the number of
> rounds selected based on system performance.
> 

i was going to suggest something like this, but my solution had more
text and was less elegant - i really like this wording.

i would take that first comma out though and it will read better.

jmc



Re: man update after login.conf uses auto rounds

2016-08-19 Thread Stuart Henderson
On 2016/08/19 12:04, Daniel Jakots wrote:
> Hi,
> 
> In June tedu@ committed [0] a diff to move login.conf to use auto
> rounds for bcrypt on amd64, sparc64, i386 and macppc.
> 
> Here's a diff to update the man pages. Currently the man pages are
> outdated on these four architectures but I guess it's still right for
> other (old) architectures. After this diff, man page on these four
> architectures will be up to date, but on others it will be wrong (as
> they're still using 8 rounds). Is it possible to have different man
> pages for different architectures or am I missing something?
> 
> 
> [0]: https://marc.info/?l=openbsd-cvs&m=146697318611223&w=2
> 
> Cheers,
> Daniel
> 
> Index: passwd.1
> ===
> RCS file: /cvs/src/usr.bin/passwd/passwd.1,v
> retrieving revision 1.44
> diff -u -p -r1.44 passwd.1
> --- passwd.1  26 Nov 2015 19:01:47 -  1.44
> +++ passwd.1  19 Aug 2016 09:25:41 -
> @@ -70,7 +70,8 @@ Password encryption parameters depend on
>  .Dq localcipher
>  capability in
>  .Xr login.conf 5 .
> -If none is specified, then blowfish with 8 rounds is used.
> +If none is specified, then blowfish with an automatically selected
> +number of rounds, based on system performance is used.

How about:

If none is specified, then blowfish is used, with the number of
rounds selected based on system performance.

>  .Sh FILES
>  .Bl -tag -width /etc/master.passwd -compact
>  .It Pa /etc/login.conf
> Index: login.conf.5
> ===
> RCS file: /cvs/src/share/man/man5/login.conf.5,v
> retrieving revision 1.62
> diff -u -p -r1.62 login.conf.5
> --- login.conf.5  30 Mar 2016 06:58:06 -  1.62
> +++ login.conf.5  19 Aug 2016 09:26:23 -
> @@ -159,7 +159,7 @@ See
>  .Xr login 1 .
>  .\"
>  .Pp
> -.It localcipher Ta string Ta blowfish,8 Ta
> +.It localcipher Ta string Ta blowfish,a Ta
>  The cipher to use for encrypting passwords.
>  Refer to
>  .Xr crypt_newhash 3
> 



man update after login.conf uses auto rounds

2016-08-19 Thread Daniel Jakots
Hi,

In June tedu@ committed [0] a diff to move login.conf to use auto
rounds for bcrypt on amd64, sparc64, i386 and macppc.

Here's a diff to update the man pages. Currently the man pages are
outdated on these four architectures but I guess it's still right for
other (old) architectures. After this diff, man page on these four
architectures will be up to date, but on others it will be wrong (as
they're still using 8 rounds). Is it possible to have different man
pages for different architectures or am I missing something?


[0]: https://marc.info/?l=openbsd-cvs&m=146697318611223&w=2

Cheers,
Daniel

Index: passwd.1
===
RCS file: /cvs/src/usr.bin/passwd/passwd.1,v
retrieving revision 1.44
diff -u -p -r1.44 passwd.1
--- passwd.126 Nov 2015 19:01:47 -  1.44
+++ passwd.119 Aug 2016 09:25:41 -
@@ -70,7 +70,8 @@ Password encryption parameters depend on
 .Dq localcipher
 capability in
 .Xr login.conf 5 .
-If none is specified, then blowfish with 8 rounds is used.
+If none is specified, then blowfish with an automatically selected
+number of rounds, based on system performance is used.
 .Sh FILES
 .Bl -tag -width /etc/master.passwd -compact
 .It Pa /etc/login.conf
Index: login.conf.5
===
RCS file: /cvs/src/share/man/man5/login.conf.5,v
retrieving revision 1.62
diff -u -p -r1.62 login.conf.5
--- login.conf.530 Mar 2016 06:58:06 -  1.62
+++ login.conf.519 Aug 2016 09:26:23 -
@@ -159,7 +159,7 @@ See
 .Xr login 1 .
 .\"
 .Pp
-.It localcipher Ta string Ta blowfish,8 Ta
+.It localcipher Ta string Ta blowfish,a Ta
 The cipher to use for encrypting passwords.
 Refer to
 .Xr crypt_newhash 3



[PATCH] [1/1] pf refactoring

2016-08-19 Thread Richard Procter

The final patch in the pf series. Will commit when I do the previous one 
in around 24 hours unless there are objections.


- pushes the 'field changed' guards into the 'change field' functions. 
This lets us normalise many of the existing guards and eliminate lots of 
code from pf_translate().

- while here, optimise pf_patch_32() a little. Relatively clean and gains 
25% speedup for a function that can be called four times per TCP segment. 
I don't know if this matters in the big picture.

- simplify pf_match_addr(). Compiler achieves the same optimisation;
I saw no difference in the assembly.


The 'working' for these appears at http://203.79.107.124/ under the 
phase two heading. 

best, 
Richard. 

Index: net/pf.c
===
--- net.orig/pf.c
+++ net/pf.c
@@ -160,15 +160,15 @@ intpf_modulate_sack(struct 
pf_pdesc
struct pf_state_peer *);
 int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *,
u_int16_t *, u_int16_t *);
 int pf_change_icmp_af(struct mbuf *, int,
struct pf_pdesc *, struct pf_pdesc *,
struct pf_addr *, struct pf_addr *, sa_family_t,
sa_family_t);
-voidpf_translate_a(struct pf_pdesc *, struct pf_addr *,
+int pf_translate_a(struct pf_pdesc *, struct pf_addr *,
struct pf_addr *);
 voidpf_translate_icmp(struct pf_pdesc *, struct pf_addr *,
u_int16_t *, struct pf_addr *, struct pf_addr *,
u_int16_t);
 int pf_translate_icmp_af(struct pf_pdesc*, int, void *);
 voidpf_send_tcp(const struct pf_rule *, sa_family_t,
const struct pf_addr *, const struct pf_addr *,
@@ -1859,73 +1859,103 @@ pf_cksum_fixup_a(u_int16_t *cksum, const
return;
if (udp && x == 0x)
x = 0x;
 
*cksum = (u_int16_t)(x);
 }
 
-void
+int
 pf_patch_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi)
 {
-   u_int16_t new = htons(hi ? ( v << 8) :  v);
-   u_int16_t old = htons(hi ? (*f << 8) : *f);
+   int rewrite = 0;
 
-   pf_cksum_fixup(pd->pcksum, old, new, pd->proto);
-   *f = v;
+   if (*f != v) {
+   u_int16_t old = htons(hi ? (*f << 8) : *f);
+   u_int16_t new = htons(hi ? ( v << 8) :  v);
+
+   pf_cksum_fixup(pd->pcksum, old, new, pd->proto);
+   *f = v;
+   rewrite = 1;
+   }
+
+   return (rewrite);
 }
 
 /* pre: *f is 16-bit aligned within its packet */
-void
+int
 pf_patch_16(struct pf_pdesc *pd, u_int16_t *f, u_int16_t v)
 {
-   pf_cksum_fixup(pd->pcksum, *f, v, pd->proto);
-   *f = v;
+   int rewrite = 0;
+
+   if (*f != v) {
+   pf_cksum_fixup(pd->pcksum, *f, v, pd->proto);
+   *f = v;
+   rewrite = 1;
+   }
+
+   return (rewrite);
 }
 
-void
+int
 pf_patch_16_unaligned(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi)
 {
-   u_int8_t *fb = (u_int8_t*)f;
-   u_int8_t *vb = (u_int8_t*)&v;
+   int rewrite = 0;
+   u_int8_t   *fb = (u_int8_t*)f;
+   u_int8_t   *vb = (u_int8_t*)&v;
 
if (hi && ALIGNED_POINTER(f, u_int16_t)) {
-   pf_patch_16(pd, f, v); /* optimise */
-   return;
+   return (pf_patch_16(pd, f, v)); /* optimise */
}
 
-   pf_patch_8(pd, fb++, *vb++, hi);
-   pf_patch_8(pd, fb++, *vb++,!hi);
+   rewrite += pf_patch_8(pd, fb++, *vb++, hi);
+   rewrite += pf_patch_8(pd, fb++, *vb++,!hi);
+
+   return (rewrite);
 }
 
 /* pre: *f is 16-bit aligned within its packet */
-void
+/* pre: pd->proto != IPPROTO_UDP */
+int
 pf_patch_32(struct pf_pdesc *pd, u_int32_t *f, u_int32_t v)
 {
-   u_int16_t *pc = pd->pcksum;
+   int rewrite = 0;
+   u_int16_t  *pc = pd->pcksum;
+   u_int8_tproto = pd->proto;
+
+   /* optimise: inline udp fixup code is unused; let compiler scrub it */
+   if (proto == IPPROTO_UDP)
+   panic("pf_patch_32: udp");
+
+   /* optimise: skip *f != v guard; true for all use-cases */
+   pf_cksum_fixup(pc, *f / (1 << 16), v / (1 << 16), proto);
+   pf_cksum_fixup(pc, *f % (1 << 16), v % (1 << 16), proto);
 
-   pf_cksum_fixup(pc, *f / (1 << 16), v / (1 << 16), pd->proto);
-   pf_cksum_fixup(pc, *f % (1 << 16), v % (1 << 16), pd->proto);
*f = v;
+   rewrite = 1;
+
+   return (rewrite);
 }
 
-void
+int
 pf_patch_32_unaligned(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi)
 {
-   u_int8_t *fb = (u_int8_t*)f;
-   u_int8_t *vb = (u_int8_t*)&v;
+   int rewrite = 0;
+   u_int8_t   *fb = (

[PATCH] [0/1] pf refactoring

2016-08-19 Thread Richard Procter
Hi, 

I've reduced the pf refactor (phase two) to two patches, which I'll be 
committing in 24 hours or so unless there are any objections.

I'm confident it won't, but supposing post-commit these have in 
fact blown up, my first suspect would be the afto paths.


This patch removes pf_translate_ap(). 
The 'working' appears at http://203.79.107.124/ in the 012*.diff files.

best, 
Richard. 

Index: net/pf.c
===
--- net.orig/pf.c
+++ net/pf.c
@@ -154,8 +154,6 @@ int  pf_check_tcp_cksum(struct mbuf *,
sa_family_t);
 static __inline voidpf_cksum_fixup(u_int16_t *, u_int16_t, u_int16_t,
u_int8_t);
-voidpf_translate_ap(struct pf_pdesc *, struct pf_addr *,
-   u_int16_t *, struct pf_addr *, u_int16_t);
 voidpf_cksum_fixup_a(u_int16_t *, const struct pf_addr *,
const struct pf_addr *, sa_family_t, u_int8_t);
 int pf_modulate_sack(struct pf_pdesc *,
@@ -1910,16 +1908,6 @@ pf_patch_32(struct pf_pdesc *pd, u_int32
 }
 
 void
-pf_translate_ap(struct pf_pdesc *pd, struct pf_addr *a, u_int16_t *p,
-struct pf_addr *an, u_int16_t pn)
-{
-   if (pd->af == pd->naf)
-   pf_translate_a(pd, a, an);
-   if (p != NULL)
-   pf_patch_16(pd, p, pn);
-}
-
-void
 pf_patch_32_unaligned(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi)
 {
u_int8_t *fb = (u_int8_t*)f;
@@ -4009,12 +3997,16 @@ pf_translate(struct pf_pdesc *pd, struct
case IPPROTO_TCP:
if (afto || PF_ANEQ(saddr, pd->src, pd->af) ||
*pd->sport != sport) {
-   pf_translate_ap(pd, pd->src, pd->sport, saddr, sport);
+   if (pd->af == pd->naf)
+   pf_translate_a(pd, pd->src, saddr);
+   pf_patch_16(pd, pd->sport, sport);
rewrite = 1;
}
if (afto || PF_ANEQ(daddr, pd->dst, pd->af) ||
*pd->dport != dport) {
-   pf_translate_ap(pd, pd->dst, pd->dport, daddr, dport);
+   if (pd->af == pd->naf)
+   pf_translate_a(pd, pd->dst, daddr);
+   pf_patch_16(pd, pd->dport, dport);
rewrite = 1;
}
break;
@@ -4022,12 +4014,16 @@ pf_translate(struct pf_pdesc *pd, struct
case IPPROTO_UDP:
if (afto || PF_ANEQ(saddr, pd->src, pd->af) ||
*pd->sport != sport) {
-   pf_translate_ap(pd, pd->src, pd->sport, saddr, sport);
+   if (pd->af == pd->naf)
+   pf_translate_a(pd, pd->src, saddr);
+   pf_patch_16(pd, pd->sport, sport);
rewrite = 1;
}
if (afto || PF_ANEQ(daddr, pd->dst, pd->af) ||
*pd->dport != dport) {
-   pf_translate_ap(pd, pd->dst, pd->dport, daddr, dport);
+   if (pd->af == pd->naf)
+   pf_translate_a(pd, pd->dst, daddr);
+   pf_patch_16(pd, pd->dport, dport);
rewrite = 1;
}
break;
@@ -4078,11 +4074,11 @@ pf_translate(struct pf_pdesc *pd, struct
rewrite = 1;
} else {
if (PF_ANEQ(saddr, pd->src, pd->af)) {
-   pf_translate_ap(pd, pd->src, NULL, saddr, 0);
+   pf_translate_a(pd, pd->src, saddr);
rewrite = 1;
}
if (PF_ANEQ(daddr, pd->dst, pd->af)) {
-   pf_translate_ap(pd, pd->dst, NULL, daddr, 0);
+   pf_translate_a(pd, pd->dst, daddr);
rewrite = 1;
}
}
@@ -4113,11 +4109,11 @@ pf_translate(struct pf_pdesc *pd, struct
 #ifdef INET6
case AF_INET6:
if (!afto && PF_ANEQ(saddr, pd->src, pd->af)) {
-   pf_translate_ap(pd, pd->src, NULL, saddr, 0);
+   pf_translate_a(pd, pd->src, saddr);
rewrite = 1;
}
if (!afto && PF_ANEQ(daddr, pd->dst, pd->af)) {
-   pf_translate_ap(pd, pd->dst, NULL, daddr, 0);
+   pf_translate_a(pd, pd->dst, daddr);
rewrite = 1;
}
break;
@@ -4738,18 +4734,24 @@ pf_test_state(struct pf_pdesc *pd, struc
 #endif /* INET6 */
 
if (afto || PF_ANEQ(pd->src, &n

Re: ksh.1: $() quirk note

2016-08-19 Thread Martin Natano
On Thu, Aug 18, 2016 at 02:38:28PM +0200, Dmitrij D. Czarkoff wrote:
> Christian Weisgerber  wrote:
> 
> >I guess.  There _are_ problems in this area, though.
> >
> >$ case x in x);; *);; esac
> >$ echo $(case x in x);; *);; esac)
> >ksh: syntax error: `;;' unexpected
> 
> This should probably be in BUGS section, not as a note in the middle of the
> manual.  Eg.:

I agree, make the ksh manual less special. Ok.


> 
> Index: ksh.1
> ===
> RCS file: /var/cvs/src/bin/ksh/ksh.1,v
> retrieving revision 1.179
> diff -u -p -r1.179 ksh.1
> --- ksh.1 27 Apr 2016 12:46:23 -  1.179
> +++ ksh.1 18 Aug 2016 12:33:49 -
> @@ -1007,12 +1007,6 @@ has the same effect as
>  .Ic $(cat foo) ,
>  but it is carried out more efficiently because no process is started.
>  .Pp
> -.Sy Note :
> -.Pf $( Ar command )
> -expressions are currently parsed by finding the matching parenthesis,
> -regardless of quoting.
> -This should be fixed soon.
> -.Pp
>  Arithmetic substitutions are replaced by the value of the specified 
> expression.
>  For example, the command
>  .Ic echo $((2+3*4))
> @@ -5561,3 +,14 @@ The
>  .Pa CONTRIBUTORS
>  file in the source distribution contains a more complete list of people and
>  their part in the shell's development.
> +.Sh BUGS
> +.Pf $( Ar command )
> +expressions are currently parsed by finding the closest matching (unquoted)
> +parenthesis.
> +Thus some constructs inside
> +.Pf $( Ar command )
> +may produce an error.
> +For example, parenthesis in
> +.Ql x);;
> +is interpreted as closing parenthesis in
> +.Ql $(case x in x);; *);; esac) .
> 



Re: bpf(4), csignal() and selwakup()

2016-08-19 Thread Martin Pieuchot
On 16/08/16(Tue) 20:01, Philip Guenther wrote:
> On Mon, 15 Aug 2016, Martin Pieuchot wrote:
> > I'd like to make sure that bpf_tap(9) does not grab the KERNEL_LOCK().
> > The reason is to reduce the potential lock ordering problems within PF.
> > 
> > I'm currently using a mutex to serialize buffer changes, but since
> > bpf_wakeup() will still need the KERNEL_LOCK(), I'm using a task for
> > that.
> 
> Yes, this is a good direction to unblock the network stack work.  
> However, I suspect there's a bug below.
> 
> 
> > @@ -515,12 +519,28 @@ bpfread(dev_t dev, struct uio *uio, int 
> >  void
> >  bpf_wakeup(struct bpf_d *d)
> >  {
> > -   wakeup((caddr_t)d);
> > +   /*
> > +* As long as csignal() and selwakeup() need to be protected
> > +* by the KERNEL_LOCK() we have to delay the wakeup to
> > +* another context to keep the hot path KERNEL_LOCK()-free.
> > +*/
> > +   bpf_get(d);
> > +   task_add(systq, &d->bd_wake_task);
> 
> We increment the reference count here and decrement it in bpf_wakeup_cb().
> However, if the task is already queued here in bpf_wakeup() then won't it
> get bumped again and only released once?  I suspect this should be:
>   bpf_get(d);
>   if (!task_add(systq, &d->bd_wake_task))
>   bpf_put(d);

Good catch, I'll got with this then.



Re: MP-safe L2 "lookup" w/o atomic operation

2016-08-19 Thread Martin Pieuchot
On 15/08/16(Mon) 13:52, Martin Pieuchot wrote:
> On 11/08/16(Thu) 16:04, Martin Pieuchot wrote:
> > One of the remaining SMP issue with our routing table usage is to
> > guarantee that the L2 entry referenced by a RTF_GATEWAY route via
> > the ``rt_gwroute'' pointer wont be replaced/invalidated by another
> > CPU while we are filling the address field of an Ethernet frame.
> > 
> > The most efficient way, performance wise, to do that is to make the
> > ``rt_gwroute'' pointer immutable during the lifetime of a RTF_GATEWAY
> > route.  If we know that this pointer won't change and that the memory
> > it points to won't be freed as long as a CPU has reference to one of
> > the RTF_GATEWAY routes, we don't need any special protection inside
> > arp_resolve() and nd6_resolve().
> > 
> > Diff below achieve that by always resolving the next-hop entry before
> > inserting the corresponding RTF_GATEWAY route in the tree.  In other
> > words rt_setgwroute() is no longer called in the sending path.
> > 
> > To guarantee that a cached route won't be deleted while it is still
> > referenced a new flag, RTF_CACHED, is added to the route.  arp(8) and
> > ndp(8) treat entry with this flag like RTF_LOCAL.
> > 
> > Removing rt_setgwroute() from the sending path means that inserting a
> > RTF_GATEWAY route will now fail if the next-hop cannot be resolve.  This
> > might introduce regression for some setups but I see that has an
> > improvement since the kernel no longer let you add a route that cannot
> > be used.
> > It also mean that stale routes need to be fixed whenever a new address
> > is configured.  The diff does that and also plug an ifa leak that was
> > happening when the same address is configured twice on an ifa.
> > 
> > Note that even with this diff there are *still* some MP-safeness issue
> > due to stale routes, they will be address in a later diff.
> 
> Updated diff that:
> 
>  - Fix a panic reported by Hrvoje Popovski 
> 
>  - Properly invalidate ARP/NDP entry when issuing a RTM_DELETE from
>userland, as discussed with bluhm@.  For that I introduced
>RTM_INVALIDATE and use it instead of deleting L2 entries.  With
>this we no longer generate deletion/insertion for L2 entries, of
>course that include RTF_CACHED.
> 
>  - This has been tested with a pppoe(4) setup to make sure that
>inserting a route with a magic 0.0.0.1 gateway works, pointed out
>by claudio@.
> 
> More tests, comments, ok are welcome.

Guys I'd rather move forward with that before g2k16, so comments are
welcome such that I can split the diff and commit it.

If people really have good arguments for using locks/SRPs in the hot
in order to keep the existing behavior I'd like to hear them.

> Index: net/route.c
> ===
> RCS file: /cvs/src/sys/net/route.c,v
> retrieving revision 1.313
> diff -u -p -r1.313 route.c
> --- net/route.c   22 Jul 2016 11:03:30 -  1.313
> +++ net/route.c   15 Aug 2016 08:58:39 -
> @@ -153,7 +153,9 @@ struct pool   rtentry_pool;   /* pool for r
>  struct pool  rttimer_pool;   /* pool for rttimer structures */
>  
>  void rt_timer_init(void);
> -void rt_setgwroute(struct rtentry *, u_int);
> +int  rt_setgwroute(struct rtentry *, u_int);
> +void rt_putgwroute(struct rtentry *);
> +int  rt_fixgwroute(struct rtentry *, void *, unsigned int);
>  int  rtflushclone1(struct rtentry *, void *, u_int);
>  void rtflushclone(unsigned int, struct rtentry *);
>  int  rt_if_remove_rtdelete(struct rtentry *, void *, u_int);
> @@ -204,21 +206,20 @@ rtisvalid(struct rtentry *rt)
>   if (rt == NULL)
>   return (0);
>  
> -#ifdef DIAGNOSTIC
> - if (ISSET(rt->rt_flags, RTF_GATEWAY) && (rt->rt_gwroute != NULL) &&
> - ISSET(rt->rt_gwroute->rt_flags, RTF_GATEWAY))
> - panic("next hop must be directly reachable");
> -#endif
> -
> - if ((rt->rt_flags & RTF_UP) == 0)
> + if (!ISSET(rt->rt_flags, RTF_UP))
>   return (0);
>  
>   /* Routes attached to stale ifas should be freed. */
>   if (rt->rt_ifa == NULL || rt->rt_ifa->ifa_ifp == NULL)
>   return (0);
>  
> - if (ISSET(rt->rt_flags, RTF_GATEWAY) && !rtisvalid(rt->rt_gwroute))
> - return (0);
> +#ifdef DIAGNOSTIC
> + if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
> + KASSERT(rt->rt_gwroute != NULL);
> + KASSERT(ISSET(rt->rt_gwroute->rt_flags, RTF_UP));
> + KASSERT(!ISSET(rt->rt_gwroute->rt_flags, RTF_GATEWAY));
> + }
> +#endif /* DIAGNOSTIC */
>  
>   return (1);
>  }
> @@ -267,8 +268,6 @@ rt_match(struct sockaddr *dst, uint32_t 
>   return (rt);
>  }
>  
> -struct rtentry *_rtalloc(struct sockaddr *, uint32_t *, int, unsigned int);
> -
>  #ifndef SMALL_KERNEL
>  /*
>   * Originated from bridge_hash() in if_bridge.c
> @@ -349,16 +348,10 @@ rt_hash(struct rtentry *rt, struct socka
>  struct rtentry *
>  rtalloc_mpath(struct sockaddr *ds