Re: rebound quantum entanglement

2016-10-03 Thread Theo de Raadt
+   return sysctl_int(oldp, oldlenp, newp, newlen, &dnsjacking);

+int dnsjacking = 0;

+   sin.sin_port = htons(dnsjacking);

How about using u_int16_t or in_port_t; and maybe error out if high
bits are set rather than ignoring them but still displaying them.



Re: rebound quantum entanglement

2016-10-03 Thread Ted Unangst
Ted Unangst wrote:
> Ted Unangst wrote:
> > So the plan is for rebound to be the 'system' resolver, with libc talking to
> > rbeound and rebound talking to the cloud. The main wrinkle is how does 
> > rebound
> > find the cloud? rebound.conf, but dhclient doesn't know anything about
> > rebound.conf, preferring to edit resolv.conf. But if rebound reads
> > resolv.conf, what does libc read? This has been a bit of a tangle until now,
> > especially in scenarios like upgrades where rebound may not even be running.
> 
> Move the hijacking into the kernel. rebound sets a sysctl, and then the kernel
> gives it all the dns connections. libc knows nothing. if rebound dies, dns
> dies. still listening on :53 because it has to listen somewhere.

configurable jack port, rebound uses 54.


Index: sys/kern/kern_sysctl.c
===
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.311
diff -u -p -r1.311 kern_sysctl.c
--- sys/kern/kern_sysctl.c  21 Sep 2016 14:06:50 -  1.311
+++ sys/kern/kern_sysctl.c  21 Sep 2016 16:06:16 -
@@ -598,6 +598,10 @@ kern_sysctl(int *name, u_int namelen, vo
return sysctl_int(oldp, oldlenp, newp, newlen, &global_ptrace);
}
 #endif
+   case KERN_DNSJACKING: {
+   extern int dnsjacking;
+   return sysctl_int(oldp, oldlenp, newp, newlen, &dnsjacking);
+   }
default:
return (EOPNOTSUPP);
}
Index: sys/kern/uipc_syscalls.c
===
RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
retrieving revision 1.133
diff -u -p -r1.133 uipc_syscalls.c
--- sys/kern/uipc_syscalls.c9 Aug 2016 02:25:35 -   1.133
+++ sys/kern/uipc_syscalls.c15 Sep 2016 22:32:55 -
@@ -67,6 +67,8 @@ externstruct fileops socketops;
 intcopyaddrout(struct proc *, struct mbuf *, struct sockaddr *, socklen_t,
socklen_t *);
 
+int dnsjacking = 0;
+
 int
 sys_socket(struct proc *p, void *v, register_t *retval)
 {
@@ -395,6 +397,16 @@ sys_connect(struct proc *p, void *v, reg
FRELE(fp, p);
m_freem(nam);
return (error);
+   }
+   if (dnsjacking) {
+   struct sockaddr_in sin;
+   memset(&sin, 0, sizeof(sin));
+   sin.sin_len = sizeof(sin);
+   sin.sin_family = AF_INET;
+   sin.sin_port = htons(dnsjacking);
+   sin.sin_addr.s_addr = INADDR_LOOPBACK;
+   memcpy(mtod(nam, void *), &sin, sizeof(sin));
+   nam->m_len = sizeof(sin);
}
}
 
Index: sys/sys/sysctl.h
===
RCS file: /cvs/src/sys/sys/sysctl.h,v
retrieving revision 1.166
diff -u -p -r1.166 sysctl.h
--- sys/sys/sysctl.h21 Sep 2016 14:06:50 -  1.166
+++ sys/sys/sysctl.h21 Sep 2016 16:06:16 -
@@ -113,7 +113,7 @@ struct ctlname {
 #defineKERN_HOSTNAME   10  /* string: hostname */
 #defineKERN_HOSTID 11  /* int: host identifier */
 #defineKERN_CLOCKRATE  12  /* struct: struct clockinfo */
-/* was KERN_VNODE  13  */
+#defineKERN_DNSJACKING 13  /* hijack dns sockets */
 /* was KERN_PROC   14  */
 /* was KERN_FILE   15  */
 #defineKERN_PROF   16  /* node: kernel profiling info 
*/
@@ -200,7 +200,7 @@ struct ctlname {
{ "hostname", CTLTYPE_STRING }, \
{ "hostid", CTLTYPE_INT }, \
{ "clockrate", CTLTYPE_STRUCT }, \
-   { "gap", 0 }, \
+   { "dnsjacking", CTLTYPE_INT }, \
{ "gap", 0 }, \
{ "gap", 0 }, \
{ "profiling", CTLTYPE_NODE }, \
Index: usr.sbin/rebound/rebound.8
===
RCS file: /cvs/src/usr.sbin/rebound/rebound.8,v
retrieving revision 1.4
diff -u -p -r1.4 rebound.8
--- usr.sbin/rebound/rebound.8  4 Dec 2015 04:50:43 -   1.4
+++ usr.sbin/rebound/rebound.8  29 Sep 2016 01:20:02 -
@@ -33,9 +33,7 @@ The options are as follows:
 .Bl -tag -width Ds
 .It Fl c Ar config
 Specify an alternative configuration file, instead of the default
-.Pa /etc/rebound.conf .
-At present, the config file consists of a single line containing the next
-hop DNS server.
+.Pa /etc/resolv.conf .
 .Nm
 will reload the configuration file when sent a SIGHUP signal.
 .It Fl d
@@ -46,8 +44,8 @@ does not
 into the background.
 .El
 .Sh FILES
-.Bl -tag -width "/etc/rebound.confXX" -compact
-.It Pa /etc/rebound.conf
+.Bl -tag -width "/etc/resolv.confXX" -compact
+.It Pa /etc/resolv.conf
 Default
 .Nm
 configuration file.
Index: usr.sbin/rebound/rebound.c
===

Re: disklabel template: percentage of disk optional?

2016-10-03 Thread Dmitrij D. Czarkoff
"Dmitrij D. Czarkoff"  wrote:

>"Dmitrij D. Czarkoff"  wrote:
>
>>"Dmitrij D. Czarkoff"  wrote:
>>
>>>Claus Assmann  wrote:
>>>
AFAICT the "percentage of disk" is optional
>>>
>>>disklabel(8) says:
>>>
>>>| A template for the automatic allocation can be passed \
>>>| to disklabel using
>>>| the -T option.  The template consists of one line per partition, with
>>>| each line giving mountpoint, min-max size range, and percentage \
>>>| of disk,
>>>| space-separated.  Max can be unlimited by specifying '*'.  If only
>>>| mountpoint and min size are given, the partition is created with that
>>>| exact size.
>>>
>>>Which means that percentages are optional unless sizes are specified as
>>>min-max, which makes a lot of sense:  you either specify the amount of
>>>space you want to allocate for particular partition, or you specify the
>>>percentage of disk you want it to cover and provide minimum and maximum
>>>limits.
>>>
Index: editor.c
===
RCS file: cvs/src/sbin/disklabel/editor.c,v
retrieving revision 1.303
diff -u -r1.303 editor.c
--- editor.c   2 Sep 2016 10:47:17 -   1.303
+++ editor.c   3 Oct 2016 22:56:55 -
@@ -2388,6 +2388,7 @@
   sa = &(alloc_table[0].table[idx]);
   idx++;

+  sa->rate = 0;
>>>
>>>So basically you set the partition size to 0% of disk, \
>>>forcing disklabel
>>>to pick the lower limit.  Why would you want to specify the upper limit
>>>then?
>>>
>>>It would probably make more sense to do something like this instead:
>>
>>Sorry, I somehow managed to remove part of the line in \
>>a diff.  Here is what it
>>is was supposed to be:
>
>Well, I should have checked parse_sizerange() first...

I shouldn't have started on sending patches at 3AM.  This one should do
what I intended it to do.  Sorry for noise.

Index: sbin/disklabel/editor.c
===
RCS file: /var/cvs/src/sbin/disklabel/editor.c,v
retrieving revision 1.303
diff -u -p -r1.303 editor.c
--- sbin/disklabel/editor.c 2 Sep 2016 10:47:17 -   1.303
+++ sbin/disklabel/editor.c 4 Oct 2016 01:28:06 -
@@ -2397,6 +2397,8 @@ parse_autotable(char *filename)
if ((t = get_token(&buf, &len)) != NULL &&
parse_pct(t, &sa->rate) == -1)
errx(1, "%s: parse error on line %u", filename, idx);
+   else if (t == NULL && sa->minsz != sa->maxsz)
+   errx(1, "%s: parse error on line %u", filename, idx);
if (sa->minsz > sa->maxsz)
errx(1, "%s: min size > max size on line %u", filename,
idx);



Re: disklabel template: percentage of disk optional?

2016-10-03 Thread Dmitrij D. Czarkoff
"Dmitrij D. Czarkoff"  wrote:

>"Dmitrij D. Czarkoff"  wrote:
>
>>Claus Assmann  wrote:
>>
>>>AFAICT the "percentage of disk" is optional
>>
>>disklabel(8) says:
>>
>>| A template for the automatic allocation can be passed \
>>| to disklabel using
>>| the -T option.  The template consists of one line per partition, with
>>| each line giving mountpoint, min-max size range, and percentage \
>>| of disk,
>>| space-separated.  Max can be unlimited by specifying '*'.  If only
>>| mountpoint and min size are given, the partition is created with that
>>| exact size.
>>
>>Which means that percentages are optional unless sizes are specified as
>>min-max, which makes a lot of sense:  you either specify the amount of
>>space you want to allocate for particular partition, or you specify the
>>percentage of disk you want it to cover and provide minimum and maximum
>>limits.
>>
>>>Index: editor.c
>>>===
>>>RCS file: cvs/src/sbin/disklabel/editor.c,v
>>>retrieving revision 1.303
>>>diff -u -r1.303 editor.c
>>>--- editor.c   2 Sep 2016 10:47:17 -   1.303
>>>+++ editor.c   3 Oct 2016 22:56:55 -
>>>@@ -2388,6 +2388,7 @@
>>>   sa = &(alloc_table[0].table[idx]);
>>>   idx++;
>>>
>>>+  sa->rate = 0;
>>
>>So basically you set the partition size to 0% of disk, forcing disklabel
>>to pick the lower limit.  Why would you want to specify the upper limit
>>then?
>>
>>It would probably make more sense to do something like this instead:
>
>Sorry, I somehow managed to remove part of the line in a diff.  Here is what it
>is was supposed to be:

Well, I should have checked parse_sizerange() first...

Index: sbin/disklabel/editor.c
===
RCS file: /var/cvs/src/sbin/disklabel/editor.c,v
retrieving revision 1.303
diff -u -p -r1.303 editor.c
--- sbin/disklabel/editor.c 2 Sep 2016 10:47:17 -   1.303
+++ sbin/disklabel/editor.c 4 Oct 2016 01:23:14 -
@@ -2397,6 +2397,8 @@ parse_autotable(char *filename)
if ((t = get_token(&buf, &len)) != NULL &&
parse_pct(t, &sa->rate) == -1)
errx(1, "%s: parse error on line %u", filename, idx);
+   else if (t == NULL && sa->minsz == sa->maxsz)
+   errx(1, "%s: parse error on line %u", filename, idx);
if (sa->minsz > sa->maxsz)
errx(1, "%s: min size > max size on line %u", filename,
idx);



Re: disklabel template: percentage of disk optional?

2016-10-03 Thread Dmitrij D. Czarkoff
"Dmitrij D. Czarkoff"  wrote:

>Claus Assmann  wrote:
>
>>AFAICT the "percentage of disk" is optional
>
>disklabel(8) says:
>
>| A template for the automatic allocation can be passed \
>| to disklabel using
>| the -T option.  The template consists of one line per partition, with
>| each line giving mountpoint, min-max size range, and percentage \
>| of disk,
>| space-separated.  Max can be unlimited by specifying '*'.  If only
>| mountpoint and min size are given, the partition is created with that
>| exact size.
>
>Which means that percentages are optional unless sizes are specified as
>min-max, which makes a lot of sense:  you either specify the amount of
>space you want to allocate for particular partition, or you specify the
>percentage of disk you want it to cover and provide minimum and maximum
>limits.
>
>>Index: editor.c
>>===
>>RCS file: cvs/src/sbin/disklabel/editor.c,v
>>retrieving revision 1.303
>>diff -u -r1.303 editor.c
>>--- editor.c   2 Sep 2016 10:47:17 -   1.303
>>+++ editor.c   3 Oct 2016 22:56:55 -
>>@@ -2388,6 +2388,7 @@
>>   sa = &(alloc_table[0].table[idx]);
>>   idx++;
>>
>>+  sa->rate = 0;
>
>So basically you set the partition size to 0% of disk, forcing disklabel
>to pick the lower limit.  Why would you want to specify the upper limit
>then?
>
>It would probably make more sense to do something like this instead:

Sorry, I somehow managed to remove part of the line in a diff.  Here is what it
is was supposed to be:

Index: sbin/disklabel/editor.c
===
RCS file: /var/cvs/src/sbin/disklabel/editor.c,v
retrieving revision 1.303
diff -u -p -r1.303 editor.c
--- sbin/disklabel/editor.c 2 Sep 2016 10:47:17 -   1.303
+++ sbin/disklabel/editor.c 4 Oct 2016 01:18:35 -
@@ -2397,6 +2397,8 @@ parse_autotable(char *filename)
if ((t = get_token(&buf, &len)) != NULL &&
parse_pct(t, &sa->rate) == -1)
errx(1, "%s: parse error on line %u", filename, idx);
+   else if (t == NULL && sa->maxsz)
+   errx(1, "%s: parse error on line %u", filename, idx);
if (sa->minsz > sa->maxsz)
errx(1, "%s: min size > max size on line %u", filename,
idx);



Re: disklabel template: percentage of disk optional?

2016-10-03 Thread Dmitrij D. Czarkoff
Claus Assmann  wrote:

>AFAICT the "percentage of disk" is optional

disklabel(8) says:

| A template for the automatic allocation can be passed to disklabel using
| the -T option.  The template consists of one line per partition, with
| each line giving mountpoint, min-max size range, and percentage of disk,
| space-separated.  Max can be unlimited by specifying '*'.  If only
| mountpoint and min size are given, the partition is created with that
| exact size.

Which means that percentages are optional unless sizes are specified as
min-max, which makes a lot of sense:  you either specify the amount of
space you want to allocate for particular partition, or you specify the
percentage of disk you want it to cover and provide minimum and maximum
limits.

>Index: editor.c
>===
>RCS file: cvs/src/sbin/disklabel/editor.c,v
>retrieving revision 1.303
>diff -u -r1.303 editor.c
>--- editor.c   2 Sep 2016 10:47:17 -   1.303
>+++ editor.c   3 Oct 2016 22:56:55 -
>@@ -2388,6 +2388,7 @@
>   sa = &(alloc_table[0].table[idx]);
>   idx++;
>
>+  sa->rate = 0;

So basically you set the partition size to 0% of disk, forcing disklabel
to pick the lower limit.  Why would you want to specify the upper limit
then?

It would probably make more sense to do something like this instead:

Index: sbin/disklabel//editor.c
===
RCS file: /var/cvs/src/sbin/disklabel/editor.c,v
retrieving revision 1.303
diff -u -p -r1.303 editor.c
--- sbin/disklabel//editor.c2 Sep 2016 10:47:17 -   1.303
+++ sbin/disklabel//editor.c4 Oct 2016 01:06:39 -
@@ -2397,6 +2397,8 @@ parse_autotable(char *filename)
if ((t = get_token(&buf, &len)) != NULL &&
parse_pct(t, &sa->rate) == -1)
errx(1, "%s: parse error on line %u", filename, idx);
+   else if (&sa->maxsz)
+   errx(1, "%s: parse error on line %u", filename, idx);
if (sa->minsz > sa->maxsz)
errx(1, "%s: min size > max size on line %u", filename,
idx);



Re: Explicitly cast the return variable in tls_load_file()

2016-10-03 Thread kinichiro inoguchi
I could have an answer that this compilation error was a bug of compiler,
and that bug will be tracked.
https://software.intel.com/en-us/forums/intel-c-compiler/topic/698109

I saw the type of buf was changed in cvs,
then I can avoid this compilation problem.

Thanks.

Kinichiro


disklabel template: percentage of disk optional?

2016-10-03 Thread Claus Assmann
While playing around with the autoinstaller and autodisklayout I
ran into several problems, some of which I worked around and for
one I created a possible patch, but I'm not sure if that's the right
thing to do.

I have a disklayout template like this:

/   500M
swap1G
/tmp1G-2G
/var10G-16G
/usr2G-4G
/usr/X11R6  1G-1G
/usr/local  200G-250G
/usr/obj2G-4G
/usr/src2G-4G
/home   500G-*

but disklabel failed with
"sum of extra space allocation > 100%"
AFAICT the "percentage of disk" is optional, and if so, maybe the
following patch should be applied (to initialize the variable as
it may contain a bogus value thus triggering the error)?

Index: editor.c
===
RCS file: cvs/src/sbin/disklabel/editor.c,v
retrieving revision 1.303
diff -u -r1.303 editor.c
--- editor.c2 Sep 2016 10:47:17 -   1.303
+++ editor.c3 Oct 2016 22:56:55 -
@@ -2388,6 +2388,7 @@
sa = &(alloc_table[0].table[idx]);
idx++;
 
+   sa->rate = 0;
if ((sa->mp = get_token(&buf, &len)) == NULL ||
(sa->mp[0] != '/' && strcmp(sa->mp, "swap")))
errx(1, "%s: parse error on line %u", filename, idx);



Re: add in6 multicast support to vxlan(4) ; question on mbufs

2016-10-03 Thread Vincent Gross
On Sat, 24 Sep 2016 10:58:10 +0200
Vincent Gross  wrote:

> Hi,
> 
[snip]
> 
> Aside from the mbuf issue, is this Ok ?

I will go back on the mbuff stuff later.

Diff rebased, ok anyone ?

Index: net/if_vxlan.c
===
RCS file: /cvs/src/sys/net/if_vxlan.c,v
retrieving revision 1.48
diff -u -p -r1.48 if_vxlan.c
--- net/if_vxlan.c  30 Sep 2016 10:22:05 -  1.48
+++ net/if_vxlan.c  3 Oct 2016 23:12:42 -
@@ -47,6 +47,8 @@
 #include 
 #include 
 
+#include 
+
 #if NPF > 0
 #include 
 #endif
@@ -61,7 +63,12 @@ struct vxlan_softc {
struct arpcomsc_ac;
struct ifmedia   sc_media;
 
-   struct ip_moptions   sc_imo;
+   union {
+   struct ip_moptions   u_imo;
+   struct ip6_moptions  u_imo6;
+   } sc_imu;
+#define sc_imo sc_imu.u_imo
+#define sc_im6osc_imu.u_imo6
void*sc_ahcookie;
void*sc_lhcookie;
void*sc_dhcookie;
@@ -129,10 +136,6 @@ vxlan_clone_create(struct if_clone *ifc,
M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
return (ENOMEM);
 
-   sc->sc_imo.imo_membership = malloc(
-   (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS),
M_IPMOPTS,
-   M_WAITOK|M_ZERO);
-   sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
sc->sc_dstport = htons(VXLAN_PORT);
sc->sc_vnetid = VXLAN_VNI_UNSET;
 
@@ -190,7 +193,6 @@ vxlan_clone_destroy(struct ifnet *ifp)
ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
ether_ifdetach(ifp);
if_detach(ifp);
-   free(sc->sc_imo.imo_membership, M_IPMOPTS, 0);
free(sc, M_DEVBUF, sizeof(*sc));
 
return (0);
@@ -199,11 +201,33 @@ vxlan_clone_destroy(struct ifnet *ifp)
 void
 vxlan_multicast_cleanup(struct ifnet *ifp)
 {
-   struct vxlan_softc  *sc = (struct vxlan_softc
*)ifp->if_softc;
-   struct ip_moptions  *imo = &sc->sc_imo;
-   struct ifnet*mifp;
+   struct vxlan_softc   *sc = (struct vxlan_softc
*)ifp->if_softc;
+   struct ip_moptions   *imo;
+   struct in_multi **imm;
+   struct ip6_moptions  *im6o;
+   struct in6_multi_mship   *im6m, *im6m_next;
+   struct ifnet *mifp = NULL;
+
+   switch (sc->sc_dst.ss_family) {
+   case AF_INET:
+   imo = &sc->sc_imo;
+   mifp = if_get(imo->imo_ifidx);
+   imm = imo->imo_membership;
+   while (imo->imo_num_memberships > 0)
+   in_delmulti(imm[--imo->imo_num_memberships]);
+   free(imm, M_IPMOPTS,
+   sizeof(struct in_multi *) *
imo->imo_num_memberships);
+   break;
+   case AF_INET6:
+   im6o = &sc->sc_im6o;
+   mifp = if_get(im6o->im6o_ifidx);
+   LIST_FOREACH_SAFE(im6m, &im6o->im6o_memberships,
i6mm_chain,
+   im6m_next)
+   in6_leavegroup(im6m);
+   break;
+   }
+   bzero(&sc->sc_imu, sizeof(sc->sc_imu));
 
-   mifp = if_get(imo->imo_ifidx);
if (mifp != NULL) {
if (sc->sc_ahcookie != NULL) {
hook_disestablish(mifp->if_addrhooks,
sc->sc_ahcookie); @@ -219,14 +243,9 @@ vxlan_multicast_cleanup(struct
ifnet *if sc->sc_dhcookie);
sc->sc_dhcookie = NULL;
}
-
-   if_put(mifp);
}
 
-   if (imo->imo_num_memberships > 0) {
-
in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
-   imo->imo_ifidx = 0;
-   }
+   if_put(mifp);
 }
 
 int
@@ -234,47 +253,136 @@ vxlan_multicast_join(struct ifnet *ifp, 
 struct sockaddr *dst)
 {
struct vxlan_softc  *sc = ifp->if_softc;
-   struct ip_moptions  *imo = &sc->sc_imo;
+   struct ip_moptions  *imo;
+   struct ip6_moptions *im6o;
+   struct in6_multi_mship  *imm;
struct sockaddr_in  *src4, *dst4;
-   struct sockaddr_in6 *dst6;
+   struct sockaddr_in6 *src6, *dst6;
struct ifaddr   *ifa;
-   struct ifnet*mifp;
+   struct ifnet*mifp = NULL;
+   struct rtentry  *rt;
+   int  error;
 
-   if (dst->sa_family == AF_INET) {
+   switch (dst->sa_family) {
+   case AF_INET:
dst4 = satosin(dst);
+   src4 = satosin(src);
if (!IN_MULTICAST(dst4->sin_addr.s_addr))
return (0);
-   } else if (dst->sa_family == AF_INET6) {
+   if (src4->sin_addr.s_addr == INADDR_ANY ||
+   IN_MULTICAST(src4->sin_addr.s_addr))
+   return (EINVAL);
+   if ((ifa = ifa_ifwithaddr(src, sc->sc_rdomain)) ==
NULL ||
+   (mifp = ifa->ifa_ifp) == NUL

Re: pending timeouts in trpt

2016-10-03 Thread Philip Guenther
On Sun, Oct 2, 2016 at 1:55 AM, David Gwynne  wrote:
> i think the change to move tcp timers to timeouts got this bit wrong.
>
> we do want to print the timer if it is pending, it doesnt make sense
> otherwise.
...
> --- trpt.c  27 Aug 2016 01:50:07 -  1.33
> +++ trpt.c  2 Oct 2016 08:51:21 -
> @@ -401,7 +401,7 @@ tcp_trace(short act, short ostate, struc
> int i;
>
> for (i = 0; i < TCPT_NTIMERS; i++) {
> -   if (timeout_pending(&tp->t_timer[i]))
> +   if (!timeout_pending(&tp->t_timer[i]))

Reading 14 year old diffs again, eh?

-   if (tp->t_timer[i] == 0)
+   if (timeout_pending(&tp->t_timer[i]))

Yeah, that test looks backwards.  ok guenther@

That reminds me: I believe this code in trpt.c is dead:
if (req == PRU_SLOWTIMO || req == PRU_FASTTIMO)
printf("<%s>", tcptimers[timer]);

PRU_{SLOW,FAST}TIMO are no longer used in the kernel, so they'll never
appear in trace records.


Philip Guenther



Re: Unexpected behavior in su/doas

2016-10-03 Thread Theo de Raadt
> > I stumbled upon unexpected behavior on OpenBSD 6.0 (all patches)
> > which seems to allow running commands as the original user when
> > using su and doas interactively because the controlling terminal
> > is the same.
> 
> > Is this behavior expected and if so, how do I run commands from
> > root as an untrusted user? It's not mentioned in the man page
> > that using su/doas as root might allow other users to run code as
> > root.
> 
> Oh, interesting. The main design of doas is to escalate privileges, allowing
> users to run commands as root. But it certainly looks appealing to use it to
> drop privileges as well.
> 
> It's easy to add an option to disassociate from the controlling tty. I'm not
> sure if this solves every problem, but it certainly blocks direct tty
> injection. (You also pick up some privileges from being in a session or
> process group, but those privileges are less powerful.)
> 
> Certain commands require a controlling tty, but that seems to be mostly
> shells. Even vi and mg work ok.

People (and programs) are not used to operating without tty
associating; furthermore study of failure condition becomes really
difficult.

I don't see a way that -D can be used correctly.  I suspect it will
not be used when it is needed; and it will be used when it causes more
harm than good.



Re: Unexpected behavior in su/doas

2016-10-03 Thread Ted Unangst
Simon Ruderich wrote:
> Hello,
> 
> I stumbled upon unexpected behavior on OpenBSD 6.0 (all patches)
> which seems to allow running commands as the original user when
> using su and doas interactively because the controlling terminal
> is the same.

> Is this behavior expected and if so, how do I run commands from
> root as an untrusted user? It's not mentioned in the man page
> that using su/doas as root might allow other users to run code as
> root.

Oh, interesting. The main design of doas is to escalate privileges, allowing
users to run commands as root. But it certainly looks appealing to use it to
drop privileges as well.

It's easy to add an option to disassociate from the controlling tty. I'm not
sure if this solves every problem, but it certainly blocks direct tty
injection. (You also pick up some privileges from being in a session or
process group, but those privileges are less powerful.)

Certain commands require a controlling tty, but that seems to be mostly
shells. Even vi and mg work ok.

Index: doas.1
===
RCS file: /cvs/src/usr.bin/doas/doas.1,v
retrieving revision 1.19
diff -u -p -r1.19 doas.1
--- doas.1  4 Sep 2016 15:20:37 -   1.19
+++ doas.1  3 Oct 2016 16:16:07 -
@@ -21,7 +21,7 @@
 .Nd execute commands as another user
 .Sh SYNOPSIS
 .Nm doas
-.Op Fl Lns
+.Op Fl DLns
 .Op Fl a Ar style
 .Op Fl C Ar config
 .Op Fl u Ar user
@@ -68,6 +68,9 @@ or
 will be printed on standard output, depending on command
 matching results.
 No command is executed.
+.It Fl D
+Detach from controlling terminal.
+This can be useful when attempting to deescalate privileges.
 .It Fl L
 Clear any persisted authorizations from previous invocations,
 then immediately exit.
Index: doas.c
===
RCS file: /cvs/src/usr.bin/doas/doas.c,v
retrieving revision 1.64
diff -u -p -r1.64 doas.c
--- doas.c  3 Sep 2016 11:03:18 -   1.64
+++ doas.c  3 Oct 2016 16:24:08 -
@@ -271,6 +271,7 @@ main(int argc, char **argv)
int i, ch;
int sflag = 0;
int nflag = 0;
+   int droptty = 0;
char cwdpath[PATH_MAX];
const char *cwd;
char *login_style = NULL;
@@ -282,7 +283,7 @@ main(int argc, char **argv)
 
uid = getuid();
 
-   while ((ch = getopt(argc, argv, "a:C:Lnsu:")) != -1) {
+   while ((ch = getopt(argc, argv, "a:C:DLnsu:")) != -1) {
switch (ch) {
case 'a':
login_style = optarg;
@@ -290,6 +291,9 @@ main(int argc, char **argv)
case 'C':
confpath = optarg;
break;
+   case 'D':
+   droptty = 1;
+   break;
case 'L':
i = open("/dev/tty", O_RDWR);
if (i != -1)
@@ -371,6 +375,15 @@ main(int argc, char **argv)
errx(1, "Authorization required");
 
authuser(myname, login_style, rule->options & PERSIST);
+   }
+
+   if (droptty) {
+   i = open("/dev/tty", O_RDWR);
+   if (i == -1)
+   err(1, "can't open tty");
+   if (ioctl(i, TIOCNOTTY) != 0)
+   err(1, "can't drop controlling tty");
+   close(i);
}
 
if (pledge("stdio rpath getpw exec id", NULL) == -1)



Re: First usages of timeout_set_proc(9)

2016-10-03 Thread Alexander Bluhm
On Mon, Oct 03, 2016 at 02:57:34PM +0200, Martin Pieuchot wrote:
> Here are some timeouts that require a process context in order to call
> ip_output().
> 
> The reason is that rtalloc(9) might end up inserting a RTF_CLONING route
> and that requires holding a write lock.  This isn't going to change
> because we're also going to use write locks to protect pf(4) internals. 
> 
> So to not introduce new sleeping points in the socket layer we're going
> to serialize all code paths that might end up in ip_output().  For that
> we're going to use a write lock, for which we need a process context.
> 
> Now that dlg@ fixed the deadlocks pointed by Haesbaert and guenther@,
> let's convert the first timeouts.
> 
> ok?

OK bluhm@

> 
> Index: net/if_pflow.c
> ===
> RCS file: /cvs/src/sys/net/if_pflow.c,v
> retrieving revision 1.61
> diff -u -p -r1.61 if_pflow.c
> --- net/if_pflow.c29 Apr 2016 08:55:03 -  1.61
> +++ net/if_pflow.c3 Oct 2016 12:47:11 -
> @@ -548,15 +548,16 @@ pflow_init_timeouts(struct pflow_softc *
>   if (timeout_initialized(&sc->sc_tmo_tmpl))
>   timeout_del(&sc->sc_tmo_tmpl);
>   if (!timeout_initialized(&sc->sc_tmo))
> - timeout_set(&sc->sc_tmo, pflow_timeout, sc);
> + timeout_set_proc(&sc->sc_tmo, pflow_timeout, sc);
>   break;
>   case PFLOW_PROTO_10:
>   if (!timeout_initialized(&sc->sc_tmo_tmpl))
> - timeout_set(&sc->sc_tmo_tmpl, pflow_timeout_tmpl, sc);
> + timeout_set_proc(&sc->sc_tmo_tmpl, pflow_timeout_tmpl,
> + sc);
>   if (!timeout_initialized(&sc->sc_tmo))
> - timeout_set(&sc->sc_tmo, pflow_timeout, sc);
> + timeout_set_proc(&sc->sc_tmo, pflow_timeout, sc);
>   if (!timeout_initialized(&sc->sc_tmo6))
> - timeout_set(&sc->sc_tmo6, pflow_timeout6, sc);
> + timeout_set_proc(&sc->sc_tmo6, pflow_timeout6, sc);
>  
>   timeout_add_sec(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT);
>   break;
> Index: net/if_pfsync.c
> ===
> RCS file: /cvs/src/sys/net/if_pfsync.c,v
> retrieving revision 1.234
> diff -u -p -r1.234 if_pfsync.c
> --- net/if_pfsync.c   27 Sep 2016 04:57:17 -  1.234
> +++ net/if_pfsync.c   3 Oct 2016 12:47:11 -
> @@ -328,9 +328,9 @@ pfsync_clone_create(struct if_clone *ifc
>   IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
>   ifp->if_hdrlen = sizeof(struct pfsync_header);
>   ifp->if_mtu = ETHERMTU;
> - timeout_set(&sc->sc_tmo, pfsync_timeout, sc);
> - timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
> - timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
> + timeout_set_proc(&sc->sc_tmo, pfsync_timeout, sc);
> + timeout_set_proc(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
> + timeout_set_proc(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
>  
>   if_attach(ifp);
>   if_alloc_sadl(ifp);
> @@ -1720,7 +1720,7 @@ pfsync_defer(struct pf_state *st, struct
>   sc->sc_deferred++;
>   TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
>  
> - timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd);
> + timeout_set_proc(&pd->pd_tmo, pfsync_defer_tmo, pd);
>   timeout_add_msec(&pd->pd_tmo, 20);
>  
>   schednetisr(NETISR_PFSYNC);
> Index: netinet/ip_carp.c
> ===
> RCS file: /cvs/src/sys/netinet/ip_carp.c,v
> retrieving revision 1.293
> diff -u -p -r1.293 ip_carp.c
> --- netinet/ip_carp.c 25 Jul 2016 16:44:04 -  1.293
> +++ netinet/ip_carp.c 3 Oct 2016 12:47:11 -
> @@ -831,9 +831,9 @@ carp_new_vhost(struct carp_softc *sc, in
>   vhe->vhid = vhid;
>   vhe->advskew = advskew;
>   vhe->state = INIT;
> - timeout_set(&vhe->ad_tmo, carp_send_ad, vhe);
> - timeout_set(&vhe->md_tmo, carp_master_down, vhe);
> - timeout_set(&vhe->md6_tmo, carp_master_down, vhe);
> + timeout_set_proc(&vhe->ad_tmo, carp_send_ad, vhe);
> + timeout_set_proc(&vhe->md_tmo, carp_master_down, vhe);
> + timeout_set_proc(&vhe->md6_tmo, carp_master_down, vhe);
>  
>   KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
>  
> Index: netinet/tcp_timer.h
> ===
> RCS file: /cvs/src/sys/netinet/tcp_timer.h,v
> retrieving revision 1.13
> diff -u -p -r1.13 tcp_timer.h
> --- netinet/tcp_timer.h   6 Jul 2011 23:44:20 -   1.13
> +++ netinet/tcp_timer.h   3 Oct 2016 12:47:11 -
> @@ -116,7 +116,7 @@ const char *tcptimers[] =
>   * Init, arm, disarm, and test TCP timers.
>   */
>  #define  TCP_TIMER_INIT(tp, timer)   
> \
> - timeout_set(&(tp)->t_timer[(timer)], tcp_timer_funcs[(timer)], tp)
>

Help me testing the netlock

2016-10-03 Thread Martin Pieuchot
Diff below introduces a single write lock that will be used to serialize
access to ip_output().

This lock will be then split in multiple readers and writers to allow
multiple forwarding paths to run in parallel of each others but still
serialized with the socket layer.

I'm currently looking for people wanting to run this diff and try to
break it.  In other words, your machine might panic with it and if it
does report the panic to me so the diff can be improved.

I tested NFS v2 and v3 so I'm quite confident, but I might have missed
some obvious stuff.

PS: This diff includes the timeout_set_proc() diff I just sent.

Index: kern/kern_rwlock.c
===
RCS file: /cvs/src/sys/kern/kern_rwlock.c,v
retrieving revision 1.27
diff -u -p -r1.27 kern_rwlock.c
--- kern/kern_rwlock.c  14 Mar 2015 07:33:42 -  1.27
+++ kern/kern_rwlock.c  3 Oct 2016 12:59:16 -
@@ -98,6 +98,12 @@ rw_enter_read(struct rwlock *rwl)
membar_enter();
 }
 
+#if 1
+#include 
+#include 
+#include 
+#endif
+
 void
 rw_enter_write(struct rwlock *rwl)
 {
@@ -108,6 +114,15 @@ rw_enter_write(struct rwlock *rwl)
rw_enter(rwl, RW_WRITE);
else
membar_enter();
+
+#if 1
+   if ((rwl == &netlock) && (splassert_ctl == 3)) {
+   printf("ENTER::%d::", cpu_number());
+   db_stack_trace_print(
+   (db_expr_t)__builtin_frame_address(1),
+   TRUE, 1, "", printf);
+   }
+#endif
 }
 
 void
@@ -129,6 +144,15 @@ rw_exit_write(struct rwlock *rwl)
unsigned long owner = rwl->rwl_owner;
 
rw_assert_wrlock(rwl);
+
+#if 1
+   if ((rwl == &netlock) && (splassert_ctl == 3)) {
+   printf("EXIT::%d::", cpu_number());
+   db_stack_trace_print(
+   (db_expr_t)__builtin_frame_address(1),
+   TRUE, 1, "", printf);
+   }
+#endif
 
membar_exit();
if (__predict_false((owner & RWLOCK_WAIT) ||
Index: kern/sys_socket.c
===
RCS file: /cvs/src/sys/kern/sys_socket.c,v
retrieving revision 1.21
diff -u -p -r1.21 sys_socket.c
--- kern/sys_socket.c   5 Dec 2015 10:11:53 -   1.21
+++ kern/sys_socket.c   3 Oct 2016 12:59:16 -
@@ -131,8 +131,10 @@ soo_poll(struct file *fp, int events, st
 {
struct socket *so = fp->f_data;
int revents = 0;
-   int s = splsoftnet();
+   int s;
 
+   rw_enter_write(&netlock);
+   s = splsoftnet();
if (events & (POLLIN | POLLRDNORM)) {
if (soreadable(so))
revents |= events & (POLLIN | POLLRDNORM);
@@ -159,6 +161,7 @@ soo_poll(struct file *fp, int events, st
}
}
splx(s);
+   rw_exit_write(&netlock);
return (revents);
 }
 
Index: kern/uipc_socket.c
===
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.161
diff -u -p -r1.161 uipc_socket.c
--- kern/uipc_socket.c  20 Sep 2016 14:27:43 -  1.161
+++ kern/uipc_socket.c  3 Oct 2016 12:59:16 -
@@ -123,6 +123,7 @@ socreate(int dom, struct socket **aso, i
return (EPROTONOSUPPORT);
if (prp->pr_type != type)
return (EPROTOTYPE);
+   rw_enter_write(&netlock);
s = splsoftnet();
so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO);
TAILQ_INIT(&so->so_q0);
@@ -142,9 +143,11 @@ socreate(int dom, struct socket **aso, i
so->so_state |= SS_NOFDREF;
sofree(so);
splx(s);
+   rw_exit_write(&netlock);
return (error);
}
splx(s);
+   rw_exit_write(&netlock);
*aso = so;
return (0);
 }
@@ -152,11 +155,13 @@ socreate(int dom, struct socket **aso, i
 int
 sobind(struct socket *so, struct mbuf *nam, struct proc *p)
 {
-   int s = splsoftnet();
-   int error;
+   int s, error;
 
+   rw_enter_write(&netlock);
+   s = splsoftnet();
error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p);
splx(s);
+   rw_exit_write(&netlock);
return (error);
 }
 
@@ -171,11 +176,13 @@ solisten(struct socket *so, int backlog)
if (isspliced(so) || issplicedback(so))
return (EOPNOTSUPP);
 #endif /* SOCKET_SPLICE */
+   rw_enter_write(&netlock);
s = splsoftnet();
error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL,
curproc);
if (error) {
splx(s);
+   rw_exit_write(&netlock);
return (error);
}
if (TAILQ_FIRST(&so->so_q) == NULL)
@@ -186,6 +193,7 @@ solisten(struct socket *so, int backlog)
backlog = sominconn;
so->so_qlimit = backlog;
splx(s);
+   rw_exit_write(&netlock);
return (0);
 }
 
@@ -

Re: First usages of timeout_set_proc(9)

2016-10-03 Thread Mark Kettenis
> Date: Mon, 3 Oct 2016 14:57:34 +0200
> From: Martin Pieuchot 
> 
> Here are some timeouts that require a process context in order to call
> ip_output().
> 
> The reason is that rtalloc(9) might end up inserting a RTF_CLONING route
> and that requires holding a write lock.  This isn't going to change
> because we're also going to use write locks to protect pf(4) internals. 
> 
> So to not introduce new sleeping points in the socket layer we're going
> to serialize all code paths that might end up in ip_output().  For that
> we're going to use a write lock, for which we need a process context.
> 
> Now that dlg@ fixed the deadlocks pointed by Haesbaert and guenther@,
> let's convert the first timeouts.
> 
> ok?

Let's see if this sticks.

ok kettenis@

> Index: net/if_pflow.c
> ===
> RCS file: /cvs/src/sys/net/if_pflow.c,v
> retrieving revision 1.61
> diff -u -p -r1.61 if_pflow.c
> --- net/if_pflow.c29 Apr 2016 08:55:03 -  1.61
> +++ net/if_pflow.c3 Oct 2016 12:47:11 -
> @@ -548,15 +548,16 @@ pflow_init_timeouts(struct pflow_softc *
>   if (timeout_initialized(&sc->sc_tmo_tmpl))
>   timeout_del(&sc->sc_tmo_tmpl);
>   if (!timeout_initialized(&sc->sc_tmo))
> - timeout_set(&sc->sc_tmo, pflow_timeout, sc);
> + timeout_set_proc(&sc->sc_tmo, pflow_timeout, sc);
>   break;
>   case PFLOW_PROTO_10:
>   if (!timeout_initialized(&sc->sc_tmo_tmpl))
> - timeout_set(&sc->sc_tmo_tmpl, pflow_timeout_tmpl, sc);
> + timeout_set_proc(&sc->sc_tmo_tmpl, pflow_timeout_tmpl,
> + sc);
>   if (!timeout_initialized(&sc->sc_tmo))
> - timeout_set(&sc->sc_tmo, pflow_timeout, sc);
> + timeout_set_proc(&sc->sc_tmo, pflow_timeout, sc);
>   if (!timeout_initialized(&sc->sc_tmo6))
> - timeout_set(&sc->sc_tmo6, pflow_timeout6, sc);
> + timeout_set_proc(&sc->sc_tmo6, pflow_timeout6, sc);
>  
>   timeout_add_sec(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT);
>   break;
> Index: net/if_pfsync.c
> ===
> RCS file: /cvs/src/sys/net/if_pfsync.c,v
> retrieving revision 1.234
> diff -u -p -r1.234 if_pfsync.c
> --- net/if_pfsync.c   27 Sep 2016 04:57:17 -  1.234
> +++ net/if_pfsync.c   3 Oct 2016 12:47:11 -
> @@ -328,9 +328,9 @@ pfsync_clone_create(struct if_clone *ifc
>   IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
>   ifp->if_hdrlen = sizeof(struct pfsync_header);
>   ifp->if_mtu = ETHERMTU;
> - timeout_set(&sc->sc_tmo, pfsync_timeout, sc);
> - timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
> - timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
> + timeout_set_proc(&sc->sc_tmo, pfsync_timeout, sc);
> + timeout_set_proc(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
> + timeout_set_proc(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
>  
>   if_attach(ifp);
>   if_alloc_sadl(ifp);
> @@ -1720,7 +1720,7 @@ pfsync_defer(struct pf_state *st, struct
>   sc->sc_deferred++;
>   TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
>  
> - timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd);
> + timeout_set_proc(&pd->pd_tmo, pfsync_defer_tmo, pd);
>   timeout_add_msec(&pd->pd_tmo, 20);
>  
>   schednetisr(NETISR_PFSYNC);
> Index: netinet/ip_carp.c
> ===
> RCS file: /cvs/src/sys/netinet/ip_carp.c,v
> retrieving revision 1.293
> diff -u -p -r1.293 ip_carp.c
> --- netinet/ip_carp.c 25 Jul 2016 16:44:04 -  1.293
> +++ netinet/ip_carp.c 3 Oct 2016 12:47:11 -
> @@ -831,9 +831,9 @@ carp_new_vhost(struct carp_softc *sc, in
>   vhe->vhid = vhid;
>   vhe->advskew = advskew;
>   vhe->state = INIT;
> - timeout_set(&vhe->ad_tmo, carp_send_ad, vhe);
> - timeout_set(&vhe->md_tmo, carp_master_down, vhe);
> - timeout_set(&vhe->md6_tmo, carp_master_down, vhe);
> + timeout_set_proc(&vhe->ad_tmo, carp_send_ad, vhe);
> + timeout_set_proc(&vhe->md_tmo, carp_master_down, vhe);
> + timeout_set_proc(&vhe->md6_tmo, carp_master_down, vhe);
>  
>   KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
>  
> Index: netinet/tcp_timer.h
> ===
> RCS file: /cvs/src/sys/netinet/tcp_timer.h,v
> retrieving revision 1.13
> diff -u -p -r1.13 tcp_timer.h
> --- netinet/tcp_timer.h   6 Jul 2011 23:44:20 -   1.13
> +++ netinet/tcp_timer.h   3 Oct 2016 12:47:11 -
> @@ -116,7 +116,7 @@ const char *tcptimers[] =
>   * Init, arm, disarm, and test TCP timers.
>   */
>  #define  TCP_TIMER_INIT(tp, timer)   
> \
> - timeout_set(&(tp)->t_timer[(timer)], tc

Re: bridge(4): fix span interface removal

2016-10-03 Thread Martin Pieuchot
On 03/10/16(Mon) 15:07, Rafael Zalamena wrote:
> While doing the "notify bridge of interface removal with hook" I noticed
> that the span ports suffer from not having something to remove them. To
> reproduce this problem, do the following steps:
> 
> # ifconfig vether0 up
> # ifconfig bridge0 up
> # ifconfig bridge0 addspan vether0
> # ifconfig vether0 destroy
> # ifconfig bridge0 # vether0 is still there!
> 
> The diff below fixes this problem by adding a hook for span ports as well
> and we get some fewer lines of duplicated code.

I wonder if span ports are still useful today and if we should keep
them.

Anyway as long as we keep them, let's fix the code, ok mpi@


> Index: net/if_bridge.c
> ===
> RCS file: /home/obsdcvs/src/sys/net/if_bridge.c,v
> retrieving revision 1.286
> diff -u -p -r1.286 if_bridge.c
> --- net/if_bridge.c   3 Oct 2016 12:26:13 -   1.286
> +++ net/if_bridge.c   3 Oct 2016 12:53:07 -
> @@ -107,6 +107,7 @@
>  void bridgeattach(int);
>  int  bridge_ioctl(struct ifnet *, u_long, caddr_t);
>  void bridge_ifdetach(void *);
> +void bridge_spandetach(void *);
>  int  bridge_input(struct ifnet *, struct mbuf *, void *);
>  void bridge_process(struct ifnet *, struct mbuf *);
>  void bridgeintr_frame(struct bridge_softc *, struct ifnet *, struct mbuf *);
> @@ -215,10 +216,8 @@ bridge_clone_destroy(struct ifnet *ifp)
>   bridge_rtflush(sc, IFBF_FLUSHALL);
>   while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL)
>   bridge_delete(sc, bif);
> - while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
> - TAILQ_REMOVE(&sc->sc_spanlist, bif, next);
> - free(bif, M_DEVBUF, sizeof *bif);
> - }
> + while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL)
> + bridge_spandetach(bif);
>  
>   bstp_destroy(sc->sc_stp);
>  
> @@ -408,6 +407,9 @@ bridge_ioctl(struct ifnet *ifp, u_long c
>   }
>   p->ifp = ifs;
>   p->bif_flags = IFBIF_SPAN;
> + p->bridge_sc = sc;
> + p->bif_dhcookie = hook_establish(ifs->if_detachhooks, 0,
> + bridge_spandetach, p);
>   SIMPLEQ_INIT(&p->bif_brlin);
>   SIMPLEQ_INIT(&p->bif_brlout);
>   TAILQ_INSERT_TAIL(&sc->sc_spanlist, p, next);
> @@ -418,8 +420,7 @@ bridge_ioctl(struct ifnet *ifp, u_long c
>   TAILQ_FOREACH(p, &sc->sc_spanlist, next) {
>   if (strncmp(p->ifp->if_xname, req->ifbr_ifsname,
>   sizeof(p->ifp->if_xname)) == 0) {
> - TAILQ_REMOVE(&sc->sc_spanlist, p, next);
> - free(p, M_DEVBUF, sizeof *p);
> + bridge_spandetach(p);
>   break;
>   }
>   }
> @@ -581,6 +582,17 @@ bridge_ifdetach(void *arg)
>   sc = bif->bridge_sc;
>  
>   bridge_delete(sc, bif);
> +}
> +
> +void
> +bridge_spandetach(void *arg)
> +{
> + struct bridge_iflist *p = (struct bridge_iflist *)arg;
> + struct bridge_softc *sc = p->bridge_sc;
> +
> + hook_disestablish(p->ifp->if_detachhooks, p->bif_dhcookie);
> + TAILQ_REMOVE(&sc->sc_spanlist, p, next);
> + free(p, M_DEVBUF, sizeof(*p));
>  }
>  
>  int
> 



bridge(4): fix span interface removal

2016-10-03 Thread Rafael Zalamena
While doing the "notify bridge of interface removal with hook" I noticed
that the span ports suffer from not having something to remove them. To
reproduce this problem, do the following steps:

# ifconfig vether0 up
# ifconfig bridge0 up
# ifconfig bridge0 addspan vether0
# ifconfig vether0 destroy
# ifconfig bridge0 # vether0 is still there!

The diff below fixes this problem by adding a hook for span ports as well
and we get some fewer lines of duplicated code.


ok?

Index: net/if_bridge.c
===
RCS file: /home/obsdcvs/src/sys/net/if_bridge.c,v
retrieving revision 1.286
diff -u -p -r1.286 if_bridge.c
--- net/if_bridge.c 3 Oct 2016 12:26:13 -   1.286
+++ net/if_bridge.c 3 Oct 2016 12:53:07 -
@@ -107,6 +107,7 @@
 void   bridgeattach(int);
 intbridge_ioctl(struct ifnet *, u_long, caddr_t);
 void   bridge_ifdetach(void *);
+void   bridge_spandetach(void *);
 intbridge_input(struct ifnet *, struct mbuf *, void *);
 void   bridge_process(struct ifnet *, struct mbuf *);
 void   bridgeintr_frame(struct bridge_softc *, struct ifnet *, struct mbuf *);
@@ -215,10 +216,8 @@ bridge_clone_destroy(struct ifnet *ifp)
bridge_rtflush(sc, IFBF_FLUSHALL);
while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL)
bridge_delete(sc, bif);
-   while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
-   TAILQ_REMOVE(&sc->sc_spanlist, bif, next);
-   free(bif, M_DEVBUF, sizeof *bif);
-   }
+   while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL)
+   bridge_spandetach(bif);
 
bstp_destroy(sc->sc_stp);
 
@@ -408,6 +407,9 @@ bridge_ioctl(struct ifnet *ifp, u_long c
}
p->ifp = ifs;
p->bif_flags = IFBIF_SPAN;
+   p->bridge_sc = sc;
+   p->bif_dhcookie = hook_establish(ifs->if_detachhooks, 0,
+   bridge_spandetach, p);
SIMPLEQ_INIT(&p->bif_brlin);
SIMPLEQ_INIT(&p->bif_brlout);
TAILQ_INSERT_TAIL(&sc->sc_spanlist, p, next);
@@ -418,8 +420,7 @@ bridge_ioctl(struct ifnet *ifp, u_long c
TAILQ_FOREACH(p, &sc->sc_spanlist, next) {
if (strncmp(p->ifp->if_xname, req->ifbr_ifsname,
sizeof(p->ifp->if_xname)) == 0) {
-   TAILQ_REMOVE(&sc->sc_spanlist, p, next);
-   free(p, M_DEVBUF, sizeof *p);
+   bridge_spandetach(p);
break;
}
}
@@ -581,6 +582,17 @@ bridge_ifdetach(void *arg)
sc = bif->bridge_sc;
 
bridge_delete(sc, bif);
+}
+
+void
+bridge_spandetach(void *arg)
+{
+   struct bridge_iflist *p = (struct bridge_iflist *)arg;
+   struct bridge_softc *sc = p->bridge_sc;
+
+   hook_disestablish(p->ifp->if_detachhooks, p->bif_dhcookie);
+   TAILQ_REMOVE(&sc->sc_spanlist, p, next);
+   free(p, M_DEVBUF, sizeof(*p));
 }
 
 int



First usages of timeout_set_proc(9)

2016-10-03 Thread Martin Pieuchot
Here are some timeouts that require a process context in order to call
ip_output().

The reason is that rtalloc(9) might end up inserting a RTF_CLONING route
and that requires holding a write lock.  This isn't going to change
because we're also going to use write locks to protect pf(4) internals. 

So to not introduce new sleeping points in the socket layer we're going
to serialize all code paths that might end up in ip_output().  For that
we're going to use a write lock, for which we need a process context.

Now that dlg@ fixed the deadlocks pointed by Haesbaert and guenther@,
let's convert the first timeouts.

ok?

Index: net/if_pflow.c
===
RCS file: /cvs/src/sys/net/if_pflow.c,v
retrieving revision 1.61
diff -u -p -r1.61 if_pflow.c
--- net/if_pflow.c  29 Apr 2016 08:55:03 -  1.61
+++ net/if_pflow.c  3 Oct 2016 12:47:11 -
@@ -548,15 +548,16 @@ pflow_init_timeouts(struct pflow_softc *
if (timeout_initialized(&sc->sc_tmo_tmpl))
timeout_del(&sc->sc_tmo_tmpl);
if (!timeout_initialized(&sc->sc_tmo))
-   timeout_set(&sc->sc_tmo, pflow_timeout, sc);
+   timeout_set_proc(&sc->sc_tmo, pflow_timeout, sc);
break;
case PFLOW_PROTO_10:
if (!timeout_initialized(&sc->sc_tmo_tmpl))
-   timeout_set(&sc->sc_tmo_tmpl, pflow_timeout_tmpl, sc);
+   timeout_set_proc(&sc->sc_tmo_tmpl, pflow_timeout_tmpl,
+   sc);
if (!timeout_initialized(&sc->sc_tmo))
-   timeout_set(&sc->sc_tmo, pflow_timeout, sc);
+   timeout_set_proc(&sc->sc_tmo, pflow_timeout, sc);
if (!timeout_initialized(&sc->sc_tmo6))
-   timeout_set(&sc->sc_tmo6, pflow_timeout6, sc);
+   timeout_set_proc(&sc->sc_tmo6, pflow_timeout6, sc);
 
timeout_add_sec(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT);
break;
Index: net/if_pfsync.c
===
RCS file: /cvs/src/sys/net/if_pfsync.c,v
retrieving revision 1.234
diff -u -p -r1.234 if_pfsync.c
--- net/if_pfsync.c 27 Sep 2016 04:57:17 -  1.234
+++ net/if_pfsync.c 3 Oct 2016 12:47:11 -
@@ -328,9 +328,9 @@ pfsync_clone_create(struct if_clone *ifc
IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
ifp->if_hdrlen = sizeof(struct pfsync_header);
ifp->if_mtu = ETHERMTU;
-   timeout_set(&sc->sc_tmo, pfsync_timeout, sc);
-   timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
-   timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
+   timeout_set_proc(&sc->sc_tmo, pfsync_timeout, sc);
+   timeout_set_proc(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
+   timeout_set_proc(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
 
if_attach(ifp);
if_alloc_sadl(ifp);
@@ -1720,7 +1720,7 @@ pfsync_defer(struct pf_state *st, struct
sc->sc_deferred++;
TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
 
-   timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd);
+   timeout_set_proc(&pd->pd_tmo, pfsync_defer_tmo, pd);
timeout_add_msec(&pd->pd_tmo, 20);
 
schednetisr(NETISR_PFSYNC);
Index: netinet/ip_carp.c
===
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.293
diff -u -p -r1.293 ip_carp.c
--- netinet/ip_carp.c   25 Jul 2016 16:44:04 -  1.293
+++ netinet/ip_carp.c   3 Oct 2016 12:47:11 -
@@ -831,9 +831,9 @@ carp_new_vhost(struct carp_softc *sc, in
vhe->vhid = vhid;
vhe->advskew = advskew;
vhe->state = INIT;
-   timeout_set(&vhe->ad_tmo, carp_send_ad, vhe);
-   timeout_set(&vhe->md_tmo, carp_master_down, vhe);
-   timeout_set(&vhe->md6_tmo, carp_master_down, vhe);
+   timeout_set_proc(&vhe->ad_tmo, carp_send_ad, vhe);
+   timeout_set_proc(&vhe->md_tmo, carp_master_down, vhe);
+   timeout_set_proc(&vhe->md6_tmo, carp_master_down, vhe);
 
KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
 
Index: netinet/tcp_timer.h
===
RCS file: /cvs/src/sys/netinet/tcp_timer.h,v
retrieving revision 1.13
diff -u -p -r1.13 tcp_timer.h
--- netinet/tcp_timer.h 6 Jul 2011 23:44:20 -   1.13
+++ netinet/tcp_timer.h 3 Oct 2016 12:47:11 -
@@ -116,7 +116,7 @@ const char *tcptimers[] =
  * Init, arm, disarm, and test TCP timers.
  */
 #defineTCP_TIMER_INIT(tp, timer)   
\
-   timeout_set(&(tp)->t_timer[(timer)], tcp_timer_funcs[(timer)], tp)
+   timeout_set_proc(&(tp)->t_timer[(timer)], tcp_timer_funcs[(timer)], tp)
 
 #defineTCP_TIMER_ARM(tp, timer, nticks)
\
timeout_add(&(tp)->t_

Re: stricter sys_mount() flag handling

2016-10-03 Thread Alexander Bluhm
On Mon, Oct 03, 2016 at 12:16:49PM +0200, Martin Natano wrote:
> > /*
> >  * Flags set by internal operations.
> >  */
> > #define MNT_LOCAL   0x1000  /* filesystem is stored locally */
> > #define MNT_QUOTA   0x2000  /* quotas are enabled on filesystem 
> > */
> > #define MNT_ROOTFS  0x4000  /* identifies the root filesystem */
> 
> Yes, done. I also removed MNT_DOOMED and MNT_WANTRDWR from the flag
> mask, as those flags are internal to the kernel and shouldn't be passed
> from userland.

I have just checked the mountd/mountd.c code.  It does a statfs(path,
&sfb) and then mount(sfb.f_fstypename, sfb.f_mntonname, sfb.f_flags
| MNT_UPDATE, &args).

So all flags that are set by the kernel can be passed back.  But
the kernel ignores some flags.  The current diff breaks that
semantics.

The optnames in sbin/mount/mount.c contains a good list of flags
that sould be ignored.  The o_optname column is "".

I still want the list of valid flags at the beginning of the system
call and not hidden in all the file system specific implementations.

So perhaps we can combine the your orignal diff with all flags and
the clearing the internal flags like it is done in sys_unmount.

If the bits are not defined anywhere, return EINVAL.  Then clear
all bits that should not be set by mount or unmount system call.

> Now, that MNT_FLAGMASK doesn't contain all flags anymore, I replaced the
> magic constant with the flag names to make it clearer which flags are
> included.

I am not sure wether we should define MNT_FLAGMASK in the header
or only in the system call implementation.

> +#define MNT_FLAGMASK (MNT_RDONLY|MNT_SYNCHRONOUS|MNT_NOEXEC|MNT_NOSUID|\
> +  MNT_NODEV|MNT_NOPERM|MNT_ASYNC|MNT_WXALLOWED|\
> +  MNT_EXRDONLY|MNT_EXPORTED|MNT_DEFEXPORTED|\
> +  MNT_EXPORTANON|MNT_NOATIME|MNT_UPDATE|MNT_DELEXPORT|\
> +  MNT_RELOAD|MNT_FORCE|MNT_SOFTDEP)

I think the nfs export flags can only be set in the ufs_args
export_info.ex_flags field.

bluhm



Re: Unexpected behavior in su/doas

2016-10-03 Thread Simon Ruderich
On Sat, Oct 01, 2016 at 03:54:40PM -0600, Theo de Raadt wrote:
> De-escalation using these "sudo" or "doas" like tools on a tty is
> somewhat unsafe - it has always been unsafe - because tty's have
> capabilities.

Until looking into this issue I was totally unaware of the
possible implications (even though they are obvious when you
start thinking about it) and I guess I'm not alone. I think we
should document the fact that using those tools for de-escalation
is not safe in the su/doas/sudo man pages.

> If you wish to be safer, do these operations without retaining access
> to a tty.

Are there tools available for this task?

I could use SSH, but that only works for unlocked accounts with a
password/ssh-key.

> Escalation on the other hand (user -> root) is different, because then
> it is clear you want to do more / everything.  But de-escalation is a
> joke.
>
> This is just one mechanism on tty, there are others.  On other
> descriptors there are other abilities.

sudo provides the use_pty option (sadly not by default) which
spawns a new session with a new controlling TTY and then forwards
the input to the original tty (similar to what SSH does - just
without the network). This way the unprivileged process never has
any access to the privileged terminal which prevents this attack.

Would it be useful to add a similar feature (per default) to
su/doas or are there downsides or other possible attacks with
this approach?

Regards
Simon
-- 
+ privacy is necessary
+ using gnupg http://gnupg.org
+ public key id: 0x92FEFDB7E44C32F9


signature.asc
Description: PGP signature


Re: v6 time_uptime vs time_second

2016-10-03 Thread Stuart Henderson
On 2016/10/02 19:37, Martin Pieuchot wrote:
> I'm trying to figure out where the regression in IPv6/NDP is and here's
> what I found so far.
> 
> When the route expiration time got converted from time_second to
> time_uptime we forgot to do the same for values inside RAs.  I'm not
> sure what's the real impact of this, but it is clearly wrong.  Diff
> below should fix that.
> 
> ok?

I haven't been able to trigger the problem again even without the
diff, but looking in ntpd log there have been a few time offsets
recently on that machine so it sounds like a good candidate.

No problems seen with the diff yet, and makes sense and reads well
to me, so OK sthen@.



> Index: netinet6/in6.c
> ===
> RCS file: /cvs/src/sys/netinet6/in6.c,v
> retrieving revision 1.192
> diff -u -p -r1.192 in6.c
> --- netinet6/in6.c4 Sep 2016 10:32:01 -   1.192
> +++ netinet6/in6.c2 Oct 2016 17:18:24 -
> @@ -353,7 +353,7 @@ in6_ioctl(u_long cmd, caddr_t data, stru
>   case SIOCGIFALIFETIME_IN6:
>   ifr->ifr_ifru.ifru_lifetime = ia6->ia6_lifetime;
>   if (ia6->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
> - time_t maxexpire;
> + time_t expire, maxexpire;
>   struct in6_addrlifetime *retlt =
>   &ifr->ifr_ifru.ifru_lifetime;
>  
> @@ -365,8 +365,13 @@ in6_ioctl(u_long cmd, caddr_t data, stru
>   (time_t)~(1ULL << ((sizeof(maxexpire) * 8) - 1));
>   if (ia6->ia6_lifetime.ia6t_vltime <
>   maxexpire - ia6->ia6_updatetime) {
> - retlt->ia6t_expire = ia6->ia6_updatetime +
> + expire = ia6->ia6_updatetime +
>   ia6->ia6_lifetime.ia6t_vltime;
> + if (expire != 0) {
> + expire -= time_uptime;
> + expire += time_second;
> + }
> + retlt->ia6t_expire = expire;
>   } else
>   retlt->ia6t_expire = maxexpire;
>   }
> @@ -604,7 +609,7 @@ in6_update_ifa(struct ifnet *ifp, struct
>   ia6->ia_ifa.ifa_addr = sin6tosa(&ia6->ia_addr);
>   ia6->ia_addr.sin6_family = AF_INET6;
>   ia6->ia_addr.sin6_len = sizeof(ia6->ia_addr);
> - ia6->ia6_createtime = ia6->ia6_updatetime = time_second;
> + ia6->ia6_createtime = ia6->ia6_updatetime = time_uptime;
>   if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) {
>   /*
>* XXX: some functions expect that ifa_dstaddr is not
> @@ -667,12 +672,12 @@ in6_update_ifa(struct ifnet *ifp, struct
>   ia6->ia6_lifetime = ifra->ifra_lifetime;
>   if (ia6->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
>   ia6->ia6_lifetime.ia6t_expire =
> - time_second + ia6->ia6_lifetime.ia6t_vltime;
> + time_uptime + ia6->ia6_lifetime.ia6t_vltime;
>   } else
>   ia6->ia6_lifetime.ia6t_expire = 0;
>   if (ia6->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
>   ia6->ia6_lifetime.ia6t_preferred =
> - time_second + ia6->ia6_lifetime.ia6t_pltime;
> + time_uptime + ia6->ia6_lifetime.ia6t_pltime;
>   } else
>   ia6->ia6_lifetime.ia6t_preferred = 0;
>  
> Index: netinet6/in6.h
> ===
> RCS file: /cvs/src/sys/netinet6/in6.h,v
> retrieving revision 1.90
> diff -u -p -r1.90 in6.h
> --- netinet6/in6.h27 Jun 2016 16:33:48 -  1.90
> +++ netinet6/in6.h2 Oct 2016 17:18:24 -
> @@ -280,11 +280,11 @@ struct route_in6 {
>  
>  #define IFA6_IS_DEPRECATED(a) \
>   ((a)->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME && \
> -  (u_int32_t)((time_second - (a)->ia6_updatetime)) > \
> +  (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \
>(a)->ia6_lifetime.ia6t_pltime)
>  #define IFA6_IS_INVALID(a) \
>   ((a)->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME && \
> -  (u_int32_t)((time_second - (a)->ia6_updatetime)) > \
> +  (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \
>(a)->ia6_lifetime.ia6t_vltime)
>  
>  #endif /* _KERNEL */
> Index: netinet6/ip6_forward.c
> ===
> RCS file: /cvs/src/sys/netinet6/ip6_forward.c,v
> retrieving revision 1.92
> diff -u -p -r1.92 ip6_forward.c
> --- netinet6/ip6_forward.c24 Aug 2016 09:41:12 -  1.92
> +++ netinet6/ip6_forward.c2 Oct 2016 17:18:24 -
> @@ -104,8 +104,8 @@ ip6_forward(struct mbuf *m, struct rtent
>   IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
>   IN6_IS_ADDR_UNSPECI

Re: stricter sys_mount() flag handling

2016-10-03 Thread Martin Natano
> > And I want this check also for sys_unmount().
> > 
> 
> Good idea, sys_mount() is easy, because the only flag allowed there is
> MNT_FORCE.

s/sys_mount/sys_unmount/



Re: stricter sys_mount() flag handling

2016-10-03 Thread Martin Natano
Thank you for the feedback.

On Sun, Oct 02, 2016 at 07:53:04PM +0200, Alexander Bluhm wrote:
> 
> I think we once had a simmilar problem, when someone tried to unmount
> with MNT_DOOMED.  So I like to check all flags at the beginning of
> the system call.
> 
> But I think you should remove these from the mask:
> 
> /*
>  * Flags set by internal operations.
>  */
> #define MNT_LOCAL   0x1000  /* filesystem is stored locally */
> #define MNT_QUOTA   0x2000  /* quotas are enabled on filesystem */
> #define MNT_ROOTFS  0x4000  /* identifies the root filesystem */

Yes, done. I also removed MNT_DOOMED and MNT_WANTRDWR from the flag
mask, as those flags are internal to the kernel and shouldn't be passed
from userland.

Now, that MNT_FLAGMASK doesn't contain all flags anymore, I replaced the
magic constant with the flag names to make it clearer which flags are
included.


> And I want this check also for sys_unmount().
> 

Good idea, sys_mount() is easy, because the only flag allowed there is
MNT_FORCE.

Ok?

natano


Index: sys/mount.h
===
RCS file: /cvs/src/sys/sys/mount.h,v
retrieving revision 1.127
diff -u -p -r1.127 mount.h
--- sys/mount.h 10 Sep 2016 16:53:30 -  1.127
+++ sys/mount.h 3 Oct 2016 09:44:51 -
@@ -414,6 +414,15 @@ struct mount {
 #define MNT_DOOMED 0x0800  /* device behind filesystem is gone */
 
 /*
+ * Flags allowed to be passed to the mount syscall.
+ */
+#define MNT_FLAGMASK   (MNT_RDONLY|MNT_SYNCHRONOUS|MNT_NOEXEC|MNT_NOSUID|\
+MNT_NODEV|MNT_NOPERM|MNT_ASYNC|MNT_WXALLOWED|\
+MNT_EXRDONLY|MNT_EXPORTED|MNT_DEFEXPORTED|\
+MNT_EXPORTANON|MNT_NOATIME|MNT_UPDATE|MNT_DELEXPORT|\
+MNT_RELOAD|MNT_FORCE|MNT_SOFTDEP)
+
+/*
  * Flags for various system call interfaces.
  *
  * waitfor flags to vfs_sync() and getfsstat()
Index: kern/vfs_syscalls.c
===
RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
retrieving revision 1.265
diff -u -p -r1.265 vfs_syscalls.c
--- kern/vfs_syscalls.c 10 Sep 2016 16:53:30 -  1.265
+++ kern/vfs_syscalls.c 3 Oct 2016 09:44:52 -
@@ -117,6 +117,9 @@ sys_mount(struct proc *p, void *v, regis
if ((error = suser(p, 0)))
return (error);
 
+   if (flags & ~MNT_FLAGMASK)
+   return (EINVAL);
+
/*
 * Mount points must fit in MNAMELEN, not MAXPATHLEN.
 */
@@ -334,10 +337,14 @@ sys_unmount(struct proc *p, void *v, reg
struct mount *mp;
int error;
struct nameidata nd;
+   int flags = SCARG(uap, flags);
 
if ((error = suser(p, 0)) != 0)
return (error);
 
+   if (flags & ~MNT_FORCE)
+   return (EINVAL);
+
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
SCARG(uap, path), p);
if ((error = namei(&nd)) != 0)
@@ -365,7 +372,7 @@ sys_unmount(struct proc *p, void *v, reg
if (vfs_busy(mp, VB_WRITE|VB_WAIT))
return (EBUSY);
 
-   return (dounmount(mp, SCARG(uap, flags) & MNT_FORCE, p, vp));
+   return (dounmount(mp, flags, p, vp));
 }
 
 /*