Re: Help with the NET_LOCK()

2017-01-25 Thread Hrvoje Popovski
On 25.1.2017. 7:32, Martin Pieuchot wrote:
> I just enabled the NET_LOCK() again and I'm looking for test reports.
> Please go build a kernel from sources or wait for the next snapshot,
> run it and report back.
> 
> If you're looking for some small coding tasks related to the NET_LOCK()
> just do:
> 
>   # sysctl kern.splassert=2
>   # sysctl kern.pool_debug=2
>   
> Then watch for the traces on your console.

Hi,

i'm sending traces from firewall updated few minutes:

on that firewall i have:

carp
pf
pfsync
isakmpd -K4
sasyncd
dhcpd
dhcpd sync
tcpdump -lnqttti pflog0
pflow ipfix


carp2: state transition: MASTER -> BACKUP
splassert: yield: want 0 have 1
Starting stack trace...
yield() at yield+0xac
pool_get() at pool_get+0x1ca
srp_v_gc_start() at srp_v_gc_start+0x55
rtable_mpath_reprio() at rtable_mpath_reprio+0x14c
rt_if_linkstate_change() at rt_if_linkstate_change+0x10d
rtable_walk_helper() at rtable_walk_helper+0x5e
art_walk_apply() at art_walk_apply+0x40
art_table_walk() at art_table_walk+0x117
art_table_walk() at art_table_walk+0x145
art_table_walk() at art_table_walk+0x145
art_table_walk() at art_table_walk+0x145
art_table_walk() at art_table_walk+0x145
art_table_walk() at art_table_walk+0x145
art_table_walk() at art_table_walk+0x145
art_walk() at art_walk+0xe4
rtable_walk() at rtable_walk+0x62
rt_if_track() at rt_if_track+0x6f
if_linkstate() at if_linkstate+0x67
if_linkstate_task() at if_linkstate_task+0x3d
taskq_thread() at taskq_thread+0x6c
end trace frame: 0x0, count: 237
End of stack trace.


splassert: yield: want 0 have 1
Starting stack trace...
yield() at yield+0xac
pool_get() at pool_get+0x1ca
m_get() at m_get+0x28
ip_ctloutput() at ip_ctloutput+0x4bf
sogetopt() at sogetopt+0x7e
sys_getsockopt() at sys_getsockopt+0xbf
syscall() at syscall+0x27b
--- syscall (number 118) ---
end of kernel
end trace frame: 0x3, count: 250
0x1b14ee7fed7a:
End of stack trace.


splassert: yield: want 0 have 1
Starting stack trace...
yield() at yield+0xac
malloc() at malloc+0x406
ifmedia_ioctl() at ifmedia_ioctl+0x178
bnx_ioctl() at bnx_ioctl+0x144
ifioctl() at ifioctl+0x3e2
soo_ioctl() at soo_ioctl+0x22d
sys_ioctl() at sys_ioctl+0x1b1
syscall() at syscall+0x27b
--- syscall (number 54) ---
end of kernel
end trace frame: 0x4eea5d6f100, count: 249
0x4ee3fdabd7a:
End of stack trace.



splassert: yield: want 0 have 1
Starting stack trace...
yield() at yield+0xac
malloc() at malloc+0x406
if_attach_common() at if_attach_common+0x15e
if_attach() at if_attach+0x2f
carp_clone_create() at carp_clone_create+0x14d
if_clone_create() at if_clone_create+0xab
ifioctl() at ifioctl+0x33c
soo_ioctl() at soo_ioctl+0x22d
sys_ioctl() at sys_ioctl+0x1b1
syscall() at syscall+0x27b
--- syscall (number 54) ---
end of kernel
end trace frame: 0x7f7fb5af, count: 247
0x9f0a5b1b05a:
End of stack trace.


splassert: yield: want 0 have 1
Starting stack trace...
yield() at yield+0xac
malloc() at malloc+0x406
counters_read() at counters_read+0x61
ip_sysctl_ipstat() at ip_sysctl_ipstat+0x43
net_sysctl() at net_sysctl+0xf2
sys_sysctl() at sys_sysctl+0x213
syscall() at syscall+0x27b
--- syscall (number 202) ---
end of kernel
end trace frame: 0x7f7d9930, count: 250
0x334a33927fa:
End of stack trace.


splassert: yield: want 0 have 1
Starting stack trace...
yield() at yield+0xac
malloc() at malloc+0x406
esp_init() at esp_init+0x200
pfkeyv2_send() at pfkeyv2_send+0x170a
pfkey_output() at pfkey_output+0x87
raw_usrreq() at raw_usrreq+0x232
sosend() at sosend+0x2ec
dofilewritev() at dofilewritev+0x205
sys_write() at sys_write+0x89
syscall() at syscall+0x27b
--- syscall (number 4) ---
end of kernel
end trace frame: 0x1b0, count: 247
0x11850f016b1a:


splassert: yield: want 0 have 1
Starting stack trace...
yield() at yield+0xac
malloc() at malloc+0x406
import_identity() at import_identity+0x30
import_identities() at import_identities+0xd7
pfkeyv2_send() at pfkeyv2_send+0x1074
pfkey_output() at pfkey_output+0x87
raw_usrreq() at raw_usrreq+0x232
sosend() at sosend+0x2ec
dofilewritev() at dofilewritev+0x205
sys_write() at sys_write+0x89
syscall() at syscall+0x27b
--- syscall (number 4) ---
end of kernel
end trace frame: 0xe8, count: 246
0x11850f016b1a:
End of stack trace.



Re: 11n support for athn(4)

2017-01-25 Thread Peter Kay
On 17 January 2017 at 12:25, Stefan Sperling  wrote:
> On Tue, Jan 17, 2017 at 11:56:09AM +0100, Stefan Sperling wrote:
>> Without more details, all I can do is guess.
>> I made one guess already (antennas) and sadly I guessed wrong.
>
> Another thing where your setup differs from mine is that you are
> using a device with 3 antennas, whereas my devices only have 2.
>
> I have already ordered a 3 antenna device two days ago so I should
> have one to test with soon. Perhaps I will see a problem then.
>
> Is anyone else reading this list using a 3 antenna device?
> Please let us know whether it works.
I'd actually recompiled the kernel using January 16th sources and it
was working fine, until tonight. Brought up the glass console to see a
panic

The following was photographed, then OCRd. I've given it a quick look
to check it seems ok, but not a fine toothcomb.

sfer overflow

ddb> trace Debugger(d09e973d,13734d68,d09cf94e,f3734db8,b200) at
Debugger+0x7 panic(d09cf9de,f3734d8c,d03c88c9,d1605008,13734df8) at
panic.0x71
ieee80211_mira_update_stats(d169ab58,d1364030,4169a000,d03b9634,d0c6c980)
at ieee80211 mira_update_siats+0x3bf
 ieee80211_mira_choose(d169ab58,d1364030,d169a000,0,d13be040) at
ieee80.2.11_mira_choose+0x4e
ar5008_tx_process (d1364009 ,0 , f3734edc,760f4a ,8354c175) at
arS008_tx_process+0x1f2
 ar5008_tx intr (d1364000,c0,f3734f0c, d03ba33f ,f3734ef4) at ar5008tx_intr+0x7c
ar5008_intr (d1364000 , d135d380) at ar5008_intr+021b
 Xintrlegacy3() at Xintr_legacy3+0x81
--- interrupt ---
 cpu_idle_cycle(d0c6c960) at cpu_idlecycle+0xf

I also have the ps lists, but I ran out of time to OCR them. Let me
know if needed and I can forward the photos



Re: pfctl: Kill states within a rdomain

2017-01-25 Thread Florian Obser

OK florian@

On Wed, Jan 25, 2017 at 07:12:14PM -0500, Bertrand Provost wrote:
> Hi,
> 
> Based on feedback from jmc and florian here a new version of the patch
> - Add -V in usage() && __dead usage()
> - Change man
> 
> (I hope this time my mail client is well configure)
> 
> Regards,
> 
> -- 
> Bertrand Provost
> 
> Index: pfctl.8
> ===
> RCS file: /cvs/src/sbin/pfctl/pfctl.8,v
> retrieving revision 1.165
> diff -u -p -r1.165 pfctl.8
> --- pfctl.8   15 Jun 2015 08:48:23 -  1.165
> +++ pfctl.8   25 Jan 2017 23:55:10 -
> @@ -47,6 +47,7 @@
>  .Op Fl S Ar statefile
>  .Op Fl s Ar modifier Op Fl R Ar id
>  .Op Fl t Ar table Fl T Ar command Op Ar address ...
> +.Op Fl V Ar rdomain
>  .Op Fl x Ar level
>  .Ek
>  .Sh DESCRIPTION
> @@ -644,6 +645,10 @@ This flag is set when per-address counte
>  .El
>  .It Fl t Ar table
>  Specify the name of the table.
> +.It Fl V Ar rdomain
> +Select the routing domain to be used for kill states by host or by label.
> +The rdomain of a state is displayed in parentheses before the host by
> +.Fl s Cm states .
>  .It Fl v
>  Produce more verbose output.
>  A second use of
> Index: pfctl.c
> ===
> RCS file: /cvs/src/sbin/pfctl/pfctl.c,v
> retrieving revision 1.334
> diff -u -p -r1.334 pfctl.c
> --- pfctl.c   14 Jan 2016 12:05:51 -  1.334
> +++ pfctl.c   25 Jan 2017 23:55:10 -
> @@ -69,9 +69,9 @@ int  pfctl_clear_src_nodes(int, int);
>  int   pfctl_clear_states(int, const char *, int);
>  void  pfctl_addrprefix(char *, struct pf_addr *);
>  int   pfctl_kill_src_nodes(int, const char *, int);
> -int   pfctl_net_kill_states(int, const char *, int);
> -int   pfctl_label_kill_states(int, const char *, int);
> -int   pfctl_id_kill_states(int, const char *, int);
> +int   pfctl_net_kill_states(int, const char *, int, int);
> +int   pfctl_label_kill_states(int, const char *, int, int);
> +int   pfctl_id_kill_states(int, int);
>  void  pfctl_init_options(struct pfctl *);
>  int   pfctl_load_options(struct pfctl *);
>  int   pfctl_load_limit(struct pfctl *, unsigned int, unsigned int);
> @@ -231,7 +231,7 @@ struct pf_qihead qspecs = TAILQ_HEAD_INI
>  struct pf_qihead rootqs = TAILQ_HEAD_INITIALIZER(rootqs);
>  
>  
> -void
> +__dead void
>  usage(void)
>  {
>   extern char *__progname;
> @@ -243,7 +243,7 @@ usage(void)
>   fprintf(stderr, "[-L statefile] [-o level] [-p device]\n");
>   fprintf(stderr, "\t[-S statefile] [-s modifier [-R id]] ");
>   fprintf(stderr, "[-t table -T command [address ...]]\n");
> - fprintf(stderr, "\t[-x level]\n");
> + fprintf(stderr, "\t[-V rdomain] [-x level]\n");
>   exit(1);
>  }
>  
> @@ -512,7 +512,7 @@ pfctl_kill_src_nodes(int dev, const char
>  }
>  
>  int
> -pfctl_net_kill_states(int dev, const char *iface, int opts)
> +pfctl_net_kill_states(int dev, const char *iface, int opts, int rdomain)
>  {
>   struct pfioc_state_kill psk;
>   struct addrinfo *res[2], *resp[2];
> @@ -531,6 +531,8 @@ pfctl_net_kill_states(int dev, const cha
>   sizeof(psk.psk_ifname)) >= sizeof(psk.psk_ifname))
>   errx(1, "invalid interface: %s", iface);
>  
> + psk.psk_rdomain = rdomain;
> +
>   pfctl_addrprefix(state_kill[0], _src.addr.v.a.mask);
>  
>   if ((ret_ga = getaddrinfo(state_kill[0], NULL, NULL, [0]))) {
> @@ -618,7 +620,7 @@ pfctl_net_kill_states(int dev, const cha
>  }
>  
>  int
> -pfctl_label_kill_states(int dev, const char *iface, int opts)
> +pfctl_label_kill_states(int dev, const char *iface, int opts, int rdomain)
>  {
>   struct pfioc_state_kill psk;
>  
> @@ -635,6 +637,8 @@ pfctl_label_kill_states(int dev, const c
>   sizeof(psk.psk_label))
>   errx(1, "label too long: %s", state_kill[1]);
>  
> + psk.psk_rdomain = rdomain;
> +
>   if (ioctl(dev, DIOCKILLSTATES, ))
>   err(1, "DIOCKILLSTATES");
>  
> @@ -645,7 +649,7 @@ pfctl_label_kill_states(int dev, const c
>  }
>  
>  int
> -pfctl_id_kill_states(int dev, const char *iface, int opts)
> +pfctl_id_kill_states(int dev, int opts)
>  {
>   struct pfioc_state_kill psk;
>  
> @@ -2098,6 +2102,7 @@ main(int argc, char *argv[])
>   int  opts = 0;
>   int  optimize = PF_OPTIMIZE_BASIC;
>   int  level;
> + int  rdomain = 0;
>   char anchorname[PATH_MAX];
>   int  anchor_wildcard = 0;
>   char*path;
> @@ -2109,7 +2114,7 @@ main(int argc, char *argv[])
>   usage();
>  
>   while ((ch = getopt(argc, argv,
> - "a:dD:eqf:F:ghi:k:K:L:no:Pp:R:rS:s:t:T:vx:z")) != -1) {
> + "a:dD:eqf:F:ghi:k:K:L:no:Pp:R:rS:s:t:T:vV:x:z")) != -1) {
>   switch (ch) {
>   case 'a':
>   anchoropt = optarg;
> @@ -2215,6 +2220,13 @@ main(int argc, char *argv[])
>   opts |= PF_OPT_VERBOSE2;
>   opts |= 

Re: iwn: Centrino Ultimate-N 6300 scans, doesn't dhcp?

2017-01-25 Thread Austin Bentley
Interesting. It seems that on OpenBSD the reception is quite weak.
I've connected, HOWEVER, I have to be in the same room as my router!
On Linux I can be anywhere in my house.

I tried disabling powersave (-powersave), and still I have to be very
close to my router. My phone can connect to it no problem anywhere in
the house. Does anyone have any clue what's going on? Is it possible
that the driver is permanently set on a power save mode?

On Wed, Jan 25, 2017 at 7:37 PM, Austin Bentley  wrote:

> I've got a ThinkPad x220 with a Centrino Ultimate-N 6300, and I'm
> trying to get it to connect to a WPA1 WiFi network. Network has a
> password.
>
> Steps taken thus far:
> Tested WiFi on Linux
>
> Update WiFi drivers with fw_update via ethernet
>
> Verified /etc/firmware/iwn-6000 exists (it does)
>
> dmesg:
>  dmesg | grep iwn
>  iwn0 at pci2 dev 0 function 0 "Intel Centrino Ultimate-N 6300"
>  rev 0x3e: msi, MIM0 3T3R, MoW, address: xx:xx:xx:xx:xx:xx
>
> Scanning networks works:
>  ifconfig iwn0 scan
>  iwn0: flags=8843 mtu 1500
>lladdr xx:xx:xx:xx:xx:xx
>index 2 priority 4 llprio 3
>groups: wlan
>media: IEEE802.11 autoselect (DS1 mode 11g)
>status: no network
>ieee80211: nwid MYSSID chan 6 bssid yy:yy:yy:yy:yy:yy -58dBm
>  wpakey 0x wpaprotos wpa1,wpa2 wpaakms psk wpacipchers
>  tkip,ccmp wpagroupcipher tkip
>  nwid MYSSID chan 6 bssid yy:yy:yy:yy:yy:yy -58dBm HT-MCS15
>  privacy,short_slottime,wpa1
>  ... #other networks
>  ...
>
> Of course, I attempted `dhclient iwn0` with both /etc/hostname.iwn0,
> and `ifconfig iwn0 nwid MYSSID wpa wpakey  up`, and both
> achieved this:
>  dhclient iwn0
>  iwn0: no link ... sleeping
>
> The WiFi light on my laptop is constantly blinking too.
> Does this indicate anything?
>
> Thanks,
> Austin Bentley
>


iwn: Centrino Ultimate-N 6300 scans, doesn't dhcp?

2017-01-25 Thread Austin Bentley
I've got a ThinkPad x220 with a Centrino Ultimate-N 6300, and I'm
trying to get it to connect to a WPA1 WiFi network. Network has a
password.

Steps taken thus far:
Tested WiFi on Linux

Update WiFi drivers with fw_update via ethernet

Verified /etc/firmware/iwn-6000 exists (it does)

dmesg:
 dmesg | grep iwn
 iwn0 at pci2 dev 0 function 0 "Intel Centrino Ultimate-N 6300"
 rev 0x3e: msi, MIM0 3T3R, MoW, address: xx:xx:xx:xx:xx:xx

Scanning networks works:
 ifconfig iwn0 scan
 iwn0: flags=8843 mtu 1500
   lladdr xx:xx:xx:xx:xx:xx
   index 2 priority 4 llprio 3
   groups: wlan
   media: IEEE802.11 autoselect (DS1 mode 11g)
   status: no network
   ieee80211: nwid MYSSID chan 6 bssid yy:yy:yy:yy:yy:yy -58dBm
 wpakey 0x wpaprotos wpa1,wpa2 wpaakms psk wpacipchers
 tkip,ccmp wpagroupcipher tkip
 nwid MYSSID chan 6 bssid yy:yy:yy:yy:yy:yy -58dBm HT-MCS15
 privacy,short_slottime,wpa1
 ... #other networks
 ...

Of course, I attempted `dhclient iwn0` with both /etc/hostname.iwn0,
and `ifconfig iwn0 nwid MYSSID wpa wpakey  up`, and both
achieved this:
 dhclient iwn0
 iwn0: no link ... sleeping

The WiFi light on my laptop is constantly blinking too.
Does this indicate anything?

Thanks,
Austin Bentley


Re: Help with the NET_LOCK()

2017-01-25 Thread Martin Pieuchot
On 25/01/17(Wed) 20:12, David Hill wrote:
> > splassert: yield: want 0 have 1
> > Starting stack trace...
> > yield() at yield+0xac
> > malloc() at malloc+0x406
> > ip6_setmoptions() at ip6_setmoptions+0x65
> > ip6_ctloutput() at ip6_ctloutput+0x6d9
> > sosetopt() at sosetopt+0x8e
> > sys_setsockopt() at sys_setsockopt+0x12d
> > syscall() at syscall+0x27b
> > --- syscall (number 105) ---
> > end of kernel
> > end trace frame: 0x1b1987be0028, count: 250
> > 0x1b197d24b80a:
> > End of stack trace.
> 
> Switch to NOWAIT?  Check is already there...

This is a tricky one.

Instead of allocating the mbuf in every *ctloutput() we can do like
doaccept().  Allocate the mbuf in sys_setsockopt() before grabbing 
the lock and let the various function fill it.

That needs a bit more works since all the *ctloutput() have to be
fixed in one go.

> Index: netinet6/ip6_output.c
> ===
> RCS file: /cvs/src/sys/netinet6/ip6_output.c,v
> retrieving revision 1.221
> diff -u -p -r1.221 ip6_output.c
> --- netinet6/ip6_output.c 19 Jan 2017 14:49:19 -  1.221
> +++ netinet6/ip6_output.c 26 Jan 2017 01:10:58 -
> @@ -1902,7 +1902,7 @@ ip6_setmoptions(int optname, struct ip6_
>* allocate one and initialize to default values.
>*/
>   im6o = (struct ip6_moptions *)
> - malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
> + malloc(sizeof(*im6o), M_IPMOPTS, M_NOWAIT);
>  
>   if (im6o == NULL)
>   return (ENOBUFS);
> 



Re: Help with the NET_LOCK()

2017-01-25 Thread David Hill
On Wed, Jan 25, 2017 at 11:11:03AM -0500, David Hill wrote:
> On Wed, Jan 25, 2017 at 04:32:25PM +1000, Martin Pieuchot wrote:
> > I just enabled the NET_LOCK() again and I'm looking for test reports.
> > Please go build a kernel from sources or wait for the next snapshot,
> > run it and report back.
> > 
> > If you're looking for some small coding tasks related to the NET_LOCK()
> > just do:
> > 
> > # sysctl kern.splassert=2
> > # sysctl kern.pool_debug=2
> > 
> > Then watch for the traces on your console.
> > 
> > You'll see something like:
> > 
> > Starting stack trace...
> > yield(0,1,d09dac52,f5549dbc,d94e9378) at yield+0xa4
> > yield(d0bc8f40,1,f5549e18,80,14) at yield+0xa4
> > pool_get(d0bc8f40,1,f5549ec8,d03ecbfb,d97815f4) at pool_get+0x1ba
> > m_get(1,3,f5549ec0,d03a9362,d0bc22e0) at m_get+0x30
> > doaccept(d977e6c4,3,cf7ee4f8,cf7ee4ec,2000) at doaccept+0x193
> > sys_accept(d977e6c4,f5549f5c,f5549f7c,0,f5549fa8) at sys_accept+0x37
> > syscall() at syscall+0x250
> > 
> > This means accept(2) is doing a memory allocation that can sleep, here
> > with m_get(9), while holding the NET_LOCK().  Even if these should be
> > ok, it is easy to avoid them.  In the case of doaccept() a mbuf could
> > be allocated beforehand or simply use the stack for that.
> > 
> > Cheers,
> > Martin
> >
> 
> splassert: yield: want 0 have 1
> Starting stack trace...
> yield() at yield+0xac
> malloc() at malloc+0x406
> ip6_setmoptions() at ip6_setmoptions+0x65
> ip6_ctloutput() at ip6_ctloutput+0x6d9
> sosetopt() at sosetopt+0x8e
> sys_setsockopt() at sys_setsockopt+0x12d
> syscall() at syscall+0x27b
> --- syscall (number 105) ---
> end of kernel
> end trace frame: 0x1b1987be0028, count: 250
> 0x1b197d24b80a:
> End of stack trace.
>  
>

Switch to NOWAIT?  Check is already there...
 
Index: netinet6/ip6_output.c
===
RCS file: /cvs/src/sys/netinet6/ip6_output.c,v
retrieving revision 1.221
diff -u -p -r1.221 ip6_output.c
--- netinet6/ip6_output.c   19 Jan 2017 14:49:19 -  1.221
+++ netinet6/ip6_output.c   26 Jan 2017 01:10:58 -
@@ -1902,7 +1902,7 @@ ip6_setmoptions(int optname, struct ip6_
 * allocate one and initialize to default values.
 */
im6o = (struct ip6_moptions *)
-   malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
+   malloc(sizeof(*im6o), M_IPMOPTS, M_NOWAIT);
 
if (im6o == NULL)
return (ENOBUFS);



Re: Help with the NET_LOCK()

2017-01-25 Thread Martin Pieuchot
On 25/01/17(Wed) 20:04, David Hill wrote:
> [...] 
> Allocate the mbuf beforehand.  Also, move the setting of nflag closer
> to where its value is used.

ok mpi@

> Index: uipc_syscalls.c
> ===
> RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
> retrieving revision 1.147
> diff -u -p -r1.147 uipc_syscalls.c
> --- uipc_syscalls.c   25 Jan 2017 07:35:31 -  1.147
> +++ uipc_syscalls.c   26 Jan 2017 01:03:13 -
> @@ -265,7 +265,7 @@ doaccept(struct proc *p, int sock, struc
>  {
>   struct filedesc *fdp = p->p_fd;
>   struct file *fp, *headfp;
> - struct mbuf *nam = NULL;
> + struct mbuf *nam;
>   socklen_t namelen;
>   int error, s, tmpfd;
>   struct socket *head, *so;
> @@ -288,7 +288,8 @@ doaccept(struct proc *p, int sock, struc
>   return (error);
>   }
>  
> -redo:
> + nam = m_get(M_WAIT, MT_SONAME);
> + 
>   NET_LOCK(s);
>   head = headfp->f_data;
>   if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) {
> @@ -318,30 +319,16 @@ redo:
>   goto out;
>   }
>  
> - /* Figure out whether the new socket should be non-blocking. */
> - nflag = flags & SOCK_NONBLOCK_INHERIT ? (headfp->f_flag & FNONBLOCK)
> - : (flags & SOCK_NONBLOCK ? FNONBLOCK : 0);
> -
> - nam = m_get(M_WAIT, MT_SONAME);
> -
> - /*
> -  * Check whether the queue emptied while we slept: m_get() may have
> -  * blocked, allowing the connection to be reset or another thread or
> -  * process to accept it.  If so, start over.
> -  */
> - if (head->so_qlen == 0) {
> - NET_UNLOCK(s);
> - m_freem(nam);
> - nam = NULL;
> - goto redo;
> - }
> -
>   /*
>* Do not sleep after we have taken the socket out of the queue.
>*/
>   so = TAILQ_FIRST(>so_q);
>   if (soqremque(so, 1) == 0)
>   panic("accept");
> +
> + /* Figure out whether the new socket should be non-blocking. */
> + nflag = flags & SOCK_NONBLOCK_INHERIT ? (headfp->f_flag & FNONBLOCK)
> + : (flags & SOCK_NONBLOCK ? FNONBLOCK : 0);
>  
>   /* connection has been removed from the listen queue */
>   KNOTE(>so_rcv.sb_sel.si_note, 0);
> 



Re: Help with the NET_LOCK()

2017-01-25 Thread David Hill
On Wed, Jan 25, 2017 at 04:32:25PM +1000, Martin Pieuchot wrote:
> I just enabled the NET_LOCK() again and I'm looking for test reports.
> Please go build a kernel from sources or wait for the next snapshot,
> run it and report back.
> 
> If you're looking for some small coding tasks related to the NET_LOCK()
> just do:
> 
>   # sysctl kern.splassert=2
>   # sysctl kern.pool_debug=2
>   
> Then watch for the traces on your console.
> 
> You'll see something like:
> 
>   Starting stack trace...
>   yield(0,1,d09dac52,f5549dbc,d94e9378) at yield+0xa4
>   yield(d0bc8f40,1,f5549e18,80,14) at yield+0xa4
>   pool_get(d0bc8f40,1,f5549ec8,d03ecbfb,d97815f4) at pool_get+0x1ba
>   m_get(1,3,f5549ec0,d03a9362,d0bc22e0) at m_get+0x30
>   doaccept(d977e6c4,3,cf7ee4f8,cf7ee4ec,2000) at doaccept+0x193
>   sys_accept(d977e6c4,f5549f5c,f5549f7c,0,f5549fa8) at sys_accept+0x37
>   syscall() at syscall+0x250
>   
> This means accept(2) is doing a memory allocation that can sleep, here
> with m_get(9), while holding the NET_LOCK().  Even if these should be
> ok, it is easy to avoid them.  In the case of doaccept() a mbuf could
> be allocated beforehand or simply use the stack for that.
> 
> Cheers,
> Martin
>

Allocate the mbuf beforehand.  Also, move the setting of nflag closer
to where its value is used.

Index: uipc_syscalls.c
===
RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
retrieving revision 1.147
diff -u -p -r1.147 uipc_syscalls.c
--- uipc_syscalls.c 25 Jan 2017 07:35:31 -  1.147
+++ uipc_syscalls.c 26 Jan 2017 01:03:13 -
@@ -265,7 +265,7 @@ doaccept(struct proc *p, int sock, struc
 {
struct filedesc *fdp = p->p_fd;
struct file *fp, *headfp;
-   struct mbuf *nam = NULL;
+   struct mbuf *nam;
socklen_t namelen;
int error, s, tmpfd;
struct socket *head, *so;
@@ -288,7 +288,8 @@ doaccept(struct proc *p, int sock, struc
return (error);
}
 
-redo:
+   nam = m_get(M_WAIT, MT_SONAME);
+   
NET_LOCK(s);
head = headfp->f_data;
if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) {
@@ -318,30 +319,16 @@ redo:
goto out;
}
 
-   /* Figure out whether the new socket should be non-blocking. */
-   nflag = flags & SOCK_NONBLOCK_INHERIT ? (headfp->f_flag & FNONBLOCK)
-   : (flags & SOCK_NONBLOCK ? FNONBLOCK : 0);
-
-   nam = m_get(M_WAIT, MT_SONAME);
-
-   /*
-* Check whether the queue emptied while we slept: m_get() may have
-* blocked, allowing the connection to be reset or another thread or
-* process to accept it.  If so, start over.
-*/
-   if (head->so_qlen == 0) {
-   NET_UNLOCK(s);
-   m_freem(nam);
-   nam = NULL;
-   goto redo;
-   }
-
/*
 * Do not sleep after we have taken the socket out of the queue.
 */
so = TAILQ_FIRST(>so_q);
if (soqremque(so, 1) == 0)
panic("accept");
+
+   /* Figure out whether the new socket should be non-blocking. */
+   nflag = flags & SOCK_NONBLOCK_INHERIT ? (headfp->f_flag & FNONBLOCK)
+   : (flags & SOCK_NONBLOCK ? FNONBLOCK : 0);
 
/* connection has been removed from the listen queue */
KNOTE(>so_rcv.sb_sel.si_note, 0);



Re: pfctl: Kill states within a rdomain

2017-01-25 Thread Bertrand Provost
Hi,

Based on feedback from jmc and florian here a new version of the patch
- Add -V in usage() && __dead usage()
- Change man

(I hope this time my mail client is well configure)

Regards,

-- 
Bertrand Provost

Index: pfctl.8
===
RCS file: /cvs/src/sbin/pfctl/pfctl.8,v
retrieving revision 1.165
diff -u -p -r1.165 pfctl.8
--- pfctl.8 15 Jun 2015 08:48:23 -  1.165
+++ pfctl.8 25 Jan 2017 23:55:10 -
@@ -47,6 +47,7 @@
 .Op Fl S Ar statefile
 .Op Fl s Ar modifier Op Fl R Ar id
 .Op Fl t Ar table Fl T Ar command Op Ar address ...
+.Op Fl V Ar rdomain
 .Op Fl x Ar level
 .Ek
 .Sh DESCRIPTION
@@ -644,6 +645,10 @@ This flag is set when per-address counte
 .El
 .It Fl t Ar table
 Specify the name of the table.
+.It Fl V Ar rdomain
+Select the routing domain to be used for kill states by host or by label.
+The rdomain of a state is displayed in parentheses before the host by
+.Fl s Cm states .
 .It Fl v
 Produce more verbose output.
 A second use of
Index: pfctl.c
===
RCS file: /cvs/src/sbin/pfctl/pfctl.c,v
retrieving revision 1.334
diff -u -p -r1.334 pfctl.c
--- pfctl.c 14 Jan 2016 12:05:51 -  1.334
+++ pfctl.c 25 Jan 2017 23:55:10 -
@@ -69,9 +69,9 @@ intpfctl_clear_src_nodes(int, int);
 int pfctl_clear_states(int, const char *, int);
 voidpfctl_addrprefix(char *, struct pf_addr *);
 int pfctl_kill_src_nodes(int, const char *, int);
-int pfctl_net_kill_states(int, const char *, int);
-int pfctl_label_kill_states(int, const char *, int);
-int pfctl_id_kill_states(int, const char *, int);
+int pfctl_net_kill_states(int, const char *, int, int);
+int pfctl_label_kill_states(int, const char *, int, int);
+int pfctl_id_kill_states(int, int);
 voidpfctl_init_options(struct pfctl *);
 int pfctl_load_options(struct pfctl *);
 int pfctl_load_limit(struct pfctl *, unsigned int, unsigned int);
@@ -231,7 +231,7 @@ struct pf_qihead qspecs = TAILQ_HEAD_INI
 struct pf_qihead rootqs = TAILQ_HEAD_INITIALIZER(rootqs);
 
 
-void
+__dead void
 usage(void)
 {
extern char *__progname;
@@ -243,7 +243,7 @@ usage(void)
fprintf(stderr, "[-L statefile] [-o level] [-p device]\n");
fprintf(stderr, "\t[-S statefile] [-s modifier [-R id]] ");
fprintf(stderr, "[-t table -T command [address ...]]\n");
-   fprintf(stderr, "\t[-x level]\n");
+   fprintf(stderr, "\t[-V rdomain] [-x level]\n");
exit(1);
 }
 
@@ -512,7 +512,7 @@ pfctl_kill_src_nodes(int dev, const char
 }
 
 int
-pfctl_net_kill_states(int dev, const char *iface, int opts)
+pfctl_net_kill_states(int dev, const char *iface, int opts, int rdomain)
 {
struct pfioc_state_kill psk;
struct addrinfo *res[2], *resp[2];
@@ -531,6 +531,8 @@ pfctl_net_kill_states(int dev, const cha
sizeof(psk.psk_ifname)) >= sizeof(psk.psk_ifname))
errx(1, "invalid interface: %s", iface);
 
+   psk.psk_rdomain = rdomain;
+
pfctl_addrprefix(state_kill[0], _src.addr.v.a.mask);
 
if ((ret_ga = getaddrinfo(state_kill[0], NULL, NULL, [0]))) {
@@ -618,7 +620,7 @@ pfctl_net_kill_states(int dev, const cha
 }
 
 int
-pfctl_label_kill_states(int dev, const char *iface, int opts)
+pfctl_label_kill_states(int dev, const char *iface, int opts, int rdomain)
 {
struct pfioc_state_kill psk;
 
@@ -635,6 +637,8 @@ pfctl_label_kill_states(int dev, const c
sizeof(psk.psk_label))
errx(1, "label too long: %s", state_kill[1]);
 
+   psk.psk_rdomain = rdomain;
+
if (ioctl(dev, DIOCKILLSTATES, ))
err(1, "DIOCKILLSTATES");
 
@@ -645,7 +649,7 @@ pfctl_label_kill_states(int dev, const c
 }
 
 int
-pfctl_id_kill_states(int dev, const char *iface, int opts)
+pfctl_id_kill_states(int dev, int opts)
 {
struct pfioc_state_kill psk;
 
@@ -2098,6 +2102,7 @@ main(int argc, char *argv[])
int  opts = 0;
int  optimize = PF_OPTIMIZE_BASIC;
int  level;
+   int  rdomain = 0;
char anchorname[PATH_MAX];
int  anchor_wildcard = 0;
char*path;
@@ -2109,7 +2114,7 @@ main(int argc, char *argv[])
usage();
 
while ((ch = getopt(argc, argv,
-   "a:dD:eqf:F:ghi:k:K:L:no:Pp:R:rS:s:t:T:vx:z")) != -1) {
+   "a:dD:eqf:F:ghi:k:K:L:no:Pp:R:rS:s:t:T:vV:x:z")) != -1) {
switch (ch) {
case 'a':
anchoropt = optarg;
@@ -2215,6 +2220,13 @@ main(int argc, char *argv[])
opts |= PF_OPT_VERBOSE2;
opts |= PF_OPT_VERBOSE;
break;
+   case 'V':
+   rdomain = strtonum(optarg, 0, RT_TABLEID_MAX, );
+   if (errstr) {
+   warnx("Invalid rdomain: %s", errstr);
+

Re: athn0: device timeout (AR9271 USB 2.0 Wifi-key as hostap)

2017-01-25 Thread Martin Pieuchot
[moving to tech@]

On 26/01/17(Thu) 00:15, Adam Wolk wrote:
> One of the diff from testing had this guard in place:
[...]
> Index: if_athn_usb.c
> ===
> RCS file: /cvs/src/sys/dev/usb/if_athn_usb.c,v
> retrieving revision 1.42
> diff -u -p -r1.42 if_athn_usb.c
> --- if_athn_usb.c   11 Dec 2015 16:07:02 -  1.42
> +++ if_athn_usb.c   4 Sep 2016 18:48:14 -
> @@ -2098,13 +2098,17 @@ void
>  athn_usb_watchdog(struct ifnet *ifp)
>  {
> struct athn_softc *sc = ifp->if_softc;
> +   struct ieee80211com *ic = >sc_ic;
> 
> ifp->if_timer = 0;
> 
> if (sc->sc_tx_timer > 0) {
> if (--sc->sc_tx_timer == 0) {
> printf("%s: device timeout\n", sc->sc_dev.dv_xname);
> -   /* athn_usb_init(ifp); XXX needs a process context! */
> +   if (ic->ic_bss == NULL)
> +   return;
> +   athn_usb_stop(ifp);
> +   athn_usb_init(ifp);
> ifp->if_oerrors++;
> return;
> }
> 
> 
> the ic->ic_bss being null doing stop resulted in further crashing. Though it 
> was

This diff cannot be better than the other one.  Let me explain what the
problem is.

When an interface is being detached, the detaching thread waits in
if_detach() until all if_get(9) references are release.  The problem
is that wireless driver call:

ieee80211_ifdetach(ifp);
if_detach(ifp);

So when the detaching thread is waiting for if_get(9) references it is
no longer safe to dereference wireless data structures. 

That's why you added the (ic->ic_bss == NULL) check.  But to be correct
the driver should do such check every time it comes back from sleeping.
This is not possible since you'd have to go deep in the USB layer.

What I suggested at Cambridge is to move part of the if_detach() code
into an if_barrier() function and call it like the diff below does.

Now this needs conversion and testing on multiple interfaces to make
sure it is safe and we can convert all the tree.  But as long as we
call if_barrier() first in an interface detach routing it will be safe
to free any memory afterwards.

Index: dev/ic/rtwn.c
===
RCS file: /cvs/src/sys/dev/ic/rtwn.c,v
retrieving revision 1.11
diff -u -p -r1.11 rtwn.c
--- dev/ic/rtwn.c   8 Jan 2017 05:48:27 -   1.11
+++ dev/ic/rtwn.c   25 Jan 2017 23:50:58 -
@@ -262,8 +262,9 @@ rtwn_detach(struct rtwn_softc *sc, int f
task_del(systq, >init_task);
 
if (ifp->if_softc != NULL) {
+   if_barrier(ifp);
ieee80211_ifdetach(ifp);
-   if_detach(ifp);
+   if_detach2(ifp);
}
 
splx(s);
Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.482
diff -u -p -r1.482 if.c
--- net/if.c25 Jan 2017 21:59:41 -  1.482
+++ net/if.c25 Jan 2017 23:57:13 -
@@ -999,26 +999,18 @@ if_deactivate(struct ifnet *ifp)
splx(s);
 }
 
-/*
- * Detach an interface from everything in the kernel.  Also deallocate
- * private resources.
- */
 void
-if_detach(struct ifnet *ifp)
+if_barrier(struct ifnet *ifp)
 {
-   struct ifaddr *ifa;
-   struct ifg_list *ifg;
-   struct domain *dp;
-   int i, s, s2;
+   int s;
+
+   NET_LOCK(s);
 
/* Undo pseudo-driver changes. */
if_deactivate(ifp);
 
ifq_clr_oactive(>if_snd);
 
-   NET_LOCK(s);
-   s2 = splnet();
-   /* Other CPUs must not have a reference before we start destroying. */
if_idxmap_remove(ifp);
 
ifp->if_qstart = if_detached_qstart;
@@ -1033,9 +1025,36 @@ if_detach(struct ifnet *ifp)
timeout_del(ifp->if_slowtimo);
task_del(systq, ifp->if_watchdogtask);
 
-   /* Remove the link state task */
task_del(systq, ifp->if_linkstatetask);
 
+   NET_UNLOCK(s);
+}
+
+/*
+ * Detach an interface from everything in the kernel.  Also deallocate
+ * private resources.
+ */
+void
+if_detach(struct ifnet *ifp)
+{
+   int s;
+
+   s = splnet();
+   if_barrier(ifp);
+   if_detach2(ifp);
+   splx(s);
+}
+
+void
+if_detach2(struct ifnet *ifp)
+{
+   struct ifaddr *ifa;
+   struct ifg_list *ifg;
+   struct domain *dp;
+   int i, s;
+
+   NET_LOCK(s);
+
 #if NBPFILTER > 0
bpfdetach(ifp);
 #endif
@@ -1092,7 +,6 @@ if_detach(struct ifnet *ifp)
 
/* Announce that the interface is gone. */
rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
-   splx(s2);
NET_UNLOCK(s);
 
for (i = 0; i < ifp->if_nifqs; i++)
Index: net/if.h
===
RCS file: /cvs/src/sys/net/if.h,v
retrieving revision 1.186
diff -u -p -r1.186 

pr_ctlinput void

2017-01-25 Thread Alexander Bluhm
Hi,

This is the next step in merging protosw and ip6protosw.

The IPv4 pr_ctlinput functions return a void pointer that is always
NULL and never used.  Make all functions void like in the IPv6 case.

ok?

bluhm

Index: netinet/ip_icmp.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_icmp.c,v
retrieving revision 1.160
diff -u -p -r1.160 ip_icmp.c
--- netinet/ip_icmp.c   25 Jan 2017 17:34:31 -  1.160
+++ netinet/ip_icmp.c   25 Jan 2017 23:27:53 -
@@ -326,7 +326,7 @@ icmp_input_if(struct ifnet *ifp, struct 
struct sockaddr_in sin;
int icmplen, i, code;
struct in_ifaddr *ia;
-   void *(*ctlfunc)(int, struct sockaddr *, u_int, void *);
+   void (*ctlfunc)(int, struct sockaddr *, u_int, void *);
struct mbuf *opts;
 
/*
@@ -1053,7 +1053,7 @@ icmp_mtudisc_timeout(struct rtentry *rt,
return;
 
if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) {
-   void *(*ctlfunc)(int, struct sockaddr *, u_int, void *);
+   void (*ctlfunc)(int, struct sockaddr *, u_int, void *);
struct sockaddr_in sin;
 
sin = *satosin(rt_key(rt));
Index: netinet/ip_ipsp.h
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.h,v
retrieving revision 1.175
diff -u -p -r1.175 ip_ipsp.h
--- netinet/ip_ipsp.h   25 Jan 2017 17:34:31 -  1.175
+++ netinet/ip_ipsp.h   25 Jan 2017 23:33:06 -
@@ -493,8 +493,8 @@ int ah_output(struct mbuf *, struct tdb 
 intah_sysctl(int *, u_int, void *, size_t *, void *, size_t);
 
 void   ah4_input(struct mbuf *, int, int);
-void   *ah4_ctlinput(int, struct sockaddr *, u_int, void *);
-void   *udpencap_ctlinput(int, struct sockaddr *, u_int, void *);
+void   ah4_ctlinput(int, struct sockaddr *, u_int, void *);
+void   udpencap_ctlinput(int, struct sockaddr *, u_int, void *);
 
 #ifdef INET6
 intah6_input(struct mbuf **, int *, int);
@@ -509,7 +509,7 @@ int esp_output(struct mbuf *, struct tdb
 intesp_sysctl(int *, u_int, void *, size_t *, void *, size_t);
 
 void   esp4_input(struct mbuf *, int, int);
-void   *esp4_ctlinput(int, struct sockaddr *, u_int, void *);
+void   esp4_ctlinput(int, struct sockaddr *, u_int, void *);
 
 #ifdef INET6
 intesp6_input(struct mbuf **, int *, int);
Index: netinet/ipsec_input.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ipsec_input.c,v
retrieving revision 1.139
diff -u -p -r1.139 ipsec_input.c
--- netinet/ipsec_input.c   25 Jan 2017 17:34:31 -  1.139
+++ netinet/ipsec_input.c   25 Jan 2017 23:35:43 -
@@ -79,7 +79,7 @@
 
 #include "bpfilter.h"
 
-void *ipsec_common_ctlinput(u_int, int, struct sockaddr *, void *, int);
+void ipsec_common_ctlinput(u_int, int, struct sockaddr *, void *, int);
 int ah4_input_cb(struct mbuf *, ...);
 int esp4_input_cb(struct mbuf *, ...);
 int ipcomp4_input_cb(struct mbuf *, ...);
@@ -725,14 +725,14 @@ ah4_input_cb(struct mbuf *m, ...)
 
 
 /* XXX rdomain */
-void *
+void
 ah4_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
 {
if (sa->sa_family != AF_INET ||
sa->sa_len != sizeof(struct sockaddr_in))
-   return (NULL);
+   return;
 
-   return (ipsec_common_ctlinput(rdomain, cmd, sa, v, IPPROTO_AH));
+   ipsec_common_ctlinput(rdomain, cmd, sa, v, IPPROTO_AH);
 }
 
 /* IPv4 ESP wrapper. */
@@ -786,7 +786,7 @@ ipcomp4_input_cb(struct mbuf *m, ...)
return 0;
 }
 
-void *
+void
 ipsec_common_ctlinput(u_int rdomain, int cmd, struct sockaddr *sa,
 void *v, int proto)
 {
@@ -810,7 +810,7 @@ ipsec_common_ctlinput(u_int rdomain, int
 * or the MTU is too small to be acceptable.
 */
if (mtu < 296)
-   return (NULL);
+   return;
 
memset(, 0, sizeof(struct sockaddr_in));
dst.sin_family = AF_INET;
@@ -822,13 +822,13 @@ ipsec_common_ctlinput(u_int rdomain, int
tdbp = gettdb(rdomain, spi, (union sockaddr_union *),
proto);
if (tdbp == NULL || tdbp->tdb_flags & TDBF_INVALID)
-   return (NULL);
+   return;
 
/* Walk the chain backwards to the first tdb */
for (; tdbp; tdbp = tdbp->tdb_inext) {
if (tdbp->tdb_flags & TDBF_INVALID ||
(adjust = ipsec_hdrsz(tdbp)) == -1)
-   return (NULL);
+   return;
 
mtu -= adjust;
 
@@ -842,11 +842,10 @@ ipsec_common_ctlinput(u_int rdomain, int
adjust));
}
}
-   return (NULL);
 }
 
 /* XXX rdomain */
-void *
+void
 

Re: Help with the NET_LOCK() - socreate

2017-01-25 Thread Martin Pieuchot
On 25/01/17(Wed) 15:52, Theo de Raadt wrote:
> I have the same diff.
> 
> It does not help the pr_usrreq path though.

Sure but, let's do baby step. :)

> >On Wed, Jan 25, 2017 at 11:02:22AM -0500, David Hill wrote:
> >> splassert: yield: want 0 have 1
> >> Starting stack trace...
> >> yield() at yield+0xac
> >> pool_get() at pool_get+0x1ca
> >> socreate() at socreate+0xba
> >> sys_socket() at sys_socket+0x135
> >> syscall() at syscall+0x27b
> >> --- syscall (number 97) ---
> >> end of kernel
> >> end trace frame: 0x1b1a16c05800, count: 252
> >> 0x1b197d23277a:
> >> End of stack trace.
> >
> >This one looks easy.  We do not need a lock to setup the still
> >private so structure.
> >
> >ok?
> >
> >bluhm
> >
> >Index: kern/uipc_socket.c
> >===
> >RCS file: /cvs/src/sys/kern/uipc_socket.c,v
> >retrieving revision 1.173
> >diff -u -p -r1.173 uipc_socket.c
> >--- kern/uipc_socket.c   25 Jan 2017 16:45:50 -  1.173
> >+++ kern/uipc_socket.c   25 Jan 2017 18:01:06 -
> >@@ -123,7 +123,6 @@ socreate(int dom, struct socket **aso, i
> > return (EPROTONOSUPPORT);
> > if (prp->pr_type != type)
> > return (EPROTOTYPE);
> >-NET_LOCK(s);
> > so = pool_get(_pool, PR_WAITOK | PR_ZERO);
> > TAILQ_INIT(>so_q0);
> > TAILQ_INIT(>so_q);
> >@@ -136,6 +135,7 @@ socreate(int dom, struct socket **aso, i
> > so->so_egid = p->p_ucred->cr_gid;
> > so->so_cpid = p->p_p->ps_pid;
> > so->so_proto = prp;
> >+NET_LOCK(s);
> > error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL,
> > (struct mbuf *)(long)proto, NULL, p);
> > if (error) {
> >
> >
> 



Re: Help with the NET_LOCK() - socreate

2017-01-25 Thread Theo de Raadt
I have the same diff.

It does not help the pr_usrreq path though.

>On Wed, Jan 25, 2017 at 11:02:22AM -0500, David Hill wrote:
>> splassert: yield: want 0 have 1
>> Starting stack trace...
>> yield() at yield+0xac
>> pool_get() at pool_get+0x1ca
>> socreate() at socreate+0xba
>> sys_socket() at sys_socket+0x135
>> syscall() at syscall+0x27b
>> --- syscall (number 97) ---
>> end of kernel
>> end trace frame: 0x1b1a16c05800, count: 252
>> 0x1b197d23277a:
>> End of stack trace.
>
>This one looks easy.  We do not need a lock to setup the still
>private so structure.
>
>ok?
>
>bluhm
>
>Index: kern/uipc_socket.c
>===
>RCS file: /cvs/src/sys/kern/uipc_socket.c,v
>retrieving revision 1.173
>diff -u -p -r1.173 uipc_socket.c
>--- kern/uipc_socket.c 25 Jan 2017 16:45:50 -  1.173
>+++ kern/uipc_socket.c 25 Jan 2017 18:01:06 -
>@@ -123,7 +123,6 @@ socreate(int dom, struct socket **aso, i
>   return (EPROTONOSUPPORT);
>   if (prp->pr_type != type)
>   return (EPROTOTYPE);
>-  NET_LOCK(s);
>   so = pool_get(_pool, PR_WAITOK | PR_ZERO);
>   TAILQ_INIT(>so_q0);
>   TAILQ_INIT(>so_q);
>@@ -136,6 +135,7 @@ socreate(int dom, struct socket **aso, i
>   so->so_egid = p->p_ucred->cr_gid;
>   so->so_cpid = p->p_p->ps_pid;
>   so->so_proto = prp;
>+  NET_LOCK(s);
>   error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL,
>   (struct mbuf *)(long)proto, NULL, p);
>   if (error) {
>
>



Re: Help with the NET_LOCK() - socreate

2017-01-25 Thread Martin Pieuchot
On 25/01/17(Wed) 19:04, Alexander Bluhm wrote:
> On Wed, Jan 25, 2017 at 11:02:22AM -0500, David Hill wrote:
> > splassert: yield: want 0 have 1
> > Starting stack trace...
> > yield() at yield+0xac
> > pool_get() at pool_get+0x1ca
> > socreate() at socreate+0xba
> > sys_socket() at sys_socket+0x135
> > syscall() at syscall+0x27b
> > --- syscall (number 97) ---
> > end of kernel
> > end trace frame: 0x1b1a16c05800, count: 252
> > 0x1b197d23277a:
> > End of stack trace.
> 
> This one looks easy.  We do not need a lock to setup the still
> private so structure.
> 
> ok?

It's right, in the end what we want is to protect the insertion of the
newly created socket to a global list.

ok mpi@

> Index: kern/uipc_socket.c
> ===
> RCS file: /cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.173
> diff -u -p -r1.173 uipc_socket.c
> --- kern/uipc_socket.c25 Jan 2017 16:45:50 -  1.173
> +++ kern/uipc_socket.c25 Jan 2017 18:01:06 -
> @@ -123,7 +123,6 @@ socreate(int dom, struct socket **aso, i
>   return (EPROTONOSUPPORT);
>   if (prp->pr_type != type)
>   return (EPROTOTYPE);
> - NET_LOCK(s);
>   so = pool_get(_pool, PR_WAITOK | PR_ZERO);
>   TAILQ_INIT(>so_q0);
>   TAILQ_INIT(>so_q);
> @@ -136,6 +135,7 @@ socreate(int dom, struct socket **aso, i
>   so->so_egid = p->p_ucred->cr_gid;
>   so->so_cpid = p->p_p->ps_pid;
>   so->so_proto = prp;
> + NET_LOCK(s);
>   error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL,
>   (struct mbuf *)(long)proto, NULL, p);
>   if (error) {
> 



Re: pfctl: Kill states within a rdomain

2017-01-25 Thread Florian Obser
On Wed, Jan 25, 2017 at 10:45:55AM -0500, Bertrand Provost wrote:
> Hi,
> 
> On 2017-01-24 07:26 PM, Sebastian Benoit wrote:
> > but your diff does not seem to be against -current, you started from 6.0
> >
> > But even with 6.0 i get rejects, maybe you mail client messes this up.
> My patch is based on current I suppose problem came from my mail client.
> 
> > Can you please resend a good diff?
> 
> I try another config of my mail client and I attach the patch in case it
> fail again.

the attachment applies, the inline one does not. I currently can't
spot why that is.

> 
> Index: pfctl.8
> ===
> RCS file: /cvs/src/sbin/pfctl/pfctl.8,v
> retrieving revision 1.165
> diff -u -p -r1.165 pfctl.8
> --- pfctl.8   15 Jun 2015 08:48:23 -  1.165
> +++ pfctl.8   24 Jan 2017 21:38:56 -
> @@ -47,6 +47,7 @@
>  .Op Fl S Ar statefile
>  .Op Fl s Ar modifier Op Fl R Ar id
>  .Op Fl t Ar table Fl T Ar command Op Ar address ...
> +.Op Fl V Ar rdomain

this misses a description of the V flag later on, it's only mentioned
in an example of the k flag

>  .Op Fl x Ar level
>  .Ek
>  .Sh DESCRIPTION
> @@ -275,6 +276,12 @@ from rules carrying the label
>  .Dq foobar :
>  .Pp
>  .Dl # pfctl -k label -k foobar
> +.Pp
> +To kill states withing a rdomain (the rdomain of a state is displayed

within

and I believe it's an rdomain. Not a big fan of this, but I
guess we will summon jmc sooner or later :)

> +in parentheses before the host by pfctl -s states) use
> +.Fl V Ar rdomain :
> +.Pp
> +.Dl # pfctl -V rdomain -k host

this is an example, but "rdomain" is not a valid argument for -V,

.Dl # pfctl -V 23 -k 0.0.0.0

>  .Pp
>  To kill one specific state by its unique state ID
>  (as shown by pfctl -s state -vv),
> Index: pfctl.c
> ===
> RCS file: /cvs/src/sbin/pfctl/pfctl.c,v
> retrieving revision 1.334
> diff -u -p -r1.334 pfctl.c
> --- pfctl.c   14 Jan 2016 12:05:51 -  1.334
> +++ pfctl.c   24 Jan 2017 21:38:56 -
> @@ -69,9 +69,9 @@ int  pfctl_clear_src_nodes(int, int);
>  int   pfctl_clear_states(int, const char *, int);
>  void  pfctl_addrprefix(char *, struct pf_addr *);
>  int   pfctl_kill_src_nodes(int, const char *, int);
> -int   pfctl_net_kill_states(int, const char *, int);
> -int   pfctl_label_kill_states(int, const char *, int);
> -int   pfctl_id_kill_states(int, const char *, int);
> +int   pfctl_net_kill_states(int, const char *, int, int);
> +int   pfctl_label_kill_states(int, const char *, int, int);
> +int   pfctl_id_kill_states(int, int);
>  void  pfctl_init_options(struct pfctl *);
>  int   pfctl_load_options(struct pfctl *);
>  int   pfctl_load_limit(struct pfctl *, unsigned int, unsigned int);
> @@ -512,7 +512,7 @@ pfctl_kill_src_nodes(int dev, const char
>  }
> 
>  int
> -pfctl_net_kill_states(int dev, const char *iface, int opts)
> +pfctl_net_kill_states(int dev, const char *iface, int opts, int rdomain)
>  {
>   struct pfioc_state_kill psk;
>   struct addrinfo *res[2], *resp[2];
> @@ -531,6 +531,8 @@ pfctl_net_kill_states(int dev, const cha
>   sizeof(psk.psk_ifname)) >= sizeof(psk.psk_ifname))
>   errx(1, "invalid interface: %s", iface);
> 
> + psk.psk_rdomain = rdomain;
> +
>   pfctl_addrprefix(state_kill[0], _src.addr.v.a.mask);
> 
>   if ((ret_ga = getaddrinfo(state_kill[0], NULL, NULL, [0]))) {
> @@ -618,7 +620,7 @@ pfctl_net_kill_states(int dev, const cha
>  }
> 
>  int
> -pfctl_label_kill_states(int dev, const char *iface, int opts)
> +pfctl_label_kill_states(int dev, const char *iface, int opts, int rdomain)
>  {
>   struct pfioc_state_kill psk;
> 
> @@ -635,6 +637,8 @@ pfctl_label_kill_states(int dev, const c
>   sizeof(psk.psk_label))
>   errx(1, "label too long: %s", state_kill[1]);
> 
> + psk.psk_rdomain = rdomain;
> +
>   if (ioctl(dev, DIOCKILLSTATES, ))
>   err(1, "DIOCKILLSTATES");
> 
> @@ -645,7 +649,7 @@ pfctl_label_kill_states(int dev, const c
>  }
> 
>  int
> -pfctl_id_kill_states(int dev, const char *iface, int opts)
> +pfctl_id_kill_states(int dev, int opts)
>  {
>   struct pfioc_state_kill psk;
> 
> @@ -2098,6 +2102,7 @@ main(int argc, char *argv[])
>   int  opts = 0;
>   int  optimize = PF_OPTIMIZE_BASIC;
>   int  level;
> + int  rdomain = 0;
>   char anchorname[PATH_MAX];
>   int  anchor_wildcard = 0;
>   char*path;
> @@ -2109,7 +2114,7 @@ main(int argc, char *argv[])
>   usage();
> 
>   while ((ch = getopt(argc, argv,
> - "a:dD:eqf:F:ghi:k:K:L:no:Pp:R:rS:s:t:T:vx:z")) != -1) {
> + "a:dD:eqf:F:ghi:k:K:L:no:Pp:R:rS:s:t:T:vV:x:z")) != -1) {

missing diff for usage(); while there, please mark it __dead

othere than that looks good

>   switch (ch) {
>   case 'a':
>   anchoropt = optarg;
> @@ -2215,6 +2220,13 @@ main(int 

Re: Help with the NET_LOCK()

2017-01-25 Thread David Hill
On Wed, Jan 25, 2017 at 04:32:25PM +1000, Martin Pieuchot wrote:
> I just enabled the NET_LOCK() again and I'm looking for test reports.
> Please go build a kernel from sources or wait for the next snapshot,
> run it and report back.
> 
> If you're looking for some small coding tasks related to the NET_LOCK()
> just do:
> 
>   # sysctl kern.splassert=2
>   # sysctl kern.pool_debug=2
>   
> Then watch for the traces on your console.
> 
> You'll see something like:
> 
>   Starting stack trace...
>   yield(0,1,d09dac52,f5549dbc,d94e9378) at yield+0xa4
>   yield(d0bc8f40,1,f5549e18,80,14) at yield+0xa4
>   pool_get(d0bc8f40,1,f5549ec8,d03ecbfb,d97815f4) at pool_get+0x1ba
>   m_get(1,3,f5549ec0,d03a9362,d0bc22e0) at m_get+0x30
>   doaccept(d977e6c4,3,cf7ee4f8,cf7ee4ec,2000) at doaccept+0x193
>   sys_accept(d977e6c4,f5549f5c,f5549f7c,0,f5549fa8) at sys_accept+0x37
>   syscall() at syscall+0x250
>   
> This means accept(2) is doing a memory allocation that can sleep, here
> with m_get(9), while holding the NET_LOCK().  Even if these should be
> ok, it is easy to avoid them.  In the case of doaccept() a mbuf could
> be allocated beforehand or simply use the stack for that.
> 
> Cheers,
> Martin
>

splassert: yield: want 0 have 1
Starting stack trace...
yield() at yield+0xac
malloc() at malloc+0x406
import_identity() at import_identity+0x30
import_identities() at import_identities+0x62
pfkeyv2_send() at pfkeyv2_send+0x1074
pfkey_output() at pfkey_output+0x87
raw_usrreq() at raw_usrreq+0x232
sosend() at sosend+0x2ec
dofilewritev() at dofilewritev+0x205
sys_writev() at sys_writev+0x6d
syscall() at syscall+0x27b
--- syscall (number 121) ---
end of kernel
end trace frame: 0x1ef4e34e5300, count: 246
0x1ef227c2f3aa:
End of stack trace.
 



Re: Help with the NET_LOCK() - socreate

2017-01-25 Thread David Hill
On Wed, Jan 25, 2017 at 07:04:19PM +0100, Alexander Bluhm wrote:
> On Wed, Jan 25, 2017 at 11:02:22AM -0500, David Hill wrote:
> > splassert: yield: want 0 have 1
> > Starting stack trace...
> > yield() at yield+0xac
> > pool_get() at pool_get+0x1ca
> > socreate() at socreate+0xba
> > sys_socket() at sys_socket+0x135
> > syscall() at syscall+0x27b
> > --- syscall (number 97) ---
> > end of kernel
> > end trace frame: 0x1b1a16c05800, count: 252
> > 0x1b197d23277a:
> > End of stack trace.
> 
> This one looks easy.  We do not need a lock to setup the still
> private so structure.
> 
> ok?
> 
> bluhm
> 

Which uncovers: 

splassert: yield: want 0 have 1
Starting stack trace...
yield() at yield+0xac
malloc() at malloc+0x406
route_usrreq() at route_usrreq+0xb0
socreate() at socreate+0x15f
sys_socket() at sys_socket+0x135
syscall() at syscall+0x27b
--- syscall (number 97) ---
end of kernel
end trace frame: 0x1b171444a720, count: 251
0x1b19e6cfd5ca:
End of stack trace.
 



Re: Help with the NET_LOCK() - socreate

2017-01-25 Thread Alexander Bluhm
On Wed, Jan 25, 2017 at 11:02:22AM -0500, David Hill wrote:
> splassert: yield: want 0 have 1
> Starting stack trace...
> yield() at yield+0xac
> pool_get() at pool_get+0x1ca
> socreate() at socreate+0xba
> sys_socket() at sys_socket+0x135
> syscall() at syscall+0x27b
> --- syscall (number 97) ---
> end of kernel
> end trace frame: 0x1b1a16c05800, count: 252
> 0x1b197d23277a:
> End of stack trace.

This one looks easy.  We do not need a lock to setup the still
private so structure.

ok?

bluhm

Index: kern/uipc_socket.c
===
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.173
diff -u -p -r1.173 uipc_socket.c
--- kern/uipc_socket.c  25 Jan 2017 16:45:50 -  1.173
+++ kern/uipc_socket.c  25 Jan 2017 18:01:06 -
@@ -123,7 +123,6 @@ socreate(int dom, struct socket **aso, i
return (EPROTONOSUPPORT);
if (prp->pr_type != type)
return (EPROTOTYPE);
-   NET_LOCK(s);
so = pool_get(_pool, PR_WAITOK | PR_ZERO);
TAILQ_INIT(>so_q0);
TAILQ_INIT(>so_q);
@@ -136,6 +135,7 @@ socreate(int dom, struct socket **aso, i
so->so_egid = p->p_ucred->cr_gid;
so->so_cpid = p->p_p->ps_pid;
so->so_proto = prp;
+   NET_LOCK(s);
error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL,
(struct mbuf *)(long)proto, NULL, p);
if (error) {



Re: Help with the NET_LOCK()

2017-01-25 Thread David Hill
On Wed, Jan 25, 2017 at 04:32:25PM +1000, Martin Pieuchot wrote:
> I just enabled the NET_LOCK() again and I'm looking for test reports.
> Please go build a kernel from sources or wait for the next snapshot,
> run it and report back.
> 
> If you're looking for some small coding tasks related to the NET_LOCK()
> just do:
> 
>   # sysctl kern.splassert=2
>   # sysctl kern.pool_debug=2
>   
> Then watch for the traces on your console.
> 
> You'll see something like:
> 
>   Starting stack trace...
>   yield(0,1,d09dac52,f5549dbc,d94e9378) at yield+0xa4
>   yield(d0bc8f40,1,f5549e18,80,14) at yield+0xa4
>   pool_get(d0bc8f40,1,f5549ec8,d03ecbfb,d97815f4) at pool_get+0x1ba
>   m_get(1,3,f5549ec0,d03a9362,d0bc22e0) at m_get+0x30
>   doaccept(d977e6c4,3,cf7ee4f8,cf7ee4ec,2000) at doaccept+0x193
>   sys_accept(d977e6c4,f5549f5c,f5549f7c,0,f5549fa8) at sys_accept+0x37
>   syscall() at syscall+0x250
>   
> This means accept(2) is doing a memory allocation that can sleep, here
> with m_get(9), while holding the NET_LOCK().  Even if these should be
> ok, it is easy to avoid them.  In the case of doaccept() a mbuf could
> be allocated beforehand or simply use the stack for that.
> 
> Cheers,
> Martin
>

splassert: yield: want 0 have 1
Starting stack trace...
yield() at yield+0xac
pool_get() at pool_get+0x1ca
m_get() at m_get+0x28
ip_ctloutput() at ip_ctloutput+0x4bf
sogetopt() at sogetopt+0x7e
sys_getsockopt() at sys_getsockopt+0xbf
syscall() at syscall+0x27b
--- syscall (number 118) ---
end of kernel
end trace frame: 0x3, count: 250
0x978bdd844a:
End of stack trace.
 



Re: Help with the NET_LOCK()

2017-01-25 Thread David Hill
On Wed, Jan 25, 2017 at 04:32:25PM +1000, Martin Pieuchot wrote:
> I just enabled the NET_LOCK() again and I'm looking for test reports.
> Please go build a kernel from sources or wait for the next snapshot,
> run it and report back.
> 
> If you're looking for some small coding tasks related to the NET_LOCK()
> just do:
> 
>   # sysctl kern.splassert=2
>   # sysctl kern.pool_debug=2
>   
> Then watch for the traces on your console.
> 
> You'll see something like:
> 
>   Starting stack trace...
>   yield(0,1,d09dac52,f5549dbc,d94e9378) at yield+0xa4
>   yield(d0bc8f40,1,f5549e18,80,14) at yield+0xa4
>   pool_get(d0bc8f40,1,f5549ec8,d03ecbfb,d97815f4) at pool_get+0x1ba
>   m_get(1,3,f5549ec0,d03a9362,d0bc22e0) at m_get+0x30
>   doaccept(d977e6c4,3,cf7ee4f8,cf7ee4ec,2000) at doaccept+0x193
>   sys_accept(d977e6c4,f5549f5c,f5549f7c,0,f5549fa8) at sys_accept+0x37
>   syscall() at syscall+0x250
>   
> This means accept(2) is doing a memory allocation that can sleep, here
> with m_get(9), while holding the NET_LOCK().  Even if these should be
> ok, it is easy to avoid them.  In the case of doaccept() a mbuf could
> be allocated beforehand or simply use the stack for that.
> 
> Cheers,
> Martin
>

splassert: yield: want 0 have 1
Starting stack trace...
yield() at yield+0xac
malloc() at malloc+0x406
ip6_setmoptions() at ip6_setmoptions+0x65
ip6_ctloutput() at ip6_ctloutput+0x6d9
sosetopt() at sosetopt+0x8e
sys_setsockopt() at sys_setsockopt+0x12d
syscall() at syscall+0x27b
--- syscall (number 105) ---
end of kernel
end trace frame: 0x1b1987be0028, count: 250
0x1b197d24b80a:
End of stack trace.
 



Re: Help with the NET_LOCK() - socreate

2017-01-25 Thread David Hill
On Wed, Jan 25, 2017 at 04:32:25PM +1000, Martin Pieuchot wrote:
> I just enabled the NET_LOCK() again and I'm looking for test reports.
> Please go build a kernel from sources or wait for the next snapshot,
> run it and report back.
> 
> If you're looking for some small coding tasks related to the NET_LOCK()
> just do:
> 
>   # sysctl kern.splassert=2
>   # sysctl kern.pool_debug=2
>   
> Then watch for the traces on your console.
> 
> You'll see something like:
> 
>   Starting stack trace...
>   yield(0,1,d09dac52,f5549dbc,d94e9378) at yield+0xa4
>   yield(d0bc8f40,1,f5549e18,80,14) at yield+0xa4
>   pool_get(d0bc8f40,1,f5549ec8,d03ecbfb,d97815f4) at pool_get+0x1ba
>   m_get(1,3,f5549ec0,d03a9362,d0bc22e0) at m_get+0x30
>   doaccept(d977e6c4,3,cf7ee4f8,cf7ee4ec,2000) at doaccept+0x193
>   sys_accept(d977e6c4,f5549f5c,f5549f7c,0,f5549fa8) at sys_accept+0x37
>   syscall() at syscall+0x250
>   
> This means accept(2) is doing a memory allocation that can sleep, here
> with m_get(9), while holding the NET_LOCK().  Even if these should be
> ok, it is easy to avoid them.  In the case of doaccept() a mbuf could
> be allocated beforehand or simply use the stack for that.
> 
> Cheers,
> Martin
>

splassert: yield: want 0 have 1
Starting stack trace...
yield() at yield+0xac
pool_get() at pool_get+0x1ca
socreate() at socreate+0xba
sys_socket() at sys_socket+0x135
syscall() at syscall+0x27b
--- syscall (number 97) ---
end of kernel
end trace frame: 0x1b1a16c05800, count: 252
0x1b197d23277a:
End of stack trace.



Re: pfctl: Kill states within a rdomain

2017-01-25 Thread Bertrand Provost

Hi,

On 2017-01-24 07:26 PM, Sebastian Benoit wrote:
> but your diff does not seem to be against -current, you started from 6.0
>
> But even with 6.0 i get rejects, maybe you mail client messes this up.
My patch is based on current I suppose problem came from my mail client.

> Can you please resend a good diff?

I try another config of my mail client and I attach the patch in case it
fail again.

Index: pfctl.8
===
RCS file: /cvs/src/sbin/pfctl/pfctl.8,v
retrieving revision 1.165
diff -u -p -r1.165 pfctl.8
--- pfctl.8 15 Jun 2015 08:48:23 -  1.165
+++ pfctl.8 24 Jan 2017 21:38:56 -
@@ -47,6 +47,7 @@
 .Op Fl S Ar statefile
 .Op Fl s Ar modifier Op Fl R Ar id
 .Op Fl t Ar table Fl T Ar command Op Ar address ...
+.Op Fl V Ar rdomain
 .Op Fl x Ar level
 .Ek
 .Sh DESCRIPTION
@@ -275,6 +276,12 @@ from rules carrying the label
 .Dq foobar :
 .Pp
 .Dl # pfctl -k label -k foobar
+.Pp
+To kill states withing a rdomain (the rdomain of a state is displayed
+in parentheses before the host by pfctl -s states) use
+.Fl V Ar rdomain :
+.Pp
+.Dl # pfctl -V rdomain -k host
 .Pp
 To kill one specific state by its unique state ID
 (as shown by pfctl -s state -vv),
Index: pfctl.c
===
RCS file: /cvs/src/sbin/pfctl/pfctl.c,v
retrieving revision 1.334
diff -u -p -r1.334 pfctl.c
--- pfctl.c 14 Jan 2016 12:05:51 -  1.334
+++ pfctl.c 24 Jan 2017 21:38:56 -
@@ -69,9 +69,9 @@ intpfctl_clear_src_nodes(int, int);
 int pfctl_clear_states(int, const char *, int);
 voidpfctl_addrprefix(char *, struct pf_addr *);
 int pfctl_kill_src_nodes(int, const char *, int);
-int pfctl_net_kill_states(int, const char *, int);
-int pfctl_label_kill_states(int, const char *, int);
-int pfctl_id_kill_states(int, const char *, int);
+int pfctl_net_kill_states(int, const char *, int, int);
+int pfctl_label_kill_states(int, const char *, int, int);
+int pfctl_id_kill_states(int, int);
 voidpfctl_init_options(struct pfctl *);
 int pfctl_load_options(struct pfctl *);
 int pfctl_load_limit(struct pfctl *, unsigned int, unsigned int);
@@ -512,7 +512,7 @@ pfctl_kill_src_nodes(int dev, const char
 }

 int
-pfctl_net_kill_states(int dev, const char *iface, int opts)
+pfctl_net_kill_states(int dev, const char *iface, int opts, int rdomain)
 {
struct pfioc_state_kill psk;
struct addrinfo *res[2], *resp[2];
@@ -531,6 +531,8 @@ pfctl_net_kill_states(int dev, const cha
sizeof(psk.psk_ifname)) >= sizeof(psk.psk_ifname))
errx(1, "invalid interface: %s", iface);

+   psk.psk_rdomain = rdomain;
+
pfctl_addrprefix(state_kill[0], _src.addr.v.a.mask);

if ((ret_ga = getaddrinfo(state_kill[0], NULL, NULL, [0]))) {
@@ -618,7 +620,7 @@ pfctl_net_kill_states(int dev, const cha
 }

 int
-pfctl_label_kill_states(int dev, const char *iface, int opts)
+pfctl_label_kill_states(int dev, const char *iface, int opts, int rdomain)
 {
struct pfioc_state_kill psk;

@@ -635,6 +637,8 @@ pfctl_label_kill_states(int dev, const c
sizeof(psk.psk_label))
errx(1, "label too long: %s", state_kill[1]);

+   psk.psk_rdomain = rdomain;
+
if (ioctl(dev, DIOCKILLSTATES, ))
err(1, "DIOCKILLSTATES");

@@ -645,7 +649,7 @@ pfctl_label_kill_states(int dev, const c
 }

 int
-pfctl_id_kill_states(int dev, const char *iface, int opts)
+pfctl_id_kill_states(int dev, int opts)
 {
struct pfioc_state_kill psk;

@@ -2098,6 +2102,7 @@ main(int argc, char *argv[])
int  opts = 0;
int  optimize = PF_OPTIMIZE_BASIC;
int  level;
+   int  rdomain = 0;
char anchorname[PATH_MAX];
int  anchor_wildcard = 0;
char*path;
@@ -2109,7 +2114,7 @@ main(int argc, char *argv[])
usage();

while ((ch = getopt(argc, argv,
-   "a:dD:eqf:F:ghi:k:K:L:no:Pp:R:rS:s:t:T:vx:z")) != -1) {
+   "a:dD:eqf:F:ghi:k:K:L:no:Pp:R:rS:s:t:T:vV:x:z")) != -1) {
switch (ch) {
case 'a':
anchoropt = optarg;
@@ -2215,6 +2220,13 @@ main(int argc, char *argv[])
opts |= PF_OPT_VERBOSE2;
opts |= PF_OPT_VERBOSE;
break;
+   case 'V':
+   rdomain = strtonum(optarg, 0, RT_TABLEID_MAX, );
+   if (errstr) {
+   warnx("Invalid rdomain: %s", errstr);
+   usage();
+   }
+   break;
case 'x':
debugopt = pfctl_lookup_option(optarg, debugopt_list);
if (debugopt == NULL) {
@@ -2403,11 +2415,11 @@ main(int argc, char *argv[])
}
if (state_killers) {
 

Re: socket splicing sleeps in task thread

2017-01-25 Thread Mike Belopuhov
On 25 January 2017 at 13:12, Alexander Bluhm  wrote:
> Hi,
>
> I ran the regression tests with netlock and kern.splassert=2.
>
> panic: sleep: sosplice failed insomnia
> Stopped at  Debugger+0x7:   leave
> TIDPIDUID PRFLAGS PFLAGS  CPU  COMMAND
>  165128   1277  0 0x2  01  relayd
> * 72837  53210  0 0x14000  0x2100  sosplice
> Debugger(d0a02d9d,f5534e78,d09db72f,f5534e78,f5534eac) at Debugger+0x7
> panic(d09db72f,db715a60,f5534ecc,d03bb703,d5bb0314) at panic+0x71
> sleep_setup(f5534edc,d0b5cf78,20,d09e35bc,0) at sleep_setup+0xcc
> rw_enter(d0b5cf78,1,f5534f4c,d03bf6b3,d5bb0314) at rw_enter+0x142
> rw_enter_write(d0b5cf78,2,f5534f8c,d03bfa20,0) at rw_enter_write+0x3c
> sotask(db546ac8,f5534f68,d03bfa20,f5534f90,d03a9572) at sotask+0x1b
> taskq_thread(d5bb0300) at taskq_thread+0x60
>
> This forgotten diff should fix it.
>
> ok?
>

OK mikeb

> bluhm
>
> On Wed, Dec 21, 2016 at 12:40:04AM +0100, Alexander Bluhm wrote:
>> Hi,
>>
>> As NET_LOCK() is a read/write lock, it can sleep in sotask().  So
>> the TASKQ_CANTSLEEP flag is no longer valid for the splicing thread.
>>
>> ok?
>>
>> bluhm
>>



Re: socket splicing sleeps in task thread

2017-01-25 Thread Alexander Bluhm
Hi,

I ran the regression tests with netlock and kern.splassert=2.

panic: sleep: sosplice failed insomnia
Stopped at  Debugger+0x7:   leave
TIDPIDUID PRFLAGS PFLAGS  CPU  COMMAND
 165128   1277  0 0x2  01  relayd
* 72837  53210  0 0x14000  0x2100  sosplice
Debugger(d0a02d9d,f5534e78,d09db72f,f5534e78,f5534eac) at Debugger+0x7
panic(d09db72f,db715a60,f5534ecc,d03bb703,d5bb0314) at panic+0x71
sleep_setup(f5534edc,d0b5cf78,20,d09e35bc,0) at sleep_setup+0xcc
rw_enter(d0b5cf78,1,f5534f4c,d03bf6b3,d5bb0314) at rw_enter+0x142
rw_enter_write(d0b5cf78,2,f5534f8c,d03bfa20,0) at rw_enter_write+0x3c
sotask(db546ac8,f5534f68,d03bfa20,f5534f90,d03a9572) at sotask+0x1b
taskq_thread(d5bb0300) at taskq_thread+0x60

This forgotten diff should fix it.

ok?

bluhm

On Wed, Dec 21, 2016 at 12:40:04AM +0100, Alexander Bluhm wrote:
> Hi,
> 
> As NET_LOCK() is a read/write lock, it can sleep in sotask().  So
> the TASKQ_CANTSLEEP flag is no longer valid for the splicing thread.
> 
> ok?
> 
> bluhm
> 
> Index: kern/uipc_socket.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.170
> diff -u -p -r1.170 uipc_socket.c
> --- kern/uipc_socket.c20 Dec 2016 21:15:36 -  1.170
> +++ kern/uipc_socket.c20 Dec 2016 23:29:00 -
> @@ -1070,8 +1070,7 @@ sosplice(struct socket *so, int fd, off_
>   int  s, error = 0;
>  
>   if (sosplice_taskq == NULL)
> - sosplice_taskq = taskq_create("sosplice", 1, IPL_SOFTNET,
> - TASKQ_CANTSLEEP);
> + sosplice_taskq = taskq_create("sosplice", 1, IPL_SOFTNET, 0);
>   if (sosplice_taskq == NULL)
>   return (ENOMEM);
>  



preliminary diff to add BFD support to OpenBGPD

2017-01-25 Thread Peter Hessler
Here is the inital support for OpenBGPD to understand BFD messages.

With this, when BFD detects failure, it sets the nexthop for that
neighbor to Invalid.  Conversely, when BFD sets the state to up, it
removes that flag, setting the nexthop to Valid.


# when BFD state to 203.0.113.9 is Up
$ bgpctl show rib
flags: * = Valid, > = Selected, I = via IBGP, A = Announced, S = Stale
origin: i = IGP, e = EGP, ? = Incomplete

flags destination  gateway  lpref   med aspath origin
I*>   192.0.2.1/32 203.0.113.1100 0 i
AI*   192.0.2.1/32 0.0.0.0100 0 i
I*>   192.0.2.9/32 203.0.113.9100 0 i
I*>   198.51.100.9/32  203.0.113.9100 0 i
I*>   203.0.113.0/24   203.0.113.9100 0 i


# when BFD state to 203.0.113.9 is Down
$ bgpctl show rib
flags: * = Valid, > = Selected, I = via IBGP, A = Announced, S = Stale
origin: i = IGP, e = EGP, ? = Incomplete

flags destination  gateway  lpref   med aspath origin
I*>   192.0.2.1/32 203.0.113.1100 0 i
AI*   192.0.2.1/32 0.0.0.0100 0 i
I 192.0.2.9/32 203.0.113.9100 0 i
I 198.51.100.9/32  203.0.113.9100 0 i
I 203.0.113.0/24   203.0.113.9100 0 i



Index: bgpd.c
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v
retrieving revision 1.188
diff -u -p -u -p -r1.188 bgpd.c
--- bgpd.c  24 Jan 2017 04:22:42 -  1.188
+++ bgpd.c  25 Jan 2017 09:47:10 -
@@ -778,6 +778,12 @@ send_network(int type, struct network_co
 int
 bgpd_filternexthop(struct kroute *kr, struct kroute6 *kr6)
 {
+   /* BFD routes are special */
+   if (kr && kr->flags & F_BFDDOWN && kr->prefixlen != 0)
+   return (1);
+   if (kr6 && kr6->flags & F_BFDDOWN && kr6->prefixlen != 0)
+   return (1);
+
/* kernel routes are never filtered */
if (kr && kr->flags & F_KERNEL && kr->prefixlen != 0)
return (0);
Index: bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.300
diff -u -p -u -p -r1.300 bgpd.h
--- bgpd.h  25 Jan 2017 00:11:07 -  1.300
+++ bgpd.h  25 Jan 2017 09:47:10 -
@@ -85,6 +85,8 @@
 #defineF_CTL_ADJ_OUT   0x4000
 #defineF_CTL_ACTIVE0x8000
 #defineF_RTLABEL   0x1
+#defineF_BFD   0x2
+#defineF_BFDDOWN   0x4
 
 /*
  * Limit the number of control messages generated by the RDE and queued in
@@ -522,7 +524,7 @@ struct kroute {
struct in_addr  prefix;
struct in_addr  nexthop;
u_int32_t   mplslabel;
-   u_int16_t   flags;
+   u_int32_t   flags;
u_int16_t   labelid;
u_short ifindex;
u_int8_tprefixlen;
@@ -532,7 +534,7 @@ struct kroute {
 struct kroute6 {
struct in6_addr prefix;
struct in6_addr nexthop;
-   u_int16_t   flags;
+   u_int32_t   flags;
u_int16_t   labelid;
u_short ifindex;
u_int8_tprefixlen;
Index: kroute.c
===
RCS file: /cvs/src/usr.sbin/bgpd/kroute.c,v
retrieving revision 1.211
diff -u -p -u -p -r1.211 kroute.c
--- kroute.c24 Jan 2017 04:22:42 -  1.211
+++ kroute.c25 Jan 2017 09:47:10 -
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -163,6 +164,7 @@ u_int8_tmask2prefixlen6(struct sockaddr
 void   get_rtaddrs(int, struct sockaddr *, struct sockaddr **);
 void   if_change(u_short, int, struct if_data *);
 void   if_announce(void *);
+void   bfd_msg(void *, struct sockaddr *);
 
 intsend_rtmsg(int, int, struct ktable *, struct kroute *,
u_int8_t);
@@ -2093,7 +2095,7 @@ kroute_validate(struct kroute *kr)
 {
struct kif_node *kif;
 
-   if (kr->flags & (F_REJECT | F_BLACKHOLE))
+   if (kr->flags & (F_REJECT | F_BLACKHOLE | F_BFDDOWN))
return (0);
 
if ((kif = kif_find(kr->ifindex)) == NULL) {
@@ -2113,7 +2115,7 @@ kroute6_validate(struct kroute6 *kr)
 {
struct kif_node *kif;
 
-   if (kr->flags & (F_REJECT | F_BLACKHOLE))
+   if (kr->flags & (F_REJECT | F_BLACKHOLE | F_BFDDOWN))
return (0);
 
if ((kif = kif_find(kr->ifindex)) == NULL) {
@@ -2510,6 +2512,80 @@ if_change(u_short ifindex, int flags, st
 }
 
 void
+bfd_msg(void *msg, struct sockaddr *sa)
+{
+   struct bfd_msghdr   *bfd;
+   struct ktable   *kt;
+   struct knexthop_node*kn;
+   struct kroute_node  *kr;
+   struct kroute6_node *kr6;
+   struct bgpd_addr prefix;
+   

Re: NET_LOCK and pf_consistency_lock removal

2017-01-25 Thread Martin Pieuchot
On 25/01/17(Wed) 09:53, Sebastian Benoit wrote:
> Hi,
> 
> this removes the pf_consistency_lock and protects the users with NET_LOCK()
> 
> I ran into a crash when using pfctl -k, and mpi suggested this change (and
> likes the diff).

The reason I suggested this diff is that pfioctl() will need the
NET_LOCK() anyway.  So better keep things simple until we're going to
redesign PF for a MP world.

Note that the crash benno@ talked about is just for pflow(4) users.

ok mpi@

> diff --git sys/net/pf.c sys/net/pf.c
> index 2ae434e405d..a8ade533484 100644
> --- sys/net/pf.c
> +++ sys/net/pf.c
> @@ -1154,26 +1154,20 @@ pf_state_export(struct pfsync_state *sp, struct 
> pf_state *st)
>  /* END state table stuff */
>  
>  void
> -pf_purge_expired_rules(int locked)
> +pf_purge_expired_rules(void)
>  {
>   struct pf_rule  *r;
>  
> + NET_ASSERT_LOCKED();
> +
>   if (SLIST_EMPTY(_rule_gcl))
>   return;
>  
> - if (!locked)
> - rw_enter_write(_consistency_lock);
> - else
> - rw_assert_wrlock(_consistency_lock);
> -
>   while ((r = SLIST_FIRST(_rule_gcl)) != NULL) {
>   SLIST_REMOVE(_rule_gcl, r, pf_rule, gcle);
>   KASSERT(r->rule_flag & PFRULE_EXPIRED);
>   pf_purge_rule(r);
>   }
> -
> - if (!locked)
> - rw_exit_write(_consistency_lock);
>  }
>  
>  void
> @@ -1194,7 +1188,7 @@ pf_purge_thread(void *v)
>   if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
>   pf_purge_expired_fragments();
>   pf_purge_expired_src_nodes(0);
> - pf_purge_expired_rules(0);
> + pf_purge_expired_rules();
>   nloops = 0;
>   }
>  
> @@ -1241,27 +1235,21 @@ pf_state_expires(const struct pf_state *state)
>  }
>  
>  void
> -pf_purge_expired_src_nodes(int waslocked)
> +pf_purge_expired_src_nodes(void)
>  {
>   struct pf_src_node  *cur, *next;
> - int  locked = waslocked;
> +
> + NET_ASSERT_LOCKED();
>  
>   for (cur = RB_MIN(pf_src_tree, _src_tracking); cur; cur = next) {
>   next = RB_NEXT(pf_src_tree, _src_tracking, cur);
>  
>   if (cur->states == 0 && cur->expire <= time_uptime) {
> - if (! locked) {
> - rw_enter_write(_consistency_lock);
> - next = RB_NEXT(pf_src_tree,
> - _src_tracking, cur);
> - locked = 1;
> - }
> + next = RB_NEXT(pf_src_tree,
> + _src_tracking, cur);
>   pf_remove_src_node(cur);
>   }
>   }
> -
> - if (locked && !waslocked)
> - rw_exit_write(_consistency_lock);
>  }
>  
>  void
> @@ -1334,13 +1322,12 @@ pf_remove_divert_state(struct pf_state_key *sk)
>   }
>  }
>  
> -/* callers should hold the write_lock on pf_consistency_lock */
>  void
>  pf_free_state(struct pf_state *cur)
>  {
>   struct pf_rule_item *ri;
>  
> - splsoftassert(IPL_SOFTNET);
> + NET_ASSERT_LOCKED();
>  
>  #if NPFSYNC > 0
>   if (pfsync_state_in_use(cur))
> @@ -1375,7 +1362,8 @@ pf_purge_expired_states(u_int32_t maxcheck)
>  {
>   static struct pf_state  *cur = NULL;
>   struct pf_state *next;
> - int  locked = 0;
> +
> + NET_ASSERT_LOCKED();
>  
>   while (maxcheck--) {
>   /* wrap to start of list when we hit the end */
> @@ -1390,25 +1378,14 @@ pf_purge_expired_states(u_int32_t maxcheck)
>  
>   if (cur->timeout == PFTM_UNLINKED) {
>   /* free removed state */
> - if (! locked) {
> - rw_enter_write(_consistency_lock);
> - locked = 1;
> - }
>   pf_free_state(cur);
>   } else if (pf_state_expires(cur) <= time_uptime) {
>   /* remove and free expired state */
>   pf_remove_state(cur);
> - if (! locked) {
> - rw_enter_write(_consistency_lock);
> - locked = 1;
> - }
>   pf_free_state(cur);
>   }
>   cur = next;
>   }
> -
> - if (locked)
> - rw_exit_write(_consistency_lock);
>  }
>  
>  int
> diff --git sys/net/pf_ioctl.c sys/net/pf_ioctl.c
> index 9b278c907f5..25c8bd1 100644
> --- sys/net/pf_ioctl.c
> +++ sys/net/pf_ioctl.c
> @@ -111,7 +111,6 @@ void   pf_qid2qname(u_int16_t, char 
> *);
>  void  pf_qid_unref(u_int16_t);
>  
>  struct pf_rulepf_default_rule, pf_default_rule_new;
> -struct rwlock pf_consistency_lock = 
> RWLOCK_INITIALIZER("pfcnslk");
>  
>  struct {
>   char   

NET_LOCK and pf_consistency_lock removal

2017-01-25 Thread Sebastian Benoit
Hi,

this removes the pf_consistency_lock and protects the users with NET_LOCK()

I ran into a crash when using pfctl -k, and mpi suggested this change (and
likes the diff).

pf hackers: ok?

diff --git sys/net/pf.c sys/net/pf.c
index 2ae434e405d..a8ade533484 100644
--- sys/net/pf.c
+++ sys/net/pf.c
@@ -1154,26 +1154,20 @@ pf_state_export(struct pfsync_state *sp, struct 
pf_state *st)
 /* END state table stuff */
 
 void
-pf_purge_expired_rules(int locked)
+pf_purge_expired_rules(void)
 {
struct pf_rule  *r;
 
+   NET_ASSERT_LOCKED();
+
if (SLIST_EMPTY(_rule_gcl))
return;
 
-   if (!locked)
-   rw_enter_write(_consistency_lock);
-   else
-   rw_assert_wrlock(_consistency_lock);
-
while ((r = SLIST_FIRST(_rule_gcl)) != NULL) {
SLIST_REMOVE(_rule_gcl, r, pf_rule, gcle);
KASSERT(r->rule_flag & PFRULE_EXPIRED);
pf_purge_rule(r);
}
-
-   if (!locked)
-   rw_exit_write(_consistency_lock);
 }
 
 void
@@ -1194,7 +1188,7 @@ pf_purge_thread(void *v)
if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
pf_purge_expired_fragments();
pf_purge_expired_src_nodes(0);
-   pf_purge_expired_rules(0);
+   pf_purge_expired_rules();
nloops = 0;
}
 
@@ -1241,27 +1235,21 @@ pf_state_expires(const struct pf_state *state)
 }
 
 void
-pf_purge_expired_src_nodes(int waslocked)
+pf_purge_expired_src_nodes(void)
 {
struct pf_src_node  *cur, *next;
-   int  locked = waslocked;
+
+   NET_ASSERT_LOCKED();
 
for (cur = RB_MIN(pf_src_tree, _src_tracking); cur; cur = next) {
next = RB_NEXT(pf_src_tree, _src_tracking, cur);
 
if (cur->states == 0 && cur->expire <= time_uptime) {
-   if (! locked) {
-   rw_enter_write(_consistency_lock);
-   next = RB_NEXT(pf_src_tree,
-   _src_tracking, cur);
-   locked = 1;
-   }
+   next = RB_NEXT(pf_src_tree,
+   _src_tracking, cur);
pf_remove_src_node(cur);
}
}
-
-   if (locked && !waslocked)
-   rw_exit_write(_consistency_lock);
 }
 
 void
@@ -1334,13 +1322,12 @@ pf_remove_divert_state(struct pf_state_key *sk)
}
 }
 
-/* callers should hold the write_lock on pf_consistency_lock */
 void
 pf_free_state(struct pf_state *cur)
 {
struct pf_rule_item *ri;
 
-   splsoftassert(IPL_SOFTNET);
+   NET_ASSERT_LOCKED();
 
 #if NPFSYNC > 0
if (pfsync_state_in_use(cur))
@@ -1375,7 +1362,8 @@ pf_purge_expired_states(u_int32_t maxcheck)
 {
static struct pf_state  *cur = NULL;
struct pf_state *next;
-   int  locked = 0;
+
+   NET_ASSERT_LOCKED();
 
while (maxcheck--) {
/* wrap to start of list when we hit the end */
@@ -1390,25 +1378,14 @@ pf_purge_expired_states(u_int32_t maxcheck)
 
if (cur->timeout == PFTM_UNLINKED) {
/* free removed state */
-   if (! locked) {
-   rw_enter_write(_consistency_lock);
-   locked = 1;
-   }
pf_free_state(cur);
} else if (pf_state_expires(cur) <= time_uptime) {
/* remove and free expired state */
pf_remove_state(cur);
-   if (! locked) {
-   rw_enter_write(_consistency_lock);
-   locked = 1;
-   }
pf_free_state(cur);
}
cur = next;
}
-
-   if (locked)
-   rw_exit_write(_consistency_lock);
 }
 
 int
diff --git sys/net/pf_ioctl.c sys/net/pf_ioctl.c
index 9b278c907f5..25c8bd1 100644
--- sys/net/pf_ioctl.c
+++ sys/net/pf_ioctl.c
@@ -111,7 +111,6 @@ void pf_qid2qname(u_int16_t, char 
*);
 voidpf_qid_unref(u_int16_t);
 
 struct pf_rule  pf_default_rule, pf_default_rule_new;
-struct rwlock   pf_consistency_lock = RWLOCK_INITIALIZER("pfcnslk");
 
 struct {
charstatusif[IFNAMSIZ];
@@ -987,12 +986,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
return (EACCES);
}
 
-   if (flags & FWRITE)
-   rw_enter_write(_consistency_lock);
-   else
-   rw_enter_read(_consistency_lock);
-
-   s = splsoftnet();
+   NET_LOCK(s);
switch (cmd) {
 
case DIOCSTART:
@@ -2388,11 

Re: Special rules for early-open fd's in pledge

2017-01-25 Thread Reyk Floeter
On Wed, Jan 25, 2017 at 12:41:26AM -0700, Theo de Raadt wrote:
> > On Wed, Jan 25, 2017 at 12:33:36AM -0700, Theo de Raadt wrote:
> > > > 2. vmd calls openpty() in the pledged parent whenever a new VM is
> > > > started - effectively doing ioctls on post-pledge fds.  I will
> > > > probably solve this by opening the pty in the non-pledged "priv"
> > > > process, and do some additional passing, but then I'll also have to
> > > > give up its chroot to access /dev/.
> > > > 
> > > > vmd: ioctl 40287401 post-pledge fd 12
> > > > vmd(51681): syscall 54 "tty"
> > > 
> > > How about opening PATH_PTMDEV early and keeping it open in a
> > > properly protected process; then create pty pairs as required.
> > 
> > Oh, yes, I should have looked at openpty() in libutil first :)
> > That makes sense, I will try it.
> 
> But please don't become too attached to the proposed semantics.
> It's a proposal, we need to learn if it actually helps or hinders
> privsep programs become better.

ok, no problem, it is a very simple thing in vmd.

so, just as a proposal, the attached diff makes it work with vmd and
the chroot-friendly version of openpty might be useful for libutil.

Reyk

Index: lib/libutil/openpty.3
===
RCS file: /cvs/src/lib/libutil/openpty.3,v
retrieving revision 1.17
diff -u -p -u -p -r1.17 openpty.3
--- lib/libutil/openpty.3   28 Aug 2015 19:54:06 -  1.17
+++ lib/libutil/openpty.3   25 Jan 2017 08:40:46 -
@@ -44,6 +44,8 @@
 .Ft int
 .Fn openpty "int *amaster" "int *aslave" "char *name" "struct termios *termp" 
"struct winsize *winp"
 .Ft int
+.Fn fdopenpty "int ptmfd" "int *amaster" "int *aslave" "char *name" "struct 
termios *termp" "struct winsize *winp"
+.Ft int
 .Fn login_tty "int fd"
 .Ft pid_t
 .Fn forkpty "int *amaster" "char *name" "struct termios *termp" "struct 
winsize *winp"
@@ -88,6 +90,15 @@ the caller, permissions are set to corre
 uses of that device are revoked (see
 .Xr revoke 2
 for details).
+.Pp
+The
+.Fn fdopenpty
+function works like
+.Fn openpty
+but expects a pre-opened
+.Pa /dev/ptm
+file descriptor
+.Fa ptmfd .
 .Pp
 The
 .Fn login_tty
Index: lib/libutil/pty.c
===
RCS file: /cvs/src/lib/libutil/pty.c,v
retrieving revision 1.20
diff -u -p -u -p -r1.20 pty.c
--- lib/libutil/pty.c   30 Aug 2016 14:44:45 -  1.20
+++ lib/libutil/pty.c   25 Jan 2017 08:40:46 -
@@ -57,11 +57,30 @@ openpty(int *amaster, int *aslave, char 
fd = open(PATH_PTMDEV, O_RDWR|O_CLOEXEC);
if (fd == -1)
return (-1);
-   if ((ioctl(fd, PTMGET, ) == -1)) {
+
+   if (fdopenpty(fd, amaster, aslave, name, termp, winp) == -1) {
close(fd);
return (-1);
}
close(fd);
+
+   return (0);
+}
+
+int
+fdopenpty(int ptmfd, int *amaster, int *aslave, char *name,
+struct termios *termp, struct winsize *winp)
+{
+   int master, slave;
+   struct ptmget ptm;
+
+   /*
+* Use /dev/ptm and the PTMGET ioctl to get a properly set up and
+* owned pty/tty pair.
+*/
+   if ((ioctl(ptmfd, PTMGET, ) == -1))
+   return (-1);
+
master = ptm.cfd;
slave = ptm.sfd;
if (name) {
Index: lib/libutil/util.h
===
RCS file: /cvs/src/lib/libutil/util.h,v
retrieving revision 1.34
diff -u -p -u -p -r1.34 util.h
--- lib/libutil/util.h  3 Jun 2013 21:07:02 -   1.34
+++ lib/libutil/util.h  25 Jan 2017 08:40:46 -
@@ -99,6 +99,7 @@ void  pw_copy(int, int, const struct pass
 intpw_scan(char *, struct passwd *, int *);
 void   pw_error(const char *, int, int);
 intopenpty(int *, int *, char *, struct termios *, struct winsize *);
+intfdopenpty(int, int *, int *, char *, struct termios *, struct winsize 
*);
 intopendisk(const char *, int, char *, size_t, int);
 pid_t  forkpty(int *, char *, struct termios *, struct winsize *);
 intgetmaxpartitions(void);
Index: usr.sbin/vmd/config.c
===
RCS file: /cvs/src/usr.sbin/vmd/config.c,v
retrieving revision 1.23
diff -u -p -u -p -r1.23 config.c
--- usr.sbin/vmd/config.c   17 Jan 2017 21:51:01 -  1.23
+++ usr.sbin/vmd/config.c   25 Jan 2017 08:40:46 -
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "proc.h"
 #include "vmd.h"
@@ -122,6 +123,7 @@ config_getreset(struct vmd *env, struct 
 int
 config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid)
 {
+   struct vmd  *env = ps->ps_env;
struct vmd_if   *vif;
struct vmop_create_params *vmc = >vm_params;
struct vm_create_params *vcp = >vmc_params;
@@ -241,7 +243,8 @@ config_setvm(struct privsep *ps, struct 
 
/* Open TTY */
if (vm->vm_ttyname == NULL) {
-   if