Re: simplify in6_selectsrc() logic

2015-12-05 Thread Martin Pieuchot
On 05/12/15(Sat) 13:07, Vincent Gross wrote:
> in6_selectsrc() uses two different rtalloc calls depending on whether or
> not the destination address is multicast or not, but there is nothing to
> explain why. I dug a bit and found this commit from itojun@ :
> 
> diff -u -r1.6 -r1.7
> --- src/sys/netinet6/in6_src.c2000/06/18 04:49:32 1.6
> +++ src/sys/netinet6/in6_src.c2000/06/18 17:02:59 1.7
> @@ -244,7 +244,11 @@
>   ro->ro_dst.sin6_family = AF_INET6;
>   ro->ro_dst.sin6_len = sizeof(struct sockaddr_in6);
>   ro->ro_dst.sin6_addr = *dst;
> - if (!IN6_IS_ADDR_MULTICAST(dst)) {
> + ro->ro_dst.sin6_scope_id = dstsock->sin6_scope_id;
> + if (IN6_IS_ADDR_MULTICAST(dst)) {
> + ro->ro_rt = rtalloc1(&((struct route *)ro)
> +  ->ro_dst, 0);
> + } else {
>   rtalloc((struct route *)ro);
>   }
>   }
> 
> Below are rtalloc() and rtalloc1() from sys/net/route.c r1.19 commited
> on 05/21/2000 :
> 
> > void
> > rtalloc(ro)
> > register struct route *ro;
> > {
> > if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
> > return;  /* XXX */
> > ro->ro_rt = rtalloc1(>ro_dst, 1);
> > }
> > 
> > struct rtentry *
> > rtalloc1(dst, report)
> > register struct sockaddr *dst;
> > int report;
> > {
> [...]
> > /*
> >  * IP encapsulation does lots of lookups where we don't need nor want
> >  * the RTM_MISSes that would be generated.  It causes RTM_MISS storms
> >  * sent upward breaking user-level routing queries.
> >  */
> > miss:   if (report && dst->sa_family != PF_KEY) {
> > bzero((caddr_t), sizeof(info));
> > info.rti_info[RTAX_DST] = dst;
> > rt_missmsg(msgtype, , 0, err);
> > }
> > }
> > splx(s);
> > return (newrt);
> > }
> 
> 
> So this if(MULTICAST) has been introduced to prevent RTM_MISS storms when
> looking up routes to multicast addresses ; multicast and unicast route lookups
> are the same.
> 
> Also, rtalloc(foo, RT_RESOLVE, bar) and rtalloc_mpath(foo, NULL, bar) are both
> equivalent to _rtalloc(foo, NULL, RT_RESOLVE, bar).
> 
> Let's remove this if(MULTICAST), it's just confusing.
> 
> ok ?

ok mpi@

> 
> Index: sys/netinet6/in6_src.c
> ===
> RCS file: /cvs/src/sys/netinet6/in6_src.c,v
> retrieving revision 1.71
> diff -u -p -r1.71 in6_src.c
> --- sys/netinet6/in6_src.c2 Dec 2015 13:29:26 -   1.71
> +++ sys/netinet6/in6_src.c5 Dec 2015 12:03:48 -
> @@ -240,13 +240,8 @@ in6_selectsrc(struct in6_addr **in6src, 
>   sa6->sin6_len = sizeof(struct sockaddr_in6);
>   sa6->sin6_addr = *dst;
>   sa6->sin6_scope_id = dstsock->sin6_scope_id;
> - if (IN6_IS_ADDR_MULTICAST(dst)) {
> - ro->ro_rt = rtalloc(sin6tosa(>ro_dst),
> - RT_RESOLVE, ro->ro_tableid);
> - } else {
> - ro->ro_rt = rtalloc_mpath(sin6tosa(>ro_dst),
> - NULL, ro->ro_tableid);
> - }
> + ro->ro_rt = rtalloc(sin6tosa(>ro_dst),
> + RT_RESOLVE, ro->ro_tableid);
>   }
>  
>   /*
> 



Re: newfs: avoid oob read on command line argument

2015-12-05 Thread Tobias Stoeckmann
On Sat, Dec 05, 2015 at 06:26:35AM -0500, Ted Unangst wrote:
> may i suggest strlen(s) instead of strchr(s, 0)?

There's actually one part in newfs' code that uses this. And in theory
it has the same issue, not checking if s (which is special, which might
be argv[0]) is empty. I highly doubt this could be reached there, but
I fixed it anyway. Until now it uses strncpy, and with the switch to
strlcpy this is just another additional boundary check in place.


Tobias

Index: sbin/newfs/newfs.c
===
RCS file: /cvs/src/sbin/newfs/newfs.c,v
retrieving revision 1.103
diff -u -p -u -p -r1.103 newfs.c
--- sbin/newfs/newfs.c  25 Nov 2015 19:45:21 -  1.103
+++ sbin/newfs/newfs.c  5 Dec 2015 12:32:07 -
@@ -423,10 +423,11 @@ main(int argc, char *argv[])
warnx("%s: not a character-special device",
special);
}
-   cp = strchr(argv[0], '\0') - 1;
-   if (cp == NULL ||
-   ((*cp < 'a' || *cp > ('a' + maxpartitions - 1))
-   && !isdigit((unsigned char)*cp)))
+   if (*argv[0] == '\0')
+   fatal("empty partition name supplied");
+   cp = argv[0] + strlen(argv[0]) - 1;
+   if ((*cp < 'a' || *cp > ('a' + maxpartitions - 1))
+   && !isdigit((unsigned char)*cp))
fatal("%s: can't figure out file system partition",
argv[0]);
lp = getdisklabel(special, fsi);
@@ -631,8 +632,9 @@ rewritelabel(char *s, int fd, struct dis
/*
 * Make name for 'c' partition.
 */
-   strncpy(specname, s, sizeof(specname) - 1);
-   specname[sizeof(specname) - 1] = '\0';
+   if (*s == '\0' ||
+   strlcpy(specname, s, sizeof(specname)) >= sizeof(specname))
+   fatal("%s: invalid partition name supplied", s);
cp = specname + strlen(specname) - 1;
if (!isdigit((unsigned char)*cp))
*cp = 'c';
Index: sbin/newfs_ext2fs/newfs_ext2fs.c
===
RCS file: /cvs/src/sbin/newfs_ext2fs/newfs_ext2fs.c,v
retrieving revision 1.21
diff -u -p -u -p -r1.21 newfs_ext2fs.c
--- sbin/newfs_ext2fs/newfs_ext2fs.c28 Nov 2015 06:12:09 -  1.21
+++ sbin/newfs_ext2fs/newfs_ext2fs.c5 Dec 2015 12:32:07 -
@@ -529,9 +529,11 @@ getpartition(int fsi, const char *specia
errx(EXIT_FAILURE, "%s: block device", special);
if (!S_ISCHR(st.st_mode))
warnx("%s: not a character-special device", special);
-   cp = strchr(argv[0], '\0') - 1;
-   if (cp == NULL || ((*cp < 'a' || *cp > ('a' + getmaxpartitions() - 1))
-   && !isdigit((unsigned char)*cp)))
+   if (*argv[0] == '\0')
+   errx(EXIT_FAILURE, "empty partition name supplied");
+   cp = argv[0] + strlen(argv[0]) - 1;
+   if ((*cp < 'a' || *cp > ('a' + getmaxpartitions() - 1))
+   && !isdigit((unsigned char)*cp))
errx(EXIT_FAILURE, "%s: can't figure out file system 
partition", argv[0]);
lp = getdisklabel(special, fsi);
if (isdigit((unsigned char)*cp))



Re: malloc canaries and validation

2015-12-05 Thread Theo Buehler
On Wed, Dec 02, 2015 at 08:37:05AM -0500, Ted Unangst wrote:
> This is a variant of the diffs sent by Daniel Micay, and then assembled by
> Theo Buehler. I've looked it over and made a few tweaks.
> 
> One: validate the junk in malloc hasn't been touched. I've tweaked this to
> always be on if junk is on, but to only check the first 32 bytes. (Without the
> atexit() handler, since I do not trust installing such by default.) I think
> this is a decent compromise between checking and performance and complexity
> and whatnot.
> 
> Two: add chunk canaries at the end of allocations. I would like to do
> something more interesting here (and by default, of course) but growing the
> size of every allocation isn't free. Many userland applications already aim
> for power of two sizing, so expanding by 4/8 bytes is far from free. However,
> this is an ineresting feature, it's not that intrusive, and maybe we can build
> from it. Include it as well.
> 
> Hurray or nay?

FWIW, I would be very happy to see this go in.  Your changes make sense
to me, although I can't really judge the price of your point two on
slower architectures.  I believe it's slightly slower than Daniel's
version although I don't have hard data to back this.

I've been running with Daniel's patches and /etc/malloc.conf -> CJV
on my two production laptops without any issues for more than a month
now.

Running with your diff and 'CJ' since Wednesday.



Re: Make ix(4) mpsafe: take 2

2015-12-05 Thread Hrvoje Popovski
On 4.12.2015. 17:35, Hrvoje Popovski wrote:
> On 4.12.2015. 12:47, Mark Kettenis wrote:
>> Here is a new diff to make ix(4) mpsafe.  Should now longer get stuck
>> in the OACTIVE state.  Tests more than welcome.
>>
> 
> 
> Hi,
> 
> i have tested this patch with 82599 and x540 while sending 6Mpps for cca
> 3 hours and ifconfig down/up and everything is working fine. Will test
> it more and if I find something will send mail.
> 
> Thank you.
> 


Hi,

sending 12Mpps for about 10 hours and i haven't seen OACTIVE flag and
with ifconfig down/up couldn't trigger
ix1: unable to fill any rx descriptors
ix1: Could not setup receive structures




Make em(4) more mpsafe again

2015-12-05 Thread Claudio Jeker
So Mark and I spent some time to figure out what the issue was with ix(4)
based on that info I resurected the em(4) mpsafe diff that got backed out
and I applied the same fix. It is somewhat unclear if this fixes the
watchdog timeouts since in theory the wdog timer should be stopped when
hitting the race condition we hit in ix(4).

I'm currently hammering my test system with this and until now I have not
seen a watchdog fired.
-- 
:wq Claudio

Index: pci/if_em.c
===
RCS file: /cvs/src/sys/dev/pci/if_em.c,v
retrieving revision 1.313
diff -u -p -r1.313 if_em.c
--- pci/if_em.c 25 Nov 2015 03:09:59 -  1.313
+++ pci/if_em.c 3 Dec 2015 22:08:54 -
@@ -612,6 +612,16 @@ em_start(struct ifnet *ifp)
if (em_encap(sc, m_head)) {
ifq_deq_rollback(>if_snd, m_head);
ifq_set_oactive(>if_snd);
+   /*
+*  Make sure there are still packets on the
+*  ring.  The interrupt handler may have
+*  cleaned up the ring before we were able to
+*  set the IF_OACTIVE flag.
+*/
+   if (sc->num_tx_desc_avail == sc->num_tx_desc) {
+   ifq_clr_oactive(>if_snd);
+   continue;
+   }
break;
}
 
@@ -918,20 +928,17 @@ em_intr(void *arg)
if (reg_icr & E1000_ICR_RXO)
sc->rx_overruns++;
 
-   KERNEL_LOCK();
-
/* Link status change */
if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
+   KERNEL_LOCK();
sc->hw.get_link_status = 1;
em_check_for_link(>hw);
em_update_link_status(sc);
+   if (!IFQ_IS_EMPTY(>if_snd))
+   em_start(ifp);
+   KERNEL_UNLOCK();
}
 
-   if (ifp->if_flags & IFF_RUNNING && !IFQ_IS_EMPTY(>if_snd))
-   em_start(ifp);
-
-   KERNEL_UNLOCK();
-
if (refill && em_rxfill(sc)) {
/* Advance the Rx Queue #0 "Tail Pointer". */
E1000_WRITE_REG(>hw, RDT, sc->last_rx_desc_filled);
@@ -1108,17 +1115,10 @@ em_encap(struct em_softc *sc, struct mbu
struct em_buffer   *tx_buffer, *tx_buffer_mapped;
struct em_tx_desc *current_tx_desc = NULL;
 
-   /*
-* Force a cleanup if number of TX descriptors
-* available hits the threshold
-*/
-   if (sc->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
-   em_txeof(sc);
-   /* Now do we at least have a minimal? */
-   if (sc->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
-   sc->no_tx_desc_avail1++;
-   return (ENOBUFS);
-   }
+   /* Check that we have least the minimal number of TX descriptors. */
+   if (sc->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
+   sc->no_tx_desc_avail1++;
+   return (ENOBUFS);
}
 
if (sc->hw.mac_type == em_82547) {
@@ -1220,12 +1220,6 @@ em_encap(struct em_softc *sc, struct mbu
}
}
 
-   sc->next_avail_tx_desc = i;
-   if (sc->pcix_82544)
-   sc->num_tx_desc_avail -= txd_used;
-   else
-   sc->num_tx_desc_avail -= map->dm_nsegs;
-
 #if NVLAN > 0
/* Find out if we are in VLAN mode */
if (m_head->m_flags & M_VLANTAG) {
@@ -1259,6 +1253,14 @@ em_encap(struct em_softc *sc, struct mbu
tx_buffer = >tx_buffer_area[first];
tx_buffer->next_eop = last;
 
+   membar_producer();
+
+   sc->next_avail_tx_desc = i;
+   if (sc->pcix_82544)
+   atomic_sub_int(>num_tx_desc_avail, txd_used);
+   else
+   atomic_sub_int(>num_tx_desc_avail, map->dm_nsegs);
+
/* 
 * Advance the Transmit Descriptor Tail (Tdt),
 * this tells the E1000 that this frame is
@@ -2389,10 +2391,12 @@ em_transmit_checksum_setup(struct em_sof
tx_buffer->m_head = NULL;
tx_buffer->next_eop = -1;
 
+   membar_producer();
+
if (++curr_txd == sc->num_tx_desc)
curr_txd = 0;
 
-   sc->num_tx_desc_avail--;
+   atomic_dec_int(>num_tx_desc_avail);
sc->next_avail_tx_desc = curr_txd;
 }
 
@@ -2406,7 +2410,7 @@ em_transmit_checksum_setup(struct em_sof
 void
 em_txeof(struct em_softc *sc)
 {
-   int first, last, done, num_avail;
+   int first, last, done, num_avail, free = 0;
struct em_buffer *tx_buffer;
struct em_tx_desc   *tx_desc, *eop_desc;
struct ifnet   *ifp = >interface_data.ac_if;
@@ -2414,9 +2418,8 @@ em_txeof(struct em_softc *sc)
if (sc->num_tx_desc_avail == sc->num_tx_desc)
return;
 
-   KERNEL_LOCK();
+   membar_consumer();
 
-   num_avail = 

Move ancient ksh bug from README to man page

2015-12-05 Thread Michael McConville
Thoughts? ok?


Index: README
===
RCS file: /cvs/src/bin/ksh/README,v
retrieving revision 1.15
diff -u -p -r1.15 README
--- README  5 Dec 2015 19:40:45 -   1.15
+++ README  6 Dec 2015 04:58:27 -
@@ -15,8 +15,3 @@ Files of interest:
NOTES   lists of known bugs in pdksh, at ksh, and posix.
PROJECTSlist of things that need to be done in pdksh.
LEGAL   A file detailing legal issues concerning pdksh.
-
-
-BTW, THE MOST FREQUENTLY REPORTED BUG IS
-   echo hi | read a; echo $a   # Does not print hi
-I'm aware of this and there is no need to report it.
Index: ksh.1
===
RCS file: /cvs/src/bin/ksh/ksh.1,v
retrieving revision 1.171
diff -u -p -r1.171 ksh.1
--- ksh.1   24 Nov 2015 21:07:31 -  1.171
+++ ksh.1   6 Dec 2015 04:58:29 -
@@ -5628,3 +5628,9 @@ The
 .Pa CONTRIBUTORS
 file in the source distribution contains a more complete list of people and
 their part in the shell's development.
+.Sh BUGS
+Since time immemorial, the below command has not printed
+.Qq hi ,
+as would be expected:
+.Pp
+.Dl $ echo hi | read a; echo $a



mpsafe re(4)

2015-12-05 Thread Jonathan Matthew
The main interesting bit here is the txeof and start loops, which previously
operated based on the prod/cons indices and the contents of the tx queue,
but now just uses the indices as that's the only way to get a consistent view
of the tx queue state.

At the moment I don't think the tx ring is big enough to use IFQ_DEQUEUE
instead of ifq_deq_begin/commit, but maybe I'm wrong about that.

can someone try this on an APU1?

Index: ic/re.c
===
RCS file: /cvs/src/sys/dev/ic/re.c,v
retrieving revision 1.187
diff -u -p -r1.187 re.c
--- ic/re.c 25 Nov 2015 03:09:58 -  1.187
+++ ic/re.c 4 Dec 2015 15:07:02 -
@@ -120,6 +120,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -151,7 +152,7 @@ int redebug = 0;
 
 static inline void re_set_bufaddr(struct rl_desc *, bus_addr_t);
 
-intre_encap(struct rl_softc *, struct mbuf *, int *);
+intre_encap(struct rl_softc *, struct mbuf *, struct rl_txq *, int *);
 
 intre_newbuf(struct rl_softc *);
 intre_rx_list_init(struct rl_softc *);
@@ -1448,18 +1449,14 @@ re_txeof(struct rl_softc *sc)
struct ifnet*ifp;
struct rl_txq   *txq;
uint32_ttxstat;
-   int idx, descidx, tx = 0;
+   int idx, descidx, tx_free, freed = 0;
 
ifp = >sc_arpcom.ac_if;
 
-   for (idx = sc->rl_ldata.rl_txq_considx;; idx = RL_NEXT_TXQ(sc, idx)) {
+   for (idx = sc->rl_ldata.rl_txq_considx;
+   idx != sc->rl_ldata.rl_txq_prodidx; idx = RL_NEXT_TXQ(sc, idx)) {
txq = >rl_ldata.rl_txq[idx];
 
-   if (txq->txq_mbuf == NULL) {
-   KASSERT(idx == sc->rl_ldata.rl_txq_prodidx);
-   break;
-   }
-
descidx = txq->txq_descidx;
RL_TXDESCSYNC(sc, descidx,
BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
@@ -1470,9 +1467,7 @@ re_txeof(struct rl_softc *sc)
if (txstat & RL_TDESC_CMD_OWN)
break;
 
-   tx = 1;
-   sc->rl_ldata.rl_tx_free += txq->txq_nsegs;
-   KASSERT(sc->rl_ldata.rl_tx_free <= sc->rl_ldata.rl_tx_desc_cnt);
+   freed += txq->txq_nsegs;
bus_dmamap_sync(sc->sc_dmat, txq->txq_dmamap,
0, txq->txq_dmamap->dm_mapsize, BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(sc->sc_dmat, txq->txq_dmamap);
@@ -1487,9 +1482,13 @@ re_txeof(struct rl_softc *sc)
ifp->if_opackets++;
}
 
-   sc->rl_ldata.rl_txq_considx = idx;
+   if (freed == 0)
+   return (0);
 
-   ifq_clr_oactive(>if_snd);
+   tx_free = atomic_add_int_nv(>rl_ldata.rl_tx_free, freed);
+   KASSERT(tx_free <= sc->rl_ldata.rl_tx_desc_cnt);
+
+   sc->rl_ldata.rl_txq_considx = idx;
 
/*
 * Some chips will ignore a second TX request issued while an
@@ -1498,12 +1497,14 @@ re_txeof(struct rl_softc *sc)
 * to restart the channel here to flush them out. This only
 * seems to be required with the PCIe devices.
 */
-   if (sc->rl_ldata.rl_tx_free < sc->rl_ldata.rl_tx_desc_cnt)
+   if (tx_free < sc->rl_ldata.rl_tx_desc_cnt)
CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
-   else
+   else {
+   ifq_clr_oactive(>if_snd);
ifp->if_timer = 0;
+   }
 
-   return (tx);
+   return (1);
 }
 
 void
@@ -1566,7 +1567,9 @@ re_intr(void *arg)
}
 
if (status & RL_ISR_SYSTEM_ERR) {
+   KERNEL_LOCK();
re_init(ifp);
+   KERNEL_UNLOCK();
claimed = 1;
}
}
@@ -1599,7 +1602,11 @@ re_intr(void *arg)
}
}
 
-   re_start(ifp);
+   if (!IFQ_IS_EMPTY(>if_snd)) {
+   KERNEL_LOCK();
+   re_start(ifp);
+   KERNEL_UNLOCK();
+   }
 
CSR_WRITE_2(sc, RL_IMR, sc->rl_intrs);
 
@@ -1607,7 +1614,7 @@ re_intr(void *arg)
 }
 
 int
-re_encap(struct rl_softc *sc, struct mbuf *m, int *idx)
+re_encap(struct rl_softc *sc, struct mbuf *m, struct rl_txq *txq, int *used)
 {
bus_dmamap_tmap;
struct mbuf *mp, mh;
@@ -1616,7 +1623,6 @@ re_encap(struct rl_softc *sc, struct mbu
struct ip   *ip;
struct rl_desc  *d;
u_int32_t   cmdstat, vlanctl = 0, csum_flags = 0;
-   struct rl_txq   *txq;
 
/*
 * Set up checksum offload. Note: checksum offload bits must
@@ -1669,7 +1675,6 @@ re_encap(struct rl_softc *sc, struct mbu
}
}
 
-   txq = >rl_ldata.rl_txq[*idx];
map = txq->txq_dmamap;
 
error = bus_dmamap_load_mbuf(sc->sc_dmat, map, m,
@@ -1710,7 +1715,7 @@ re_encap(struct rl_softc *sc, struct mbu
nsegs++;
}
 
-   if 

Re: [UPDATE] freetype-2.6.2

2015-12-05 Thread Matthieu Herrb
On Tue, Dec 01, 2015 at 05:46:07PM +0100, David Coppa wrote:
> 
> Hi!
> 
> Here's the update to freetype-2.6.2.
> 
> It shouldn't cause any fallout, but who knows with freetype... So
> probably a ports bulk build can be useful.
> 
> FreeType homepage says it's mostly a bugfixing release, but I've
> compared objdump outputs and there're some new symbols, thus I have
> bumped shlib_version minor to 2.

Hmm which ones? both my script and guenther's one don't find any
differences in public symbols. 

Otherwise Tested on amd64, i386, macppc, sparc64, loongson and
compile-tested on arm. ok matthieu@


-- 
Matthieu Herrb


pgpsDK7AS2hMs.pgp
Description: PGP signature


Xen Patch-09: XenStore

2015-12-05 Thread Mike Belopuhov
XenStore provides a hierarchical storage for Xen configuration
ala OpenFirmware.  Itself it's an interrupt driven producer/
consumer interface with two 1kb queues for input and output.

It's required in order to do virtual device discovery and
device configuration (MAC address, various paramethers).

OK?

---
 sys/dev/pv/files.pv   |   1 +
 sys/dev/pv/xen.c  |   5 +
 sys/dev/pv/xenstore.c | 793 ++
 sys/dev/pv/xenvar.h   |  45 +++
 4 files changed, 844 insertions(+)
 create mode 100644 sys/dev/pv/xenstore.c

diff --git sys/dev/pv/files.pv sys/dev/pv/files.pv
index 1e5c9bd..461ea08 100644
--- sys/dev/pv/files.pv
+++ sys/dev/pv/files.pv
@@ -15,5 +15,6 @@ file  dev/pv/vmt.cvmt needs-flag
 
 # Xen
 device xen {}
 attach xen at pvbus
 file   dev/pv/xen.cxen needs-flag
+file   dev/pv/xenstore.c   xen
diff --git sys/dev/pv/xen.c sys/dev/pv/xen.c
index 2fa7283..2c7c161 100644
--- sys/dev/pv/xen.c
+++ sys/dev/pv/xen.c
@@ -47,10 +47,12 @@ int xen_match(struct device *, void *, void *);
 void   xen_attach(struct device *, struct device *, void *);
 void   xen_deferred(void *);
 void   xen_resume(struct device *);
 intxen_activate(struct device *, int);
 
+intxs_attach(struct xen_softc *);
+
 struct cfdriver xen_cd = {
NULL, "xen", DV_DULL
 };
 
 struct cfattach xen_ca = {
@@ -99,10 +101,13 @@ xen_attach(struct device *parent, struct device *self, 
void *aux)
xen_init_cbvec(sc);
 
if (xen_init_interrupts(sc))
return;
 
+   if (xs_attach(sc))
+   return;
+
mountroothook_establish(xen_deferred, sc);
 }
 
 void
 xen_deferred(void *arg)
diff --git sys/dev/pv/xenstore.c sys/dev/pv/xenstore.c
new file mode 100644
index 000..3abbf45
--- /dev/null
+++ sys/dev/pv/xenstore.c
@@ -0,0 +1,793 @@
+/*
+ * Copyright (c) 2015 Mike Belopuhov
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+#include 
+
+/*
+ * The XenStore interface is a simple storage system that is a means of
+ * communicating state and configuration data between the Xen Domain 0
+ * and the various guest domains.  All configuration data other than
+ * a small amount of essential information required during the early
+ * boot process of launching a Xen aware guest, is managed using the
+ * XenStore.
+ *
+ * The XenStore is ASCII string based, and has a structure and semantics
+ * similar to a filesystem.  There are files and directories that are
+ * able to contain files or other directories.  The depth of the hierachy
+ * is only limited by the XenStore's maximum path length.
+ *
+ * The communication channel between the XenStore service and other
+ * domains is via two, guest specific, ring buffers in a shared memory
+ * area.  One ring buffer is used for communicating in each direction.
+ * The grant table references for this shared memory are given to the
+ * guest via HVM hypercalls.
+ *
+ * The XenStore communication relies on an event channel and thus
+ * interrupts. Several Xen services depend on the XenStore, most
+ * notably the XenBus used to discover and manage Xen devices.
+ */
+
+const struct
+{
+   const char  *xse_errstr;
+   int  xse_errnum;
+} xs_errors[] = {
+   { "EINVAL", EINVAL },
+   { "EACCES", EACCES },
+   { "EEXIST", EEXIST },
+   { "EISDIR", EISDIR },
+   { "ENOENT", ENOENT },
+   { "ENOMEM", ENOMEM },
+   { "ENOSPC", ENOSPC },
+   { "EIO",EIO },
+   { "ENOTEMPTY",  ENOTEMPTY },
+   { "ENOSYS", ENOSYS },
+   { "EROFS",  EROFS },
+   { "EBUSY",  EBUSY },
+   { "EAGAIN", EAGAIN },
+   { "EISCONN",EISCONN },
+   { NULL, -1 },
+};
+
+struct xs_msghdr
+{
+   /* Message type */
+   uint32_t xmh_type;
+   /* Request identifier, echoed in daemon's response.  */
+   uint32_t xmh_rid;
+   /* Transaction id (0 if not related to a transaction). */
+   uint32_t xmh_tid;
+   /* Length of data following this. */
+   uint32_t 

Xen Patch-11: Provide simple device attachment logic

2015-12-05 Thread Mike Belopuhov
This implements simple device probing.

I have discussed this with deraadt@ and then later with kettenis@
and mpi@ and we've decided that OpenFirmware emulation would be a
bit too much (at least for now).

This is a work in progress, it will improve a bit once we start
working on the Netfront network interface.

OK?

---
 sys/dev/pv/xen.c| 67 +
 sys/dev/pv/xenvar.h |  6 +
 2 files changed, 73 insertions(+)

diff --git sys/dev/pv/xen.c sys/dev/pv/xen.c
index c78cc75..8523e03 100644
--- sys/dev/pv/xen.c
+++ sys/dev/pv/xen.c
@@ -47,10 +47,11 @@ voidxen_disable_emulated_devices(struct xen_softc 
*);
 intxen_match(struct device *, void *, void *);
 void   xen_attach(struct device *, struct device *, void *);
 void   xen_deferred(void *);
 void   xen_resume(struct device *);
 intxen_activate(struct device *, int);
+intxen_probe_devices(struct xen_softc *);
 
 intxs_attach(struct xen_softc *);
 
 struct cfdriver xen_cd = {
NULL, "xen", DV_DULL
@@ -107,10 +108,12 @@ xen_attach(struct device *parent, struct device *self, 
void *aux)
if (xs_attach(sc))
return;
 
xen_disable_emulated_devices(sc);
 
+   xen_probe_devices(sc);
+
mountroothook_establish(xen_deferred, sc);
 }
 
 void
 xen_deferred(void *arg)
@@ -721,10 +724,74 @@ xen_intr_enable(void)
sc->sc_dev.dv_xname, xi->xi_port);
}
}
 }
 
+static int
+xen_attach_print(void *aux, const char *name)
+{
+   struct xen_attach_args *xa = aux;
+
+   if (name)
+   printf("\"%s\" at %s: %s", xa->xa_name, name, xa->xa_node);
+
+   return (UNCONF);
+}
+
+int
+xen_probe_devices(struct xen_softc *sc)
+{
+   struct xen_attach_args xa;
+   struct xs_transaction xst;
+   struct iovec *iovp1, *iovp2;
+   int error = 0, iov1_cnt, iov2_cnt, i, j;
+   char path[64];
+
+   memset(, 0, sizeof(xst));
+   xst.xst_id = 0;
+   xst.xst_sc = sc->sc_xs;
+   xst.xst_flags |= XST_POLL;
+
+   if ((error = xs_cmd(, XS_DIRECTORY, "device", ,
+   _cnt)) != 0)
+   return (error);
+
+   for (i = 0; i < iov1_cnt; i++) {
+   /* Special handling */
+   if (!strcmp("suspend", (char *)iovp1[i].iov_base)) {
+   xa.xa_parent = sc;
+   strlcpy(xa.xa_name, (char *)iovp1[i].iov_base,
+   sizeof(xa.xa_name));
+   snprintf(xa.xa_node, sizeof(xa.xa_node), "device/%s",
+   (char *)iovp1[i].iov_base);
+   config_found((struct device *)sc, ,
+   xen_attach_print);
+   continue;
+   }
+   snprintf(path, sizeof(path), "device/%s",
+   (char *)iovp1[i].iov_base);
+   if ((error = xs_cmd(, XS_DIRECTORY, path, ,
+   _cnt)) != 0) {
+   xs_resfree(, iovp1, iov1_cnt);
+   return (error);
+   }
+   for (j = 0; j < iov2_cnt; j++) {
+   xa.xa_parent = sc;
+   strlcpy(xa.xa_name, (char *)iovp1[i].iov_base,
+   sizeof(xa.xa_name));
+   snprintf(xa.xa_node, sizeof(xa.xa_node), "device/%s/%s",
+   (char *)iovp1[i].iov_base,
+   (char *)iovp2[j].iov_base);
+   config_found((struct device *)sc, ,
+   xen_attach_print);
+   }
+   xs_resfree(, iovp2, iov2_cnt);
+   }
+
+   return (error);
+}
+
 #include 
 
 #defineXMI_PORT0x10
 #define XMI_MAGIC  0x49d2
 #define XMI_UNPLUG_IDE 0x01
diff --git sys/dev/pv/xenvar.h sys/dev/pv/xenvar.h
index 9d73921..0e62dd9 100644
--- sys/dev/pv/xenvar.h
+++ sys/dev/pv/xenvar.h
@@ -55,10 +55,16 @@ struct xen_softc {
struct xs_softc *sc_xs; /* xenstore softc */
 };
 
 extern struct xen_softc *xen_sc;
 
+struct xen_attach_args {
+   void*xa_parent;
+   char xa_name[16];
+   char xa_node[64];
+};
+
 /*
  *  Hypercalls
  */
 #define memory_op  12
 #define xen_version17
-- 
2.6.3



Re: Make ix(4) mpsafe: take 2

2015-12-05 Thread Peter Hessler
On 2015 Dec 05 (Sat) at 16:16:50 +0100 (+0100), Hrvoje Popovski wrote:
:On 4.12.2015. 17:35, Hrvoje Popovski wrote:
:> On 4.12.2015. 12:47, Mark Kettenis wrote:
:>> Here is a new diff to make ix(4) mpsafe.  Should now longer get stuck
:>> in the OACTIVE state.  Tests more than welcome.
:>>
:> 
:> 
:> Hi,
:> 
:> i have tested this patch with 82599 and x540 while sending 6Mpps for cca
:> 3 hours and ifconfig down/up and everything is working fine. Will test
:> it more and if I find something will send mail.
:> 
:> Thank you.
:> 
:
:
:Hi,
:
:sending 12Mpps for about 10 hours and i haven't seen OACTIVE flag and
:with ifconfig down/up couldn't trigger
:ix1: unable to fill any rx descriptors
:ix1: Could not setup receive structures
:
:

What is the packet rate on the receiving system?  Are you testing
routing/forwarding?



Xen Patch-03: Hypercalls

2015-12-05 Thread Mike Belopuhov
This brings in support for Xen hypercalls via an MI interface
and implements functions to fetch extended version and features.

OK?

---
 sys/dev/pv/xen.c| 227 
 sys/dev/pv/xenreg.h | 184 ++
 sys/dev/pv/xenvar.h |  23 ++
 3 files changed, 434 insertions(+)
 create mode 100644 sys/dev/pv/xenreg.h

diff --git sys/dev/pv/xen.c sys/dev/pv/xen.c
index ee16d99..f1198d1 100644
--- sys/dev/pv/xen.c
+++ sys/dev/pv/xen.c
@@ -26,15 +26,19 @@
 #include 
 
 #include 
 
 #include 
+#include 
 #include 
 
 struct xen_softc *xen_sc;
 
 void   xen_find_base(struct xen_softc *);
+intxen_init_hypercall(struct xen_softc *);
+intxen_getversion(struct xen_softc *);
+intxen_getfeatures(struct xen_softc *);
 
 intxen_match(struct device *, void *, void *);
 void   xen_attach(struct device *, struct device *, void *);
 void   xen_resume(struct device *);
 intxen_activate(struct device *, int);
@@ -70,12 +74,20 @@ xen_attach(struct device *parent, struct device *self, void 
*aux)
 
xen_find_base(sc);
 
printf("\n");
 
+   if (xen_init_hypercall(sc))
+   return;
+
/* Wire it up to the global */
xen_sc = sc;
+
+   if (xen_getversion(sc))
+   return;
+   if (xen_getfeatures(sc))
+   return;
 }
 
 void
 xen_resume(struct device *self)
 {
@@ -109,5 +121,220 @@ xen_find_base(struct xen_softc *sc)
}
break;
}
}
 }
+
+int
+xen_init_hypercall(struct xen_softc *sc)
+{
+   extern void *xen_hypercall_page;
+   uint32_t regs[4];
+   paddr_t pa;
+
+   /* Get hypercall page configuration MSR */
+   CPUID(sc->sc_base + CPUID_OFFSET_XEN_HYPERCALL,
+   regs[0], regs[1], regs[2], regs[3]);
+
+   /* We don't support more than one hypercall page */
+   if (regs[0] != 1) {
+   printf("%s: requested %d hypercall pages\n",
+   sc->sc_dev.dv_xname, regs[0]);
+   return (-1);
+   }
+
+   sc->sc_hc = _hypercall_page;
+
+   if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_hc, )) {
+   printf("%s: hypercall page PA extraction failed\n",
+   sc->sc_dev.dv_xname);
+   return (-1);
+   }
+   wrmsr(regs[1], pa);
+
+   DPRINTF("%s: hypercall page at va %p pa %#lx\n", sc->sc_dev.dv_xname,
+   sc->sc_hc, pa);
+
+   return (0);
+}
+
+int
+xen_hypercall(struct xen_softc *sc, int op, int argc, ...)
+{
+   va_list ap;
+   ulong argv[5];
+   int i;
+
+   if (argc < 0 || argc > 5)
+   return (-1);
+   va_start(ap, argc);
+   for (i = 0; i < argc; i++)
+   argv[i] = (ulong)va_arg(ap, ulong);
+   return (xen_hypercallv(sc, op, argc, argv));
+}
+
+int
+xen_hypercallv(struct xen_softc *sc, int op, int argc, ulong *argv)
+{
+   ulong hcall;
+   int rv = 0;
+
+   hcall = (ulong)sc->sc_hc + op * 32;
+
+#if defined(XEN_DEBUG) && disabled
+   {
+   int i;
+
+   printf("hypercall %d", op);
+   if (argc > 0) {
+   printf(", args {");
+   for (i = 0; i < argc; i++)
+   printf(" %#lx", argv[i]);
+   printf(" }\n");
+   } else
+   printf("\n");
+   }
+#endif
+
+   switch (argc) {
+   case 0: {
+   HYPERCALL_RES1;
+   __asm__ volatile (  \
+ HYPERCALL_LABEL   \
+   : HYPERCALL_OUT1\
+   : HYPERCALL_PTR(hcall)  \
+   : HYPERCALL_CLOBBER \
+   );
+   HYPERCALL_RET(rv);
+   break;
+   }
+   case 1: {
+   HYPERCALL_RES1; HYPERCALL_RES2;
+   HYPERCALL_ARG1(argv[0]);
+   __asm__ volatile (  \
+ HYPERCALL_LABEL   \
+   : HYPERCALL_OUT1 HYPERCALL_OUT2 \
+   : HYPERCALL_IN1 \
+   , HYPERCALL_PTR(hcall)  \
+   : HYPERCALL_CLOBBER \
+   );
+   HYPERCALL_RET(rv);
+   break;
+   }
+   case 2: {
+   HYPERCALL_RES1; HYPERCALL_RES2; HYPERCALL_RES3;
+   HYPERCALL_ARG1(argv[0]); HYPERCALL_ARG2(argv[1]);
+   __asm__ volatile (  \
+ HYPERCALL_LABEL   \
+   : HYPERCALL_OUT1 HYPERCALL_OUT2 \
+ HYPERCALL_OUT3\
+   : HYPERCALL_IN1 HYPERCALL_IN2   \
+   , HYPERCALL_PTR(hcall)  \
+   : 

Xen Patch-02: Setup the hypercall page

2015-12-05 Thread Mike Belopuhov
This allocates a page-aligned page in the kernel text segment
to be used by the hypercall interface.  The content of the page
is provided by the Hypervisor itself when it's physical address
is communicated via an MSR write (in the next diff).

OK?

---
 sys/arch/amd64/amd64/locore.S | 10 ++
 1 file changed, 10 insertions(+)

diff --git sys/arch/amd64/amd64/locore.S sys/arch/amd64/amd64/locore.S
index 618c6e7..f147131 100644
--- sys/arch/amd64/amd64/locore.S
+++ sys/arch/amd64/amd64/locore.S
@@ -107,10 +107,11 @@
 #include "assym.h"
 #include "lapic.h"
 #include "ioapic.h"
 #include "ksyms.h"
 #include "acpi.h"
+#include "xen.h"
 
 #include 
 #include 
 
 #include 
@@ -1169,5 +1170,14 @@ _C_LABEL(codepatch_begin):
 
.section .codepatchend,"a"
.globl _C_LABEL(codepatch_end)
 _C_LABEL(codepatch_end):
.previous
+
+#if NXEN > 0
+   /* Hypercall_page needs to be PAGE aligned */
+   .text
+   .align  NBPG
+   .globl  _C_LABEL(xen_hypercall_page)
+_C_LABEL(xen_hypercall_page):
+   .skip   0x1000, 0x90/* Fill with NOPs */
+#endif /* NXEN > 0 */
-- 
2.6.3



Re: libc: getusershell, new implementation

2015-12-05 Thread Ted Unangst
Tobias Stoeckmann wrote:
> 
> And I still think that the current code is a bit too permissive in parsing
> things. I mean what's the point in allowing lines like:
> 
> sometextwithoutspace/bin/ksh should be used for logins # seriously!
> 
> Which would result in /bin/ksh, by the way.
> 
> Didn't notice the consequences that arise by keeping the descriptor open,
> so I'm fine with an alternative approach. Yet we might make the code a
> bit easier to review by not allowing such weird lines. What it should
> expect and enforce:
> 
> - a valid line has to start with a slash
> - comments are chopped off
> - comments are supposed to be at the beginning of a line
> 
> So if somebody writes "/bin/ksh # comment", that actually leads to "/bin/ksh 
> ",
> with an additional whitespace at the end. Currently we couldn't even specify a
> shell with a whitespace in its path.

ok. i was going to leave the behavior alone, but we can fix that too.

- use getline to read lines of any length.
- only consider lines that start with a /.
- truncate lines after a #, but not after spaces.


Index: gen/getusershell.c
===
RCS file: /cvs/src/lib/libc/gen/getusershell.c,v
retrieving revision 1.16
diff -u -p -r1.16 getusershell.c
--- gen/getusershell.c  14 Sep 2015 16:09:13 -  1.16
+++ gen/getusershell.c  5 Dec 2015 18:24:33 -
@@ -28,14 +28,13 @@
  * SUCH DAMAGE.
  */
 
-#include 
-
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include 
 
 /*
@@ -44,7 +43,7 @@
  */
 
 static char *okshells[] = { _PATH_BSHELL, _PATH_CSHELL, _PATH_KSHELL, NULL };
-static char **curshell, **shells, *strings;
+static char **curshell, **shells;
 static char **initshells(void);
 
 /*
@@ -66,11 +65,14 @@ getusershell(void)
 void
 endusershell(void)
 {
-   
+   char **s;
+
+   if ((s = shells))
+   while (*s)
+   free(*s++);
free(shells);
shells = NULL;
-   free(strings);
-   strings = NULL;
+
curshell = NULL;
 }
 
@@ -84,48 +86,50 @@ setusershell(void)
 static char **
 initshells(void)
 {
-   char **sp, *cp;
+   size_t nshells, nalloc, linesize;
+   char *line;
FILE *fp;
-   struct stat statb;
 
free(shells);
shells = NULL;
-   free(strings);
-   strings = NULL;
+
if ((fp = fopen(_PATH_SHELLS, "re")) == NULL)
return (okshells);
-   if (fstat(fileno(fp), ) == -1) {
-   (void)fclose(fp);
-   return (okshells);
-   }
-   if (statb.st_size > SIZE_MAX) {
-   (void)fclose(fp);
-   return (okshells);
-   }
-   if ((strings = malloc((size_t)statb.st_size)) == NULL) {
-   (void)fclose(fp);
-   return (okshells);
-   }
-   shells = calloc((size_t)(statb.st_size / 3 + 2), sizeof (char *));
-   if (shells == NULL) {
-   (void)fclose(fp);
-   free(strings);
-   strings = NULL;
-   return (okshells);
-   }
-   sp = shells;
-   cp = strings;
-   while (fgets(cp, PATH_MAX + 1, fp) != NULL) {
-   while (*cp != '#' && *cp != '/' && *cp != '\0')
-   cp++;
-   if (*cp == '#' || *cp == '\0')
+
+   line = NULL;
+   nalloc = 10; // just an initial guess
+   nshells = 0;
+   shells = reallocarray(NULL, nalloc, sizeof (char *));
+   if (shells == NULL)
+   goto fail;
+   linesize = 0;
+   while (getline(, , fp) != -1) {
+   if (*line != '/')
continue;
-   *sp++ = cp;
-   while (!isspace((unsigned char)*cp) && *cp != '#' && *cp != 
'\0')
-   cp++;
-   *cp++ = '\0';
+   line[strcspn(line, "#\n")] = '\0';
+   if (!(shells[nshells] = strdup(line)))
+   goto fail;
+
+   nshells++;
+   if (nshells == nalloc) {
+   char **new = reallocarray(shells, nalloc * 2, 
sizeof(char *));
+   if (!new)
+   goto fail;
+   shells = new;
+   nalloc *= 2;
+   }
}
-   *sp = NULL;
+   free(line);
+   shells[nshells] = NULL;
(void)fclose(fp);
return (shells);
+
+fail:
+   free(line);
+   while (nshells)
+   free(shells[nshells--]);
+   free(shells);
+   shells = NULL;
+   (void)fclose(fp);
+   return (okshells);
 }



Xen Patch-04: Shared info page

2015-12-05 Thread Mike Belopuhov
Allocate and hook up a "shared info page" that provides a matrix
of pending events and some other information like hypervisor
timecounter.  I'd like to keep the debugging function
xen_print_info_page around for a while.

OK?

---
 sys/dev/pv/xen.c|  82 
 sys/dev/pv/xenreg.h | 368 
 sys/dev/pv/xenvar.h |   2 +
 3 files changed, 452 insertions(+)

diff --git sys/dev/pv/xen.c sys/dev/pv/xen.c
index f1198d1..e8aeeb8 100644
--- sys/dev/pv/xen.c
+++ sys/dev/pv/xen.c
@@ -35,10 +35,11 @@ struct xen_softc *xen_sc;
 
 void   xen_find_base(struct xen_softc *);
 intxen_init_hypercall(struct xen_softc *);
 intxen_getversion(struct xen_softc *);
 intxen_getfeatures(struct xen_softc *);
+intxen_init_info_page(struct xen_softc *);
 
 intxen_match(struct device *, void *, void *);
 void   xen_attach(struct device *, struct device *, void *);
 void   xen_resume(struct device *);
 intxen_activate(struct device *, int);
@@ -84,10 +85,13 @@ xen_attach(struct device *parent, struct device *self, void 
*aux)
 
if (xen_getversion(sc))
return;
if (xen_getfeatures(sc))
return;
+
+   if (xen_init_info_page(sc))
+   return;
 }
 
 void
 xen_resume(struct device *self)
 {
@@ -336,5 +340,83 @@ xen_getfeatures(struct xen_softc *sc)
printf("%s: features %b\n", sc->sc_dev.dv_xname, sc->sc_features,
"\20\014DOM0\013PIRQ\012PVCLOCK\011CBVEC\010GNTFLAGS\007HMA"
"\006PTUPD\005PAE4G\004SUPERVISOR\003AUTOPMAP\002WDT\001WPT");
return (0);
 }
+
+#ifdef XEN_DEBUG
+void
+xen_print_info_page(void)
+{
+   struct xen_softc *sc = xen_sc;
+   struct shared_info *s = sc->sc_ipg;
+   struct vcpu_info *v;
+   int i;
+
+   membar_sync();
+   for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
+   v = >vcpu_info[i];
+   if (!v->evtchn_upcall_pending && !v->evtchn_upcall_mask &&
+   !v->evtchn_pending_sel && !v->time.version &&
+   !v->time.tsc_timestamp && !v->time.system_time &&
+   !v->time.tsc_to_system_mul && !v->time.tsc_shift)
+   continue;
+   printf("vcpu%d:\n"
+   "   upcall_pending=%02x upcall_mask=%02x pending_sel=%#lx\n"
+   "   time version=%u tsc=%llu system=%llu\n"
+   "   time mul=%u shift=%d\n"
+   , i, v->evtchn_upcall_pending, v->evtchn_upcall_mask,
+   v->evtchn_pending_sel, v->time.version,
+   v->time.tsc_timestamp, v->time.system_time,
+   v->time.tsc_to_system_mul, v->time.tsc_shift);
+   }
+   printf("pending events: ");
+   for (i = 0; i < nitems(s->evtchn_pending); i++) {
+   if (s->evtchn_pending[i] == 0)
+   continue;
+   printf(" %d:%#lx", i, s->evtchn_pending[i]);
+   }
+   printf("\nmasked events: ");
+   for (i = 0; i < nitems(s->evtchn_mask); i++) {
+   if (s->evtchn_mask[i] == 0xULL)
+   continue;
+   printf(" %d:%#lx", i, s->evtchn_mask[i]);
+   }
+   printf("\nwc ver=%u sec=%u nsec=%u\n", s->wc_version, s->wc_sec,
+   s->wc_nsec);
+   printf("arch maxpfn=%lu framelist=%lu nmi=%lu\n", s->arch.max_pfn,
+   s->arch.pfn_to_mfn_frame_list, s->arch.nmi_reason);
+}
+#endif /* XEN_DEBUG */
+
+int
+xen_init_info_page(struct xen_softc *sc)
+{
+   struct xen_add_to_physmap xatp;
+   paddr_t pa;
+
+   sc->sc_ipg = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
+   if (sc->sc_ipg == NULL) {
+   printf("%s: failed to allocate shared info page\n",
+   sc->sc_dev.dv_xname);
+   return (-1);
+   }
+   if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_ipg, )) {
+   printf("%s: shared info page PA extraction failed\n",
+   sc->sc_dev.dv_xname);
+   free(sc->sc_ipg, M_DEVBUF, PAGE_SIZE);
+   return (-1);
+   }
+   xatp.domid = DOMID_SELF;
+   xatp.idx = 0;
+   xatp.space = XENMAPSPACE_shared_info;
+   xatp.gpfn = atop(pa);
+   if (xen_hypercall(sc, memory_op, 2, XENMEM_add_to_physmap, )) {
+   printf("%s: failed to register shared info page\n",
+   sc->sc_dev.dv_xname);
+   free(sc->sc_ipg, M_DEVBUF, PAGE_SIZE);
+   return (-1);
+   }
+   DPRINTF("%s: shared info page at va %p pa %#lx\n", sc->sc_dev.dv_xname,
+   sc->sc_ipg, pa);
+   return (0);
+}
diff --git sys/dev/pv/xenreg.h sys/dev/pv/xenreg.h
index 3f646d3..ec45722 100644
--- sys/dev/pv/xenreg.h
+++ sys/dev/pv/xenreg.h
@@ -85,20 +85,339 @@
 # error "Not implemented"
 #endif
 
 #define CPUID_OFFSET_XEN_HYPERCALL 0x2
 
+#if defined(__i386__) || defined(__amd64__)
+struct arch_vcpu_info {
+  

Re: libc: getusershell, new implementation

2015-12-05 Thread Theo de Raadt
To me the new code seems broken.  It keeps a fd open between
libc calls.  It requires endusershell() to be called later.

Not even close-on-exec can work here, because libc is remembering
a FILE * referring to an open fd.

Leaving a fd open between libc calls is verboten, unless there is
tremendous cause.

The old code does not require endusershell() to close the descriptor.



Re: libgtop2: Use getifaddrs(3) instead of KVM

2015-12-05 Thread Antoine Jacoutot
On Sat, Dec 05, 2015 at 05:05:07PM +0100, Martin Pieuchot wrote:
> Here's a rewrite of glibtop_get_netload_p().  I tested it with custom
> code because I could not trigger this code path with our ports.
> 
> This unbreaks devel/libgtop2 after the recent  commit.
> 
> I believe this should go upstream, how should I submit this?

You can open a bug report here: https://bugzilla.gnome.org/
Jasper and I have commit access to gnome and since it's OpenBSD-only code, it 
can be pushed pretty fast.

 
> ok?
> 
> Index: Makefile
> ===
> RCS file: /cvs/ports/devel/libgtop2/Makefile,v
> retrieving revision 1.130
> diff -u -p -r1.130 Makefile
> --- Makefile  22 May 2015 11:31:13 -  1.130
> +++ Makefile  5 Dec 2015 15:58:01 -
> @@ -6,7 +6,7 @@ GNOME_VERSION=2.30.0
>  GNOME_PROJECT=   libgtop
>  PKGNAME= libgtop2-${VERSION}
>  
> -REVISION=3
> +REVISION=4
>  
>  SHARED_LIBS= gtop-2.09.0 # .10.0
>  
> Index: patches/patch-sysdeps_openbsd_netload_c
> ===
> RCS file: patches/patch-sysdeps_openbsd_netload_c
> diff -N patches/patch-sysdeps_openbsd_netload_c
> --- /dev/null 1 Jan 1970 00:00:00 -
> +++ patches/patch-sysdeps_openbsd_netload_c   5 Dec 2015 15:59:31 -
> @@ -0,0 +1,345 @@
> +$OpenBSD$
> +
> +Rewrite of glibtop_get_netload_p() to use getifaddrs(3) instead of KVM.
> +
> +--- sysdeps/openbsd/netload.c.orig   Mon Apr 28 23:09:24 2014
>  sysdeps/openbsd/netload.cSat Dec  5 16:27:56 2015
> +@@ -1,48 +1,39 @@
> +-/* Copyright (C) 1998-99 Martin Baulig
> +-   This file is part of LibGTop 1.0.
> ++/*
> ++ * Copyright (c) 2015 Martin Pieuchot 
> ++ *
> ++ * Permission to use, copy, modify, and distribute this software for any
> ++ * purpose with or without fee is hereby granted, provided that the above
> ++ * copyright notice and this permission notice appear in all copies.
> ++ *
> ++ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> ++ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> ++ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> ++ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> ++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> ++ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> ++ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> ++ */
> + 
> +-   Contributed by Martin Baulig , October 1998.
> ++#include "config.h"
> + 
> +-   LibGTop is free software; you can redistribute it and/or modify it
> +-   under the terms of the GNU General Public License as published by
> +-   the Free Software Foundation; either version 2 of the License,
> +-   or (at your option) any later version.
> ++#include 
> ++#include 
> ++#include 
> + 
> +-   LibGTop is distributed in the hope that it will be useful, but WITHOUT
> +-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> +-   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
> +-   for more details.
> +-
> +-   You should have received a copy of the GNU General Public License
> +-   along with LibGTop; see the file COPYING. If not, write to the
> +-   Free Software Foundation, Inc., 59 Temple Place - Suite 330,
> +-   Boston, MA 02111-1307, USA.
> +-*/
> +-
> +-#include 
> +-#include 
> +-#include 
> +-#include 
> +-
> +-#include 
> +-
> +-#include 
> +-
> + #include 
> + #include 
> +-#include 
> + 
> +-#include 
> +-
> +-#include 
> +-
> + #include 
> +-#define _KERNEL
> + #include 
> +-#undef _KERNEL
> + #include 
> + 
> ++#include 
> ++
> ++#include "glibtop.h"
> ++#include "glibtop/netload.h"
> ++
> + static const unsigned long _glibtop_sysdeps_netload =
> ++(1L << GLIBTOP_NETLOAD_MTU) +
> + (1L << GLIBTOP_NETLOAD_IF_FLAGS) +
> + (1L << GLIBTOP_NETLOAD_PACKETS_IN) +
> + (1L << GLIBTOP_NETLOAD_PACKETS_OUT) +
> +@@ -55,183 +46,115 @@ static const unsigned long _glibtop_sysdeps_netload =
> + (1L << GLIBTOP_NETLOAD_ERRORS_TOTAL) +
> + (1L << GLIBTOP_NETLOAD_COLLISIONS);
> + 
> +-static const unsigned _glibtop_sysdeps_netload_data =
> +-(1L << GLIBTOP_NETLOAD_ADDRESS) +
> +-(1L << GLIBTOP_NETLOAD_SUBNET) +
> +-(1L << GLIBTOP_NETLOAD_MTU);
> +-
> +-/* nlist structure for kernel access */
> +-static struct nlist nlst [] = {
> +-{ "_ifnet" },
> +-{ 0 }
> +-};
> +-
> +-/* Init function. */
> +-
> + void
> + _glibtop_init_netload_p (glibtop *server)
> + {
> + server->sysdeps.netload = _glibtop_sysdeps_netload;
> +-
> +-if (kvm_nlist (server->machine.kd, nlst) < 0)
> +-glibtop_error_io_r (server, "kvm_nlist");
> + }
> + 
> +-/* Provides Network statistics. */
> +-
> + void
> + glibtop_get_netload_p (glibtop *server, glibtop_netload *buf,
> +const char 

Re: libc: getusershell, new implementation

2015-12-05 Thread Ted Unangst
Tobias Stoeckmann wrote:
> There's still a possible overflow in getusershell.c. We could increase
> the buffer allocation yet again, but I have to agree with the glibc
> developers here: enough is enough. The code is ugly and has proven to be
> difficult to review.

Another approach is to rewrite the function to allocate memory as needed and
not play these games. This diff preserves the existing behavior, but doesn't
play 1985 era games trying to save 24 bytes of memory by compacting all the
strings into a single allocation.

Actually, I think this even uses *less* memory since the majority of my
/etc/shells is just comment lines that won't be preserved.

Index: gen/getusershell.c
===
RCS file: /cvs/src/lib/libc/gen/getusershell.c,v
retrieving revision 1.16
diff -u -p -r1.16 getusershell.c
--- gen/getusershell.c  14 Sep 2015 16:09:13 -  1.16
+++ gen/getusershell.c  5 Dec 2015 16:08:56 -
@@ -28,14 +28,13 @@
  * SUCH DAMAGE.
  */
 
-#include 
-
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include 
 
 /*
@@ -44,7 +43,7 @@
  */
 
 static char *okshells[] = { _PATH_BSHELL, _PATH_CSHELL, _PATH_KSHELL, NULL };
-static char **curshell, **shells, *strings;
+static char **curshell, **shells;
 static char **initshells(void);
 
 /*
@@ -66,11 +65,14 @@ getusershell(void)
 void
 endusershell(void)
 {
-   
+   char **s;
+
+   if ((s = shells))
+   while (*s)
+   free(*s++);
free(shells);
shells = NULL;
-   free(strings);
-   strings = NULL;
+
curshell = NULL;
 }
 
@@ -84,48 +86,52 @@ setusershell(void)
 static char **
 initshells(void)
 {
-   char **sp, *cp;
+   char buf[PATH_MAX];
+   int nshells = 0, nalloc;
+   char *cp;
FILE *fp;
-   struct stat statb;
 
free(shells);
shells = NULL;
-   free(strings);
-   strings = NULL;
+
if ((fp = fopen(_PATH_SHELLS, "re")) == NULL)
return (okshells);
-   if (fstat(fileno(fp), ) == -1) {
-   (void)fclose(fp);
-   return (okshells);
-   }
-   if (statb.st_size > SIZE_MAX) {
-   (void)fclose(fp);
-   return (okshells);
-   }
-   if ((strings = malloc((size_t)statb.st_size)) == NULL) {
-   (void)fclose(fp);
-   return (okshells);
-   }
-   shells = calloc((size_t)(statb.st_size / 3 + 2), sizeof (char *));
-   if (shells == NULL) {
-   (void)fclose(fp);
-   free(strings);
-   strings = NULL;
-   return (okshells);
-   }
-   sp = shells;
-   cp = strings;
-   while (fgets(cp, PATH_MAX + 1, fp) != NULL) {
+
+   nalloc = 10; // just an initial guess
+   nshells = 0;
+   shells = reallocarray(NULL, nalloc, sizeof (char *));
+   if (shells == NULL)
+   goto fail;
+   while ((cp = fgets(buf, sizeof(buf), fp)) != NULL) {
while (*cp != '#' && *cp != '/' && *cp != '\0')
cp++;
if (*cp == '#' || *cp == '\0')
continue;
-   *sp++ = cp;
+   if (!(shells[nshells] = strdup(cp)))
+   goto fail;
+   cp = shells[nshells];
while (!isspace((unsigned char)*cp) && *cp != '#' && *cp != 
'\0')
cp++;
*cp++ = '\0';
+
+   nshells++;
+   if (nshells == nalloc) {
+   char **new = reallocarray(shells, nalloc * 2, 
sizeof(char *));
+   if (!new)
+   goto fail;
+   shells = new;
+   nalloc *= 2;
+   }
}
-   *sp = NULL;
+   shells[nshells] = NULL;
(void)fclose(fp);
return (shells);
+
+fail:
+   while (nshells)
+   free(shells[nshells--]);
+   free(shells);
+   shells = NULL;
+   (void)fclose(fp);
+   return (okshells);
 }



libgtop2: Use getifaddrs(3) instead of KVM

2015-12-05 Thread Martin Pieuchot
Here's a rewrite of glibtop_get_netload_p().  I tested it with custom
code because I could not trigger this code path with our ports.

This unbreaks devel/libgtop2 after the recent  commit.

I believe this should go upstream, how should I submit this?

ok?

Index: Makefile
===
RCS file: /cvs/ports/devel/libgtop2/Makefile,v
retrieving revision 1.130
diff -u -p -r1.130 Makefile
--- Makefile22 May 2015 11:31:13 -  1.130
+++ Makefile5 Dec 2015 15:58:01 -
@@ -6,7 +6,7 @@ GNOME_VERSION=  2.30.0
 GNOME_PROJECT= libgtop
 PKGNAME=   libgtop2-${VERSION}
 
-REVISION=  3
+REVISION=  4
 
 SHARED_LIBS=   gtop-2.09.0 # .10.0
 
Index: patches/patch-sysdeps_openbsd_netload_c
===
RCS file: patches/patch-sysdeps_openbsd_netload_c
diff -N patches/patch-sysdeps_openbsd_netload_c
--- /dev/null   1 Jan 1970 00:00:00 -
+++ patches/patch-sysdeps_openbsd_netload_c 5 Dec 2015 15:59:31 -
@@ -0,0 +1,345 @@
+$OpenBSD$
+
+Rewrite of glibtop_get_netload_p() to use getifaddrs(3) instead of KVM.
+
+--- sysdeps/openbsd/netload.c.orig Mon Apr 28 23:09:24 2014
 sysdeps/openbsd/netload.c  Sat Dec  5 16:27:56 2015
+@@ -1,48 +1,39 @@
+-/* Copyright (C) 1998-99 Martin Baulig
+-   This file is part of LibGTop 1.0.
++/*
++ * Copyright (c) 2015 Martin Pieuchot 
++ *
++ * Permission to use, copy, modify, and distribute this software for any
++ * purpose with or without fee is hereby granted, provided that the above
++ * copyright notice and this permission notice appear in all copies.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
++ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
++ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
++ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
++ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
++ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
++ */
+ 
+-   Contributed by Martin Baulig , October 1998.
++#include "config.h"
+ 
+-   LibGTop is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published by
+-   the Free Software Foundation; either version 2 of the License,
+-   or (at your option) any later version.
++#include 
++#include 
++#include 
+ 
+-   LibGTop is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+-   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+-   for more details.
+-
+-   You should have received a copy of the GNU General Public License
+-   along with LibGTop; see the file COPYING. If not, write to the
+-   Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+-   Boston, MA 02111-1307, USA.
+-*/
+-
+-#include 
+-#include 
+-#include 
+-#include 
+-
+-#include 
+-
+-#include 
+-
+ #include 
+ #include 
+-#include 
+ 
+-#include 
+-
+-#include 
+-
+ #include 
+-#define _KERNEL
+ #include 
+-#undef _KERNEL
+ #include 
+ 
++#include 
++
++#include "glibtop.h"
++#include "glibtop/netload.h"
++
+ static const unsigned long _glibtop_sysdeps_netload =
++(1L << GLIBTOP_NETLOAD_MTU) +
+ (1L << GLIBTOP_NETLOAD_IF_FLAGS) +
+ (1L << GLIBTOP_NETLOAD_PACKETS_IN) +
+ (1L << GLIBTOP_NETLOAD_PACKETS_OUT) +
+@@ -55,183 +46,115 @@ static const unsigned long _glibtop_sysdeps_netload =
+ (1L << GLIBTOP_NETLOAD_ERRORS_TOTAL) +
+ (1L << GLIBTOP_NETLOAD_COLLISIONS);
+ 
+-static const unsigned _glibtop_sysdeps_netload_data =
+-(1L << GLIBTOP_NETLOAD_ADDRESS) +
+-(1L << GLIBTOP_NETLOAD_SUBNET) +
+-(1L << GLIBTOP_NETLOAD_MTU);
+-
+-/* nlist structure for kernel access */
+-static struct nlist nlst [] = {
+-{ "_ifnet" },
+-{ 0 }
+-};
+-
+-/* Init function. */
+-
+ void
+ _glibtop_init_netload_p (glibtop *server)
+ {
+ server->sysdeps.netload = _glibtop_sysdeps_netload;
+-
+-if (kvm_nlist (server->machine.kd, nlst) < 0)
+-  glibtop_error_io_r (server, "kvm_nlist");
+ }
+ 
+-/* Provides Network statistics. */
+-
+ void
+ glibtop_get_netload_p (glibtop *server, glibtop_netload *buf,
+  const char *interface)
+ {
+-struct ifnet ifnet;
+-u_long ifnetaddr, ifnetfound;
+-struct sockaddr *sa = NULL;
+-char name [32];
++struct ifaddrs *ifap, *ifa;
++struct if_data *ifd = NULL;
+ 
+-union {
+-  struct ifaddr ifa;
+-  struct in_ifaddr in;
+-} ifaddr;
++buf->flags = 0;
+ 
+-glibtop_init_p (server, (1L << GLIBTOP_SYSDEPS_NETLOAD), 0);
++if (getifaddrs() != 0)
++return;
+ 
+-memset (buf, 0, sizeof (glibtop_netload));
++for (ifa = ifap; ifa != 

Re: UPDATE: xf86-input-keyboard 1.8.1

2015-12-05 Thread Matthieu Herrb
On Tue, Nov 24, 2015 at 01:03:39PM +0500, Alexandr Shadchin wrote:
> Hi,
> 
> This diff updates xf86-input-keyboard to the latest release.
> Tested on amd64.
> 
> Comments ? OK ?

I had the same diff for some time on my machines. 
ok matthieu@ 
-- 
Matthieu Herrb


pgpuFQh60jB3B.pgp
Description: PGP signature


Re: UPDATE: xf86-input-synaptics 1.8.3

2015-12-05 Thread Matthieu Herrb
On Tue, Nov 24, 2015 at 01:32:01PM +0500, Alexandr Shadchin wrote:
> Hi,
> 
> This diff updates xf86-input-synaptics to the latest release.
> Tested on amd64.
> 
> Comments ? OK ?

ok matthieu@ and sorry for the delay.

> 
> -- 
> Alexandr Shadchin
> 
> Index: ChangeLog
> ===
> RCS file: /cvs/xenocara/driver/xf86-input-synaptics/ChangeLog,v
> retrieving revision 1.10
> diff -u -p -r1.10 ChangeLog
> --- ChangeLog 11 Apr 2015 16:08:42 -  1.10
> +++ ChangeLog 24 Nov 2015 08:28:47 -
> @@ -1,3 +1,22 @@
> +commit af7d8a15278a968d919bf539628281bf713f747b
> +Author: Peter Hutterer 
> +Date:   Thu Oct 29 12:10:09 2015 +1000
> +
> +synaptics 1.8.3
> +
> +Signed-off-by: Peter Hutterer 
> +
> +commit 3e7508ac087028f8f8e116f5279dad1ebcdc58e6
> +Author: Gabriele Mazzotta 
> +Date:   Thu Aug 14 20:03:42 2014 +0200
> +
> +Use cumulative relative touch movements while scrolling
> +
> +Signed-off-by: Gabriele Mazzotta 
> +Reviewed-by: Peter Hutterer 
> +Signed-off-by: Peter Hutterer 
> +(cherry picked from commit ec0901e5f81d9cad6cc8bbdcb5ea075009c13de5)
> +
>  commit 6f8d4bac14ac8f3fd2714f0a8a9e37c5136a4013
>  Author: Peter Hutterer 
>  Date:   Fri Mar 27 11:26:55 2015 +1000
> Index: configure
> ===
> RCS file: /cvs/xenocara/driver/xf86-input-synaptics/configure,v
> retrieving revision 1.11
> diff -u -p -r1.11 configure
> --- configure 11 Apr 2015 16:08:42 -  1.11
> +++ configure 24 Nov 2015 08:28:48 -
> @@ -1,6 +1,6 @@
>  #! /bin/sh
>  # Guess values for system-dependent variables and create Makefiles.
> -# Generated by GNU Autoconf 2.69 for xf86-input-synaptics 1.8.2.
> +# Generated by GNU Autoconf 2.69 for xf86-input-synaptics 1.8.3.
>  #
>  # Report bugs to .
>  #
> @@ -591,8 +591,8 @@ MAKEFLAGS=
>  # Identity of this package.
>  PACKAGE_NAME='xf86-input-synaptics'
>  PACKAGE_TARNAME='xf86-input-synaptics'
> -PACKAGE_VERSION='1.8.2'
> -PACKAGE_STRING='xf86-input-synaptics 1.8.2'
> +PACKAGE_VERSION='1.8.3'
> +PACKAGE_STRING='xf86-input-synaptics 1.8.3'
>  PACKAGE_BUGREPORT='https://bugs.freedesktop.org/enter_bug.cgi?product=xorg'
>  PACKAGE_URL=''
>  
> @@ -1378,7 +1378,7 @@ if test "$ac_init_help" = "long"; then
># Omit some internal or obsolete options to make the list less imposing.
># This message is too long to be a string in the A/UX 3.1 sh.
>cat <<_ACEOF
> -\`configure' configures xf86-input-synaptics 1.8.2 to adapt to many kinds of 
> systems.
> +\`configure' configures xf86-input-synaptics 1.8.3 to adapt to many kinds of 
> systems.
>  
>  Usage: $0 [OPTION]... [VAR=VALUE]...
>  
> @@ -1449,7 +1449,7 @@ fi
>  
>  if test -n "$ac_init_help"; then
>case $ac_init_help in
> - short | recursive ) echo "Configuration of xf86-input-synaptics 
> 1.8.2:";;
> + short | recursive ) echo "Configuration of xf86-input-synaptics 
> 1.8.3:";;
> esac
>cat <<\_ACEOF
>  
> @@ -1586,7 +1586,7 @@ fi
>  test -n "$ac_init_help" && exit $ac_status
>  if $ac_init_version; then
>cat <<\_ACEOF
> -xf86-input-synaptics configure 1.8.2
> +xf86-input-synaptics configure 1.8.3
>  generated by GNU Autoconf 2.69
>  
>  Copyright (C) 2012 Free Software Foundation, Inc.
> @@ -1910,7 +1910,7 @@ cat >config.log <<_ACEOF
>  This file contains any messages produced by compilers while
>  running configure, to aid debugging if configure makes a mistake.
>  
> -It was created by xf86-input-synaptics $as_me 1.8.2, which was
> +It was created by xf86-input-synaptics $as_me 1.8.3, which was
>  generated by GNU Autoconf 2.69.  Invocation command line was
>  
>$ $0 $@
> @@ -2739,7 +2739,7 @@ fi
>  
>  # Define the identity of the package.
>   PACKAGE='xf86-input-synaptics'
> - VERSION='1.8.2'
> + VERSION='1.8.3'
>  
>  
>  cat >>confdefs.h <<_ACEOF
> @@ -18820,7 +18820,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_wri
>  # report actual input values of CONFIG_FILES etc. instead of their
>  # values after options handling.
>  ac_log="
> -This file was extended by xf86-input-synaptics $as_me 1.8.2, which was
> +This file was extended by xf86-input-synaptics $as_me 1.8.3, which was
>  generated by GNU Autoconf 2.69.  Invocation command line was
>  
>CONFIG_FILES= $CONFIG_FILES
> @@ -18886,7 +18886,7 @@ _ACEOF
>  cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
>  ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; 
> s/[\\""\`\$]/&/g'`"
>  ac_cs_version="\\
> -xf86-input-synaptics config.status 1.8.2
> +xf86-input-synaptics config.status 1.8.3
>  configured by $0, generated by GNU Autoconf 2.69,
>with options \\"\$ac_cs_config\\"
>  
> Index: configure.ac
> 

Re: libc: getusershell, new implementation

2015-12-05 Thread Tobias Stoeckmann
> Index: gen/getusershell.c
> ===
> RCS file: /cvs/src/lib/libc/gen/getusershell.c,v
> retrieving revision 1.16
> diff -u -p -r1.16 getusershell.c
> --- gen/getusershell.c14 Sep 2015 16:09:13 -  1.16
> +++ gen/getusershell.c5 Dec 2015 16:08:56 -
> @@ -84,48 +86,52 @@ setusershell(void)
>  static char **
>  initshells(void)
>  {
> - char **sp, *cp;
> + char buf[PATH_MAX];
> + int nshells = 0, nalloc;

I would prefer size_t for nshells and nalloc.

> + char *cp;
>   FILE *fp;
> - struct stat statb;
>  
>   free(shells);
>   shells = NULL;
> - free(strings);
> - strings = NULL;
> +
>   if ((fp = fopen(_PATH_SHELLS, "re")) == NULL)
>   return (okshells);
> - if (fstat(fileno(fp), ) == -1) {
> - (void)fclose(fp);
> - return (okshells);
> - }
> - if (statb.st_size > SIZE_MAX) {
> - (void)fclose(fp);
> - return (okshells);
> - }
> - if ((strings = malloc((size_t)statb.st_size)) == NULL) {
> - (void)fclose(fp);
> - return (okshells);
> - }
> - shells = calloc((size_t)(statb.st_size / 3 + 2), sizeof (char *));
> - if (shells == NULL) {
> - (void)fclose(fp);
> - free(strings);
> - strings = NULL;
> - return (okshells);
> - }
> - sp = shells;
> - cp = strings;
> - while (fgets(cp, PATH_MAX + 1, fp) != NULL) {
> +
> + nalloc = 10; // just an initial guess
> + nshells = 0;
> + shells = reallocarray(NULL, nalloc, sizeof (char *));
> + if (shells == NULL)
> + goto fail;
> + while ((cp = fgets(buf, sizeof(buf), fp)) != NULL) {

We already have to dynamically allocate memory anyway, so getline() would
fix some issues we could face while parsing files. The buffer is PATH_MAX
bytes long, which should be sufficient, but if a comment is PATH_MAX + x
bytes in size, we would parse the "x" part as a real path due to fgets'
truncation/wrapping.

>   while (*cp != '#' && *cp != '/' && *cp != '\0')
>   cp++;
>   if (*cp == '#' || *cp == '\0')
>   continue;
> - *sp++ = cp;
> + if (!(shells[nshells] = strdup(cp)))
> + goto fail;
> + cp = shells[nshells];
>   while (!isspace((unsigned char)*cp) && *cp != '#' && *cp != 
> '\0')
>   cp++;
>   *cp++ = '\0';

And I still think that the current code is a bit too permissive in parsing
things. I mean what's the point in allowing lines like:

sometextwithoutspace/bin/ksh should be used for logins # seriously!

Which would result in /bin/ksh, by the way.

Didn't notice the consequences that arise by keeping the descriptor open,
so I'm fine with an alternative approach. Yet we might make the code a
bit easier to review by not allowing such weird lines. What it should
expect and enforce:

- a valid line has to start with a slash
- comments are chopped off
- comments are supposed to be at the beginning of a line

So if somebody writes "/bin/ksh # comment", that actually leads to "/bin/ksh ",
with an additional whitespace at the end. Currently we couldn't even specify a
shell with a whitespace in its path.


Tobias



Xen Patch-12: Netfront stub

2015-12-05 Thread Mike Belopuhov
This adds a Netfront device stub.  Diff is more for educational
purposes at the moment.  Perhaps Paul will chip in and help me
add flesh to it.

---
 sys/arch/amd64/conf/GENERIC |  1 +
 sys/dev/pv/files.pv |  4 
 sys/dev/pv/if_xnf.c | 46 +
 3 files changed, 51 insertions(+)
 create mode 100644 sys/dev/pv/if_xnf.c

diff --git sys/arch/amd64/conf/GENERIC sys/arch/amd64/conf/GENERIC
index a4a87b0..981a8d0 100644
--- sys/arch/amd64/conf/GENERIC
+++ sys/arch/amd64/conf/GENERIC
@@ -67,10 +67,11 @@ mpbios0 at bios0
 ipmi0  at mainbus? disable # IPMI
 
 vmt0   at pvbus?   # VMware Tools
 
 #xen0  at pvbus?   # Xen HVM domU
+#xnf*  at xen? # Xen Netfront
 
 option PCIVERBOSE
 option USBVERBOSE
 
 pchb*  at pci? # PCI-Host bridges
diff --git sys/dev/pv/files.pv sys/dev/pv/files.pv
index 461ea08..a4b1789 100644
--- sys/dev/pv/files.pv
+++ sys/dev/pv/files.pv
@@ -16,5 +16,9 @@ file  dev/pv/vmt.cvmt needs-flag
 # Xen
 device xen {}
 attach xen at pvbus
 file   dev/pv/xen.cxen needs-flag
 file   dev/pv/xenstore.c   xen
+
+device xnf: ether, ifnet, ifmedia
+attach xnf at xen
+file   dev/pv/if_xnf.c xnf
diff --git sys/dev/pv/if_xnf.c sys/dev/pv/if_xnf.c
new file mode 100644
index 000..d2d31c6
--- /dev/null
+++ sys/dev/pv/if_xnf.c
@@ -0,0 +1,46 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+#include 
+
+struct xnf_softc {
+   struct device   sc_dev;
+};
+
+intxnf_match(struct device *, void *, void *);
+void   xnf_attach(struct device *, struct device *, void *);
+
+
+struct cfdriver xnf_cd = {
+   NULL, "xnf", DV_IFNET
+};
+
+struct cfattach xnf_ca = {
+   sizeof(struct xnf_softc), xnf_match, xnf_attach
+};
+
+int
+xnf_match(struct device *parent, void *match, void *aux)
+{
+   struct xen_attach_args *xa = aux;
+
+   if (strcmp("vif", xa->xa_name))
+   return (0);
+
+   return (1);
+}
+
+void
+xnf_attach(struct device *parent, struct device *self, void *aux)
+{
+   struct xen_attach_args *xa = aux;
+
+   printf(": \"%s\"\n", xa->xa_node);
+}
-- 
2.6.3



Re: Make ix(4) mpsafe: take 2

2015-12-05 Thread Hrvoje Popovski
On 5.12.2015. 16:35, Peter Hessler wrote:
> On 2015 Dec 05 (Sat) at 16:16:50 +0100 (+0100), Hrvoje Popovski wrote:
> :On 4.12.2015. 17:35, Hrvoje Popovski wrote:
> :> On 4.12.2015. 12:47, Mark Kettenis wrote:
> :>> Here is a new diff to make ix(4) mpsafe.  Should now longer get stuck
> :>> in the OACTIVE state.  Tests more than welcome.
> :>>
> :> 
> :> 
> :> Hi,
> :> 
> :> i have tested this patch with 82599 and x540 while sending 6Mpps for cca
> :> 3 hours and ifconfig down/up and everything is working fine. Will test
> :> it more and if I find something will send mail.
> :> 
> :> Thank you.
> :> 
> :
> :
> :Hi,
> :
> :sending 12Mpps for about 10 hours and i haven't seen OACTIVE flag and
> :with ifconfig down/up couldn't trigger
> :ix1: unable to fill any rx descriptors
> :ix1: Could not setup receive structures
> :
> :
> 
> What is the packet rate on the receiving system?  Are you testing
> routing/forwarding?
> 

i was trying to trigger OACTIVE flag and "unable to fill any rx
descriptors" and i forgot about anything else :)

it's only forwarding and receiver side gets around 650kpps, most of the
time..
but this number can vary from 300kpps to 1Mpps and i'm not sure what
triggers it 



Re: Xen Patch-01: Start of the Xen intfastructure

2015-12-05 Thread Martin Pieuchot
On 05/12/15(Sat) 19:12, Mike Belopuhov wrote:
> This diff doesn't do much by itself, just hooks up minimal Xen
> files to the tree.  I've split it so that all changes on top
> will look logically complete.
> 
> OK?

Can you add "const" before your "struct cfattach"?

Do you need a separate "xenvar.h"?  If not I'd suggest to keep the softc
definition in the file itself.

Either way ok mpi@



Xen Patch-10: Add code to disable emulated devices

2015-12-05 Thread Mike Belopuhov
Starting with this one, all further patches are sort of
work in progress.  This diff implements a way to detach
emulated devices (like a em network interface) in order
to switch them to paravirtualized drivers (e.g. Netfront).

Currently I'm not detaching anything, but provide a way
to do it via three global variables.  We'll have to decide
later what policy do we want to implement regarding this:
keep a mix of em's and netfronts or convert them all.

So this is strictly not necessary, but the code is written
and tested, so I'd rather keep it around for later.

OK?

---
 sys/dev/pv/xen.c | 36 
 1 file changed, 36 insertions(+)

diff --git sys/dev/pv/xen.c sys/dev/pv/xen.c
index 2c7c161..c78cc75 100644
--- sys/dev/pv/xen.c
+++ sys/dev/pv/xen.c
@@ -40,10 +40,11 @@ int xen_init_hypercall(struct xen_softc *);
 intxen_getversion(struct xen_softc *);
 intxen_getfeatures(struct xen_softc *);
 intxen_init_info_page(struct xen_softc *);
 intxen_init_cbvec(struct xen_softc *);
 intxen_init_interrupts(struct xen_softc *);
+void   xen_disable_emulated_devices(struct xen_softc *);
 
 intxen_match(struct device *, void *, void *);
 void   xen_attach(struct device *, struct device *, void *);
 void   xen_deferred(void *);
 void   xen_resume(struct device *);
@@ -104,10 +105,12 @@ xen_attach(struct device *parent, struct device *self, 
void *aux)
return;
 
if (xs_attach(sc))
return;
 
+   xen_disable_emulated_devices(sc);
+
mountroothook_establish(xen_deferred, sc);
 }
 
 void
 xen_deferred(void *arg)
@@ -717,5 +720,38 @@ xen_intr_enable(void)
printf("%s: unmasking port %u failed\n",
sc->sc_dev.dv_xname, xi->xi_port);
}
}
 }
+
+#include 
+
+#defineXMI_PORT0x10
+#define XMI_MAGIC  0x49d2
+#define XMI_UNPLUG_IDE 0x01
+#define XMI_UNPLUG_NIC 0x02
+#define XMI_UNPLUG_IDESEC  0x04
+
+int xen_disable_pv_ide, xen_disable_pv_idesec, xen_disable_pv_nic;
+
+void
+xen_disable_emulated_devices(struct xen_softc *sc)
+{
+#if defined(__i386__) || defined(__amd64__)
+   ushort unplug = 0;
+
+   if (inw(XMI_PORT) != XMI_MAGIC) {
+   printf("%s: no magic!\n", sc->sc_dev.dv_xname);
+   return;
+   }
+   if (xen_disable_pv_ide)
+   unplug |= XMI_UNPLUG_IDE;
+   if (xen_disable_pv_idesec)
+   unplug |= XMI_UNPLUG_IDESEC;
+   if (xen_disable_pv_nic)
+   unplug |= XMI_UNPLUG_NIC;
+   if (unplug) {
+   outw(XMI_PORT, unplug);
+   DPRINTF("%s: disabled emulated devices\n", sc->sc_dev.dv_xname);
+   }
+#endif /* __i386__ || __amd64__ */
+}
-- 
2.6.3



Re: libc: getusershell, new implementation

2015-12-05 Thread Tobias Stoeckmann
On Sat, Dec 05, 2015 at 01:25:10PM -0500, Ted Unangst wrote:
> ok. i was going to leave the behavior alone, but we can fix that too.
> 
> - use getline to read lines of any length.
> - only consider lines that start with a /.
> - truncate lines after a #, but not after spaces.

ok tobias, thanks for this clean implementation and adjusting the parser
while at it. I guess we wouldn't touch the code otherwise again.



Xen Patch-01: Start of the Xen intfastructure

2015-12-05 Thread Mike Belopuhov
This diff doesn't do much by itself, just hooks up minimal Xen
files to the tree.  I've split it so that all changes on top
will look logically complete.

OK?

---
 sys/arch/amd64/conf/GENERIC |   2 +
 sys/dev/pv/files.pv |   5 ++
 sys/dev/pv/xen.c| 113 
 sys/dev/pv/xenvar.h |  25 ++
 4 files changed, 145 insertions(+)
 create mode 100644 sys/dev/pv/xen.c
 create mode 100644 sys/dev/pv/xenvar.h

diff --git sys/arch/amd64/conf/GENERIC sys/arch/amd64/conf/GENERIC
index 2e4f979..b20b62b 100644
--- sys/arch/amd64/conf/GENERIC
+++ sys/arch/amd64/conf/GENERIC
@@ -66,10 +66,12 @@ mpbios0 at bios0
 
 ipmi0  at mainbus? disable # IPMI
 
 vmt0   at pvbus?   # VMware Tools
 
+#xen0  at pvbus?   # Xen HVM domU
+
 option PCIVERBOSE
 option USBVERBOSE
 
 pchb*  at pci? # PCI-Host bridges
 aapic* at pci? # AMD 8131 IO apic
diff --git sys/dev/pv/files.pv sys/dev/pv/files.pv
index 2acad4b..1e5c9bd 100644
--- sys/dev/pv/files.pv
+++ sys/dev/pv/files.pv
@@ -10,5 +10,10 @@ file dev/pv/pvbus.c  pvbus   needs-flag
 
 # VMware Tools
 device vmt
 attach vmt at pvbus
 file   dev/pv/vmt.cvmt needs-flag
+
+# Xen
+device xen {}
+attach xen at pvbus
+file   dev/pv/xen.cxen needs-flag
diff --git sys/dev/pv/xen.c sys/dev/pv/xen.c
new file mode 100644
index 000..ee16d99
--- /dev/null
+++ sys/dev/pv/xen.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2015 Mike Belopuhov
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+#include 
+
+struct xen_softc *xen_sc;
+
+void   xen_find_base(struct xen_softc *);
+
+intxen_match(struct device *, void *, void *);
+void   xen_attach(struct device *, struct device *, void *);
+void   xen_resume(struct device *);
+intxen_activate(struct device *, int);
+
+struct cfdriver xen_cd = {
+   NULL, "xen", DV_DULL
+};
+
+struct cfattach xen_ca = {
+   sizeof(struct xen_softc), xen_match, xen_attach, NULL, xen_activate
+};
+
+int
+xen_match(struct device *parent, void *match, void *aux)
+{
+   struct pv_attach_args *pva = aux;
+   struct pvbus_hv *hv = >pva_hv[PVBUS_XEN];
+
+   if (hv->hv_base == 0)
+   return (0);
+
+   return (1);
+}
+
+void
+xen_attach(struct device *parent, struct device *self, void *aux)
+{
+   struct pv_attach_args *pva = (struct pv_attach_args *)aux;
+   struct pvbus_hv *hv = >pva_hv[PVBUS_XEN];
+   struct xen_softc *sc = (struct xen_softc *)self;
+
+   sc->sc_base = hv->hv_base;
+
+   xen_find_base(sc);
+
+   printf("\n");
+
+   /* Wire it up to the global */
+   xen_sc = sc;
+}
+
+void
+xen_resume(struct device *self)
+{
+}
+
+int
+xen_activate(struct device *self, int act)
+{
+   int rv = 0;
+
+   switch (act) {
+   case DVACT_RESUME:
+   xen_resume(self);
+   break;
+   }
+   return (rv);
+}
+
+void
+xen_find_base(struct xen_softc *sc)
+{
+   uint32_t base, regs[4];
+
+   for (base = 0x4000; base < 0x4001; base += 0x100) {
+   CPUID(base, regs[0], regs[1], regs[2], regs[3]);
+   if (!memcmp("XenVMMXenVMM", [1], 12) &&
+   (regs[0] - base) >= 2) {
+   if (base != sc->sc_base) {
+   printf(": new base %#x", base);
+   sc->sc_base = base;
+   }
+   break;
+   }
+   }
+}
diff --git sys/dev/pv/xenvar.h sys/dev/pv/xenvar.h
new file mode 100644
index 000..76061ee
--- /dev/null
+++ sys/dev/pv/xenvar.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2015 Mike Belopuhov
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND 

Xen Patch-05: Xen IDT vector

2015-12-05 Thread Mike Belopuhov
We're going to support two ways of delivering the combined
Xen interrupt that later fans out into event port specific
(device specific) interrupts: IDT method and a PCI device
method.

This diff adds a Xen IDT vector the same way LAPIC timer and
IPIs are implemented.  The additional machinery is there to
be able to mask Xen interrupt (it's installed as an IPL_NET|
IPL_MPSAFE interrupt) via standard mechanisms (e.g. splnet).

The LAPIC_XEN_VECTOR is set to be the first of the IPL_NET
group in the IDT.  The next IPL_NET interrupt will be
assigned 0x71 and so on.

OK?

---
 sys/arch/amd64/amd64/intr.c| 16 +++-
 sys/arch/amd64/amd64/lapic.c   |  7 +++
 sys/arch/amd64/amd64/vector.S  | 37 -
 sys/arch/amd64/include/i82489var.h |  7 +++
 sys/arch/amd64/include/intrdefs.h  |  1 +
 5 files changed, 66 insertions(+), 2 deletions(-)

diff --git sys/arch/amd64/amd64/intr.c sys/arch/amd64/amd64/intr.c
index 022ae1f..b24e872 100644
--- sys/arch/amd64/amd64/intr.c
+++ sys/arch/amd64/amd64/intr.c
@@ -52,10 +52,11 @@
 #include 
 #include 
 
 #include "ioapic.h"
 #include "lapic.h"
+#include "xen.h"
 
 #if NIOAPIC > 0
 #include 
 #endif
 
@@ -547,10 +548,13 @@ intr_handler(struct intrframe *frame, struct intrhand *ih)
 struct intrhand fake_softclock_intrhand;
 struct intrhand fake_softnet_intrhand;
 struct intrhand fake_softtty_intrhand;
 struct intrhand fake_timer_intrhand;
 struct intrhand fake_ipi_intrhand;
+#if NXEN > 0
+struct intrhand fake_xen_intrhand;
+#endif
 
 #if NLAPIC > 0 && defined(MULTIPROCESSOR) && 0
 static char *x86_ipi_names[X86_NIPI] = X86_IPI_NAMES;
 #endif
 
@@ -611,13 +615,23 @@ cpu_intr_init(struct cpu_info *ci)
isp->is_resume = Xresume_lapic_ipi;
fake_ipi_intrhand.ih_level = IPL_IPI;
isp->is_handlers = _ipi_intrhand;
isp->is_pic = _pic;
ci->ci_isources[LIR_IPI] = isp;
-
 #endif
+#if NXEN > 0
+   isp = malloc(sizeof (struct intrsource), M_DEVBUF, M_NOWAIT|M_ZERO);
+   if (isp == NULL)
+   panic("can't allocate fixed interrupt source");
+   isp->is_recurse = Xrecurse_xen_upcall;
+   isp->is_resume = Xresume_xen_upcall;
+   fake_xen_intrhand.ih_level = IPL_NET;
+   isp->is_handlers = _xen_intrhand;
+   isp->is_pic = _pic;
+   ci->ci_isources[LIR_XEN] = isp;
 #endif
+#endif /* NLAPIC */
 
intr_calculatemasks(ci);
 
 }
 
diff --git sys/arch/amd64/amd64/lapic.c sys/arch/amd64/amd64/lapic.c
index 29a8a6c..f810aed 100644
--- sys/arch/amd64/amd64/lapic.c
+++ sys/arch/amd64/amd64/lapic.c
@@ -53,10 +53,11 @@
 #include 
 
 #include 
 
 #include "ioapic.h"
+#include "xen.h"
 
 #if NIOAPIC > 0
 #include 
 #endif
 
@@ -351,10 +352,16 @@ lapic_boot_init(paddr_t lapic_base)
idt_vec_set(LAPIC_SPURIOUS_VECTOR, Xintrspurious);
 
idt_allocmap[LAPIC_TIMER_VECTOR] = 1;
idt_vec_set(LAPIC_TIMER_VECTOR, Xintr_lapic_ltimer);
 
+#if NXEN > 0
+   /* Xen HVM Event Channel Interrupt Vector */
+   idt_allocmap[LAPIC_XEN_VECTOR] = 1;
+   idt_vec_set(LAPIC_XEN_VECTOR, Xintr_xen_upcall);
+#endif
+
evcount_attach(_count, "clock", _irq);
 #ifdef MULTIPROCESSOR
evcount_attach(_count, "ipi", _irq);
 #endif
 }
diff --git sys/arch/amd64/amd64/vector.S sys/arch/amd64/amd64/vector.S
index 3d752ab..2874734 100644
--- sys/arch/amd64/amd64/vector.S
+++ sys/arch/amd64/amd64/vector.S
@@ -81,10 +81,11 @@
 #include 
 
 #include "ioapic.h"
 #include "lapic.h"
 #include "assym.h"
+#include "xen.h"
 
 /*/
 
 /*
  * Trap and fault vector routines
@@ -433,12 +434,46 @@ IDTVEC(resume_lapic_ltimer)
jmp _C_LABEL(Xdoreti)
 2:
movq$(1 << LIR_TIMER),%rax
orq %rax,CPUVAR(IPENDING)
INTRFASTEXIT
-#endif /* NLAPIC > 0 */
 
+#if NXEN > 0
+/*
+ * Xen event channel upcall interrupt handler.
+ * Only used when the hypervisor supports direct vector callbacks.
+ */
+IDTVEC(recurse_xen_upcall)
+   INTR_RECURSE_HWFRAME
+   pushq   $0
+   subq$8,%rsp /* unused __if_trapno */
+   INTRENTRY
+   jmp 1f
+IDTVEC(intr_xen_upcall)
+   pushq   $0
+   subq$8,%rsp /* unused __if_trapno */
+   INTRENTRY
+   call_C_LABEL(xen_intr_ack)
+   movlCPUVAR(ILEVEL),%ebx
+   cmpl$IPL_NET,%ebx
+   jae 2f
+IDTVEC(resume_xen_upcall)
+1:
+   inclCPUVAR(IDEPTH)
+   movl$IPL_NET,CPUVAR(ILEVEL)
+   sti
+   cld
+   pushq   %rbx
+   call_C_LABEL(xen_intr)
+   jmp _C_LABEL(Xdoreti)
+2:
+   movq$(1 << LIR_XEN),%rax
+   orq %rax,CPUVAR(IPENDING)
+3:
+   INTRFASTEXIT
+#endif /* NXEN > 0 */
+#endif /* NLAPIC > 0 */
 
 #define voidop(num)
 
 
 /*
diff --git sys/arch/amd64/include/i82489var.h sys/arch/amd64/include/i82489var.h
index 4eb1999..0ac0dcc 100644
--- sys/arch/amd64/include/i82489var.h
+++ 

Xen Patch-06: Communicate the selected IDT vector to the HV

2015-12-05 Thread Mike Belopuhov
This diff communicates an allocated IDT vector to the Hypervisor.

OK?

---
 sys/dev/pv/xen.c| 34 ++
 sys/dev/pv/xenvar.h |  2 ++
 2 files changed, 36 insertions(+)

diff --git sys/dev/pv/xen.c sys/dev/pv/xen.c
index e8aeeb8..8643636 100644
--- sys/dev/pv/xen.c
+++ sys/dev/pv/xen.c
@@ -25,10 +25,12 @@
 #include 
 #include 
 
 #include 
 
+#include 
+
 #include 
 #include 
 #include 
 
 struct xen_softc *xen_sc;
@@ -36,10 +38,11 @@ struct xen_softc *xen_sc;
 void   xen_find_base(struct xen_softc *);
 intxen_init_hypercall(struct xen_softc *);
 intxen_getversion(struct xen_softc *);
 intxen_getfeatures(struct xen_softc *);
 intxen_init_info_page(struct xen_softc *);
+intxen_init_cbvec(struct xen_softc *);
 
 intxen_match(struct device *, void *, void *);
 void   xen_attach(struct device *, struct device *, void *);
 void   xen_resume(struct device *);
 intxen_activate(struct device *, int);
@@ -88,10 +91,12 @@ xen_attach(struct device *parent, struct device *self, void 
*aux)
if (xen_getfeatures(sc))
return;
 
if (xen_init_info_page(sc))
return;
+
+   xen_init_cbvec(sc);
 }
 
 void
 xen_resume(struct device *self)
 {
@@ -418,5 +423,34 @@ xen_init_info_page(struct xen_softc *sc)
}
DPRINTF("%s: shared info page at va %p pa %#lx\n", sc->sc_dev.dv_xname,
sc->sc_ipg, pa);
return (0);
 }
+
+int
+xen_init_cbvec(struct xen_softc *sc)
+{
+   struct xen_hvm_param xhp;
+
+   if ((sc->sc_features & XENFEAT_CBVEC) == 0)
+   return (ENOENT);
+
+   xhp.domid = DOMID_SELF;
+   xhp.index = HVM_PARAM_CALLBACK_IRQ;
+   xhp.value = HVM_CALLBACK_VECTOR(LAPIC_XEN_VECTOR);
+   if (xen_hypercall(sc, hvm_op, 2, HVMOP_set_param, )) {
+   /* Will retry with the xspd(4) PCI interrupt */
+   return (ENOENT);
+   }
+   DPRINTF("%s: registered callback IDT vector %d\n",
+   sc->sc_dev.dv_xname, LAPIC_XEN_VECTOR);
+
+   sc->sc_cbvec = 1;
+
+   return (0);
+}
+
+void
+xen_intr(void)
+{
+   /* stub */
+}
diff --git sys/dev/pv/xenvar.h sys/dev/pv/xenvar.h
index b5db26d..7c5d244 100644
--- sys/dev/pv/xenvar.h
+++ sys/dev/pv/xenvar.h
@@ -31,10 +31,12 @@ struct xen_softc {
void*sc_hc;
uint32_t sc_features;
 #define  XENFEAT_CBVEC (1<<8)
 
struct shared_info  *sc_ipg;/* HYPERVISOR_shared_info */
+
+   int  sc_cbvec;  /* callback was installed */
 };
 
 extern struct xen_softc *xen_sc;
 
 /*
-- 
2.6.3



Xen Patch-07: Xen interrupt subsystem

2015-12-05 Thread Mike Belopuhov
This adds support for establishing virtual interrupts via Xen
event channel ports.

During boot, Xen will use polling mode, but once the system
enables interrupts after cpu_configure(), xen_intr_enable
will be called from the mountrook hook to unmask event ports.

xen_intr is the combined interrupt handler that is called
from the vector.S or the [upcoming] PCI device driver.

OK?

---
 sys/dev/pv/xen.c| 262 +++-
 sys/dev/pv/xenreg.h | 115 +++
 sys/dev/pv/xenvar.h |  27 ++
 3 files changed, 403 insertions(+), 1 deletion(-)

diff --git sys/dev/pv/xen.c sys/dev/pv/xen.c
index 8643636..2fa7283 100644
--- sys/dev/pv/xen.c
+++ sys/dev/pv/xen.c
@@ -39,13 +39,15 @@ voidxen_find_base(struct xen_softc *);
 intxen_init_hypercall(struct xen_softc *);
 intxen_getversion(struct xen_softc *);
 intxen_getfeatures(struct xen_softc *);
 intxen_init_info_page(struct xen_softc *);
 intxen_init_cbvec(struct xen_softc *);
+intxen_init_interrupts(struct xen_softc *);
 
 intxen_match(struct device *, void *, void *);
 void   xen_attach(struct device *, struct device *, void *);
+void   xen_deferred(void *);
 void   xen_resume(struct device *);
 intxen_activate(struct device *, int);
 
 struct cfdriver xen_cd = {
NULL, "xen", DV_DULL
@@ -93,10 +95,29 @@ xen_attach(struct device *parent, struct device *self, void 
*aux)
 
if (xen_init_info_page(sc))
return;
 
xen_init_cbvec(sc);
+
+   if (xen_init_interrupts(sc))
+   return;
+
+   mountroothook_establish(xen_deferred, sc);
+}
+
+void
+xen_deferred(void *arg)
+{
+   struct xen_softc *sc = arg;
+
+   if (!sc->sc_cbvec) {
+   DPRINTF("%s: callback vector hasn't been established\n",
+   sc->sc_dev.dv_xname);
+   return;
+   }
+
+   xen_intr_enable();
 }
 
 void
 xen_resume(struct device *self)
 {
@@ -447,10 +468,249 @@ xen_init_cbvec(struct xen_softc *sc)
sc->sc_cbvec = 1;
 
return (0);
 }
 
+int
+xen_init_interrupts(struct xen_softc *sc)
+{
+   int i;
+
+   sc->sc_irq = LAPIC_XEN_VECTOR;
+   evcount_attach(>sc_evcnt, sc->sc_dev.dv_xname, >sc_irq);
+
+   /*
+* Clear all pending events and mask all interrupts
+*/
+   for (i = 0; i < nitems(sc->sc_ipg->evtchn_pending); i++) {
+   sc->sc_ipg->evtchn_pending[i] = 0;
+   sc->sc_ipg->evtchn_mask[i] = ~0UL;
+   membar_producer();
+   }
+
+   SLIST_INIT(>sc_intrs);
+
+   return (0);
+}
+
+static inline struct xen_intsrc *
+xen_lookup_intsrc(struct xen_softc *sc, evtchn_port_t port)
+{
+   struct xen_intsrc *xi;
+
+   SLIST_FOREACH(xi, >sc_intrs, xi_entry)
+   if (xi->xi_port == port)
+   break;
+   return (xi);
+}
+
+void
+xen_intr_ack(void)
+{
+   struct xen_softc *sc = xen_sc;
+   struct shared_info *s = sc->sc_ipg;
+   struct vcpu_info *v = >vcpu_info[curcpu()->ci_cpuid];
+
+   v->evtchn_upcall_pending = 0;
+}
+
 void
 xen_intr(void)
 {
-   /* stub */
+   struct xen_softc *sc = xen_sc;
+   struct xen_intsrc *xi;
+   struct shared_info *s = sc->sc_ipg;
+   struct vcpu_info *v = >vcpu_info[curcpu()->ci_cpuid];
+   ulong pending, selector;
+   int port, bit, row;
+
+   sc->sc_evcnt.ec_count++;
+
+   v->evtchn_upcall_pending = 0;
+   selector = atomic_swap_ulong(>evtchn_pending_sel, 0);
+
+   for (row = 0; selector > 0; selector >>= 1, row++) {
+   if ((selector & 1) == 0)
+   continue;
+   pending = sc->sc_ipg->evtchn_pending[row] &
+   ~(sc->sc_ipg->evtchn_mask[row]);
+   for (bit = 0; pending > 0; pending >>= 1, bit++) {
+   if ((pending & 1) == 0)
+   continue;
+   sc->sc_ipg->evtchn_pending[row] &= ~(1 << bit);
+   membar_producer();
+   port = (row * LONG_BIT) + bit;
+   if ((xi = xen_lookup_intsrc(sc, port)) == NULL)
+   continue;
+   xi->xi_evcnt.ec_count++;
+
+   if (xi->xi_handler)
+   xi->xi_handler(xi->xi_arg);
+   }
+   }
+}
+
+void
+xen_intr_signal(xen_intr_handle_t xih)
+{
+   struct xen_softc *sc = xen_sc;
+   struct xen_intsrc *xi;
+   struct evtchn_send es;
+
+   if ((xi = xen_lookup_intsrc(sc, (evtchn_port_t)xih)) != NULL) {
+   es.port = xi->xi_port;
+   xen_hypercall(sc, event_channel_op, 2, EVTCHNOP_send, );
+   }
+}
+
+int
+xen_intr_establish(evtchn_port_t port, xen_intr_handle_t *xih,
+void (*handler)(void *), void *arg, char *name)
+{
+   struct xen_softc *sc = xen_sc;
+   struct xen_intsrc *xi;
+   struct evtchn_alloc_unbound 

Re: Xen Patch-02: Setup the hypercall page

2015-12-05 Thread Mike Larkin
On Sat, Dec 05, 2015 at 07:19:18PM +0100, Mike Belopuhov wrote:
> This allocates a page-aligned page in the kernel text segment
> to be used by the hypercall interface.  The content of the page
> is provided by the Hypervisor itself when it's physical address
> is communicated via an MSR write (in the next diff).
> 
> OK?

If this is in kernel text, it will end up getting RX permissions.
Is that what you want? Does the hypervisor write actual code in
there or just data? 

If the latter, perhaps another location is better, maybe .rodata
or .data ?

-ml

> 
> ---
>  sys/arch/amd64/amd64/locore.S | 10 ++
>  1 file changed, 10 insertions(+)
> 
> diff --git sys/arch/amd64/amd64/locore.S sys/arch/amd64/amd64/locore.S
> index 618c6e7..f147131 100644
> --- sys/arch/amd64/amd64/locore.S
> +++ sys/arch/amd64/amd64/locore.S
> @@ -107,10 +107,11 @@
>  #include "assym.h"
>  #include "lapic.h"
>  #include "ioapic.h"
>  #include "ksyms.h"
>  #include "acpi.h"
> +#include "xen.h"
>  
>  #include 
>  #include 
>  
>  #include 
> @@ -1169,5 +1170,14 @@ _C_LABEL(codepatch_begin):
>  
>   .section .codepatchend,"a"
>   .globl _C_LABEL(codepatch_end)
>  _C_LABEL(codepatch_end):
>   .previous
> +
> +#if NXEN > 0
> + /* Hypercall_page needs to be PAGE aligned */
> + .text
> + .align  NBPG
> + .globl  _C_LABEL(xen_hypercall_page)
> +_C_LABEL(xen_hypercall_page):
> + .skip   0x1000, 0x90/* Fill with NOPs */
> +#endif /* NXEN > 0 */
> -- 
> 2.6.3
> 



newfs: avoid oob read on command line argument

2015-12-05 Thread Tobias Stoeckmann
Here's the spin-off from previous __progname patch.

It's possible to have an out-of-boundary read in newfs_ext2fs when
supplying an empty partition name. Before calling strchr() - 1, it should
be verified that it's not empty. While at it, the result of the strchr call
will never be NULL, because eventually a '\0' char will be found. Even if
that would not be the case, the "- 1" addition renders the NULL check
pointless.

mmcc@ had the nice idea to split this into an own check, which looks much
better because we avoid saving an illegal pointer, even though it wouldn't
be used.

With applied patch:

$ newfs_ext2fs -N ""   
newfs_ext2fs: /dev/: not a character-special device
newfs_ext2fs: empty partition name supplied
$ _

I think the newfs-part cannot be triggered, but better be safe than
sorry, and stay in sync with newfs_ext2fs.


Tobias

Index: sbin/newfs/newfs.c
===
RCS file: /cvs/src/sbin/newfs/newfs.c,v
retrieving revision 1.103
diff -u -p -u -p -r1.103 newfs.c
--- sbin/newfs/newfs.c  25 Nov 2015 19:45:21 -  1.103
+++ sbin/newfs/newfs.c  5 Dec 2015 10:52:39 -
@@ -423,10 +423,11 @@ main(int argc, char *argv[])
warnx("%s: not a character-special device",
special);
}
+   if (*argv[0] == '\0')
+   fatal("empty partition name supplied");
cp = strchr(argv[0], '\0') - 1;
-   if (cp == NULL ||
-   ((*cp < 'a' || *cp > ('a' + maxpartitions - 1))
-   && !isdigit((unsigned char)*cp)))
+   if ((*cp < 'a' || *cp > ('a' + maxpartitions - 1))
+   && !isdigit((unsigned char)*cp))
fatal("%s: can't figure out file system partition",
argv[0]);
lp = getdisklabel(special, fsi);
Index: sbin/newfs_ext2fs/newfs_ext2fs.c
===
RCS file: /cvs/src/sbin/newfs_ext2fs/newfs_ext2fs.c,v
retrieving revision 1.21
diff -u -p -u -p -r1.21 newfs_ext2fs.c
--- sbin/newfs_ext2fs/newfs_ext2fs.c28 Nov 2015 06:12:09 -  1.21
+++ sbin/newfs_ext2fs/newfs_ext2fs.c5 Dec 2015 10:52:39 -
@@ -529,9 +529,11 @@ getpartition(int fsi, const char *specia
errx(EXIT_FAILURE, "%s: block device", special);
if (!S_ISCHR(st.st_mode))
warnx("%s: not a character-special device", special);
+   if (*argv[0] == '\0')
+   errx(EXIT_FAILURE, "empty partition name supplied");
cp = strchr(argv[0], '\0') - 1;
-   if (cp == NULL || ((*cp < 'a' || *cp > ('a' + getmaxpartitions() - 1))
-   && !isdigit((unsigned char)*cp)))
+   if ((*cp < 'a' || *cp > ('a' + getmaxpartitions() - 1))
+   && !isdigit((unsigned char)*cp))
errx(EXIT_FAILURE, "%s: can't figure out file system 
partition", argv[0]);
lp = getdisklabel(special, fsi);
if (isdigit((unsigned char)*cp))



simplify in6_selectsrc() logic

2015-12-05 Thread Vincent Gross
in6_selectsrc() uses two different rtalloc calls depending on whether or
not the destination address is multicast or not, but there is nothing to
explain why. I dug a bit and found this commit from itojun@ :

diff -u -r1.6 -r1.7
--- src/sys/netinet6/in6_src.c  2000/06/18 04:49:32 1.6
+++ src/sys/netinet6/in6_src.c  2000/06/18 17:02:59 1.7
@@ -244,7 +244,11 @@
ro->ro_dst.sin6_family = AF_INET6;
ro->ro_dst.sin6_len = sizeof(struct sockaddr_in6);
ro->ro_dst.sin6_addr = *dst;
-   if (!IN6_IS_ADDR_MULTICAST(dst)) {
+   ro->ro_dst.sin6_scope_id = dstsock->sin6_scope_id;
+   if (IN6_IS_ADDR_MULTICAST(dst)) {
+   ro->ro_rt = rtalloc1(&((struct route *)ro)
+->ro_dst, 0);
+   } else {
rtalloc((struct route *)ro);
}
}

Below are rtalloc() and rtalloc1() from sys/net/route.c r1.19 commited
on 05/21/2000 :

> void
> rtalloc(ro)
>   register struct route *ro;
> {
>   if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
>   return;  /* XXX */
>   ro->ro_rt = rtalloc1(>ro_dst, 1);
> }
> 
> struct rtentry *
> rtalloc1(dst, report)
>   register struct sockaddr *dst;
>   int report;
> {
[...]
>   /*
>* IP encapsulation does lots of lookups where we don't need nor want
>* the RTM_MISSes that would be generated.  It causes RTM_MISS storms
>* sent upward breaking user-level routing queries.
>*/
>   miss:   if (report && dst->sa_family != PF_KEY) {
>   bzero((caddr_t), sizeof(info));
>   info.rti_info[RTAX_DST] = dst;
>   rt_missmsg(msgtype, , 0, err);
>   }
>   }
>   splx(s);
>   return (newrt);
> }


So this if(MULTICAST) has been introduced to prevent RTM_MISS storms when
looking up routes to multicast addresses ; multicast and unicast route lookups
are the same.

Also, rtalloc(foo, RT_RESOLVE, bar) and rtalloc_mpath(foo, NULL, bar) are both
equivalent to _rtalloc(foo, NULL, RT_RESOLVE, bar).

Let's remove this if(MULTICAST), it's just confusing.

ok ?

Index: sys/netinet6/in6_src.c
===
RCS file: /cvs/src/sys/netinet6/in6_src.c,v
retrieving revision 1.71
diff -u -p -r1.71 in6_src.c
--- sys/netinet6/in6_src.c  2 Dec 2015 13:29:26 -   1.71
+++ sys/netinet6/in6_src.c  5 Dec 2015 12:03:48 -
@@ -240,13 +240,8 @@ in6_selectsrc(struct in6_addr **in6src, 
sa6->sin6_len = sizeof(struct sockaddr_in6);
sa6->sin6_addr = *dst;
sa6->sin6_scope_id = dstsock->sin6_scope_id;
-   if (IN6_IS_ADDR_MULTICAST(dst)) {
-   ro->ro_rt = rtalloc(sin6tosa(>ro_dst),
-   RT_RESOLVE, ro->ro_tableid);
-   } else {
-   ro->ro_rt = rtalloc_mpath(sin6tosa(>ro_dst),
-   NULL, ro->ro_tableid);
-   }
+   ro->ro_rt = rtalloc(sin6tosa(>ro_dst),
+   RT_RESOLVE, ro->ro_tableid);
}
 
/*



Re: libc: getusershell, new implementation

2015-12-05 Thread patrick keshishian
On Sat, Dec 05, 2015 at 01:25:10PM -0500, Ted Unangst wrote:
> Tobias Stoeckmann wrote:
> > 
> > And I still think that the current code is a bit too permissive in parsing
> > things. I mean what's the point in allowing lines like:
> > 
> > sometextwithoutspace/bin/ksh should be used for logins # seriously!
> > 
> > Which would result in /bin/ksh, by the way.
> > 
> > Didn't notice the consequences that arise by keeping the descriptor open,
> > so I'm fine with an alternative approach. Yet we might make the code a
> > bit easier to review by not allowing such weird lines. What it should
> > expect and enforce:
> > 
> > - a valid line has to start with a slash
> > - comments are chopped off
> > - comments are supposed to be at the beginning of a line
> > 
> > So if somebody writes "/bin/ksh # comment", that actually leads to 
> > "/bin/ksh ",
> > with an additional whitespace at the end. Currently we couldn't even 
> > specify a
> > shell with a whitespace in its path.
> 
> ok. i was going to leave the behavior alone, but we can fix that too.
> 
> - use getline to read lines of any length.
> - only consider lines that start with a /.
> - truncate lines after a #, but not after spaces.
> 
> 
> Index: gen/getusershell.c
> ===
> RCS file: /cvs/src/lib/libc/gen/getusershell.c,v
> retrieving revision 1.16
> diff -u -p -r1.16 getusershell.c
> --- gen/getusershell.c14 Sep 2015 16:09:13 -  1.16
> +++ gen/getusershell.c5 Dec 2015 18:24:33 -
> @@ -28,14 +28,13 @@
>   * SUCH DAMAGE.
>   */
>  
> -#include 
> -
>  #include 
>  #include 
>  #include 
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  /*
> @@ -44,7 +43,7 @@
>   */
>  
>  static char *okshells[] = { _PATH_BSHELL, _PATH_CSHELL, _PATH_KSHELL, NULL };
> -static char **curshell, **shells, *strings;
> +static char **curshell, **shells;
>  static char **initshells(void);
>  
>  /*
> @@ -66,11 +65,14 @@ getusershell(void)
>  void
>  endusershell(void)
>  {
> - 
> + char **s;
> +
> + if ((s = shells))
> + while (*s)
> + free(*s++);
>   free(shells);
>   shells = NULL;
> - free(strings);
> - strings = NULL;
> +
>   curshell = NULL;
>  }
>  
> @@ -84,48 +86,50 @@ setusershell(void)
>  static char **
>  initshells(void)
>  {
> - char **sp, *cp;
> + size_t nshells, nalloc, linesize;
> + char *line;
>   FILE *fp;
> - struct stat statb;
>  
>   free(shells);
>   shells = NULL;
> - free(strings);
> - strings = NULL;
> +
>   if ((fp = fopen(_PATH_SHELLS, "re")) == NULL)
>   return (okshells);
> - if (fstat(fileno(fp), ) == -1) {
> - (void)fclose(fp);
> - return (okshells);
> - }
> - if (statb.st_size > SIZE_MAX) {
> - (void)fclose(fp);
> - return (okshells);
> - }
> - if ((strings = malloc((size_t)statb.st_size)) == NULL) {
> - (void)fclose(fp);
> - return (okshells);
> - }
> - shells = calloc((size_t)(statb.st_size / 3 + 2), sizeof (char *));
> - if (shells == NULL) {
> - (void)fclose(fp);
> - free(strings);
> - strings = NULL;
> - return (okshells);
> - }
> - sp = shells;
> - cp = strings;
> - while (fgets(cp, PATH_MAX + 1, fp) != NULL) {
> - while (*cp != '#' && *cp != '/' && *cp != '\0')
> - cp++;
> - if (*cp == '#' || *cp == '\0')
> +
> + line = NULL;
> + nalloc = 10; // just an initial guess
> + nshells = 0;
> + shells = reallocarray(NULL, nalloc, sizeof (char *));
> + if (shells == NULL)
> + goto fail;
> + linesize = 0;
> + while (getline(, , fp) != -1) {
> + if (*line != '/')
>   continue;
> - *sp++ = cp;
> - while (!isspace((unsigned char)*cp) && *cp != '#' && *cp != 
> '\0')
> - cp++;
> - *cp++ = '\0';
> + line[strcspn(line, "#\n")] = '\0';
> + if (!(shells[nshells] = strdup(line)))
> + goto fail;
> +
> + nshells++;
> + if (nshells == nalloc) {
> + char **new = reallocarray(shells, nalloc * 2, 
> sizeof(char *));
> + if (!new)
> + goto fail;

This 'goto fail' will free() beyond allocated: shells[nshells--]
Better to check 'if (nshells + 1 == nalloc)' and increment nshells
afterward.

--patrick

> + shells = new;
> + nalloc *= 2;
> + }
>   }
> - *sp = NULL;
> + free(line);
> + shells[nshells] = NULL;
>   (void)fclose(fp);
>   return (shells);
> +
> +fail:
> + free(line);
> + while (nshells)
> + free(shells[nshells--]);
> + free(shells);
> + shells = NULL;
> + 

Re: Xen Patch-10: Add code to disable emulated devices

2015-12-05 Thread Mike Belopuhov
On Sat, Dec 05, 2015 at 20:13 +0100, Karel Gardas wrote:
> > +void
> > +xen_disable_emulated_devices(struct xen_softc *sc)
> > +{
> > +#if defined(__i386__) || defined(__amd64__)
> 
> just a nitpick, not in a position to comment on your patches but this
> has caught my eyes. So far everything was just generic or amd64
> specific. Now you ifdef also for i386. Is that intentional? Thanks,
> Karel

Hi Karel,

Please don't be afraid to comment and provide any insights you
might have.  The whole reason for having the tech@ list is to
get people involved into public technical discussions and let
everyone have a chance to speak their mind.  Your question is
not less valid than any other.  

As to the question itself, Xen dom0 has been so far implemented
for i386, amd64 and arm (or arm64 -- I'll have to check that) so
by definition there can only be support for these architectures.
I have concentrated on providing amd64 domU support first and
while hypercall inteface is implemented for i386 as well, the
interrupt code is missing.

In the meantime I have guarded some obviously MD specific chunks
with __i386__ and __amd64__.  I realise that a few other places
(around CPUID or wrmsr) can use those ifdefs as well and perhaps
it's a bit of deficiency that I'm not 100% consistent with their
usage.  I'll have to consider this carefully.

Cheers,
Mike



Re: __progname in base

2015-12-05 Thread Ted Unangst
Tobias Stoeckmann wrote:
> Opinions, thoughts?

looks good, but you've got some mostly unrelated changes in here. this should
be separate, but ok for the rest.

> > Index: sbin/newfs_ext2fs/newfs_ext2fs.c
> > ===
> > RCS file: /cvs/src/sbin/newfs_ext2fs/newfs_ext2fs.c,v
> > retrieving revision 1.17
> > diff -u -p -u -p -r1.17 newfs_ext2fs.c
> > --- sbin/newfs_ext2fs/newfs_ext2fs.c14 Oct 2015 15:54:49 -  
> > 1.17
> > +++ sbin/newfs_ext2fs/newfs_ext2fs.c7 Nov 2015 11:16:27 -
> > @@ -519,7 +519,8 @@ getpartition(int fsi, const char *specia
> > if (!S_ISCHR(st.st_mode))
> > warnx("%s: not a character-special device", special);
> > cp = strchr(argv[0], '\0') - 1;
> > -   if (cp == NULL || ((*cp < 'a' || *cp > ('a' + getmaxpartitions() - 1))
> > +   if (cp == NULL || cp < argv[0] ||
> > +   ((*cp < 'a' || *cp > ('a' + getmaxpartitions() - 1))
> > && !isdigit((unsigned char)*cp)))
> > errx(EXIT_FAILURE, "%s: can't figure out file system 
> > partition", argv[0]);
> > lp = getdisklabel(special, fsi);



Re: Linker changes between 5.7 and 5.8

2015-12-05 Thread Tati Chevron

On Sat, Dec 05, 2015 at 07:33:11AM +0100, Stefan Kempf wrote:

Tati Chevron wrote:

This assembled and linked without problems on 5.7-release, but now when
I try it on 5.8-release, I get an error:

$ as -o charset.o charset.S
$ ld -Bstatic charset.o



ld: charset.o: relocation R_X86_64_32S against `a local symbol' can not
be used when making a shared object; recompile with -fPIC
charset.o: could not read symbols: Bad value


Try it with ld -Bstatic -nopie charset.o


Ahhh, I was using -no-pie and wondering why it didn't work :-).

Great, so compiling with -nopie produces a working executable, excellent, 
thanks.

But I thought that amd64 was already using static pie by default in 5.7?

The release notes at http://www.openbsd.org/57.html suggest this:


All architectures have been transitioned to static PIE


--
Tati Chevron
Perl and FORTRAN specialist.
SWABSIT development and migration department.
http://www.swabsit.com



Re: __progname in base

2015-12-05 Thread Tobias Stoeckmann
On Sat, Dec 05, 2015 at 03:29:06AM -0500, Ted Unangst wrote:
> looks good, but you've got some mostly unrelated changes in here. this should
> be separate, but ok for the rest.

It started with a "check argv" code review and ended up with __progname
adjustments, so I agree here and removed the newfs parts for a separate
commit. Also the gomoku KNF is a bit distracting, so it's gone now, too.

Theo pointed out that tradcpp and lex should stay as they are, because
they are upstream projects.

Joerg mentioned the getprogname function previously, so I left id and nl
alone, too.

The patch touches these programs now:
mt, pax, gomoku, telnet, crunchgen, pppd, and pdisk

As I have removed the concerning parts and got OKs for the rest,
I will commit it later this day if nobody objects.

Index: bin/mt/mt.c
===
RCS file: /cvs/src/bin/mt/mt.c,v
retrieving revision 1.36
diff -u -p -u -p -r1.36 mt.c
--- bin/mt/mt.c 12 Nov 2013 04:36:02 -  1.36
+++ bin/mt/mt.c 5 Dec 2015 10:01:55 -
@@ -88,6 +88,8 @@ int   _rmtmtioctop(int fd, struct mtop *c
 struct mtget   *_rmtstatus(int fd);
 void   _rmtclose(void);
 
+extern char*__progname;
+
 char   *host = NULL;   /* remote host (if any) */
 
 int
@@ -133,7 +135,6 @@ _rmtclose(void)
 #endif
 }
 
-char   *progname;
 inteject = 0;
 
 int
@@ -145,12 +146,7 @@ main(int argc, char *argv[])
char *p, *tape, *realtape, *opts;
size_t len;
 
-   if ((progname = strrchr(argv[0], '/')))
-   progname++;
-   else
-   progname = argv[0];
-
-   if (strcmp(progname, "eject") == 0) {
+   if (strcmp(__progname, "eject") == 0) {
opts = "t";
eject = 1;
tape = NULL;
@@ -320,9 +316,9 @@ void
 usage(void)
 {
if (eject)
-   (void)fprintf(stderr, "usage: %s [-t] device\n", progname);
+   (void)fprintf(stderr, "usage: %s [-t] device\n", __progname);
else
(void)fprintf(stderr,
-   "usage: %s [-f device] command [count]\n", progname);
+   "usage: %s [-f device] command [count]\n", __progname);
exit(X_USAGE);
 }
Index: bin/pax/options.c
===
RCS file: /cvs/src/bin/pax/options.c,v
retrieving revision 1.91
diff -u -p -u -p -r1.91 options.c
--- bin/pax/options.c   18 May 2015 20:26:16 -  1.91
+++ bin/pax/options.c   5 Dec 2015 10:01:56 -
@@ -184,14 +184,12 @@ char *chdname = NULL;
 void
 options(int argc, char **argv)
 {
+   extern char *__progname;
 
/*
 * Are we acting like pax, tar or cpio (based on argv[0])
 */
-   if ((argv0 = strrchr(argv[0], '/')) != NULL)
-   argv0++;
-   else
-   argv0 = argv[0];
+   argv0 = __progname;
 
if (strcmp(NM_TAR, argv0) == 0) {
tar_options(argc, argv);
Index: games/gomoku/main.c
===
RCS file: /cvs/src/games/gomoku/main.c,v
retrieving revision 1.29
diff -u -p -u -p -r1.29 main.c
--- games/gomoku/main.c 30 Nov 2015 08:44:51 -  1.29
+++ games/gomoku/main.c 5 Dec 2015 10:01:56 -
@@ -45,10 +45,11 @@
 #define PROGRAM1   /* get input from program */
 #define INPUTF 2   /* get input from a file */
 
+extern char *__progname;   /* name of program */
+
 intinteractive = 1;/* true if interactive */
 intdebug;  /* true if debugging */
 inttest;   /* both moves come from 1: input, 2: computer */
-char   *prog;  /* name of program */
 FILE   *debugfp;   /* file for debug output */
 FILE   *inputfp;   /* file for debug input */
 
@@ -84,12 +85,6 @@ main(argc, argv)
if (pledge("stdio rpath wpath cpath tty", NULL) == -1)
err(1, "pledge");
 
-   prog = strrchr(argv[0], '/');
-   if (prog)
-   prog++;
-   else
-   prog = argv[0];
-
if ((tmpname = getlogin()) != NULL)
strlcpy(you, tmpname, sizeof(you));
else
@@ -117,7 +112,7 @@ main(argc, argv)
default:
fprintf(stderr,
"usage: %s [-bcdu] [-D debugfile] [inputfile]\n",
-   prog);
+   __progname);
exit(1);
}
}
@@ -194,8 +189,8 @@ again:
}
}
if (interactive) {
-   plyr[BLACK] = input[BLACK] == USER ? you : prog;
-   plyr[WHITE] = input[WHITE] == USER ? you : prog;
+   plyr[BLACK] = input[BLACK] == USER ? you : __progname;
+   plyr[WHITE] = input[WHITE] == USER ? you : __progname;
bdwho(1);
}
 
@@ -222,8 +217,8 @@ again:
  

removing expired once rules in pf_purge_thread()

2015-12-05 Thread Alexandr Nedvedicky
Hello,

henning@ and mikeb@ showed some interest to change handling of once rules to
the same way as PF has it on Solaris. Just to refresh the audience on once
option offered by PF:

 onceCreates a one shot rule that will remove itself from an active
 ruleset after the first match.  In case this is the only rule in
 the anchor, the anchor will be destroyed automatically after the
 rule is matched.
   -- pf.conf(5)

Currently the once rules are removed by matching packet. Patch makes life for
packets, which match once rules bit easier. Packets instead of removing rule
from ruleset just mark rule as expired and put it to garbage colloector list.
The list is processed by pf_purge_thread(), which just removes and deletes
those expired rules. To get there we need to simplify pf_purge_rule() image,
which currently looks as follows:

void
pf_purge_rule(struct pf_ruleset *ruleset, struct pf_rule *rule,
struct pf_ruleset *aruleset, struct pf_rule *arule)

- ruleset is the ruleset, where once rule is being removed from

- rule is a once rule to remove

- aruleset holds an anchor rule with once-rule we remove

- arule an anchor which holds a once rule

To make pf_purge_rule() suitable for pf_purge_thread() it has to be changed to:

void
pf_purge_rule(struct pf_rule *once_rule)

To get there the ruleset and arule has to be carried by once_rule itself.
Therefore patch adds those members to pf_rule:
struct pf_ruleset   *myruleset
struct pf_rule  *myarule
SLIST_ENTRY(pf_rule) gcle
(the gcle is garbage colleter list link).

Patch sets myruleset as soon as rule gets inserted to ruleset in SIOCADDRULE
ioctl. The myarule is set in pf_test_rule(), when once rule is marked as
expired.

Don't forget to recompile all user-land bits (pfctl, proxies et. al.) when
you'll be testing the patch, since pf_rule structure gets changed.

regards
sasha

8<---8<---8<--8<

Index: net/pf.c
===
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.958
diff -u -p -r1.958 pf.c
--- net/pf.c5 Dec 2015 14:58:06 -   1.958
+++ net/pf.c5 Dec 2015 22:09:42 -
@@ -298,6 +298,9 @@ RB_GENERATE(pf_state_tree, pf_state_key,
 RB_GENERATE(pf_state_tree_id, pf_state,
 entry_id, pf_state_compare_id);
 
+SLIST_HEAD(pf_rule_gcl, pf_rule)   pf_rule_gcl =
+   SLIST_HEAD_INITIALIZER(pf_rule_gcl);
+
 __inline int
 pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
 {
@@ -1140,6 +1143,24 @@ pf_state_export(struct pfsync_state *sp,
 /* END state table stuff */
 
 void
+pf_purge_expired_rules(void)
+{
+   struct pf_rule  *r;
+
+   if (SLIST_EMPTY(_rule_gcl)) {
+   return;
+   }
+
+   rw_enter_write(_consistency_lock);
+   while ((r = SLIST_FIRST(_rule_gcl)) != NULL) {
+   SLIST_REMOVE(_rule_gcl, r, pf_rule, gcle);
+   KASSERT(r->rule_flag & PFRULE_EXPIRED);
+   pf_purge_rule(r);
+   }
+   rw_exit_write(_consistency_lock);
+}
+
+void
 pf_purge_thread(void *v)
 {
int nloops = 0, s;
@@ -1157,6 +1178,7 @@ pf_purge_thread(void *v)
if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
pf_purge_expired_fragments();
pf_purge_expired_src_nodes(0);
+   pf_purge_expired_rules();
nloops = 0;
}
 
@@ -3149,6 +3171,10 @@ pf_test_rule(struct pf_pdesc *pd, struct
ruleset = _main_ruleset;
r = TAILQ_FIRST(pf_main_ruleset.rules.active.ptr);
while (r != NULL) {
+   if (r->rule_flag & PFRULE_EXPIRED) {
+   r = TAILQ_NEXT(r, entries);
+   goto nextrule;
+   }
r->evaluations++;
PF_TEST_ATTRIB((pfi_kif_match(r->kif, pd->kif) == r->ifnot),
r->skip[PF_SKIP_IFP].ptr);
@@ -3447,8 +3473,11 @@ pf_test_rule(struct pf_pdesc *pd, struct
}
 #endif /* NPFSYNC > 0 */
 
-   if (r->rule_flag & PFRULE_ONCE)
-   pf_purge_rule(ruleset, r, aruleset, a);
+   if (r->rule_flag & PFRULE_ONCE) {
+   r->rule_flag |= PFRULE_EXPIRED;
+   r->myarule = a;
+   SLIST_INSERT_HEAD(_rule_gcl, r, gcle);
+   }
 
 #ifdef INET6
if (rewrite && skw->af != sks->af)
Index: net/pf_ioctl.c
===
RCS file: /cvs/src/sys/net/pf_ioctl.c,v
retrieving revision 1.297
diff -u -p -r1.297 pf_ioctl.c
--- net/pf_ioctl.c  3 Dec 2015 13:30:18 -   1.297
+++ net/pf_ioctl.c  5 Dec 2015 22:09:42 -
@@ -301,12 +301,14 @@ pf_rm_rule(struct pf_rulequeue *rulequeu
 }
 
 void

3rd party Xbox 360 USB controller support

2015-12-05 Thread Christian Heckendorf
The previous thread[1] discussing these controllers includes two
patches but they seem to have been merged for the commit in a way
that limits support to only Microsoft controllers. 3rd party Xbox 360
controllers have their own vendor and product IDs but use the same
subclass and protocol as the Microsoft controllers.

Here's a diff based on the first patch that will match controllers
and assign the report descriptor more generally using subclass/protocol
rather than vendor/product. Is it more correct to create an array
of known vendors/products and match against a call to usb_lookup()?

[1] http://marc.info/?l=openbsd-tech=138229619410284=2

Thanks,
Christian


Index: uhidev.c
===
RCS file: /cvs/src/sys/dev/usb/uhidev.c,v
retrieving revision 1.70
diff -u -p -r1.70 uhidev.c
--- uhidev.c28 Feb 2015 08:42:41 -  1.70
+++ uhidev.c5 Dec 2015 20:14:49 -
@@ -62,7 +62,10 @@
 #ifndef SMALL_KERNEL
 /* Replacement report descriptors for devices shipped with broken ones */
 #include 
-int uhidev_use_rdesc(struct uhidev_softc *, int, int, void **, int *);
+int uhidev_use_rdesc(struct uhidev_softc *, usb_interface_descriptor_t *,
+   int, int, void **, int *);
+#define UISUBCLASS_XBOX360_CONTROLLER 0x5d
+#define UIPROTO_XBOX360_GAMEPAD 0x01
 #endif /* !SMALL_KERNEL */
 
 #define DEVNAME(sc)((sc)->sc_dev.dv_xname)
@@ -118,10 +121,10 @@ uhidev_match(struct device *parent, void
if (id == NULL)
return (UMATCH_NONE);
 #ifndef SMALL_KERNEL
-   if (uaa->vendor == USB_VENDOR_MICROSOFT &&
-   uaa->product == USB_PRODUCT_MICROSOFT_XBOX360_CONTROLLER &&
-   id->bInterfaceNumber == 0)
-   return (UMATCH_VENDOR_PRODUCT);
+   if (id->bInterfaceClass == UICLASS_VENDOR &&
+   id->bInterfaceSubClass == UISUBCLASS_XBOX360_CONTROLLER &&
+   id->bInterfaceProtocol == UIPROTO_XBOX360_GAMEPAD)
+   return (UMATCH_IFACECLASS_IFACESUBCLASS_IFACEPROTO);
 #endif /* !SMALL_KERNEL */
if (id->bInterfaceClass != UICLASS_HID)
return (UMATCH_NONE);
@@ -191,7 +194,7 @@ uhidev_attach(struct device *parent, str
}
 
 #ifndef SMALL_KERNEL
-   if (uhidev_use_rdesc(sc, uaa->vendor, uaa->product, , ))
+   if (uhidev_use_rdesc(sc, id, uaa->vendor, uaa->product, , ))
return;
 #endif /* !SMALL_KERNEL */
 
@@ -275,8 +278,8 @@ uhidev_attach(struct device *parent, str
 
 #ifndef SMALL_KERNEL
 int
-uhidev_use_rdesc(struct uhidev_softc *sc, int vendor, int product,
-void **descp, int *sizep)
+uhidev_use_rdesc(struct uhidev_softc *sc, usb_interface_descriptor_t *id,
+   int vendor, int product, void **descp, int *sizep)
 {
static uByte reportbuf[] = {2, 2};
const void *descptr = NULL;
@@ -300,8 +303,9 @@ uhidev_use_rdesc(struct uhidev_softc *sc
default:
break;
}
-   } else if (vendor == USB_VENDOR_MICROSOFT &&
-   product == USB_PRODUCT_MICROSOFT_XBOX360_CONTROLLER) {
+   } else if ((id->bInterfaceClass == UICLASS_VENDOR &&
+  id->bInterfaceSubClass == UISUBCLASS_XBOX360_CONTROLLER &&
+  id->bInterfaceProtocol == UIPROTO_XBOX360_GAMEPAD)) {
/* The Xbox 360 gamepad has no report descriptor. */
size = sizeof(uhid_xb360gp_report_descr);
descptr = uhid_xb360gp_report_descr;



Re: newfs: avoid oob read on command line argument

2015-12-05 Thread Michael McConville
Tobias Stoeckmann wrote:
> On Sat, Dec 05, 2015 at 06:26:35AM -0500, Ted Unangst wrote:
> > may i suggest strlen(s) instead of strchr(s, 0)?
> 
> There's actually one part in newfs' code that uses this. And in theory
> it has the same issue, not checking if s (which is special, which might
> be argv[0]) is empty. I highly doubt this could be reached there, but
> I fixed it anyway. Until now it uses strncpy, and with the switch to
> strlcpy this is just another additional boundary check in place.

ok mmcc@

It'd be nice to have a macro for specname's size, IMO. Using sizeof() in
strlcpy makes me uneasy and is less readable. That's a separate
improvement, though.

> Index: sbin/newfs/newfs.c
> ===
> RCS file: /cvs/src/sbin/newfs/newfs.c,v
> retrieving revision 1.103
> diff -u -p -u -p -r1.103 newfs.c
> --- sbin/newfs/newfs.c25 Nov 2015 19:45:21 -  1.103
> +++ sbin/newfs/newfs.c5 Dec 2015 12:32:07 -
> @@ -423,10 +423,11 @@ main(int argc, char *argv[])
>   warnx("%s: not a character-special device",
>   special);
>   }
> - cp = strchr(argv[0], '\0') - 1;
> - if (cp == NULL ||
> - ((*cp < 'a' || *cp > ('a' + maxpartitions - 1))
> - && !isdigit((unsigned char)*cp)))
> + if (*argv[0] == '\0')
> + fatal("empty partition name supplied");
> + cp = argv[0] + strlen(argv[0]) - 1;
> + if ((*cp < 'a' || *cp > ('a' + maxpartitions - 1))
> + && !isdigit((unsigned char)*cp))
>   fatal("%s: can't figure out file system partition",
>   argv[0]);
>   lp = getdisklabel(special, fsi);
> @@ -631,8 +632,9 @@ rewritelabel(char *s, int fd, struct dis
>   /*
>* Make name for 'c' partition.
>*/
> - strncpy(specname, s, sizeof(specname) - 1);
> - specname[sizeof(specname) - 1] = '\0';
> + if (*s == '\0' ||
> + strlcpy(specname, s, sizeof(specname)) >= sizeof(specname))
> + fatal("%s: invalid partition name supplied", s);
>   cp = specname + strlen(specname) - 1;
>   if (!isdigit((unsigned char)*cp))
>   *cp = 'c';
> Index: sbin/newfs_ext2fs/newfs_ext2fs.c
> ===
> RCS file: /cvs/src/sbin/newfs_ext2fs/newfs_ext2fs.c,v
> retrieving revision 1.21
> diff -u -p -u -p -r1.21 newfs_ext2fs.c
> --- sbin/newfs_ext2fs/newfs_ext2fs.c  28 Nov 2015 06:12:09 -  1.21
> +++ sbin/newfs_ext2fs/newfs_ext2fs.c  5 Dec 2015 12:32:07 -
> @@ -529,9 +529,11 @@ getpartition(int fsi, const char *specia
>   errx(EXIT_FAILURE, "%s: block device", special);
>   if (!S_ISCHR(st.st_mode))
>   warnx("%s: not a character-special device", special);
> - cp = strchr(argv[0], '\0') - 1;
> - if (cp == NULL || ((*cp < 'a' || *cp > ('a' + getmaxpartitions() - 1))
> - && !isdigit((unsigned char)*cp)))
> + if (*argv[0] == '\0')
> + errx(EXIT_FAILURE, "empty partition name supplied");
> + cp = argv[0] + strlen(argv[0]) - 1;
> + if ((*cp < 'a' || *cp > ('a' + getmaxpartitions() - 1))
> + && !isdigit((unsigned char)*cp))
>   errx(EXIT_FAILURE, "%s: can't figure out file system 
> partition", argv[0]);
>   lp = getdisklabel(special, fsi);
>   if (isdigit((unsigned char)*cp))
> 



taskctx and revisiting if_start serialisation

2015-12-05 Thread David Gwynne
the current code for serialising if_start calls for mpsafe nics does what it 
says.

however, kettenis realised it doesnt help us much when we're trying
to coordinate between the start and txeof side of a driver when
setting or clearing oactive. in particular, a start routine can
figure out there's no more space, and then set oactive. txeof could
be running on another cpu emptying the ring and clearing it. if
that clear runs in between the other cpus space check and
ifq_set_oactive, then the nic will be marked full and the stack
wont ever call start on it again.

so it can be argued that start and txeof should be serialised.
indeed, other platforms do exactly that.

the least worst mechanism we have for doing that is taskqs. however,
all my experiments deferring start to a taskq end up significantly
hurting performance.

dragonfly appears to have some of the semantics we want. according
to sephe, start and txeof are serialised, but they can be directly
called from anywhere. however, if one cpu is trying to run start
while the other is in txeof, it figures it out and makes the other
cpu run txeof on the first cpus behalf. the first cpu then simply
returns cos it knows the other cpu will end up doing the work.

the implementation is tied very much to that specific situation,
and its hard for me to grok cos im not familiar with their locking
infrastructure.

the dfly code has the (slight) caveat that you cant run txeof and
start concurrently, it forces them to be serialised.

while toying with ideas on how to solve kettenis' oactive problem,
i came up with the following.

it combines tasks with direct dispatch, and borrows the current
ifq_serialiser/pool/scsi serialisation algorithm.

the idea is you have a taskctx, which represents a serialising
context for tasks. tasks are submitted to the taskctx, and the code
will try to run the tasks immediately rather than defer them to a
thread. if there is contention on the context, the contending cpu
yields after queueing the task because the other cpu is responsible
for running all pending tasks to completion.

it also simplifies the barrier operations a lot.

the diff below implements a generic taskctx framework, and cuts the
mpsafe if_start() implementation over to it.

myx is also changed to only clr oactive from within the taskctx
serialiser, thereby avoiding the race, but keeps the bulk of txeof
outside the serialiser so it can run concurrently with start.

other nics are free to serialise start and txeof within the
ifq_serializer if they want, or not, it is up to them.

thoughts? tests? opinions on messy .h files?

Index: share/man/man9/task_add.9
===
RCS file: /cvs/src/share/man/man9/task_add.9,v
retrieving revision 1.16
diff -u -p -r1.16 task_add.9
--- share/man/man9/task_add.9   14 Sep 2015 15:14:55 -  1.16
+++ share/man/man9/task_add.9   6 Dec 2015 03:39:03 -
@@ -18,15 +18,23 @@
 .Dt TASK_ADD 9
 .Os
 .Sh NAME
+.Nm task_set ,
+.Nm TASK_INITIALIZER ,
 .Nm taskq_create ,
 .Nm taskq_destroy ,
-.Nm task_set ,
+.Nm taskq_barrier ,
 .Nm task_add ,
 .Nm task_del ,
-.Nm TASK_INITIALIZER
-.Nd task queues
+.Nm taskctx_init ,
+.Nm TASKCTX_INITIALIZER ,
+.Nm taskctx_barrier ,
+.Nm task_dispatch
+.Nd task queues and contexts
 .Sh SYNOPSIS
 .In sys/task.h
+.Ft void
+.Fn task_set "struct task *t" "void (*fn)(void *)" "void *arg"
+.Fn TASK_INITIALIZER "void (*fn)(void *)" "void *arg"
 .Ft struct taskq *
 .Fo taskq_create
 .Fa "const char *name"
@@ -37,19 +45,74 @@
 .Ft void
 .Fn taskq_destroy "struct taskq *tq"
 .Ft void
-.Fn task_set "struct task *t" "void (*fn)(void *)" "void *arg"
+.Fn taskq_barrier "struct taskq *tq"
 .Ft int
 .Fn task_add "struct taskq *tq" "struct task *t"
 .Ft int
 .Fn task_del "struct taskq *tq" "struct task *t"
+.Ft void
+.Fn taskctx_init "struct taskctx *tc" "int ipl"
+.Fn TASKCTX_INITIALIZER "struct taskctx self" "int ipl"
+.Ft void
+.Fn taskctx_barrier "struct taskctx *tc"
+.Ft void
+.Fn task_dispatch "struct taskctx *tc" "struct task *t"
 .Vt extern struct taskq *const systq;
 .Vt extern struct taskq *const systqmp;
-.Fn TASK_INITIALIZER "void (*fn)(void *)" "void *arg"
 .Sh DESCRIPTION
 The
 taskq
 API provides a mechanism to defer work to a process context.
 .Pp
+The
+taskctx
+API provides a mechanism to serialise work in a single context.
+A taskctx guarantees that all work submitted to it will not run
+concurrently and can therefore provide exclusive access to a resource.
+It attempts to run the submitted work immediately, unless another
+another CPU is already running work in the taskctx.
+The other CPU will then run all the submitted work on behalf of the
+rest of the system.
+.Ss TASKS
+Work is represented in both the
+taskq
+and
+taskctx
+APIs by task structures.
+It is up to the user of the APIs to allocate the task structures.
+.Pp
+.Fn task_set
+prepares the task structure
+.Fa t
+to be used in a
+taskq
+with
+.Fn task_add
+and
+.Fn task_del ,
+or for use