On Tue, May 30, 2017 at 11:48:50AM +0200, Martin Pieuchot wrote: > Updated diff below.
OK bluhm@ > Index: net/if.c > =================================================================== > RCS file: /cvs/src/sys/net/if.c,v > retrieving revision 1.502 > diff -u -p -r1.502 if.c > --- net/if.c 30 May 2017 07:50:37 -0000 1.502 > +++ net/if.c 30 May 2017 08:24:30 -0000 > @@ -874,7 +874,10 @@ if_input_process(void *xifidx) > struct ifnet *ifp; > struct ifih *ifih; > struct srp_ref sr; > - int s; > + int s, s2; > +#ifdef IPSEC > + int locked = 0; > +#endif /* IPSEC */ > > ifp = if_get(ifidx); > if (ifp == NULL) > @@ -887,6 +890,32 @@ if_input_process(void *xifidx) > if (!ISSET(ifp->if_xflags, IFXF_CLONED)) > add_net_randomness(ml_len(&ml)); > > +#ifdef IPSEC > + /* > + * IPsec is not ready to run without KERNEL_LOCK(). So all > + * the traffic on your machine is punished if you have IPsec > + * enabled. > + */ > + extern int ipsec_in_use; > + if (ipsec_in_use) { > + KERNEL_LOCK(); > + locked = 1; > + } > +#endif /* IPSEC */ > + > + /* > + * We grab the NET_LOCK() before processing any packet to > + * ensure there's no contention on the routing table lock. > + * > + * Without it we could race with a userland thread to insert > + * a L2 entry in ip{6,}_output(). Such race would result in > + * one of the threads sleeping *inside* the IP output path. > + * > + * Since we have a NET_LOCK() we also use it to serialize access > + * to PF globals, pipex globals, unicast and multicast addresses > + * lists. > + */ > + NET_LOCK(s2); > s = splnet(); > while ((m = ml_dequeue(&ml)) != NULL) { > /* > @@ -903,7 +932,12 @@ if_input_process(void *xifidx) > m_freem(m); > } > splx(s); > + NET_UNLOCK(s2); > > +#ifdef IPSEC > + if (locked) > + KERNEL_UNLOCK(); > +#endif /* IPSEC */ > out: > if_put(ifp); > } > Index: net/if_ethersubr.c > =================================================================== > RCS file: /cvs/src/sys/net/if_ethersubr.c,v > retrieving revision 1.245 > diff -u -p -r1.245 if_ethersubr.c > --- net/if_ethersubr.c 30 May 2017 07:50:37 -0000 1.245 > +++ net/if_ethersubr.c 30 May 2017 08:02:13 -0000 > @@ -416,15 +416,11 @@ decapsulate: > #ifdef PIPEX > if (pipex_enable) { > struct pipex_session *session; > - int s; > > - NET_LOCK(s); > if ((session = pipex_pppoe_lookup_session(m)) != NULL) { > pipex_pppoe_input(m, session); > - NET_UNLOCK(s); > return (1); > } > - NET_UNLOCK(s); > } > #endif > if (etype == ETHERTYPE_PPPOEDISC) > Index: netinet/ip_input.c > =================================================================== > RCS file: /cvs/src/sys/netinet/ip_input.c,v > retrieving revision 1.308 > diff -u -p -r1.308 ip_input.c > --- netinet/ip_input.c 30 May 2017 07:50:37 -0000 1.308 > +++ netinet/ip_input.c 30 May 2017 09:44:53 -0000 > @@ -127,6 +127,7 @@ int ip_sysctl_ipstat(void *, size_t *, v > static struct mbuf_queue ipsend_mq; > > void ip_ours(struct mbuf *); > +void ip_local(struct mbuf *); > int ip_dooptions(struct mbuf *, struct ifnet *); > int in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **); > > @@ -207,27 +208,31 @@ ip_init(void) > mq_init(&ipsend_mq, 64, IPL_SOFTNET); > } > > +/* > + * Enqueue packet for local delivery. Queuing is used as a boundary > + * between the network layer (input/forward path) running without > + * KERNEL_LOCK() and the transport layer still needing it. > + */ > void > -ipv4_input(struct ifnet *ifp, struct mbuf *m) > +ip_ours(struct mbuf *m) > { > niq_enqueue(&ipintrq, m); > } > > +/* > + * Dequeue and process locally delivered packets. > + */ > void > ipintr(void) > { > struct mbuf *m; > > - /* > - * Get next datagram off input queue and get IP header > - * in first mbuf. > - */ > while ((m = niq_dequeue(&ipintrq)) != NULL) { > -#ifdef DIAGNOSTIC > +#ifdef DIAGNOSTIC > if ((m->m_flags & M_PKTHDR) == 0) > panic("ipintr no HDR"); > #endif > - ip_input(m); > + ip_local(m); > } > } > > @@ -237,18 +242,13 @@ ipintr(void) > * Checksum and byte swap header. Process options. Forward or deliver. > */ > void > -ip_input(struct mbuf *m) > +ipv4_input(struct ifnet *ifp, struct mbuf *m) > { > - struct ifnet *ifp; > struct rtentry *rt = NULL; > struct ip *ip; > int hlen, len; > in_addr_t pfrdr = 0; > > - ifp = if_get(m->m_pkthdr.ph_ifidx); > - if (ifp == NULL) > - goto bad; > - > ipstat_inc(ips_total); > if (m->m_len < sizeof (struct ip) && > (m = m_pullup(m, sizeof (struct ip))) == NULL) { > @@ -462,13 +462,11 @@ ip_input(struct mbuf *m) > #endif /* IPSEC */ > > ip_forward(m, ifp, rt, pfrdr); > - if_put(ifp); > return; > bad: > m_freem(m); > out: > rtfree(rt); > - if_put(ifp); > } > > /* > @@ -477,13 +475,15 @@ out: > * If fragmented try to reassemble. Pass to next level. > */ > void > -ip_ours(struct mbuf *m) > +ip_local(struct mbuf *m) > { > struct ip *ip = mtod(m, struct ip *); > struct ipq *fp; > struct ipqent *ipqe; > int mff, hlen; > > + KERNEL_ASSERT_LOCKED(); > + > hlen = ip->ip_hl << 2; > > /* > @@ -1680,18 +1680,37 @@ ip_send_dispatch(void *xmq) > struct mbuf *m; > struct mbuf_list ml; > int s; > +#ifdef IPSEC > + int locked = 0; > +#endif /* IPSEC */ > > mq_delist(mq, &ml); > if (ml_empty(&ml)) > return; > > - KERNEL_LOCK(); > +#ifdef IPSEC > + /* > + * IPsec is not ready to run without KERNEL_LOCK(). So all > + * the traffic on your machine is punished if you have IPsec > + * enabled. > + */ > + extern int ipsec_in_use; > + if (ipsec_in_use) { > + KERNEL_LOCK(); > + locked = 1; > + } > +#endif /* IPSEC */ > + > NET_LOCK(s); > while ((m = ml_dequeue(&ml)) != NULL) { > ip_output(m, NULL, NULL, 0, NULL, NULL, 0); > } > NET_UNLOCK(s); > - KERNEL_UNLOCK(); > + > +#ifdef IPSEC > + if (locked) > + KERNEL_UNLOCK(); > +#endif /* IPSEC */ > } > > void > Index: netinet/ip_var.h > =================================================================== > RCS file: /cvs/src/sys/netinet/ip_var.h,v > retrieving revision 1.77 > diff -u -p -r1.77 ip_var.h > --- netinet/ip_var.h 30 May 2017 07:50:37 -0000 1.77 > +++ netinet/ip_var.h 30 May 2017 07:57:31 -0000 > @@ -248,7 +248,6 @@ int ip_sysctl(int *, u_int, void *, siz > void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *, > struct mbuf *); > void ipintr(void); > -void ip_input(struct mbuf *); > void ip_deliver(struct mbuf **, int *, int, int); > void ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int); > int rip_ctloutput(int, struct socket *, int, int, struct mbuf *); > Index: netinet6/ip6_input.c > =================================================================== > RCS file: /cvs/src/sys/netinet6/ip6_input.c,v > retrieving revision 1.192 > diff -u -p -r1.192 ip6_input.c > --- netinet6/ip6_input.c 30 May 2017 07:50:37 -0000 1.192 > +++ netinet6/ip6_input.c 30 May 2017 09:45:25 -0000 > @@ -119,6 +119,7 @@ struct niqueue ip6intrq = NIQUEUE_INITIA > struct cpumem *ip6counters; > > void ip6_ours(struct mbuf *); > +void ip6_local(struct mbuf *); > int ip6_check_rh0hdr(struct mbuf *, int *); > int ip6_hbhchcheck(struct mbuf *, int *, int *, int *); > int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *); > @@ -160,28 +161,37 @@ ip6_init(void) > ip6counters = counters_alloc(ip6s_ncounters); > } > > +/* > + * Enqueue packet for local delivery. Queuing is used as a boundary > + * between the network layer (input/forward path) running without > + * KERNEL_LOCK() and the transport layer still needing it. > + */ > void > -ipv6_input(struct ifnet *ifp, struct mbuf *m) > +ip6_ours(struct mbuf *m) > { > niq_enqueue(&ip6intrq, m); > } > > /* > - * IP6 input interrupt handling. Just pass the packet to ip6_input. > + * Dequeue and process locally delivered packets. > */ > void > ip6intr(void) > { > struct mbuf *m; > > - while ((m = niq_dequeue(&ip6intrq)) != NULL) > - ip6_input(m); > + while ((m = niq_dequeue(&ip6intrq)) != NULL) { > +#ifdef DIAGNOSTIC > + if ((m->m_flags & M_PKTHDR) == 0) > + panic("ipintr no HDR"); > +#endif > + ip6_local(m); > + } > } > > void > -ip6_input(struct mbuf *m) > +ipv6_input(struct ifnet *ifp, struct mbuf *m) > { > - struct ifnet *ifp; > struct ip6_hdr *ip6; > struct sockaddr_in6 sin6; > struct rtentry *rt = NULL; > @@ -192,10 +202,6 @@ ip6_input(struct mbuf *m) > #endif > int srcrt = 0; > > - ifp = if_get(m->m_pkthdr.ph_ifidx); > - if (ifp == NULL) > - goto bad; > - > ip6stat_inc(ip6s_total); > > if (m->m_len < sizeof(struct ip6_hdr)) { > @@ -441,8 +447,8 @@ ip6_input(struct mbuf *m) > inet_ntop(AF_INET6, &ip6->ip6_dst, dst, sizeof(dst)); > /* address is not ready, so discard the packet. */ > nd6log((LOG_INFO, > - "ip6_input: packet to an unready address %s->%s\n", > - src, dst)); > + "%s: packet to an unready address %s->%s\n", > + __func__, src, dst)); > > goto bad; > } else { > @@ -500,11 +506,10 @@ ip6_input(struct mbuf *m) > m_freem(m); > out: > rtfree(rt); > - if_put(ifp); > } > > void > -ip6_ours(struct mbuf *m) > +ip6_local(struct mbuf *m) > { > int off, nxt; > > @@ -1456,18 +1461,37 @@ ip6_send_dispatch(void *xmq) > struct mbuf *m; > struct mbuf_list ml; > int s; > +#ifdef IPSEC > + int locked = 0; > +#endif /* IPSEC */ > > mq_delist(mq, &ml); > if (ml_empty(&ml)) > return; > > - KERNEL_LOCK(); > +#ifdef IPSEC > + /* > + * IPsec is not ready to run without KERNEL_LOCK(). So all > + * the traffic on your machine is punished if you have IPsec > + * enabled. > + */ > + extern int ipsec_in_use; > + if (ipsec_in_use) { > + KERNEL_LOCK(); > + locked = 1; > + } > +#endif /* IPSEC */ > + > NET_LOCK(s); > while ((m = ml_dequeue(&ml)) != NULL) { > ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL); > } > NET_UNLOCK(s); > - KERNEL_UNLOCK(); > + > +#ifdef IPSEC > + if (locked) > + KERNEL_UNLOCK(); > +#endif /* IPSEC */ > } > > void > Index: netinet6/ip6_var.h > =================================================================== > RCS file: /cvs/src/sys/netinet6/ip6_var.h,v > retrieving revision 1.74 > diff -u -p -r1.74 ip6_var.h > --- netinet6/ip6_var.h 28 May 2017 09:25:51 -0000 1.74 > +++ netinet6/ip6_var.h 30 May 2017 08:06:20 -0000 > @@ -303,7 +303,6 @@ int icmp6_ctloutput(int, struct socket * > > void ip6_init(void); > void ip6intr(void); > -void ip6_input(struct mbuf *); > void ip6_deliver(struct mbuf **, int *, int, int); > void ip6_freepcbopts(struct ip6_pktopts *); > void ip6_freemoptions(struct ip6_moptions *);