On Tue, May 30, 2017 at 11:48:50AM +0200, Martin Pieuchot wrote:
> Updated diff below.

OK bluhm@

> Index: net/if.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.502
> diff -u -p -r1.502 if.c
> --- net/if.c  30 May 2017 07:50:37 -0000      1.502
> +++ net/if.c  30 May 2017 08:24:30 -0000
> @@ -874,7 +874,10 @@ if_input_process(void *xifidx)
>       struct ifnet *ifp;
>       struct ifih *ifih;
>       struct srp_ref sr;
> -     int s;
> +     int s, s2;
> +#ifdef IPSEC
> +     int locked = 0;
> +#endif /* IPSEC */
>  
>       ifp = if_get(ifidx);
>       if (ifp == NULL)
> @@ -887,6 +890,32 @@ if_input_process(void *xifidx)
>       if (!ISSET(ifp->if_xflags, IFXF_CLONED))
>               add_net_randomness(ml_len(&ml));
>  
> +#ifdef IPSEC
> +     /*
> +      * IPsec is not ready to run without KERNEL_LOCK().  So all
> +      * the traffic on your machine is punished if you have IPsec
> +      * enabled.
> +      */
> +     extern int ipsec_in_use;
> +     if (ipsec_in_use) {
> +             KERNEL_LOCK();
> +             locked = 1;
> +     }
> +#endif /* IPSEC */
> +
> +     /*
> +      * We grab the NET_LOCK() before processing any packet to
> +      * ensure there's no contention on the routing table lock.
> +      *
> +      * Without it we could race with a userland thread to insert
> +      * a L2 entry in ip{6,}_output().  Such race would result in
> +      * one of the threads sleeping *inside* the IP output path.
> +      *
> +      * Since we have a NET_LOCK() we also use it to serialize access
> +      * to PF globals, pipex globals, unicast and multicast addresses
> +      * lists.
> +      */
> +     NET_LOCK(s2);
>       s = splnet();
>       while ((m = ml_dequeue(&ml)) != NULL) {
>               /*
> @@ -903,7 +932,12 @@ if_input_process(void *xifidx)
>                       m_freem(m);
>       }
>       splx(s);
> +     NET_UNLOCK(s2);
>  
> +#ifdef IPSEC
> +     if (locked)
> +             KERNEL_UNLOCK();
> +#endif /* IPSEC */
>  out:
>       if_put(ifp);
>  }
> Index: net/if_ethersubr.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_ethersubr.c,v
> retrieving revision 1.245
> diff -u -p -r1.245 if_ethersubr.c
> --- net/if_ethersubr.c        30 May 2017 07:50:37 -0000      1.245
> +++ net/if_ethersubr.c        30 May 2017 08:02:13 -0000
> @@ -416,15 +416,11 @@ decapsulate:
>  #ifdef PIPEX
>               if (pipex_enable) {
>                       struct pipex_session *session;
> -                     int s;
>  
> -                     NET_LOCK(s);
>                       if ((session = pipex_pppoe_lookup_session(m)) != NULL) {
>                               pipex_pppoe_input(m, session);
> -                             NET_UNLOCK(s);
>                               return (1);
>                       }
> -                     NET_UNLOCK(s);
>               }
>  #endif
>               if (etype == ETHERTYPE_PPPOEDISC)
> Index: netinet/ip_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_input.c,v
> retrieving revision 1.308
> diff -u -p -r1.308 ip_input.c
> --- netinet/ip_input.c        30 May 2017 07:50:37 -0000      1.308
> +++ netinet/ip_input.c        30 May 2017 09:44:53 -0000
> @@ -127,6 +127,7 @@ int ip_sysctl_ipstat(void *, size_t *, v
>  static struct mbuf_queue     ipsend_mq;
>  
>  void ip_ours(struct mbuf *);
> +void ip_local(struct mbuf *);
>  int  ip_dooptions(struct mbuf *, struct ifnet *);
>  int  in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **);
>  
> @@ -207,27 +208,31 @@ ip_init(void)
>       mq_init(&ipsend_mq, 64, IPL_SOFTNET);
>  }
>  
> +/*
> + * Enqueue packet for local delivery.  Queuing is used as a boundary
> + * between the network layer (input/forward path) running without
> + * KERNEL_LOCK() and the transport layer still needing it.
> + */
>  void
> -ipv4_input(struct ifnet *ifp, struct mbuf *m)
> +ip_ours(struct mbuf *m)
>  {
>       niq_enqueue(&ipintrq, m);
>  }
>  
> +/*
> + * Dequeue and process locally delivered packets.
> + */
>  void
>  ipintr(void)
>  {
>       struct mbuf *m;
>  
> -     /*
> -      * Get next datagram off input queue and get IP header
> -      * in first mbuf.
> -      */
>       while ((m = niq_dequeue(&ipintrq)) != NULL) {
> -#ifdef       DIAGNOSTIC
> +#ifdef DIAGNOSTIC
>               if ((m->m_flags & M_PKTHDR) == 0)
>                       panic("ipintr no HDR");
>  #endif
> -             ip_input(m);
> +             ip_local(m);
>       }
>  }
>  
> @@ -237,18 +242,13 @@ ipintr(void)
>   * Checksum and byte swap header.  Process options. Forward or deliver.
>   */
>  void
> -ip_input(struct mbuf *m)
> +ipv4_input(struct ifnet *ifp, struct mbuf *m)
>  {
> -     struct ifnet    *ifp;
>       struct rtentry  *rt = NULL;
>       struct ip       *ip;
>       int hlen, len;
>       in_addr_t pfrdr = 0;
>  
> -     ifp = if_get(m->m_pkthdr.ph_ifidx);
> -     if (ifp == NULL)
> -             goto bad;
> -
>       ipstat_inc(ips_total);
>       if (m->m_len < sizeof (struct ip) &&
>           (m = m_pullup(m, sizeof (struct ip))) == NULL) {
> @@ -462,13 +462,11 @@ ip_input(struct mbuf *m)
>  #endif /* IPSEC */
>  
>       ip_forward(m, ifp, rt, pfrdr);
> -     if_put(ifp);
>       return;
>  bad:
>       m_freem(m);
>  out:
>       rtfree(rt);
> -     if_put(ifp);
>  }
>  
>  /*
> @@ -477,13 +475,15 @@ out:
>   * If fragmented try to reassemble.  Pass to next level.
>   */
>  void
> -ip_ours(struct mbuf *m)
> +ip_local(struct mbuf *m)
>  {
>       struct ip *ip = mtod(m, struct ip *);
>       struct ipq *fp;
>       struct ipqent *ipqe;
>       int mff, hlen;
>  
> +     KERNEL_ASSERT_LOCKED();
> +
>       hlen = ip->ip_hl << 2;
>  
>       /*
> @@ -1680,18 +1680,37 @@ ip_send_dispatch(void *xmq)
>       struct mbuf *m;
>       struct mbuf_list ml;
>       int s;
> +#ifdef IPSEC
> +     int locked = 0;
> +#endif /* IPSEC */
>  
>       mq_delist(mq, &ml);
>       if (ml_empty(&ml))
>               return;
>  
> -     KERNEL_LOCK();
> +#ifdef IPSEC
> +     /*
> +      * IPsec is not ready to run without KERNEL_LOCK().  So all
> +      * the traffic on your machine is punished if you have IPsec
> +      * enabled.
> +      */
> +     extern int ipsec_in_use;
> +     if (ipsec_in_use) {
> +             KERNEL_LOCK();
> +             locked = 1;
> +     }
> +#endif /* IPSEC */
> +
>       NET_LOCK(s);
>       while ((m = ml_dequeue(&ml)) != NULL) {
>               ip_output(m, NULL, NULL, 0, NULL, NULL, 0);
>       }
>       NET_UNLOCK(s);
> -     KERNEL_UNLOCK();
> +
> +#ifdef IPSEC
> +     if (locked)
> +             KERNEL_UNLOCK();
> +#endif /* IPSEC */
>  }
>  
>  void
> Index: netinet/ip_var.h
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_var.h,v
> retrieving revision 1.77
> diff -u -p -r1.77 ip_var.h
> --- netinet/ip_var.h  30 May 2017 07:50:37 -0000      1.77
> +++ netinet/ip_var.h  30 May 2017 07:57:31 -0000
> @@ -248,7 +248,6 @@ int        ip_sysctl(int *, u_int, void *, siz
>  void  ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
>           struct mbuf *);
>  void  ipintr(void);
> -void  ip_input(struct mbuf *);
>  void  ip_deliver(struct mbuf **, int *, int, int);
>  void  ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int);
>  int   rip_ctloutput(int, struct socket *, int, int, struct mbuf *);
> Index: netinet6/ip6_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
> retrieving revision 1.192
> diff -u -p -r1.192 ip6_input.c
> --- netinet6/ip6_input.c      30 May 2017 07:50:37 -0000      1.192
> +++ netinet6/ip6_input.c      30 May 2017 09:45:25 -0000
> @@ -119,6 +119,7 @@ struct niqueue ip6intrq = NIQUEUE_INITIA
>  struct cpumem *ip6counters;
>  
>  void ip6_ours(struct mbuf *);
> +void ip6_local(struct mbuf *);
>  int ip6_check_rh0hdr(struct mbuf *, int *);
>  int ip6_hbhchcheck(struct mbuf *, int *, int *, int *);
>  int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
> @@ -160,28 +161,37 @@ ip6_init(void)
>       ip6counters = counters_alloc(ip6s_ncounters);
>  }
>  
> +/*
> + * Enqueue packet for local delivery.  Queuing is used as a boundary
> + * between the network layer (input/forward path) running without
> + * KERNEL_LOCK() and the transport layer still needing it.
> + */
>  void
> -ipv6_input(struct ifnet *ifp, struct mbuf *m)
> +ip6_ours(struct mbuf *m)
>  {
>       niq_enqueue(&ip6intrq, m);
>  }
>  
>  /*
> - * IP6 input interrupt handling. Just pass the packet to ip6_input.
> + * Dequeue and process locally delivered packets.
>   */
>  void
>  ip6intr(void)
>  {
>       struct mbuf *m;
>  
> -     while ((m = niq_dequeue(&ip6intrq)) != NULL)
> -             ip6_input(m);
> +     while ((m = niq_dequeue(&ip6intrq)) != NULL) {
> +#ifdef DIAGNOSTIC
> +             if ((m->m_flags & M_PKTHDR) == 0)
> +                     panic("ipintr no HDR");
> +#endif
> +             ip6_local(m);
> +     }
>  }
>  
>  void
> -ip6_input(struct mbuf *m)
> +ipv6_input(struct ifnet *ifp, struct mbuf *m)
>  {
> -     struct ifnet *ifp;
>       struct ip6_hdr *ip6;
>       struct sockaddr_in6 sin6;
>       struct rtentry *rt = NULL;
> @@ -192,10 +202,6 @@ ip6_input(struct mbuf *m)
>  #endif
>       int srcrt = 0;
>  
> -     ifp = if_get(m->m_pkthdr.ph_ifidx);
> -     if (ifp == NULL)
> -             goto bad;
> -
>       ip6stat_inc(ip6s_total);
>  
>       if (m->m_len < sizeof(struct ip6_hdr)) {
> @@ -441,8 +447,8 @@ ip6_input(struct mbuf *m)
>                       inet_ntop(AF_INET6, &ip6->ip6_dst, dst, sizeof(dst));
>                       /* address is not ready, so discard the packet. */
>                       nd6log((LOG_INFO,
> -                         "ip6_input: packet to an unready address %s->%s\n",
> -                         src, dst));
> +                         "%s: packet to an unready address %s->%s\n",
> +                         __func__, src, dst));
>  
>                       goto bad;
>               } else {
> @@ -500,11 +506,10 @@ ip6_input(struct mbuf *m)
>       m_freem(m);
>   out:
>       rtfree(rt);
> -     if_put(ifp);
>  }
>  
>  void
> -ip6_ours(struct mbuf *m)
> +ip6_local(struct mbuf *m)
>  {
>       int off, nxt;
>  
> @@ -1456,18 +1461,37 @@ ip6_send_dispatch(void *xmq)
>       struct mbuf *m;
>       struct mbuf_list ml;
>       int s;
> +#ifdef IPSEC
> +     int locked = 0;
> +#endif /* IPSEC */
>  
>       mq_delist(mq, &ml);
>       if (ml_empty(&ml))
>               return;
>  
> -     KERNEL_LOCK();
> +#ifdef IPSEC
> +     /*
> +      * IPsec is not ready to run without KERNEL_LOCK().  So all
> +      * the traffic on your machine is punished if you have IPsec
> +      * enabled.
> +      */
> +     extern int ipsec_in_use;
> +     if (ipsec_in_use) {
> +             KERNEL_LOCK();
> +             locked = 1;
> +     }
> +#endif /* IPSEC */
> +
>       NET_LOCK(s);
>       while ((m = ml_dequeue(&ml)) != NULL) {
>               ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL);
>       }
>       NET_UNLOCK(s);
> -     KERNEL_UNLOCK();
> +
> +#ifdef IPSEC
> +     if (locked)
> +             KERNEL_UNLOCK();
> +#endif /* IPSEC */
>  }
>  
>  void
> Index: netinet6/ip6_var.h
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/ip6_var.h,v
> retrieving revision 1.74
> diff -u -p -r1.74 ip6_var.h
> --- netinet6/ip6_var.h        28 May 2017 09:25:51 -0000      1.74
> +++ netinet6/ip6_var.h        30 May 2017 08:06:20 -0000
> @@ -303,7 +303,6 @@ int       icmp6_ctloutput(int, struct socket *
>  
>  void ip6_init(void);
>  void ip6intr(void);
> -void ip6_input(struct mbuf *);
>  void ip6_deliver(struct mbuf **, int *, int, int);
>  void ip6_freepcbopts(struct ip6_pktopts *);
>  void ip6_freemoptions(struct ip6_moptions *);

Reply via email to