On 13/11/14(Thu) 16:41, Stuart Henderson wrote: > This changes behaviour of ping6 ff02::1%pppoe0 for me, previously I saw > a response to each icmp message in the sequence, now I just see the > first response. I am not using "set skip" on that machine. > > $ ping6 ff02::1%pppoe0 > PING6(56=40+8+8 bytes) fe80::225:90ff:fec0:77b4%pppoe0 --> ff02::1%pppoe0 > 16 bytes from fe80:15::225:90ff:fec0:77b4, icmp_seq=0 hlim=64 time=0.271 ms > ^C > --- ff02::1%pppoe0 ping6 statistics --- > 6 packets transmitted, 1 packets received, 83.3% packet loss > round-trip min/avg/max/std-dev = 0.271/0.271/0.271/0.000 ms
Thanks for spotting this Stuart, diff below fixes this issue. It was another funky situation to debug. Problem was that 2 new states were created on loopback for the echo reply instead of matching the corresponding echo request states. But according to mikeb@ new states should never be created for replies. So the diff below changes the icmp multicast check in pf to take into consideration the scope of the sender, and in a more general way no longer clear the scopes of addresses for packets sent through loopback when calling pf_test(). Index: net/pf.c =================================================================== RCS file: /home/ncvs/src/sys/net/pf.c,v retrieving revision 1.895 diff -u -p -r1.895 pf.c --- net/pf.c 18 Nov 2014 02:37:31 -0000 1.895 +++ net/pf.c 18 Nov 2014 13:58:37 -0000 @@ -824,6 +824,11 @@ pf_state_key_addr_setup(struct pf_pdesc default: if (multi == PF_ICMP_MULTI_LINK) { key->addr[sidx].addr32[0] = __IPV6_ADDR_INT32_MLL; + + if (IN6_IS_SCOPE_EMBED(&key->addr[didx].v6)) + key->addr[sidx].addr16[1] = + key->addr[didx].addr16[1]; + key->addr[sidx].addr32[1] = 0; key->addr[sidx].addr32[2] = 0; key->addr[sidx].addr32[3] = __IPV6_ADDR_INT32_ONE; @@ -5796,7 +5801,7 @@ pf_route6(struct mbuf **m, struct pf_rul if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { - nd6_output(ifp, ifp, m0, dst, NULL); + nd6_output(ifp, m0, dst, NULL); } else { in6_ifstat_inc(ifp, ifs6_in_toobig); if (r->rt != PF_DUPTO) Index: netinet6/ip6_forward.c =================================================================== RCS file: /home/ncvs/src/sys/netinet6/ip6_forward.c,v retrieving revision 1.69 diff -u -p -r1.69 ip6_forward.c --- netinet6/ip6_forward.c 14 Oct 2014 09:52:26 -0000 1.69 +++ netinet6/ip6_forward.c 18 Nov 2014 13:58:37 -0000 @@ -91,7 +91,6 @@ ip6_forward(struct mbuf *m, int srcrt) struct rtentry *rt; int error = 0, type = 0, code = 0; struct mbuf *mcopy = NULL; - struct ifnet *origifp; /* maybe unnecessary */ #ifdef IPSEC u_int8_t sproto = 0; struct m_tag *mtag; @@ -422,36 +421,6 @@ reroute: * link identifiers, we can do this stuff after making a copy for * returning an error. */ - if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) { - /* - * See corresponding comments in ip6_output. - * XXX: but is it possible that ip6_forward() sends a packet - * to a loopback interface? I don't think so, and thus - * I bark here. (jin...@kame.net) - * XXX: it is common to route invalid packets to loopback. - * also, the codepath will be visited on use of ::1 in - * rthdr. (itojun) - */ -#if 1 - if (0) -#else - if ((rt->rt_flags & (RTF_BLACKHOLE|RTF_REJECT)) == 0) -#endif - { - inet_ntop(AF_INET6, &ip6->ip6_src, src6, sizeof(src6)); - inet_ntop(AF_INET6, &ip6->ip6_dst, dst6, sizeof(dst6)); - printf("ip6_forward: outgoing interface is loopback. " - "src %s, dst %s, nxt %d, rcvif %s, outif %s\n", - src6, dst6, - ip6->ip6_nxt, m->m_pkthdr.rcvif->if_xname, - rt->rt_ifp->if_xname); - } - - /* we can just use rcvif in forwarding. */ - origifp = m->m_pkthdr.rcvif; - } - else - origifp = rt->rt_ifp; if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) ip6->ip6_src.s6_addr16[1] = 0; if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) @@ -492,7 +461,7 @@ reroute: goto freert; } - error = nd6_output(rt->rt_ifp, origifp, m, dst, rt); + error = nd6_output(rt->rt_ifp, m, dst, rt); if (error) { in6_ifstat_inc(rt->rt_ifp, ifs6_out_discard); ip6stat.ip6s_cantforward++; Index: netinet6/ip6_input.c =================================================================== RCS file: /home/ncvs/src/sys/netinet6/ip6_input.c,v retrieving revision 1.130 diff -u -p -r1.130 ip6_input.c --- netinet6/ip6_input.c 14 Oct 2014 09:52:26 -0000 1.130 +++ netinet6/ip6_input.c 18 Nov 2014 13:58:37 -0000 @@ -267,6 +267,13 @@ ip6_input(struct mbuf *m) in6_ifstat_inc(ifp, ifs6_in_addrerr); goto bad; } + /* Drop packets if interface ID portion is already filled. */ + if (((IN6_IS_SCOPE_EMBED(&ip6->ip6_src) && ip6->ip6_src.s6_addr16[1]) || + (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst) && ip6->ip6_dst.s6_addr16[1])) && + (ifp->if_flags & IFF_LOOPBACK) == 0) { + ip6stat.ip6s_badscope++; + goto bad; + } if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) && !(m->m_flags & M_LOOP)) { /* @@ -314,6 +321,18 @@ ip6_input(struct mbuf *m) } #endif + /* + * If the packet has been received on a loopback interface it + * can be destinated to any local address, not necessarily to + * an address configured on `ifp'. + */ + if ((ifp->if_flags & IFF_LOOPBACK) == 0) { + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) + ip6->ip6_src.s6_addr16[1] = htons(ifp->if_index); + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) + ip6->ip6_dst.s6_addr16[1] = htons(ifp->if_index); + } + #if NPF > 0 /* * Packet filter @@ -349,43 +368,6 @@ ip6_input(struct mbuf *m) goto hbhcheck; } - /* drop packets if interface ID portion is already filled */ - if ((IN6_IS_SCOPE_EMBED(&ip6->ip6_src) && ip6->ip6_src.s6_addr16[1]) || - (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst) && ip6->ip6_dst.s6_addr16[1])) { - if ((ifp->if_flags & IFF_LOOPBACK) == 0) { - ip6stat.ip6s_badscope++; - goto bad; - } - } - - if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) - ip6->ip6_src.s6_addr16[1] = htons(ifp->if_index); - if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) - ip6->ip6_dst.s6_addr16[1] = htons(ifp->if_index); - - /* - * We use rt->rt_ifp to determine if the address is ours or not. - * If rt_ifp is lo0, the address is ours. - * The problem here is, rt->rt_ifp for fe80::%lo0/64 is set to lo0, - * so any address under fe80::%lo0/64 will be mistakenly considered - * local. The special case is supplied to handle the case properly - * by actually looking at interface addresses - * (using in6ifa_ifpwithaddr). - */ - if ((ifp->if_flags & IFF_LOOPBACK) != 0 && - IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst)) { - if (!in6ifa_ifpwithaddr(ifp, &ip6->ip6_dst)) { - icmp6_error(m, ICMP6_DST_UNREACH, - ICMP6_DST_UNREACH_ADDR, 0); - /* m is already freed */ - return; - } - - ours = 1; - deliverifp = ifp; - goto hbhcheck; - } - if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { ours = 1; deliverifp = ifp; @@ -456,18 +438,11 @@ ip6_input(struct mbuf *m) } /* - * Accept the packet if the forwarding interface to the destination - * according to the routing table is the loopback interface, - * unless the associated route has a gateway. - * Note that this approach causes to accept a packet if there is a - * route to the loopback interface for the destination of the packet. - * But we think it's even useful in some situations, e.g. when using - * a special daemon which wants to intercept the packet. + * Accept the packet if the route to the destination is marked + * as local. */ if (ip6_forward_rt.ro_rt && - (ip6_forward_rt.ro_rt->rt_flags & - (RTF_HOST|RTF_GATEWAY)) == RTF_HOST && - ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_LOOP) { + ISSET(ip6_forward_rt.ro_rt->rt_flags, RTF_LOCAL)) { struct in6_ifaddr *ia6 = ifatoia6(ip6_forward_rt.ro_rt->rt_ifa); if (ia6->ia6_flags & IN6_IFF_ANYCAST) Index: netinet6/ip6_output.c =================================================================== RCS file: /home/ncvs/src/sys/netinet6/ip6_output.c,v retrieving revision 1.161 diff -u -p -r1.161 ip6_output.c --- netinet6/ip6_output.c 1 Nov 2014 21:40:39 -0000 1.161 +++ netinet6/ip6_output.c 18 Nov 2014 13:58:37 -0000 @@ -158,14 +158,13 @@ ip6_output(struct mbuf *m0, struct ip6_p struct inpcb *inp) { struct ip6_hdr *ip6; - struct ifnet *ifp, *origifp = NULL; + struct ifnet *ifp; struct mbuf *m = m0; int hlen, tlen; struct route_in6 ip6route; struct rtentry *rt = NULL; struct sockaddr_in6 *dst, dstsock; int error = 0; - struct in6_ifaddr *ia6 = NULL; u_long mtu; int alwaysfrag, dontfrag; u_int32_t optlen = 0, plen = 0, unfragpartlen = 0; @@ -572,26 +571,14 @@ reroute: /* * then rt (for unicast) and ifp must be non-NULL valid values. */ - if (rt) { - ia6 = ifatoia6(rt->rt_ifa); + if (rt) rt->rt_use++; - } if ((flags & IPV6_FORWARDING) == 0) { /* XXX: the FORWARDING flag can be set for mrouting. */ in6_ifstat_inc(ifp, ifs6_out_request); } - /* - * The outgoing interface must be in the zone of source and - * destination addresses. We should use ia_ifp to support the - * case of sending packets to an address of our own. - */ - if (ia6 != NULL && ia6->ia_ifp) - origifp = ia6->ia_ifp; - else - origifp = ifp; - if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { if (opt && opt->ip6po_nextroute.ro_rt) { /* @@ -708,49 +695,6 @@ reroute: } } - /* Fake scoped addresses */ - if ((ifp->if_flags & IFF_LOOPBACK) != 0) { - /* - * If source or destination address is a scoped address, and - * the packet is going to be sent to a loopback interface, - * we should keep the original interface. - */ - - /* - * XXX: this is a very experimental and temporary solution. - * We eventually have sockaddr_in6 and use the sin6_scope_id - * field of the structure here. - * We rely on the consistency between two scope zone ids - * of source add destination, which should already be assured - * Larger scopes than link will be supported in the near - * future. - */ - origifp = NULL; - if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) - origifp = if_get(ntohs(ip6->ip6_src.s6_addr16[1])); - else if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) - origifp = if_get(ntohs(ip6->ip6_dst.s6_addr16[1])); - /* - * XXX: origifp can be NULL even in those two cases above. - * For example, if we remove the (only) link-local address - * from the loopback interface, and try to send a link-local - * address without link-id information. Then the source - * address is ::1, and the destination address is the - * link-local address with its s6_addr16[1] being zero. - * What is worse, if the packet goes to the loopback interface - * by a default rejected route, the null pointer would be - * passed to looutput, and the kernel would hang. - * The following last resort would prevent such disaster. - */ - if (origifp == NULL) - origifp = ifp; - } else - origifp = ifp; - if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) - ip6->ip6_src.s6_addr16[1] = 0; - if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) - ip6->ip6_dst.s6_addr16[1] = 0; - /* * If the outgoing packet contains a hop-by-hop options header, * it must be examined and processed even by the source node. @@ -801,6 +745,19 @@ reroute: goto reroute; } #endif + + /* + * If the packet is not going on the wire it can be destinated + * to any local address. In this case do not clear its scopes + * to let ip6_input() find a matching local route. + */ + if ((ifp->if_flags & IFF_LOOPBACK) == 0) { + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) + ip6->ip6_src.s6_addr16[1] = 0; + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) + ip6->ip6_dst.s6_addr16[1] = 0; + } + in6_proto_cksum_out(m, ifp); /* @@ -861,7 +818,7 @@ reroute: * transmit packet without fragmentation */ if (dontfrag || (!alwaysfrag && tlen <= mtu)) { /* case 1-a and 2-a */ - error = nd6_output(ifp, origifp, m, dst, ro->ro_rt); + error = nd6_output(ifp, m, dst, ro->ro_rt); goto done; } @@ -949,7 +906,7 @@ reroute: if (error == 0) { ip6stat.ip6s_ofragments++; in6_ifstat_inc(ifp, ifs6_out_fragcreat); - error = nd6_output(ifp, origifp, m, dst, ro->ro_rt); + error = nd6_output(ifp, m, dst, ro->ro_rt); } else m_freem(m); } Index: netinet6/nd6.c =================================================================== RCS file: /home/ncvs/src/sys/netinet6/nd6.c,v retrieving revision 1.125 diff -u -p -r1.125 nd6.c --- netinet6/nd6.c 18 Nov 2014 02:37:31 -0000 1.125 +++ netinet6/nd6.c 18 Nov 2014 13:58:37 -0000 @@ -1488,8 +1488,7 @@ fail: * we assume ifp is not a p2p here, so just * set the 2nd argument as the 1st one. */ - nd6_output(ifp, ifp, n, - satosin6(rt_key(rt)), rt); + nd6_output(ifp, n, satosin6(rt_key(rt)), rt); if (ln->ln_hold == n) { /* n is back in ln_hold. Discard. */ m_freem(ln->ln_hold); @@ -1650,8 +1649,8 @@ nd6_rs_output_timo(void *ignored_arg) #define senderr(e) { error = (e); goto bad;} int -nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, - struct sockaddr_in6 *dst, struct rtentry *rt0) +nd6_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr_in6 *dst, + struct rtentry *rt0) { struct mbuf *m = m0; struct rtentry *rt = rt0; @@ -1826,17 +1825,6 @@ nd6_output(struct ifnet *ifp, struct ifn mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL); #endif /* IPSEC */ - if ((ifp->if_flags & IFF_LOOPBACK) != 0) { -#ifdef IPSEC - if (mtag != NULL) { - /* Tell IPsec to do its own crypto. */ - ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1)); - error = EACCES; - goto bad; - } -#endif /* IPSEC */ - return ((*ifp->if_output)(origifp, m, sin6tosa(dst), rt)); - } #ifdef IPSEC if (mtag != NULL) { /* Tell IPsec to do its own crypto. */ Index: netinet6/nd6.h =================================================================== RCS file: /home/ncvs/src/sys/netinet6/nd6.h,v retrieving revision 1.40 diff -u -p -r1.40 nd6.h --- netinet6/nd6.h 10 Nov 2014 10:46:10 -0000 1.40 +++ netinet6/nd6.h 18 Nov 2014 13:58:37 -0000 @@ -283,8 +283,8 @@ void nd6_rtrequest(int, struct rtentry * int nd6_ioctl(u_long, caddr_t, struct ifnet *); struct rtentry *nd6_cache_lladdr(struct ifnet *, struct in6_addr *, char *, int, int, int); -int nd6_output(struct ifnet *, struct ifnet *, struct mbuf *, - struct sockaddr_in6 *, struct rtentry *); +int nd6_output(struct ifnet *, struct mbuf *, struct sockaddr_in6 *, + struct rtentry *); int nd6_storelladdr(struct ifnet *, struct rtentry *, struct mbuf *, struct sockaddr *, u_char *); int nd6_sysctl(int, void *, size_t *, void *, size_t); Index: netinet6/nd6_nbr.c =================================================================== RCS file: /home/ncvs/src/sys/netinet6/nd6_nbr.c,v retrieving revision 1.84 diff -u -p -r1.84 nd6_nbr.c --- netinet6/nd6_nbr.c 18 Nov 2014 02:37:31 -0000 1.84 +++ netinet6/nd6_nbr.c 18 Nov 2014 13:58:37 -0000 @@ -870,7 +870,7 @@ nd6_na_input(struct mbuf *m, int off, in * we assume ifp is not a loopback here, so just set the 2nd * argument as the 1st one. */ - nd6_output(ifp, ifp, n, satosin6(rt_key(rt)), rt); + nd6_output(ifp, n, satosin6(rt_key(rt)), rt); if (ln->ln_hold == n) { /* n is back in ln_hold. Discard. */ m_freem(ln->ln_hold);