Hello,
I'm sending finalized patch. The final shape has been discussed with bluhm@,
who was kind enough to do thorough testing on OpenBSD.
The patch solves the problem for deployment as follows:
Client <-- MTU_1 --> PF <-- MTU_1 --> Router <-- MTU_2 --> Server
MTU_2 = MTU_1/2
PF does IPv6 reassembly for PBRed packets (packets forwarded on behalf of
route-to action). Let's assume client sends packet of MTU_1 size. PF forwards
packet to destination. Router discards packet, because it does not
fit to wire. It sends packet too big suggesting MTU_2.
Client retries with fragmented packet this time. In our scenario it uses
MTU_1/2. PF reassembles the packet and is going to forwarded to destination
as ordered by route-to action. Snippet below comes from pf_route6():
5710 /*
5711 * If the packet is too large for the outgoing interface,
5712 * send back an icmp6 error.
5713 */
5714 if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr))
5715 dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
5716 if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
5717 nd6_output(ifp, m0, dst, NULL);
5718 } else {
5719 in6_ifstat_inc(ifp, ifs6_in_toobig);
5720 if (r->rt != PF_DUPTO)
5721 icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0,
ifp->if_mtu);
5722 else
5723 goto bad;
5724 }
test at line 5716 is invalid for our scenario. In our case condition
is met, so PF puts IPv6 reassembled packet to wire, while it should turn
the packet back to fragments as sent by client.
The fix is straightforward. Whenever pf_route6() deals with reassembled
packet it must use pf_refragment6() to turn it back to fragments.
pf_refragment6() must use nd6_output() to put fragment to wire, if it is
invoked on behalf of pf_route6().
bluhm@ exchanged couple of emails with me to finalize the patch I intend to
commit. These are the two points I'd like to highlight:
- PF should report ICMP packet too big for every fragment
if it fails to put to wire (1)
- the dup-to actions should not be treated as special case
curent version does not send ICMP pakcet too big for duped
packets. proposed patch changes that (2)
Ad (1) the earlier patch version I've sent in June sends only one ICMP packet
too big message as it hits the first for the first fragment, which exceeds MTU,
all other fragments are discarded only. This would be a deviation to regular
IPv6 router, which sends MTU exceeded to every packet, which exceeds MTU. The
patch proposed here makes PF to act as regular router.
Ad (2) dup-to actions: we assume the dup-to is used to pass duped packets to
IDS deployed somewhere on network. So if duped packet exceeds MTU we let PF to
send ICMP packet too big back to packet source, so it will somewhat enforce
correct fragment size to keep IDS in picture. Still the dup-to usecase is kind
of blurry to me, if someone can point me to yet another use case for dup-to
I'll be glad.
I believe the what we have is optimal fix.
regards
sasha
--------8<---------------8<---------------8<------------------8<--------
Index: pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.935
diff -u -p -r1.935 pf.c
--- pf.c 21 Jul 2015 02:32:04 -0000 1.935
+++ pf.c 17 Aug 2015 17:58:55 -0000
@@ -5633,6 +5633,7 @@ pf_route6(struct mbuf **m, struct pf_rul
struct ifnet *ifp = NULL;
struct pf_addr naddr;
struct pf_src_node *sns[PF_SN_MAX];
+ struct m_tag *mtag;
if (m == NULL || *m == NULL || r == NULL ||
(dir != PF_IN && dir != PF_OUT) || oifp == NULL)
@@ -5707,20 +5708,20 @@ pf_route6(struct mbuf **m, struct pf_rul
in6_proto_cksum_out(m0, ifp);
- /*
- * If the packet is too large for the outgoing interface,
- * send back an icmp6 error.
- */
if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr))
dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
- if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
+
+ /*
+ * If packet has been reassembled by PF earlier, we have to
+ * use pf_refragment6() here to turn it back to fragments.
+ */
+ if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) {
+ (void) pf_refragment6(&m0, mtag, dst, ifp);
+ } else if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
nd6_output(ifp, m0, dst, NULL);
} else {
in6_ifstat_inc(ifp, ifs6_in_toobig);
- if (r->rt != PF_DUPTO)
- icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
- else
- goto bad;
+ icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
}
done:
@@ -6644,7 +6645,7 @@ done:
struct m_tag *mtag;
if ((mtag = m_tag_find(*m0, PACKET_TAG_PF_REASSEMBLED, NULL)))
- action = pf_refragment6(m0, mtag);
+ action = pf_refragment6(m0, mtag, NULL, NULL);
}
#endif /* INET6 */
if (s && action != PF_DROP) {
Index: pf_norm.c
===================================================================
RCS file: /cvs/src/sys/net/pf_norm.c,v
retrieving revision 1.180
diff -u -p -r1.180 pf_norm.c
--- pf_norm.c 19 Jul 2015 01:58:19 -0000 1.180
+++ pf_norm.c 17 Aug 2015 17:58:56 -0000
@@ -57,6 +57,9 @@
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/nd6.h>
+#include <netinet/icmp6.h>
#endif /* INET6 */
#include <net/pfvar.h>
@@ -680,7 +683,8 @@ fail:
}
int
-pf_refragment6(struct mbuf **m0, struct m_tag *mtag)
+pf_refragment6(struct mbuf **m0, struct m_tag *mtag, struct sockaddr_in6 *dst,
+ struct ifnet *ifp)
{
struct mbuf *m = *m0, *t;
struct pf_fragment_tag *ftag = (struct pf_fragment_tag *)(mtag + 1);
@@ -743,10 +747,18 @@ pf_refragment6(struct mbuf **m0, struct
t = m->m_nextpkt;
m->m_nextpkt = NULL;
m->m_pkthdr.pf.flags |= PF_TAG_REFRAGMENTED;
- if (error == 0)
- ip6_forward(m, 0);
- else
+ if (error == 0) {
+ if (ifp == NULL) {
+ ip6_forward(m, 0);
+ } else if ((u_long)m->m_pkthdr.len <= ifp->if_mtu) {
+ nd6_output(ifp, m, dst, NULL);
+ } else {
+ in6_ifstat_inc(ifp, ifs6_in_toobig);
+ icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0,
ifp->if_mtu);
+ }
+ } else {
m_freem(m);
+ }
}
return (action);
Index: pfvar.h
===================================================================
RCS file: /cvs/src/sys/net/pfvar.h,v
retrieving revision 1.419
diff -u -p -r1.419 pfvar.h
--- pfvar.h 20 Jul 2015 01:18:33 -0000 1.419
+++ pfvar.h 17 Aug 2015 17:58:58 -0000
@@ -1734,7 +1734,8 @@ int pf_match_port(u_int8_t, u_int16_t, u
int pf_match_uid(u_int8_t, uid_t, uid_t, uid_t);
int pf_match_gid(u_int8_t, gid_t, gid_t, gid_t);
-int pf_refragment6(struct mbuf **, struct m_tag *mtag);
+int pf_refragment6(struct mbuf **, struct m_tag *mtag,
+ struct sockaddr_in6 *, struct ifnet *);
void pf_normalize_init(void);
int pf_normalize_ip(struct pf_pdesc *, u_short *);
int pf_normalize_ip6(struct pf_pdesc *, u_short *);