The following diff fixes an issue where frag list sorting was not applied in all cases, as it should have been. The change just moves the line
ipf_sort(ipf_list->frag_list, ipf_list->last_inuse_idx); with some associated indentation changes. I have some additional private tests that found this, but I need to adapt them and will add them later. diff --git a/lib/ipf.c b/lib/ipf.c index 2963dd5..9cdc130 100644 --- a/lib/ipf.c +++ b/lib/ipf.c @@ -541,7 +541,6 @@ ipf_list_state_transition(struct ipf_list *ipf_list, bool ff, bool lf, break; case IPF_LIST_STATE_FIRST_LAST_SEEN: next_state = IPF_LIST_STATE_FIRST_LAST_SEEN; - ipf_sort(ipf_list->frag_list, ipf_list->last_inuse_idx); break; case IPF_LIST_STATE_COMPLETED: next_state = curr_state; @@ -552,23 +551,25 @@ ipf_list_state_transition(struct ipf_list *ipf_list, bool ff, bool lf, OVS_NOT_REACHED(); } - if (next_state == IPF_LIST_STATE_FIRST_LAST_SEEN && - ipf_list_complete(ipf_list)) { - struct dp_packet *reass_pkt = NULL; - if (v4) { - reass_pkt = ipf_reassemble_v4_frags(ipf_list); - } else { - reass_pkt = ipf_reassemble_v6_frags(ipf_list); - } - if (reass_pkt) { - struct reassembled_pkt *rp = xzalloc(sizeof *rp); - rp->pkt = reass_pkt; - rp->list = ipf_list; - ipf_reassembled_list_add(rp); - ipf_expiry_list_remove(ipf_list); - next_state = IPF_LIST_STATE_COMPLETED; - } else { - next_state = IPF_LIST_STATE_REASS_FAIL; + if (next_state == IPF_LIST_STATE_FIRST_LAST_SEEN) { + ipf_sort(ipf_list->frag_list, ipf_list->last_inuse_idx); + if (ipf_list_complete(ipf_list)) { + struct dp_packet *reass_pkt = NULL; + if (v4) { + reass_pkt = ipf_reassemble_v4_frags(ipf_list); + } else { + reass_pkt = ipf_reassemble_v6_frags(ipf_list); + } + if (reass_pkt) { + struct reassembled_pkt *rp = xzalloc(sizeof *rp); + rp->pkt = reass_pkt; + rp->list = ipf_list; + ipf_reassembled_list_add(rp); + ipf_expiry_list_remove(ipf_list); + next_state = IPF_LIST_STATE_COMPLETED; + } else { + next_state = IPF_LIST_STATE_REASS_FAIL; + } } } ipf_list->state = next_state; Darrell On Sun, Apr 8, 2018 at 7:53 PM, Darrell Ball <dlu...@gmail.com> wrote: > Fragmentation handling is added for supporting conntrack. > Both v4 and v6 are supported. > > After discussion with several people, I decided to not store > configuration state in the database to be more consistent with > the kernel in future, similarity with other conntrack configuration > which will not be in the database as well and overall simplicity. > Accordingly, fragmentation handling is enabled by default. > > This patch enables fragmentation tests for the userspace datapath. > > Signed-off-by: Darrell Ball <dlu...@gmail.com> > --- > NEWS | 2 + > include/sparse/netinet/ip6.h | 1 + > lib/automake.mk | 2 + > lib/conntrack.c | 7 + > lib/ipf.c | 1238 ++++++++++++++++++++++++++++++ > ++++++++++++ > lib/ipf.h | 63 +++ > tests/system-traffic.at | 10 - > 7 files changed, 1313 insertions(+), 10 deletions(-) > create mode 100644 lib/ipf.c > create mode 100644 lib/ipf.h > > diff --git a/NEWS b/NEWS > index 0cfcac5..2f31680 100644 > --- a/NEWS > +++ b/NEWS > @@ -10,6 +10,8 @@ Post-v2.9.0 > * ovs-ofctl now accepts and display table names in place of > numbers. By > default it always accepts names and in interactive use it displays > them; > use --names or --no-names to override. See ovs-ofctl(8) for > details. > + - Userspace datapath: > + * Add v4/v6 fragmentation support for conntrack. > - ovs-vsctl: New commands "add-bond-iface" and "del-bond-iface". > - OpenFlow: > * OFPT_ROLE_STATUS is now available in OpenFlow 1.3. > diff --git a/include/sparse/netinet/ip6.h b/include/sparse/netinet/ip6.h > index d2a54de..bfa637a 100644 > --- a/include/sparse/netinet/ip6.h > +++ b/include/sparse/netinet/ip6.h > @@ -64,5 +64,6 @@ struct ip6_frag { > }; > > #define IP6F_OFF_MASK ((OVS_FORCE ovs_be16) 0xfff8) > +#define IP6F_MORE_FRAG ((OVS_FORCE ovs_be16) 0x0001) > > #endif /* netinet/ip6.h sparse */ > diff --git a/lib/automake.mk b/lib/automake.mk > index 915a33b..04163b3 100644 > --- a/lib/automake.mk > +++ b/lib/automake.mk > @@ -107,6 +107,8 @@ lib_libopenvswitch_la_SOURCES = \ > lib/hmapx.h \ > lib/id-pool.c \ > lib/id-pool.h \ > + lib/ipf.c \ > + lib/ipf.h \ > lib/jhash.c \ > lib/jhash.h \ > lib/json.c \ > diff --git a/lib/conntrack.c b/lib/conntrack.c > index 2b20e93..987c034 100644 > --- a/lib/conntrack.c > +++ b/lib/conntrack.c > @@ -30,6 +30,7 @@ > #include "ct-dpif.h" > #include "dp-packet.h" > #include "flow.h" > +#include "ipf.h" > #include "netdev.h" > #include "odp-netlink.h" > #include "openvswitch/hmap.h" > @@ -340,6 +341,7 @@ conntrack_init(struct conntrack *ct) > atomic_init(&ct->n_conn_limit, DEFAULT_N_CONN_LIMIT); > latch_init(&ct->clean_thread_exit); > ct->clean_thread = ovs_thread_create("ct_clean", clean_thread_main, > ct); > + ipf_init(); > } > > /* Destroys the connection tracker 'ct' and frees all the allocated > memory. */ > @@ -382,6 +384,7 @@ conntrack_destroy(struct conntrack *ct) > hindex_destroy(&ct->alg_expectation_refs); > ct_rwlock_unlock(&ct->resources_lock); > ct_rwlock_destroy(&ct->resources_lock); > + ipf_destroy(); > } > > static unsigned hash_to_bucket(uint32_t hash) > @@ -1308,6 +1311,8 @@ conntrack_execute(struct conntrack *ct, struct > dp_packet_batch *pkt_batch, > const struct nat_action_info_t *nat_action_info, > long long now) > { > + ipf_preprocess_conntrack(pkt_batch, now, dl_type, zone, > ct->hash_basis); > + > struct dp_packet *packet; > struct conn_lookup_ctx ctx; > > @@ -1321,6 +1326,8 @@ conntrack_execute(struct conntrack *ct, struct > dp_packet_batch *pkt_batch, > setlabel, nat_action_info, tp_src, tp_dst, helper); > } > > + ipf_postprocess_conntrack(pkt_batch, now, dl_type); > + > return 0; > } > > diff --git a/lib/ipf.c b/lib/ipf.c > new file mode 100644 > index 0000000..3837c60 > --- /dev/null > +++ b/lib/ipf.c > @@ -0,0 +1,1238 @@ > +/* > + * Copyright (c) 2018 Nicira, Inc. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#include <config.h> > +#include <ctype.h> > +#include <errno.h> > +#include <sys/types.h> > +#include <netinet/in.h> > +#include <netinet/ip6.h> > +#include <netinet/icmp6.h> > +#include <string.h> > + > +#include "coverage.h" > +#include "csum.h" > +#include "ipf.h" > +#include "openvswitch/hmap.h" > +#include "openvswitch/vlog.h" > +#include "ovs-atomic.h" > +#include "packets.h" > +#include "util.h" > + > +VLOG_DEFINE_THIS_MODULE(ipf); > +COVERAGE_DEFINE(ipf_stuck_frag_list_purged); > + > +enum { > + IPV4_PACKET_MAX_HDR_SIZE = 60, > + IPV4_PACKET_MAX_SIZE = 65535, > + IPV6_PACKET_MAX_DATA = 65535, > +}; > + > +enum ipf_list_state { > + IPF_LIST_STATE_UNUSED, > + IPF_LIST_STATE_REASS_FAIL, > + IPF_LIST_STATE_OTHER_SEEN, > + IPF_LIST_STATE_FIRST_SEEN, > + IPF_LIST_STATE_LAST_SEEN, > + IPF_LIST_STATE_FIRST_LAST_SEEN, > + IPF_LIST_STATE_COMPLETED, > + IPF_LIST_STATE_NUM, > +}; > + > +enum ipf_list_type { > + IPF_FRAG_COMPLETED_LIST, > + IPF_FRAG_EXPIRY_LIST, > +}; > + > +enum { > + IPF_INVALID_IDX = -1, > + IPF_V4_FRAG_SIZE_LBOUND = 400, > + IPF_V4_FRAG_SIZE_MIN_DEF = 1200, > + IPF_V6_FRAG_SIZE_LBOUND = 1280, > + IPF_V6_FRAG_SIZE_MIN_DEF = 1280, > + IPF_MAX_FRAGS_DEFAULT = 1000, > + IPF_NFRAG_UBOUND = 5000, > +}; > + > +enum ipf_counter_type { > + IPF_COUNTER_NFRAGS, > + IPF_COUNTER_NFRAGS_ACCEPTED, > + IPF_COUNTER_NFRAGS_COMPL_SENT, > + IPF_COUNTER_NFRAGS_EXPD_SENT, > + IPF_COUNTER_NFRAGS_TOO_SMALL, > + IPF_COUNTER_NFRAGS_OVERLAP, > +}; > + > +struct ipf_addr { > + union { > + ovs_16aligned_be32 ipv4; > + union ovs_16aligned_in6_addr ipv6; > + ovs_be32 ipv4_aligned; > + struct in6_addr ipv6_aligned; > + }; > +}; > + > +struct ipf_frag { > + struct dp_packet *pkt; > + uint16_t start_data_byte; > + uint16_t end_data_byte; > +}; > + > +struct ipf_list_key { > + struct ipf_addr src_addr; > + struct ipf_addr dst_addr; > + uint32_t recirc_id; > + ovs_be32 ip_id; /* V6 is 32 bits. */ > + ovs_be16 dl_type; > + uint16_t zone; > + uint8_t nw_proto; > +}; > + > +struct ipf_list { > + struct hmap_node node; > + struct ovs_list exp_node; > + struct ovs_list complete_node; > + struct ipf_frag *frag_list; > + struct ipf_list_key key; > + struct dp_packet *reass_execute_ctx; > + long long expiration; > + int last_sent_idx; > + int last_inuse_idx; > + int size; > + uint8_t state; > +}; > + > +struct reassembled_pkt { > + struct ovs_list rp_list_node; > + struct dp_packet *pkt; > + struct ipf_list *list; > +}; > + > +struct OVS_LOCKABLE ipf_lock { > + struct ovs_mutex lock; > +}; > + > +static int max_v4_frag_list_size; > + > +static struct hmap frag_lists OVS_GUARDED; > +static struct ovs_list frag_exp_list OVS_GUARDED; > +static struct ovs_list frag_complete_list OVS_GUARDED; > +static struct ovs_list reassembled_pkt_list OVS_GUARDED; > + > +static atomic_bool ifp_v4_enabled; > +static atomic_bool ifp_v6_enabled; > +static atomic_uint nfrag_max; > +/* Will be clamped above 400 bytes; the value chosen should handle > + * alg control packets of interest that use string encoding of mutable > + * IP fields; meaning, the control packets should not be fragmented. */ > +static atomic_uint min_v4_frag_size; > +static atomic_uint min_v6_frag_size; > + > +static atomic_count nfrag; > +static atomic_count n4frag_accepted; > +static atomic_count n4frag_completed_sent; > +static atomic_count n4frag_expired_sent; > +static atomic_count n4frag_too_small; > +static atomic_count n4frag_overlap; > +static atomic_count n6frag_accepted; > +static atomic_count n6frag_completed_sent; > +static atomic_count n6frag_expired_sent; > +static atomic_count n6frag_too_small; > +static atomic_count n6frag_overlap; > + > +static struct ipf_lock ipf_lock; > + > +static void ipf_lock_init(struct ipf_lock *lock) > +{ > + ovs_mutex_init_adaptive(&lock->lock); > +} > + > +static void ipf_lock_lock(struct ipf_lock *lock) > + OVS_ACQUIRES(lock) > + OVS_NO_THREAD_SAFETY_ANALYSIS > +{ > + ovs_mutex_lock(&lock->lock); > +} > + > +static void ipf_lock_unlock(struct ipf_lock *lock) > + OVS_RELEASES(lock) > + OVS_NO_THREAD_SAFETY_ANALYSIS > +{ > + ovs_mutex_unlock(&lock->lock); > +} > + > +static void ipf_lock_destroy(struct ipf_lock *lock) > +{ > + ovs_mutex_destroy(&lock->lock); > +} > + > +static void > +ipf_count(bool v4, enum ipf_counter_type cntr) > +{ > + if (v4) { > + switch (cntr) { > + case IPF_COUNTER_NFRAGS_ACCEPTED: > + atomic_count_inc(&n4frag_accepted); > + break; > + case IPF_COUNTER_NFRAGS_COMPL_SENT: > + atomic_count_inc(&n4frag_completed_sent); > + break; > + case IPF_COUNTER_NFRAGS_EXPD_SENT: > + atomic_count_inc(&n4frag_expired_sent); > + break; > + case IPF_COUNTER_NFRAGS_TOO_SMALL: > + atomic_count_inc(&n4frag_too_small); > + break; > + case IPF_COUNTER_NFRAGS_OVERLAP: > + atomic_count_inc(&n4frag_overlap); > + break; > + case IPF_COUNTER_NFRAGS: > + default: > + OVS_NOT_REACHED(); > + } > + } else { > + switch (cntr) { > + case IPF_COUNTER_NFRAGS_ACCEPTED: > + atomic_count_inc(&n6frag_accepted); > + break; > + case IPF_COUNTER_NFRAGS_COMPL_SENT: > + atomic_count_inc(&n6frag_completed_sent); > + break; > + case IPF_COUNTER_NFRAGS_EXPD_SENT: > + atomic_count_inc(&n6frag_expired_sent); > + break; > + case IPF_COUNTER_NFRAGS_TOO_SMALL: > + atomic_count_inc(&n6frag_too_small); > + break; > + case IPF_COUNTER_NFRAGS_OVERLAP: > + atomic_count_inc(&n6frag_overlap); > + break; > + case IPF_COUNTER_NFRAGS: > + default: > + OVS_NOT_REACHED(); > + } > + } > +} > + > +static bool > +ipf_get_enabled(void) > +{ > + bool ifp_v4_enabled_; > + bool ifp_v6_enabled_; > + atomic_read_relaxed(&ifp_v4_enabled, &ifp_v4_enabled_); > + atomic_read_relaxed(&ifp_v6_enabled, &ifp_v6_enabled_); > + return ifp_v4_enabled_ || ifp_v6_enabled_; > +} > + > +static bool > +ipf_get_v4_enabled(void) > +{ > + bool ifp_v4_enabled_; > + atomic_read_relaxed(&ifp_v4_enabled, &ifp_v4_enabled_); > + return ifp_v4_enabled_; > +} > + > +static bool > +ipf_get_v6_enabled(void) > +{ > + bool ifp_v6_enabled_; > + atomic_read_relaxed(&ifp_v6_enabled, &ifp_v6_enabled_); > + return ifp_v6_enabled_; > +} > + > +static uint32_t > +ipf_addr_hash_add(uint32_t hash, const struct ipf_addr *addr) > +{ > + BUILD_ASSERT_DECL(sizeof *addr % 4 == 0); > + return hash_add_bytes32(hash, (const uint32_t *) addr, sizeof *addr); > +} > + > +static void > +ipf_expiry_list_add(struct ipf_list *ipf_list, long long now) > + OVS_REQUIRES(ipf_lock) > +{ > + enum { > + IPF_FRAG_LIST_TIMEOUT_DEFAULT = 15000, > + }; > + > + ipf_list->expiration = now + IPF_FRAG_LIST_TIMEOUT_DEFAULT; > + ovs_list_push_back(&frag_exp_list, &ipf_list->exp_node); > +} > + > +static void > +ipf_completed_list_add(struct ipf_list *ipf_list) > + OVS_REQUIRES(ipf_lock) > +{ > + ovs_list_push_back(&frag_complete_list, &ipf_list->complete_node); > +} > + > +static void > +ipf_reassembled_list_add(struct reassembled_pkt *rp) > + OVS_REQUIRES(ipf_lock) > +{ > + ovs_list_push_back(&reassembled_pkt_list, &rp->rp_list_node); > +} > + > +static void > +ipf_expiry_list_remove(struct ipf_list *ipf_list) > + OVS_REQUIRES(ipf_lock) > +{ > + ovs_list_remove(&ipf_list->exp_node); > +} > + > +static void > +ipf_completed_list_remove(struct ipf_list *ipf_list) > + OVS_REQUIRES(ipf_lock) > +{ > + ovs_list_remove(&ipf_list->complete_node); > +} > + > +static void > +ipf_reassembled_list_remove(struct reassembled_pkt *rp) > + OVS_REQUIRES(ipf_lock) > +{ > + ovs_list_remove(&rp->rp_list_node); > +} > + > +/* Symmetric */ > +static uint32_t > +ipf_list_key_hash(const struct ipf_list_key *key, uint32_t basis) > +{ > + uint32_t hsrc, hdst, hash; > + hsrc = hdst = basis; > + hsrc = ipf_addr_hash_add(hsrc, &key->src_addr); > + hdst = ipf_addr_hash_add(hdst, &key->dst_addr); > + hash = hsrc ^ hdst; > + > + /* Hash the rest of the key. */ > + hash = hash_words((uint32_t *) (&key->dst_addr + 1), > + (uint32_t *) (key + 1) - > + (uint32_t *) (&key->dst_addr + 1), > + hash); > + > + return hash_finish(hash, 0); > +} > + > +static bool > +ipf_is_first_v4_frag(const struct dp_packet *pkt) > +{ > + const struct ip_header *l3 = dp_packet_l3(pkt); > + if (!(l3->ip_frag_off & htons(IP_FRAG_OFF_MASK)) && > + l3->ip_frag_off & htons(IP_MORE_FRAGMENTS)) { > + return true; > + } > + return false; > +} > + > +static bool > +ipf_is_last_v4_frag(const struct dp_packet *pkt) > +{ > + const struct ip_header *l3 = dp_packet_l3(pkt); > + if (l3->ip_frag_off & htons(IP_FRAG_OFF_MASK) && > + !(l3->ip_frag_off & htons(IP_MORE_FRAGMENTS))) { > + return true; > + } > + return false; > +} > + > +static bool > +ipf_is_v6_frag(ovs_be16 ip6f_offlg) > +{ > + if (ip6f_offlg & (IP6F_OFF_MASK | IP6F_MORE_FRAG)) { > + return true; > + } > + return false; > +} > + > +static bool > +ipf_is_first_v6_frag(ovs_be16 ip6f_offlg) > +{ > + if (!(ip6f_offlg & IP6F_OFF_MASK) && > + ip6f_offlg & IP6F_MORE_FRAG) { > + return true; > + } > + return false; > +} > + > +static bool > +ipf_is_last_v6_frag(ovs_be16 ip6f_offlg) > +{ > + if ((ip6f_offlg & IP6F_OFF_MASK) && > + !(ip6f_offlg & IP6F_MORE_FRAG)) { > + return true; > + } > + return false; > +} > + > +static bool > +ipf_list_complete(const struct ipf_list *ipf_list) > + OVS_REQUIRES(ipf_lock) > +{ > + for (int i = 0; i < ipf_list->last_inuse_idx; i++) { > + if (ipf_list->frag_list[i].end_data_byte + 1 > + != ipf_list->frag_list[i + 1].start_data_byte) { > + return false; > + } > + } > + return true; > +} > + > +/* Runs O(n) for a sorted or almost sorted list. */ > +static void > +ipf_sort(struct ipf_frag *frag_list, size_t last_idx) > + OVS_REQUIRES(ipf_lock) > +{ > + int running_last_idx = 1; > + struct ipf_frag ipf_frag; > + while (running_last_idx <= last_idx) { > + ipf_frag = frag_list[running_last_idx]; > + int frag_list_idx = running_last_idx - 1; > + while (frag_list_idx >= 0 && > + frag_list[frag_list_idx].start_data_byte > > + ipf_frag.start_data_byte) { > + frag_list[frag_list_idx + 1] = frag_list[frag_list_idx]; > + frag_list_idx -= 1; > + } > + frag_list[frag_list_idx + 1] = ipf_frag; > + running_last_idx++; > + } > +} > + > +/* Called on a sorted complete list of fragments. */ > +static struct dp_packet * > +ipf_reassemble_v4_frags(struct ipf_list *ipf_list) > + OVS_REQUIRES(ipf_lock) > +{ > + struct ipf_frag *frag_list = ipf_list->frag_list; > + struct dp_packet *pkt = dp_packet_clone(frag_list[0].pkt); > + struct ip_header *l3 = dp_packet_l3(pkt); > + int len = ntohs(l3->ip_tot_len); > + size_t add_len; > + size_t ip_hdr_len = IP_IHL(l3->ip_ihl_ver) * 4; > + > + for (int i = 1; i <= ipf_list->last_inuse_idx; i++) { > + add_len = frag_list[i].end_data_byte - > + frag_list[i].start_data_byte + 1; > + len += add_len; > + if (len > IPV4_PACKET_MAX_SIZE) { > + dp_packet_delete(pkt); > + return NULL; > + } > + l3 = dp_packet_l3(frag_list[i].pkt); > + dp_packet_put(pkt, (char *)l3 + ip_hdr_len, add_len); > + } > + l3 = dp_packet_l3(pkt); > + ovs_be16 new_ip_frag_off = l3->ip_frag_off & > ~htons(IP_MORE_FRAGMENTS); > + l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_frag_off, > + new_ip_frag_off); > + l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_tot_len, htons(len)); > + l3->ip_tot_len = htons(len); > + l3->ip_frag_off = new_ip_frag_off; > + > + return pkt; > +} > + > +/* Called on a sorted complete list of fragments. */ > +static struct dp_packet * > +ipf_reassemble_v6_frags(struct ipf_list *ipf_list) > + OVS_REQUIRES(ipf_lock) > +{ > + struct ipf_frag *frag_list = ipf_list->frag_list; > + struct dp_packet *pkt = dp_packet_clone(frag_list[0].pkt); > + struct ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt); > + int pl = ntohs(l3->ip6_plen) - sizeof(struct ovs_16aligned_ip6_frag); > + const char *tail = dp_packet_tail(pkt); > + uint8_t pad = dp_packet_l2_pad_size(pkt); > + const char *l4 = dp_packet_l4(pkt); > + size_t l3_size = tail - (char *)l3 -pad; > + size_t l4_size = tail - (char *)l4 -pad; > + size_t l3_hlen = l3_size - l4_size; > + size_t add_len; > + > + for (int i = 1; i <= ipf_list->last_inuse_idx; i++) { > + add_len = frag_list[i].end_data_byte - > + frag_list[i].start_data_byte + 1; > + pl += add_len; > + if (pl > IPV6_PACKET_MAX_DATA) { > + dp_packet_delete(pkt); > + return NULL; > + } > + l3 = dp_packet_l3(frag_list[i].pkt); > + dp_packet_put(pkt, (char *)l3 + l3_hlen, add_len); > + } > + l3 = dp_packet_l3(pkt); > + l4 = dp_packet_l4(pkt); > + tail = dp_packet_tail(pkt); > + pad = dp_packet_l2_pad_size(pkt); > + l3_size = tail - (char *)l3 -pad; > + > + uint8_t nw_proto = l3->ip6_nxt; > + uint8_t nw_frag = 0; > + const void *data = l3 + 1; > + size_t datasize = l3_size - sizeof *l3; > + > + const struct ovs_16aligned_ip6_frag *frag_hdr = NULL; > + if (!parse_ipv6_ext_hdrs(&data, &datasize, &nw_proto, &nw_frag, > &frag_hdr) > + || !nw_frag || !frag_hdr) { > + return NULL; > + } > + > + struct ovs_16aligned_ip6_frag *fh = > + CONST_CAST(struct ovs_16aligned_ip6_frag *, frag_hdr); > + fh->ip6f_offlg = 0;; > + l3->ip6_plen = htons(pl); > + l3->ip6_ctlun.ip6_un1.ip6_un1_nxt = nw_proto; > + return pkt; > +} > + > +/* Called when a valid fragment is added. */ > +static void > +ipf_list_state_transition(struct ipf_list *ipf_list, bool ff, bool lf, > + bool v4) > + OVS_REQUIRES(ipf_lock) > +{ > + enum ipf_list_state curr_state = ipf_list->state; > + enum ipf_list_state next_state; > + switch (curr_state) { > + case IPF_LIST_STATE_UNUSED: > + case IPF_LIST_STATE_OTHER_SEEN: > + if (ff) { > + next_state = IPF_LIST_STATE_FIRST_SEEN; > + } else if (lf) { > + next_state = IPF_LIST_STATE_LAST_SEEN; > + } else { > + next_state = IPF_LIST_STATE_OTHER_SEEN; > + } > + break; > + case IPF_LIST_STATE_FIRST_SEEN: > + if (ff) { > + next_state = IPF_LIST_STATE_FIRST_SEEN; > + } else if (lf) { > + next_state = IPF_LIST_STATE_FIRST_LAST_SEEN; > + } else { > + next_state = IPF_LIST_STATE_FIRST_SEEN; > + } > + break; > + case IPF_LIST_STATE_LAST_SEEN: > + if (ff) { > + next_state = IPF_LIST_STATE_FIRST_LAST_SEEN; > + } else if (lf) { > + next_state = IPF_LIST_STATE_LAST_SEEN; > + } else { > + next_state = IPF_LIST_STATE_LAST_SEEN; > + } > + break; > + case IPF_LIST_STATE_FIRST_LAST_SEEN: > + next_state = IPF_LIST_STATE_FIRST_LAST_SEEN; > + ipf_sort(ipf_list->frag_list, ipf_list->last_inuse_idx); > + break; > + case IPF_LIST_STATE_COMPLETED: > + next_state = curr_state; > + break; > + case IPF_LIST_STATE_REASS_FAIL: > + case IPF_LIST_STATE_NUM: > + default: > + OVS_NOT_REACHED(); > + } > + > + if (next_state == IPF_LIST_STATE_FIRST_LAST_SEEN && > + ipf_list_complete(ipf_list)) { > + struct dp_packet *reass_pkt = NULL; > + if (v4) { > + reass_pkt = ipf_reassemble_v4_frags(ipf_list); > + } else { > + reass_pkt = ipf_reassemble_v6_frags(ipf_list); > + } > + if (reass_pkt) { > + struct reassembled_pkt *rp = xzalloc(sizeof *rp); > + rp->pkt = reass_pkt; > + rp->list = ipf_list; > + ipf_reassembled_list_add(rp); > + ipf_expiry_list_remove(ipf_list); > + next_state = IPF_LIST_STATE_COMPLETED; > + } else { > + next_state = IPF_LIST_STATE_REASS_FAIL; > + } > + } > + ipf_list->state = next_state; > +} > + > +static bool > +ipf_v4_key_extract(const struct dp_packet *pkt, ovs_be16 dl_type, > + uint16_t zone, struct ipf_list_key *key, > + uint16_t *start_data_byte, uint16_t *end_data_byte, > + bool *ff, bool *lf) > +{ > + if (dp_packet_ip_checksum_bad(pkt)) { > + return false; > + } > + > + const struct eth_header *l2 = dp_packet_eth(pkt); > + const struct ip_header *l3 = dp_packet_l3(pkt); > + > + if (!l2 || !l3) { > + return false; > + } > + > + const char *tail = dp_packet_tail(pkt); > + uint8_t pad = dp_packet_l2_pad_size(pkt); > + size_t size = tail - (char *)l3 -pad; > + if (OVS_UNLIKELY(size < IP_HEADER_LEN)) { > + return false; > + } > + > + uint16_t ip_tot_len = ntohs(l3->ip_tot_len); > + if (ip_tot_len != size) { > + return false; > + } > + > + if (!(IP_IS_FRAGMENT(l3->ip_frag_off))) { > + return false; > + } > + > + size_t ip_hdr_len = IP_IHL(l3->ip_ihl_ver) * 4; > + if (OVS_UNLIKELY(ip_hdr_len < IP_HEADER_LEN)) { > + return false; > + } > + if (OVS_UNLIKELY(size < ip_hdr_len)) { > + return false; > + } > + > + if (!dp_packet_ip_checksum_valid(pkt) && csum(l3, ip_hdr_len) != 0) { > + return false; > + } > + > + uint32_t min_v4_frag_size_; > + atomic_read_relaxed(&min_v4_frag_size, &min_v4_frag_size_); > + *lf = ipf_is_last_v4_frag(pkt); > + if (!*lf && dp_packet_size(pkt) <= min_v4_frag_size_) { > + ipf_count(true, IPF_COUNTER_NFRAGS_TOO_SMALL); > + return false; > + } > + > + *start_data_byte = ntohs(l3->ip_frag_off & htons(IP_FRAG_OFF_MASK)) * > 8; > + *end_data_byte = *start_data_byte + ip_tot_len - ip_hdr_len - 1; > + *ff = ipf_is_first_v4_frag(pkt); > + memset(key, 0, sizeof *key); > + key->ip_id = be16_to_be32(l3->ip_id); > + key->dl_type = dl_type; > + key->src_addr.ipv4 = l3->ip_src; > + key->dst_addr.ipv4 = l3->ip_dst; > + key->nw_proto = l3->ip_proto; > + key->zone = zone; > + key->recirc_id = pkt->md.recirc_id; > + return true; > +} > + > +static bool > +ipf_v6_key_extract(const struct dp_packet *pkt, ovs_be16 dl_type, > + uint16_t zone, struct ipf_list_key *key, > + uint16_t *start_data_byte, uint16_t *end_data_byte, > + bool *ff, bool *lf) > +{ > + const struct eth_header *l2 = dp_packet_eth(pkt); > + const struct ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt); > + const char *l4 = dp_packet_l4(pkt); > + > + if (!l2 || !l3 || !l4) { > + return false; > + } > + > + const char *tail = dp_packet_tail(pkt); > + uint8_t pad = dp_packet_l2_pad_size(pkt); > + size_t l3_size = tail - (char *)l3 -pad; > + size_t l4_size = tail - (char *)l4 -pad; > + size_t l3_hdr_size = sizeof *l3; > + > + if (OVS_UNLIKELY(l3_size < l3_hdr_size)) { > + return false; > + } > + > + int pl = ntohs(l3->ip6_plen); > + if (pl + l3_hdr_size != l3_size) { > + return false; > + } > + > + uint8_t nw_frag = 0; > + uint8_t nw_proto = l3->ip6_nxt; > + const void *data = l3 + 1; > + size_t datasize = l3_size - l3_hdr_size; > + const struct ovs_16aligned_ip6_frag *frag_hdr = NULL; > + if (!parse_ipv6_ext_hdrs(&data, &datasize, &nw_proto, &nw_frag, > + &frag_hdr) || !nw_frag || !frag_hdr) { > + return false; > + } > + > + ovs_be16 ip6f_offlg = frag_hdr->ip6f_offlg; > + > + if (!(ipf_is_v6_frag(ip6f_offlg))) { > + return false; > + } > + > + uint32_t min_v6_frag_size_; > + atomic_read_relaxed(&min_v6_frag_size, &min_v6_frag_size_); > + *lf = ipf_is_last_v6_frag(ip6f_offlg); > + > + if (!(*lf) && dp_packet_size(pkt) <= min_v6_frag_size_) { > + ipf_count(false, IPF_COUNTER_NFRAGS_TOO_SMALL); > + return false; > + } > + > + *start_data_byte = ntohs(ip6f_offlg & IP6F_OFF_MASK) + > + sizeof (struct ovs_16aligned_ip6_frag); > + *end_data_byte = *start_data_byte + l4_size - 1; > + *ff = ipf_is_first_v6_frag(ip6f_offlg); > + memset(key, 0, sizeof *key); > + key->ip_id = get_16aligned_be32(&frag_hdr->ip6f_ident); > + key->dl_type = dl_type; > + key->src_addr.ipv6 = l3->ip6_src; > + /* We are not supporting parsing of the routing header header > + * to use as the dst address part of the key. */ > + key->dst_addr.ipv6 = l3->ip6_dst; > + /* Not used for key for V6. */ > + key->nw_proto = 0; > + key->zone = zone; > + key->recirc_id = pkt->md.recirc_id; > + return true; > +} > + > +static int > +ipf_list_key_cmp(const struct ipf_list_key *key1, > + const struct ipf_list_key *key2) > + OVS_REQUIRES(ipf_lock) > +{ > + if (!memcmp(&key1->src_addr, &key2->src_addr, sizeof key1->src_addr) > && > + !memcmp(&key1->dst_addr, &key2->dst_addr, sizeof key1->dst_addr) > && > + (key1->dl_type == key2->dl_type) && > + (key1->ip_id == key2->ip_id) && > + (key1->zone == key2->zone) && > + (key1->nw_proto == key2->nw_proto) && > + (key1->recirc_id == key2->recirc_id)) { > + return 0; > + } > + return 1; > +} > + > +static struct ipf_list * > +ipf_list_key_lookup(const struct ipf_list_key *key, > + uint32_t hash) > + OVS_REQUIRES(ipf_lock) > +{ > + struct ipf_list *ipf_list; > + HMAP_FOR_EACH_WITH_HASH (ipf_list, node, hash, &frag_lists) { > + if (!ipf_list_key_cmp(&ipf_list->key, key)) { > + return ipf_list; > + } > + } > + return NULL; > +} > + > +static bool > +ipf_is_frag_duped(const struct ipf_frag *frag_list, int last_inuse_idx, > + size_t start_data_byte, size_t end_data_byte) > + OVS_REQUIRES(ipf_lock) > +{ > + for (int i = 0; i <= last_inuse_idx; i++) { > + if (((start_data_byte >= frag_list[i].start_data_byte) && > + (start_data_byte <= frag_list[i].end_data_byte)) || > + ((end_data_byte >= frag_list[i].start_data_byte) && > + (end_data_byte <= frag_list[i].end_data_byte))) { > + return true; > + } > + } > + return false; > +} > + > +static bool > +ipf_process_frag(struct ipf_list *ipf_list, struct dp_packet *pkt, > + uint16_t start_data_byte, uint16_t end_data_byte, > + bool ff, bool lf, bool v4) > + OVS_REQUIRES(ipf_lock) > +{ > + bool duped_frag = ipf_is_frag_duped(ipf_list->frag_list, > + ipf_list->last_inuse_idx, start_data_byte, end_data_byte); > + int last_inuse_idx = ipf_list->last_inuse_idx; > + > + if (!duped_frag) { > + if (last_inuse_idx < ipf_list->size - 1) { > + /* In the case of dpdk, it would be unfortunate if we had > + * to create a clone fragment outside the dpdk mp due to the > + * mempool size being too limited. We will otherwise need to > + * recommend not setting the mempool number of buffers too low > + * and also clamp the number of fragments. */ > + ipf_list->frag_list[last_inuse_idx + 1].pkt = pkt; > + ipf_list->frag_list[last_inuse_idx + 1].start_data_byte = > + start_data_byte; > + ipf_list->frag_list[last_inuse_idx + 1].end_data_byte = > + end_data_byte; > + ipf_list->last_inuse_idx++; > + atomic_count_inc(&nfrag); > + ipf_count(v4, IPF_COUNTER_NFRAGS_ACCEPTED); > + ipf_list_state_transition(ipf_list, ff, lf, v4); > + } else { > + OVS_NOT_REACHED(); > + } > + } else { > + ipf_count(v4, IPF_COUNTER_NFRAGS_OVERLAP); > + pkt->md.ct_state = CS_INVALID; > + return false; > + } > + return true; > +} > + > +static bool > +ipf_handle_frag(struct dp_packet *pkt, ovs_be16 dl_type, uint16_t zone, > + long long now, uint32_t hash_basis) > + OVS_REQUIRES(ipf_lock) > +{ > + struct ipf_list_key key; > + uint16_t start_data_byte; > + uint16_t end_data_byte; > + bool ff; > + bool lf; > + bool v4; > + > + if (dl_type == htons(ETH_TYPE_IP) && ipf_get_v4_enabled()) { > + if (!ipf_v4_key_extract(pkt, dl_type, zone, &key, > &start_data_byte, > + &end_data_byte, &ff, &lf)) { > + return false; > + } > + v4 = true; > + } else if (dl_type == htons(ETH_TYPE_IPV6) && ipf_get_v6_enabled()) { > + if (!ipf_v6_key_extract(pkt, dl_type, zone, &key, > &start_data_byte, > + &end_data_byte, &ff, &lf)) { > + return false; > + } > + v4 = false; > + } else { > + return false; > + } > + > + unsigned int nfrag_max_; > + atomic_read_relaxed(&nfrag_max, &nfrag_max_); > + if (atomic_count_get(&nfrag) >= nfrag_max_) { > + return false; > + } > + > + uint32_t hash = ipf_list_key_hash(&key, hash_basis); > + struct ipf_list *ipf_list = > + ipf_list_key_lookup(&key, hash); > + enum { > + IPF_FRAG_LIST_MIN_INCREMENT = 4, > + IPF_UNBOUNDED_FRAG_LIST_SIZE = 65535, > + }; > + > + int max_frag_list_size; > + if (v4) { > + max_frag_list_size = max_v4_frag_list_size; > + } else { > + max_frag_list_size = IPF_UNBOUNDED_FRAG_LIST_SIZE; > + } > + > + if (!ipf_list) { > + ipf_list = xzalloc(sizeof *ipf_list); > + ipf_list->key = key; > + ipf_list->last_inuse_idx = IPF_INVALID_IDX; > + ipf_list->last_sent_idx = IPF_INVALID_IDX; > + ipf_list->size = > + MIN(max_frag_list_size, IPF_FRAG_LIST_MIN_INCREMENT); > + ipf_list->frag_list = > + xzalloc(ipf_list->size * sizeof *ipf_list->frag_list); > + hmap_insert(&frag_lists, &ipf_list->node, hash); > + ipf_expiry_list_add(ipf_list, now); > + } else if (ipf_list->state == IPF_LIST_STATE_REASS_FAIL) { > + /* Bail out as early as possible. */ > + return false; > + } else if (ipf_list->last_inuse_idx + 1 >= ipf_list->size) { > + int increment = MIN(IPF_FRAG_LIST_MIN_INCREMENT, > + max_frag_list_size - ipf_list->size); > + /* Enforce limit. */ > + if (increment > 0) { > + ipf_list->frag_list = > + xrealloc(ipf_list->frag_list, (ipf_list->size + > increment) * > + sizeof *ipf_list->frag_list); > + ipf_list->size += increment; > + } else { > + return false; > + } > + } > + > + return ipf_process_frag(ipf_list, pkt, start_data_byte, > end_data_byte, ff, > + lf, v4); > +} > + > +/* Handles V4 fragments right now. */ > +static void > +ipf_extract_frags_from_batch(struct dp_packet_batch *pb, ovs_be16 > dl_type, > + uint16_t zone, long long now, uint32_t > hash_basis) > +{ > + const size_t pb_cnt = dp_packet_batch_size(pb); > + int pb_idx; /* Index in a packet batch. */ > + struct dp_packet *pkt; > + > + DP_PACKET_BATCH_REFILL_FOR_EACH (pb_idx, pb_cnt, pkt, pb) { > + ipf_lock_lock(&ipf_lock); > + > + if (!ipf_handle_frag(pkt, dl_type, zone, now, hash_basis)) { > + dp_packet_batch_refill(pb, pkt, pb_idx); > + } > + > + ipf_lock_unlock(&ipf_lock); > + } > +} > + > +/* In case of DPDK, a memory source check is done, as DPDK memory pool > + * management has trouble dealing with multiple source types. The > + * check_source paramater is used to indicate when this check is needed. > */ > +static bool > +ipf_dp_packet_batch_add(struct dp_packet_batch *pb , struct dp_packet > *pkt, > + bool check_source OVS_UNUSED) > + OVS_REQUIRES(ipf_lock) > +{ > +#ifdef DPDK_NETDEV > + if ((pb->count >= NETDEV_MAX_BURST) || > + /* DPDK cannot handle multiple sources in a batch. */ > + (check_source && pb->count && pb->packets[0]->source != > pkt->source)) { > +#else > + if (pb->count >= NETDEV_MAX_BURST) { > +#endif > + return false; > + } > + > + dp_packet_batch_add(pb, pkt); > + return true; > +} > + > +/* This would be used in a rare case where a list cannot be sent. The only > + * reason known right now is a mempool source check,which exists due to > DPDK > + * support, where packets are no longer being received on any port with a > + * source matching the fragment. > + * Returns true if the list was purged. */ > +static bool > +ipf_purge_list_check(struct ipf_list *ipf_list, long long now) > + OVS_REQUIRES(ipf_lock) > +{ > + enum { > + /* 10 minutes. */ > + IPF_FRAG_LIST_TIMEOUT_PURGE = 600000, > + }; > + > + if (now < ipf_list->expiration + IPF_FRAG_LIST_TIMEOUT_PURGE) { > + return false; > + } > + > + struct dp_packet *pkt; > + while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) { > + pkt = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; > + dp_packet_delete(pkt); > + atomic_count_dec(&nfrag); > + ipf_list->last_sent_idx++; > + } > + > + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); > + VLOG_WARN_RL(&rl, "Fragments dropped due to stuck fragment list > purge."); > + COVERAGE_INC(ipf_stuck_frag_list_purged); > + return true; > +} > + > +static bool > +ipf_send_frags_in_list(struct ipf_list *ipf_list, struct dp_packet_batch > *pb, > + enum ipf_list_type list_type, bool v4, long long > now) > + OVS_REQUIRES(ipf_lock) > +{ > + if (ipf_purge_list_check(ipf_list, now)) { > + return true; > + } > + > + struct dp_packet *pkt; > + while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) { > + pkt = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; > + if (ipf_dp_packet_batch_add(pb, pkt, true)) { > + > + ipf_list->last_sent_idx++; > + atomic_count_dec(&nfrag); > + > + if (list_type == IPF_FRAG_COMPLETED_LIST) { > + ipf_count(v4, IPF_COUNTER_NFRAGS_COMPL_SENT); > + } else { > + ipf_count(v4, IPF_COUNTER_NFRAGS_EXPD_SENT); > + pkt->md.ct_state = CS_INVALID; > + } > + > + if (ipf_list->last_sent_idx == ipf_list->last_inuse_idx) { > + return true; > + } > + } else { > + return false; > + } > + } > + OVS_NOT_REACHED(); > +} > + > +static void > +ipf_list_remove(struct ipf_list *ipf_list, enum ipf_list_type list_type) > + OVS_REQUIRES(ipf_lock) > +{ > + if (list_type == IPF_FRAG_COMPLETED_LIST) { > + ipf_completed_list_remove(ipf_list); > + } else { > + ipf_expiry_list_remove(ipf_list); > + } > + hmap_remove(&frag_lists, &ipf_list->node); > + free(ipf_list->frag_list); > + free(ipf_list); > +} > + > +static void > +ipf_send_completed_frags(struct dp_packet_batch *pb, long long now, bool > v4) > +{ > + if (ovs_list_is_empty(&frag_complete_list)) { > + return; > + } > + > + ipf_lock_lock(&ipf_lock); > + struct ipf_list *ipf_list, *next; > + > + LIST_FOR_EACH_SAFE (ipf_list, next, complete_node, > &frag_complete_list) { > + if (ipf_send_frags_in_list(ipf_list, pb, IPF_FRAG_COMPLETED_LIST, > + v4, now)) { > + ipf_list_remove(ipf_list, IPF_FRAG_COMPLETED_LIST); > + } else { > + break; > + } > + } > + ipf_lock_unlock(&ipf_lock); > +} > + > +static void > +ipf_send_expired_frags(struct dp_packet_batch *pb, long long now, bool v4) > +{ > + enum { > + /* Very conservative, due to DOS probability. */ > + IPF_FRAG_LIST_MAX_EXPIRED = 1, > + }; > + > + > + if (ovs_list_is_empty(&frag_exp_list)) { > + return; > + } > + > + ipf_lock_lock(&ipf_lock); > + struct ipf_list *ipf_list, *next; > + size_t lists_removed = 0; > + > + LIST_FOR_EACH_SAFE (ipf_list, next, exp_node, &frag_exp_list) { > + if (!(now > ipf_list->expiration) || > + lists_removed >= IPF_FRAG_LIST_MAX_EXPIRED) { > + break; > + } > + > + if (ipf_send_frags_in_list(ipf_list, pb, IPF_FRAG_EXPIRY_LIST, > v4, > + now)) { > + ipf_list_remove(ipf_list, IPF_FRAG_EXPIRY_LIST); > + lists_removed++; > + } else { > + break; > + } > + } > + ipf_lock_unlock(&ipf_lock); > +} > + > +static void > +ipf_execute_reass_pkts(struct dp_packet_batch *pb) > +{ > + if (ovs_list_is_empty(&reassembled_pkt_list)) { > + return; > + } > + > + ipf_lock_lock(&ipf_lock); > + struct reassembled_pkt *rp, *next; > + > + LIST_FOR_EACH_SAFE (rp, next, rp_list_node, &reassembled_pkt_list) { > + if (!rp->list->reass_execute_ctx && > + ipf_dp_packet_batch_add(pb, rp->pkt, false)) { > + rp->list->reass_execute_ctx = rp->pkt; > + } > + } > + ipf_lock_unlock(&ipf_lock); > +} > + > +static void > +ipf_post_execute_reass_pkts(struct dp_packet_batch *pb, bool v4) > +{ > + if (ovs_list_is_empty(&reassembled_pkt_list)) { > + return; > + } > + > + ipf_lock_lock(&ipf_lock); > + struct reassembled_pkt *rp, *next; > + > + LIST_FOR_EACH_SAFE (rp, next, rp_list_node, &reassembled_pkt_list) { > + const size_t pb_cnt = dp_packet_batch_size(pb); > + int pb_idx; > + struct dp_packet *pkt; > + /* Inner batch loop is constant time since batch size is <= > + * NETDEV_MAX_BURST. */ > + DP_PACKET_BATCH_REFILL_FOR_EACH (pb_idx, pb_cnt, pkt, pb) { > + if (pkt == rp->list->reass_execute_ctx) { > + for (int i = 0; i <= rp->list->last_inuse_idx; i++) { > + rp->list->frag_list[i].pkt->md.ct_label = > pkt->md.ct_label; > + rp->list->frag_list[i].pkt->md.ct_mark = > pkt->md.ct_mark; > + rp->list->frag_list[i].pkt->md.ct_state = > pkt->md.ct_state; > + rp->list->frag_list[i].pkt->md.ct_zone = > pkt->md.ct_zone; > + rp->list->frag_list[i].pkt->md.ct_orig_tuple_ipv6 = > + pkt->md.ct_orig_tuple_ipv6; > + if (pkt->md.ct_orig_tuple_ipv6) { > + rp->list->frag_list[i].pkt->md.ct_orig_tuple.ipv6 > = > + pkt->md.ct_orig_tuple.ipv6; > + } else { > + rp->list->frag_list[i].pkt->md.ct_orig_tuple.ipv4 > = > + pkt->md.ct_orig_tuple.ipv4; > + } > + } > + > + const char *tail_frag = > + dp_packet_tail(rp->list->frag_list[0].pkt); > + uint8_t pad_frag = > + dp_packet_l2_pad_size(rp->list->frag_list[0].pkt); > + > + void *l4_frag = dp_packet_l4(rp->list->frag_list[0].pkt); > + void *l4_reass = dp_packet_l4(pkt); > + memcpy(l4_frag, l4_reass, > + tail_frag - (char *) l4_frag - pad_frag); > + > + if (v4) { > + struct ip_header *l3_frag = > + dp_packet_l3(rp->list->frag_list[0].pkt); > + struct ip_header *l3_reass = dp_packet_l3(pkt); > + ovs_be32 reass_ip = get_16aligned_be32(&l3_reass-> > ip_src); > + ovs_be32 frag_ip = get_16aligned_be32(&l3_frag-> > ip_src); > + l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, > + frag_ip, reass_ip); > + l3_frag->ip_src = l3_reass->ip_src; > + > + reass_ip = get_16aligned_be32(&l3_reass->ip_dst); > + frag_ip = get_16aligned_be32(&l3_frag->ip_dst); > + l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, > + frag_ip, reass_ip); > + l3_frag->ip_dst = l3_reass->ip_dst; > + } else { > + struct ovs_16aligned_ip6_hdr *l3_frag = > + dp_packet_l3(rp->list->frag_list[0].pkt); > + struct ovs_16aligned_ip6_hdr *l3_reass = > + dp_packet_l3(pkt); > + l3_frag->ip6_src = l3_reass->ip6_src; > + l3_frag->ip6_dst = l3_reass->ip6_dst; > + } > + > + ipf_completed_list_add(rp->list); > + ipf_reassembled_list_remove(rp); > + dp_packet_delete(rp->pkt); > + free(rp); > + } else { > + dp_packet_batch_refill(pb, pkt, pb_idx); > + } > + } > + } > + ipf_lock_unlock(&ipf_lock); > +} > + > +void > +ipf_preprocess_conntrack(struct dp_packet_batch *pb, long long now, > + ovs_be16 dl_type, uint16_t zone, uint32_t > hash_basis) > +{ > + if (ipf_get_enabled()) { > + ipf_extract_frags_from_batch(pb, dl_type, zone, now, hash_basis); > + } > + > + if (ipf_get_enabled() || atomic_count_get(&nfrag)) { > + ipf_execute_reass_pkts(pb); > + } > +} > + > +void > +ipf_postprocess_conntrack(struct dp_packet_batch *pb, long long now, > + ovs_be16 dl_type) > +{ > + if (ipf_get_enabled() || atomic_count_get(&nfrag)) { > + ipf_post_execute_reass_pkts(pb, dl_type == htons(ETH_TYPE_IP)); > + ipf_send_completed_frags(pb, dl_type == htons(ETH_TYPE_IP), now); > + ipf_send_expired_frags(pb, now, dl_type == htons(ETH_TYPE_IP)); > + } > +} > + > +void > +ipf_init(void) > +{ > + ipf_lock_init(&ipf_lock); > + ipf_lock_lock(&ipf_lock); > + hmap_init(&frag_lists); > + ovs_list_init(&frag_exp_list); > + ovs_list_init(&frag_complete_list); > + ovs_list_init(&reassembled_pkt_list); > + atomic_init(&min_v4_frag_size, IPF_V4_FRAG_SIZE_MIN_DEF); > + atomic_init(&min_v6_frag_size, IPF_V6_FRAG_SIZE_MIN_DEF); > + max_v4_frag_list_size = DIV_ROUND_UP( > + IPV4_PACKET_MAX_SIZE - IPV4_PACKET_MAX_HDR_SIZE, > + min_v4_frag_size - IPV4_PACKET_MAX_HDR_SIZE); > + ipf_lock_unlock(&ipf_lock); > + atomic_count_init(&nfrag, 0); > + atomic_count_init(&n4frag_accepted, 0); > + atomic_count_init(&n4frag_completed_sent, 0); > + atomic_count_init(&n4frag_expired_sent, 0); > + atomic_count_init(&n4frag_too_small, 0); > + atomic_count_init(&n4frag_overlap, 0); > + atomic_count_init(&n6frag_accepted, 0); > + atomic_count_init(&n6frag_completed_sent, 0); > + atomic_count_init(&n6frag_expired_sent, 0); > + atomic_count_init(&n6frag_too_small, 0); > + atomic_count_init(&n6frag_overlap, 0); > + atomic_init(&nfrag_max, IPF_MAX_FRAGS_DEFAULT); > + atomic_init(&ifp_v4_enabled, true); > + atomic_init(&ifp_v6_enabled, true); > +} > + > +void > +ipf_destroy(void) > +{ > + ipf_lock_lock(&ipf_lock); > + > + struct ipf_list *ipf_list; > + HMAP_FOR_EACH_POP (ipf_list, node, &frag_lists) { > + struct dp_packet *pkt; > + while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) { > + pkt = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; > + dp_packet_delete(pkt); > + atomic_count_dec(&nfrag); > + ipf_list->last_sent_idx++; > + } > + free(ipf_list->frag_list); > + free(ipf_list); > + } > + > + struct reassembled_pkt * rp; > + LIST_FOR_EACH_POP (rp, rp_list_node, &reassembled_pkt_list) { > + dp_packet_delete(rp->pkt); > + free(rp); > + } > + > + hmap_destroy(&frag_lists); > + ovs_list_poison(&frag_exp_list); > + ovs_list_poison(&frag_complete_list); > + ovs_list_poison(&reassembled_pkt_list); > + ipf_lock_unlock(&ipf_lock); > + ipf_lock_destroy(&ipf_lock); > +} > diff --git a/lib/ipf.h b/lib/ipf.h > new file mode 100644 > index 0000000..5861e96 > --- /dev/null > +++ b/lib/ipf.h > @@ -0,0 +1,63 @@ > +/* > + * Copyright (c) 2018 Nicira, Inc. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#ifndef IPF_H > +#define IPF_H 1 > + > +#include "dp-packet.h" > +#include "openvswitch/types.h" > + > +struct ipf_status { > + bool ifp_v4_enabled; > + unsigned int min_v4_frag_size; > + unsigned int nfrag_max; > + unsigned int nfrag; > + unsigned int n4frag_accepted; > + unsigned int n4frag_completed_sent; > + unsigned int n4frag_expired_sent; > + unsigned int n4frag_too_small; > + unsigned int n4frag_overlap; > + bool ifp_v6_enabled; > + unsigned int min_v6_frag_size; > + unsigned int n6frag_accepted; > + unsigned int n6frag_completed_sent; > + unsigned int n6frag_expired_sent; > + unsigned int n6frag_too_small; > + unsigned int n6frag_overlap; > +}; > + > +/* Collects and reassembles fragments which are to be sent through > + * conntrack, if fragment processing is enabled or fragments are > + * in flight. */ > +void > +ipf_preprocess_conntrack(struct dp_packet_batch *pb, long long now, > + ovs_be16 dl_type, uint16_t zone, uint32_t > hash_basis); > + > +/* Updates the state of fragments associated with reassembled packets and > + * sends out fragments that are either associated with completed > + * packets or expired, if fragment processing is enabled or fragments are > + * in flight. */ > +void > +ipf_postprocess_conntrack(struct dp_packet_batch *pb, long long now, > + ovs_be16 dl_type); > + > +void > +ipf_init(void); > + > +void > +ipf_destroy(void); > + > +#endif /* ipf.h */ > diff --git a/tests/system-traffic.at b/tests/system-traffic.at > index 33a89c8..115bca9 100644 > --- a/tests/system-traffic.at > +++ b/tests/system-traffic.at > @@ -1758,7 +1758,6 @@ AT_CLEANUP > > AT_SETUP([conntrack - IPv4 fragmentation]) > CHECK_CONNTRACK() > -CHECK_CONNTRACK_FRAG() > OVS_TRAFFIC_VSWITCHD_START() > > ADD_NAMESPACES(at_ns0, at_ns1) > @@ -1792,7 +1791,6 @@ AT_CLEANUP > > AT_SETUP([conntrack - IPv4 fragmentation expiry]) > CHECK_CONNTRACK() > -CHECK_CONNTRACK_FRAG() > OVS_TRAFFIC_VSWITCHD_START() > > ADD_NAMESPACES(at_ns0, at_ns1) > @@ -1823,7 +1821,6 @@ AT_CLEANUP > > AT_SETUP([conntrack - IPv4 fragmentation + vlan]) > CHECK_CONNTRACK() > -CHECK_CONNTRACK_FRAG() > OVS_TRAFFIC_VSWITCHD_START() > > ADD_NAMESPACES(at_ns0, at_ns1) > @@ -1859,7 +1856,6 @@ AT_CLEANUP > > AT_SETUP([conntrack - IPv4 fragmentation + cvlan]) > CHECK_CONNTRACK() > -CHECK_CONNTRACK_FRAG() > OVS_TRAFFIC_VSWITCHD_START([set Open_vSwitch . > other_config:vlan-limit=0]) > OVS_CHECK_8021AD() > > @@ -1912,7 +1908,6 @@ AT_CLEANUP > > AT_SETUP([conntrack - IPv6 fragmentation]) > CHECK_CONNTRACK() > -CHECK_CONNTRACK_FRAG() > OVS_TRAFFIC_VSWITCHD_START() > > ADD_NAMESPACES(at_ns0, at_ns1) > @@ -1952,7 +1947,6 @@ AT_CLEANUP > > AT_SETUP([conntrack - IPv6 fragmentation expiry]) > CHECK_CONNTRACK() > -CHECK_CONNTRACK_FRAG() > OVS_TRAFFIC_VSWITCHD_START() > > ADD_NAMESPACES(at_ns0, at_ns1) > @@ -1993,7 +1987,6 @@ AT_CLEANUP > > AT_SETUP([conntrack - IPv6 fragmentation + vlan]) > CHECK_CONNTRACK() > -CHECK_CONNTRACK_FRAG() > OVS_TRAFFIC_VSWITCHD_START() > > ADD_NAMESPACES(at_ns0, at_ns1) > @@ -2036,7 +2029,6 @@ AT_CLEANUP > > AT_SETUP([conntrack - IPv6 fragmentation + cvlan]) > CHECK_CONNTRACK() > -CHECK_CONNTRACK_FRAG() > OVS_TRAFFIC_VSWITCHD_START([set Open_vSwitch . > other_config:vlan-limit=0]) > OVS_CHECK_8021AD() > > @@ -2091,7 +2083,6 @@ AT_CLEANUP > AT_SETUP([conntrack - Fragmentation over vxlan]) > OVS_CHECK_VXLAN() > CHECK_CONNTRACK() > -CHECK_CONNTRACK_FRAG() > CHECK_CONNTRACK_LOCAL_STACK() > > OVS_TRAFFIC_VSWITCHD_START() > @@ -2144,7 +2135,6 @@ AT_CLEANUP > AT_SETUP([conntrack - IPv6 Fragmentation over vxlan]) > OVS_CHECK_VXLAN() > CHECK_CONNTRACK() > -CHECK_CONNTRACK_FRAG() > CHECK_CONNTRACK_LOCAL_STACK() > > OVS_TRAFFIC_VSWITCHD_START() > -- > 1.9.1 > > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev