Thanks William for the review. We need to use ovsrcu_postpone(tun_metadata_free, tab) because tun_table is protected by RCU and we can only free tun_table when all threads quiesce.
It turns out that this fix based on reference count is not beautiful, it doesn't fit well with current RCU mechanism. A previous commit seems fixing the samiliar issue: 254878c18874f6 (ofproto-dpif-xlate: Fix segmentation fault caused by tun_table) It is unsafe for frozen_state to reference tun_table because tun_table is protected by RCU while the lifecycle of frozen_state can span several RCU quiesce states. As we discussed offline, we can simply nullify tun_table in frozen_state. If frozen_state needs tun_table, the latest valid tun_table can be found by ofproto_get_tun_tab() efficiently. I will submit a new version. Yifeng On Mon, Apr 6, 2020 at 8:47 AM William Tu <u9012...@gmail.com> wrote: > > On Thu, Mar 26, 2020 at 12:58:21PM -0700, Yifeng Sun wrote: > > Tun_metadata can be referened by flow and frozen_state at the same > > time. When ovs-vswitchd handles TLV table mod message, the involved > > tun_metadata gets freed. The call trace to free tun_metadata is > > shown as below: > > > > ofproto_run > > - handle_openflow > > - handle_single_part_openflow > > - handle_tlv_table_mod > > - tun_metadata_table_mod > > - tun_metadata_postpone_free > > > > Unfortunately, this tun_metadata can be still used by some frozen_state, > > and later on when frozen_state tries to access its tun_metadata table, > > ovs-vswitchd crashes. The call trace to access tun_metadata from > > frozen_state is shown as below: > > > > udpif_upcall_handler > > - recv_upcalls > > - process_upcall > > - frozen_metadata_to_flow > > > > This patch fixes it by introducing a reference count to tun_metadata. > > Whenever a pointer of tun_metadata is passed between flow and > > frozen_state, we increase its reference count. Reference count > > is decreased at deallocation. > > > > In present code, pointer of tun_metadata can be passed between flows. > > It is safe because of RCU mechanism. > > > > VMware-BZ: #2526222 > > Signed-off-by: Yifeng Sun <pkusunyif...@gmail.com> > > --- > > lib/tun-metadata.c | 29 ++++++++++++++++++++++++++++- > > lib/tun-metadata.h | 2 ++ > > ofproto/ofproto-dpif-rid.c | 8 ++++++++ > > ofproto/ofproto-dpif-rid.h | 2 ++ > > 4 files changed, 40 insertions(+), 1 deletion(-) > > > > diff --git a/lib/tun-metadata.c b/lib/tun-metadata.c > > index f8a0e19524e9..c4218a034a92 100644 > > --- a/lib/tun-metadata.c > > +++ b/lib/tun-metadata.c > > @@ -25,6 +25,7 @@ > > #include "nx-match.h" > > #include "odp-netlink.h" > > #include "openvswitch/ofp-match.h" > > +#include "ovs-atomic.h" > > #include "ovs-rcu.h" > > #include "packets.h" > > #include "tun-metadata.h" > > @@ -40,6 +41,11 @@ struct tun_meta_entry { > > /* Maps from TLV option class+type to positions in a struct tun_metadata's > > * 'opts' array. */ > > struct tun_table { > > + /* Struct tun_table can be referenced by struct frozen_state for a long > > + * time. This ref_cnt protects tun_table from being freed if it is still > > + * being used somewhere. */ > > + struct ovs_refcount ref_cnt; > > + > > /* TUN_METADATA<i> is stored in element <i>. */ > > struct tun_meta_entry entries[TUN_METADATA_NUM_OPTS]; > > > > @@ -79,6 +85,24 @@ tun_key_type(uint32_t key) > > return key & 0xff; > > } > > > > +void > > +tun_metadata_ref(const struct tun_table *tab) > > +{ > > + if (tab) { > > + ovs_refcount_ref(&CONST_CAST(struct tun_table *, tab)->ref_cnt); > > + } > > +} > > + > > +unsigned int > > +tun_metadata_unref(const struct tun_table *tab) > > +{ > > + if (tab) { > > + return ovs_refcount_unref_relaxed( > > + &CONST_CAST(struct tun_table *, tab)->ref_cnt); > > + } > > + return -1; > return -1 looks weird since it's unsigned int. > > > +} > > + > > /* Returns a newly allocated tun_table. If 'old_map' is nonnull then the new > > * tun_table is a deep copy of the old one. */ > > struct tun_table * > > @@ -111,6 +135,7 @@ tun_metadata_alloc(const struct tun_table *old_map) > > hmap_init(&new_map->key_hmap); > > } > > > > + ovs_refcount_init(&new_map->ref_cnt); > > return new_map; > > } > > > > @@ -135,7 +160,9 @@ tun_metadata_free(struct tun_table *map) > > void > > tun_metadata_postpone_free(struct tun_table *tab) > > { > > - ovsrcu_postpone(tun_metadata_free, tab); > > + if (tun_metadata_unref(tab) == 1) { > > + ovsrcu_postpone(tun_metadata_free, tab); > > If this is the last ref and "tun_metadata_unref(tab)== 1" > why not just call tun_metadata_free(tab)? > > Regards, > William > > > + } > > } > > > > enum ofperr > > diff --git a/lib/tun-metadata.h b/lib/tun-metadata.h > > index 7dad9504b8da..933021a0f679 100644 > > --- a/lib/tun-metadata.h > > +++ b/lib/tun-metadata.h > > @@ -33,6 +33,8 @@ struct ofputil_tlv_table_mod; > > struct ofputil_tlv_table_reply; > > struct tun_table; > > > > +void tun_metadata_ref(const struct tun_table *tab); > > +unsigned int tun_metadata_unref(const struct tun_table *tab); > > struct tun_table *tun_metadata_alloc(const struct tun_table *old_map); > > void tun_metadata_free(struct tun_table *); > > void tun_metadata_postpone_free(struct tun_table *); > > diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c > > index 29aafc2c0b40..d479e53d9b2d 100644 > > --- a/ofproto/ofproto-dpif-rid.c > > +++ b/ofproto/ofproto-dpif-rid.c > > @@ -201,6 +201,7 @@ static void > > frozen_state_clone(struct frozen_state *new, const struct frozen_state *old) > > { > > *new = *old; > > + tun_metadata_ref(old->metadata.tunnel.metadata.tab); > > new->stack = (new->stack_size > > ? xmemdup(new->stack, new->stack_size) > > : NULL); > > @@ -218,10 +219,17 @@ frozen_state_clone(struct frozen_state *new, const struct frozen_state *old) > > static void > > frozen_state_free(struct frozen_state *state) > > { > > + struct tun_table *tab; > > + > > free(state->stack); > > free(state->ofpacts); > > free(state->action_set); > > free(state->userdata); > > + > > + tab = CONST_CAST(struct tun_table *, state->metadata.tunnel.metadata.tab); > > + if (tun_metadata_unref(tab) == 1) { > > + tun_metadata_free(tab); > > + } > > } > > > > /* Allocate a unique recirculation id for the given set of flow metadata. > > diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h > > index e5d02caf28a3..1fefbf53b94a 100644 > > --- a/ofproto/ofproto-dpif-rid.h > > +++ b/ofproto/ofproto-dpif-rid.h > > @@ -115,6 +115,7 @@ frozen_metadata_from_flow(struct frozen_metadata *md, > > { > > memset(md, 0, sizeof *md); > > md->tunnel = flow->tunnel; > > + tun_metadata_ref(flow->tunnel.metadata.tab); > > md->metadata = flow->metadata; > > memcpy(md->regs, flow->regs, sizeof md->regs); > > md->in_port = flow->in_port.ofp_port; > > @@ -125,6 +126,7 @@ frozen_metadata_to_flow(const struct frozen_metadata *md, > > struct flow *flow) > > { > > flow->tunnel = md->tunnel; > > + tun_metadata_ref(md->tunnel.metadata.tab); > > flow->metadata = md->metadata; > > memcpy(flow->regs, md->regs, sizeof flow->regs); > > flow->in_port.ofp_port = md->in_port; > > -- > > 2.7.4 > > > > _______________________________________________ > > dev mailing list > > d...@openvswitch.org > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev