On Fri, Aug 7, 2020 at 8:33 AM Johan Knöös <jkn...@google.com> wrote:
>
> On Tue, Aug 4, 2020 at 8:52 AM Gregory Rose <gvrose8...@gmail.com> wrote:
> >
> >
> >
> > On 8/3/2020 12:01 PM, Johan Knöös via discuss wrote:
> > > Hi Open vSwitch contributors,
> > >
> > > We have found openvswitch is causing double-freeing of memory. The
> > > issue was not present in kernel version 5.5.17 but is present in
> > > 5.6.14 and newer kernels.
> > >
> > > After reverting the RCU commits below for debugging, enabling
> > > slub_debug, lockdep, and KASAN, we see the warnings at the end of this
> > > email in the kernel log (the last one shows the double-free). When I
> > > revert 50b0e61b32ee890a75b4377d5fbe770a86d6a4c1 ("net: openvswitch:
> > > fix possible memleak on destroy flow-table"), the symptoms disappear.
> > > While I have a reliable way to reproduce the issue, I unfortunately
> > > don't yet have a process that's amenable to sharing. Please take a
> > > look.
> > >
> > > 189a6883dcf7 rcu: Remove kfree_call_rcu_nobatch()
> > > 77a40f97030b rcu: Remove kfree_rcu() special casing and lazy-callback 
> > > handling
> > > e99637becb2e rcu: Add support for debug_objects debugging for kfree_rcu()
> > > 0392bebebf26 rcu: Add multiple in-flight batches of kfree_rcu() work
> > > 569d767087ef rcu: Make kfree_rcu() use a non-atomic ->monitor_todo
> > > a35d16905efc rcu: Add basic support for kfree_rcu() batching
> > >
> > > Thanks,
> > > Johan Knöös
> >
> > Let's add the author of the patch you reverted and the Linux netdev
> > mailing list.
> >
> > - Greg
>
> I found we also sometimes get warnings from
> https://elixir.bootlin.com/linux/v5.5.17/source/kernel/rcu/tree.c#L2239
> under similar conditions even on kernel 5.5.17, which I believe may be
> related. However, it's much rarer and I don't have a reliable way of
> reproducing it. Perhaps 50b0e61b32ee890a75b4377d5fbe770a86d6a4c1 only
> increases the frequency of a pre-existing bug.

It seems clear we have a double free on table->mask_array when
the reallocation is triggered on the destroy path.

Are you able to test the attached patch (compile tested only)?
Also note: it is generated against the latest net tree, it may not be
applied cleanly to any earlier stable release.

Thanks!
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 8c12675cbb67..cc7859db445a 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -294,7 +294,7 @@ static int tbl_mask_array_add_mask(struct flow_table *tbl,
 }
 
 static void tbl_mask_array_del_mask(struct flow_table *tbl,
-				    struct sw_flow_mask *mask)
+				    struct sw_flow_mask *mask, bool destroy)
 {
 	struct mask_array *ma = ovsl_dereference(tbl->mask_array);
 	int i, ma_count = READ_ONCE(ma->count);
@@ -314,6 +314,11 @@ static void tbl_mask_array_del_mask(struct flow_table *tbl,
 	rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]);
 	RCU_INIT_POINTER(ma->masks[ma_count -1], NULL);
 
+	if (destroy) {
+		kfree(mask);
+		return;
+	}
+
 	kfree_rcu(mask, rcu);
 
 	/* Shrink the mask array if necessary. */
@@ -326,7 +331,8 @@ static void tbl_mask_array_del_mask(struct flow_table *tbl,
 }
 
 /* Remove 'mask' from the mask list, if it is not needed any more. */
-static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
+static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask,
+			     bool destroy)
 {
 	if (mask) {
 		/* ovs-lock is required to protect mask-refcount and
@@ -337,7 +343,7 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
 		mask->ref_count--;
 
 		if (!mask->ref_count)
-			tbl_mask_array_del_mask(tbl, mask);
+			tbl_mask_array_del_mask(tbl, mask, destroy);
 	}
 }
 
@@ -470,7 +476,7 @@ static void table_instance_flow_free(struct flow_table *table,
 			table->ufid_count--;
 	}
 
-	flow_mask_remove(table, flow->mask);
+	flow_mask_remove(table, flow->mask, !count);
 }
 
 static void table_instance_destroy(struct flow_table *table,
@@ -521,9 +527,9 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
 	struct mask_cache *mc = rcu_dereference_raw(table->mask_cache);
 	struct mask_array *ma = rcu_dereference_raw(table->mask_array);
 
+	table_instance_destroy(table, ti, ufid_ti, false);
 	call_rcu(&mc->rcu, mask_cache_rcu_cb);
 	call_rcu(&ma->rcu, mask_array_rcu_cb);
-	table_instance_destroy(table, ti, ufid_ti, false);
 }
 
 struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
_______________________________________________
discuss mailing list
disc...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-discuss

Reply via email to