On Mon, Jun 05, 2023 at 10:59:50AM +0200, Eelco Chaudron wrote: > Currently, the per cpu upcall counters are allocated after the vport is > created and inserted into the system. This could lead to the datapath > accessing the counters before they are allocated resulting in a kernel > Oops. > > Here is an example: > > PID: 59693 TASK: ffff0005f4f51500 CPU: 0 COMMAND: "ovs-vswitchd" > #0 [ffff80000a39b5b0] __switch_to at ffffb70f0629f2f4 > #1 [ffff80000a39b5d0] __schedule at ffffb70f0629f5cc > #2 [ffff80000a39b650] preempt_schedule_common at ffffb70f0629fa60 > #3 [ffff80000a39b670] dynamic_might_resched at ffffb70f0629fb58 > #4 [ffff80000a39b680] mutex_lock_killable at ffffb70f062a1388 > #5 [ffff80000a39b6a0] pcpu_alloc at ffffb70f0594460c > #6 [ffff80000a39b750] __alloc_percpu_gfp at ffffb70f05944e68 > #7 [ffff80000a39b760] ovs_vport_cmd_new at ffffb70ee6961b90 [openvswitch] > ... > > PID: 58682 TASK: ffff0005b2f0bf00 CPU: 0 COMMAND: "kworker/0:3" > #0 [ffff80000a5d2f40] machine_kexec at ffffb70f056a0758 > #1 [ffff80000a5d2f70] __crash_kexec at ffffb70f057e2994 > #2 [ffff80000a5d3100] crash_kexec at ffffb70f057e2ad8 > #3 [ffff80000a5d3120] die at ffffb70f0628234c > #4 [ffff80000a5d31e0] die_kernel_fault at ffffb70f062828a8 > #5 [ffff80000a5d3210] __do_kernel_fault at ffffb70f056a31f4 > #6 [ffff80000a5d3240] do_bad_area at ffffb70f056a32a4 > #7 [ffff80000a5d3260] do_translation_fault at ffffb70f062a9710 > #8 [ffff80000a5d3270] do_mem_abort at ffffb70f056a2f74 > #9 [ffff80000a5d32a0] el1_abort at ffffb70f06297dac > #10 [ffff80000a5d32d0] el1h_64_sync_handler at ffffb70f06299b24 > #11 [ffff80000a5d3410] el1h_64_sync at ffffb70f056812dc > #12 [ffff80000a5d3430] ovs_dp_upcall at ffffb70ee6963c84 [openvswitch] > #13 [ffff80000a5d3470] ovs_dp_process_packet at ffffb70ee6963fdc > [openvswitch] > #14 [ffff80000a5d34f0] ovs_vport_receive at ffffb70ee6972c78 [openvswitch] > #15 [ffff80000a5d36f0] netdev_port_receive at ffffb70ee6973948 [openvswitch] > #16 [ffff80000a5d3720] netdev_frame_hook at ffffb70ee6973a28 [openvswitch] > #17 [ffff80000a5d3730] __netif_receive_skb_core.constprop.0 at > ffffb70f06079f90 > > We moved the per cpu upcall counter allocation to the existing vport > alloc and free functions to solve this. > > Fixes: 95637d91fefd ("net: openvswitch: release vport resources on failure") > Fixes: 1933ea365aa7 ("net: openvswitch: Add support to count upcall packets") > Signed-off-by: Eelco Chaudron <echau...@redhat.com> > --- > net/openvswitch/datapath.c | 19 ------------------- > net/openvswitch/vport.c | 8 ++++++++ > 2 files changed, 8 insertions(+), 19 deletions(-) > > diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c > index fcee6012293b..58f530f60172 100644 > --- a/net/openvswitch/datapath.c > +++ b/net/openvswitch/datapath.c > @@ -236,9 +236,6 @@ void ovs_dp_detach_port(struct vport *p) > /* First drop references to device. */ > hlist_del_rcu(&p->dp_hash_node); > > - /* Free percpu memory */ > - free_percpu(p->upcall_stats); > - > /* Then destroy it. */ > ovs_vport_del(p); > } > @@ -1858,12 +1855,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct > genl_info *info) > goto err_destroy_portids; > } > > - vport->upcall_stats = netdev_alloc_pcpu_stats(struct > vport_upcall_stats_percpu); > - if (!vport->upcall_stats) { > - err = -ENOMEM; > - goto err_destroy_vport; > - } > - > err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, > info->snd_seq, 0, OVS_DP_CMD_NEW); > BUG_ON(err < 0); > @@ -1876,8 +1867,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct > genl_info *info) > ovs_notify(&dp_datapath_genl_family, reply, info); > return 0; > > -err_destroy_vport: > - ovs_dp_detach_port(vport); > err_destroy_portids: > kfree(rcu_dereference_raw(dp->upcall_portids)); > err_unlock_and_destroy_meters: > @@ -2322,12 +2311,6 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, > struct genl_info *info) > goto exit_unlock_free; > } > > - vport->upcall_stats = netdev_alloc_pcpu_stats(struct > vport_upcall_stats_percpu); > - if (!vport->upcall_stats) { > - err = -ENOMEM; > - goto exit_unlock_free_vport; > - } > - > err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), > info->snd_portid, info->snd_seq, 0, > OVS_VPORT_CMD_NEW, GFP_KERNEL); > @@ -2345,8 +2328,6 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, > struct genl_info *info) > ovs_notify(&dp_vport_genl_family, reply, info); > return 0; > > -exit_unlock_free_vport: > - ovs_dp_detach_port(vport); > exit_unlock_free: > ovs_unlock(); > kfree_skb(reply); > diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c > index 7e0f5c45b512..e91ae5dd7d22 100644 > --- a/net/openvswitch/vport.c > +++ b/net/openvswitch/vport.c
Hi Eelco, could we move to a more idiomatic implementation of the error path in ovs_vport_alloc() ? I know it's not strictly related to this change, but OTOH, it is. > @@ -135,12 +135,19 @@ struct vport *ovs_vport_alloc(int priv_size, const > struct vport_ops *ops, > if (!vport) > return ERR_PTR(-ENOMEM); > > + vport->upcall_stats = netdev_alloc_pcpu_stats(struct > vport_upcall_stats_percpu); > + if (!vport->upcall_stats) { err = -ENOMEM; goto err_kfree_vport; > + kfree(vport); > + return ERR_PTR(-ENOMEM); > + } > + > vport->dp = parms->dp; > vport->port_no = parms->port_no; > vport->ops = ops; > INIT_HLIST_NODE(&vport->dp_hash_node); > > if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) { err = -EINVAL; goto err_free_percpu; > + free_percpu(vport->upcall_stats); > kfree(vport); > return ERR_PTR(-EINVAL); > } ... return vport; err_kfree_vport: kfree(vport); err_free_percpu: free_percpu(vport->upcall_stats); return(ERR_PTR(err)); > @@ -165,6 +172,7 @@ void ovs_vport_free(struct vport *vport) > * it is safe to use raw dereference. > */ > kfree(rcu_dereference_raw(vport->upcall_portids)); > + free_percpu(vport->upcall_stats); > kfree(vport); > } > EXPORT_SYMBOL_GPL(ovs_vport_free); > > _______________________________________________ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev