On Tue, Sep 03, 2019 at 04:45:41PM -0400, Lyude Paul wrote:
> When reprobing an MST topology during resume, we have to account for the
> fact that while we were suspended it's possible that mstbs may have been
> removed from any ports in the topology. Since iterating downwards in the
> topology requires that we hold &mgr->lock, destroying MSTBs from this
> context would result in attempting to lock &mgr->lock a second time and
> deadlocking.
> 
> So, fix this by first moving destruction of MSTBs into
> destroy_connector_work, then rename destroy_connector_work and friends
> to reflect that they now destroy both ports and mstbs.
> 
> Changes since v1:
> * s/destroy_connector_list/destroy_port_list/
>   s/connector_destroy_lock/delayed_destroy_lock/
>   s/connector_destroy_work/delayed_destroy_work/
>   s/drm_dp_finish_destroy_branch_device/drm_dp_delayed_destroy_mstb/
>   s/drm_dp_finish_destroy_port/drm_dp_delayed_destroy_port/
>   - danvet
> * Use two loops in drm_dp_delayed_destroy_work() - danvet
> * Better explain why we need to do this - danvet
> * Use cancel_work_sync() instead of flush_work() - flush_work() doesn't
>   account for work requeing
> 
> Cc: Juston Li <juston...@intel.com>
> Cc: Imre Deak <imre.d...@intel.com>
> Cc: Ville Syrjälä <ville.syrj...@linux.intel.com>
> Cc: Harry Wentland <hwent...@amd.com>
> Cc: Daniel Vetter <dan...@ffwll.ch>
> Signed-off-by: Lyude Paul <ly...@redhat.com>

Took me a while to grok this, and I'm still not 100% confident my mental model
is correct, so please bear with me while I ask silly questions :)

Now that the destroy is delayed, and the port remains in the topology, is it
possible we will underflow the topology kref by calling put_mstb multiple times?
It looks like that would result in a WARN from refcount.c, and wouldn't call the
destroy function multiple times, so that's nice :)

Similarly, is there any defense against calling get_mstb() between destroy() and
the delayed destroy worker running?

Sean

> ---
>  drivers/gpu/drm/drm_dp_mst_topology.c | 162 +++++++++++++++++---------
>  include/drm/drm_dp_mst_helper.h       |  26 +++--
>  2 files changed, 127 insertions(+), 61 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c 
> b/drivers/gpu/drm/drm_dp_mst_topology.c
> index 3054ec622506..738f260d4b15 100644
> --- a/drivers/gpu/drm/drm_dp_mst_topology.c
> +++ b/drivers/gpu/drm/drm_dp_mst_topology.c
> @@ -1113,34 +1113,17 @@ static void drm_dp_destroy_mst_branch_device(struct 
> kref *kref)
>       struct drm_dp_mst_branch *mstb =
>               container_of(kref, struct drm_dp_mst_branch, topology_kref);
>       struct drm_dp_mst_topology_mgr *mgr = mstb->mgr;
> -     struct drm_dp_mst_port *port, *tmp;
> -     bool wake_tx = false;
>  
> -     mutex_lock(&mgr->lock);
> -     list_for_each_entry_safe(port, tmp, &mstb->ports, next) {
> -             list_del(&port->next);
> -             drm_dp_mst_topology_put_port(port);
> -     }
> -     mutex_unlock(&mgr->lock);
> -
> -     /* drop any tx slots msg */
> -     mutex_lock(&mstb->mgr->qlock);
> -     if (mstb->tx_slots[0]) {
> -             mstb->tx_slots[0]->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
> -             mstb->tx_slots[0] = NULL;
> -             wake_tx = true;
> -     }
> -     if (mstb->tx_slots[1]) {
> -             mstb->tx_slots[1]->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
> -             mstb->tx_slots[1] = NULL;
> -             wake_tx = true;
> -     }
> -     mutex_unlock(&mstb->mgr->qlock);
> +     INIT_LIST_HEAD(&mstb->destroy_next);
>  
> -     if (wake_tx)
> -             wake_up_all(&mstb->mgr->tx_waitq);
> -
> -     drm_dp_mst_put_mstb_malloc(mstb);
> +     /*
> +      * This can get called under mgr->mutex, so we need to perform the
> +      * actual destruction of the mstb in another worker
> +      */
> +     mutex_lock(&mgr->delayed_destroy_lock);
> +     list_add(&mstb->destroy_next, &mgr->destroy_branch_device_list);
> +     mutex_unlock(&mgr->delayed_destroy_lock);
> +     schedule_work(&mgr->delayed_destroy_work);
>  }
>  
>  /**
> @@ -1255,10 +1238,10 @@ static void drm_dp_destroy_port(struct kref *kref)
>                        * we might be holding the mode_config.mutex
>                        * from an EDID retrieval */
>  
> -                     mutex_lock(&mgr->destroy_connector_lock);
> -                     list_add(&port->next, &mgr->destroy_connector_list);
> -                     mutex_unlock(&mgr->destroy_connector_lock);
> -                     schedule_work(&mgr->destroy_connector_work);
> +                     mutex_lock(&mgr->delayed_destroy_lock);
> +                     list_add(&port->next, &mgr->destroy_port_list);
> +                     mutex_unlock(&mgr->delayed_destroy_lock);
> +                     schedule_work(&mgr->delayed_destroy_work);
>                       return;
>               }
>               /* no need to clean up vcpi
> @@ -2792,7 +2775,7 @@ void drm_dp_mst_topology_mgr_suspend(struct 
> drm_dp_mst_topology_mgr *mgr)
>                          DP_MST_EN | DP_UPSTREAM_IS_SRC);
>       mutex_unlock(&mgr->lock);
>       flush_work(&mgr->work);
> -     flush_work(&mgr->destroy_connector_work);
> +     flush_work(&mgr->delayed_destroy_work);
>  }
>  EXPORT_SYMBOL(drm_dp_mst_topology_mgr_suspend);
>  
> @@ -3740,34 +3723,104 @@ static void drm_dp_tx_work(struct work_struct *work)
>       mutex_unlock(&mgr->qlock);
>  }
>  
> -static void drm_dp_destroy_connector_work(struct work_struct *work)
> +static inline void
> +drm_dp_delayed_destroy_port(struct drm_dp_mst_port *port)
>  {
> -     struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct 
> drm_dp_mst_topology_mgr, destroy_connector_work);
> -     struct drm_dp_mst_port *port;
> -     bool send_hotplug = false;
> +     port->mgr->cbs->destroy_connector(port->mgr, port->connector);
> +
> +     drm_dp_port_teardown_pdt(port, port->pdt);
> +     port->pdt = DP_PEER_DEVICE_NONE;
> +
> +     drm_dp_mst_put_port_malloc(port);
> +}
> +
> +static inline void
> +drm_dp_delayed_destroy_mstb(struct drm_dp_mst_branch *mstb)
> +{
> +     struct drm_dp_mst_topology_mgr *mgr = mstb->mgr;
> +     struct drm_dp_mst_port *port, *tmp;
> +     bool wake_tx = false;
> +
> +     mutex_lock(&mgr->lock);
> +     list_for_each_entry_safe(port, tmp, &mstb->ports, next) {
> +             list_del(&port->next);
> +             drm_dp_mst_topology_put_port(port);
> +     }
> +     mutex_unlock(&mgr->lock);
> +
> +     /* drop any tx slots msg */
> +     mutex_lock(&mstb->mgr->qlock);
> +     if (mstb->tx_slots[0]) {
> +             mstb->tx_slots[0]->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
> +             mstb->tx_slots[0] = NULL;
> +             wake_tx = true;
> +     }
> +     if (mstb->tx_slots[1]) {
> +             mstb->tx_slots[1]->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
> +             mstb->tx_slots[1] = NULL;
> +             wake_tx = true;
> +     }
> +     mutex_unlock(&mstb->mgr->qlock);
> +
> +     if (wake_tx)
> +             wake_up_all(&mstb->mgr->tx_waitq);
> +
> +     drm_dp_mst_put_mstb_malloc(mstb);
> +}
> +
> +static void drm_dp_delayed_destroy_work(struct work_struct *work)
> +{
> +     struct drm_dp_mst_topology_mgr *mgr =
> +             container_of(work, struct drm_dp_mst_topology_mgr,
> +                          delayed_destroy_work);
> +     bool send_hotplug = false, go_again;
> +
>       /*
>        * Not a regular list traverse as we have to drop the destroy
> -      * connector lock before destroying the connector, to avoid AB->BA
> +      * connector lock before destroying the mstb/port, to avoid AB->BA
>        * ordering between this lock and the config mutex.
>        */
> -     for (;;) {
> -             mutex_lock(&mgr->destroy_connector_lock);
> -             port = list_first_entry_or_null(&mgr->destroy_connector_list, 
> struct drm_dp_mst_port, next);
> -             if (!port) {
> -                     mutex_unlock(&mgr->destroy_connector_lock);
> -                     break;
> +     do {
> +             go_again = false;
> +
> +             for (;;) {
> +                     struct drm_dp_mst_branch *mstb;
> +
> +                     mutex_lock(&mgr->delayed_destroy_lock);
> +                     mstb = 
> list_first_entry_or_null(&mgr->destroy_branch_device_list,
> +                                                     struct 
> drm_dp_mst_branch,
> +                                                     destroy_next);
> +                     if (mstb)
> +                             list_del(&mstb->destroy_next);
> +                     mutex_unlock(&mgr->delayed_destroy_lock);
> +
> +                     if (!mstb)
> +                             break;
> +
> +                     drm_dp_delayed_destroy_mstb(mstb);
> +                     go_again = true;
>               }
> -             list_del(&port->next);
> -             mutex_unlock(&mgr->destroy_connector_lock);
>  
> -             mgr->cbs->destroy_connector(mgr, port->connector);
> +             for (;;) {
> +                     struct drm_dp_mst_port *port;
>  
> -             drm_dp_port_teardown_pdt(port, port->pdt);
> -             port->pdt = DP_PEER_DEVICE_NONE;
> +                     mutex_lock(&mgr->delayed_destroy_lock);
> +                     port = list_first_entry_or_null(&mgr->destroy_port_list,
> +                                                     struct drm_dp_mst_port,
> +                                                     next);
> +                     if (port)
> +                             list_del(&port->next);
> +                     mutex_unlock(&mgr->delayed_destroy_lock);
> +
> +                     if (!port)
> +                             break;
> +
> +                     drm_dp_delayed_destroy_port(port);
> +                     send_hotplug = true;
> +                     go_again = true;
> +             }
> +     } while (go_again);
>  
> -             drm_dp_mst_put_port_malloc(port);
> -             send_hotplug = true;
> -     }
>       if (send_hotplug)
>               drm_kms_helper_hotplug_event(mgr->dev);
>  }
> @@ -3957,12 +4010,13 @@ int drm_dp_mst_topology_mgr_init(struct 
> drm_dp_mst_topology_mgr *mgr,
>       mutex_init(&mgr->lock);
>       mutex_init(&mgr->qlock);
>       mutex_init(&mgr->payload_lock);
> -     mutex_init(&mgr->destroy_connector_lock);
> +     mutex_init(&mgr->delayed_destroy_lock);
>       INIT_LIST_HEAD(&mgr->tx_msg_downq);
> -     INIT_LIST_HEAD(&mgr->destroy_connector_list);
> +     INIT_LIST_HEAD(&mgr->destroy_port_list);
> +     INIT_LIST_HEAD(&mgr->destroy_branch_device_list);
>       INIT_WORK(&mgr->work, drm_dp_mst_link_probe_work);
>       INIT_WORK(&mgr->tx_work, drm_dp_tx_work);
> -     INIT_WORK(&mgr->destroy_connector_work, drm_dp_destroy_connector_work);
> +     INIT_WORK(&mgr->delayed_destroy_work, drm_dp_delayed_destroy_work);
>       init_waitqueue_head(&mgr->tx_waitq);
>       mgr->dev = dev;
>       mgr->aux = aux;
> @@ -4005,7 +4059,7 @@ void drm_dp_mst_topology_mgr_destroy(struct 
> drm_dp_mst_topology_mgr *mgr)
>  {
>       drm_dp_mst_topology_mgr_set_mst(mgr, false);
>       flush_work(&mgr->work);
> -     flush_work(&mgr->destroy_connector_work);
> +     cancel_work_sync(&mgr->delayed_destroy_work);
>       mutex_lock(&mgr->payload_lock);
>       kfree(mgr->payloads);
>       mgr->payloads = NULL;
> diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h
> index fc349204a71b..4a4507fe928d 100644
> --- a/include/drm/drm_dp_mst_helper.h
> +++ b/include/drm/drm_dp_mst_helper.h
> @@ -143,6 +143,12 @@ struct drm_dp_mst_branch {
>        */
>       struct kref malloc_kref;
>  
> +     /**
> +      * @destroy_next: linked-list entry used by
> +      * drm_dp_delayed_destroy_work()
> +      */
> +     struct list_head destroy_next;
> +
>       u8 rad[8];
>       u8 lct;
>       int num_ports;
> @@ -575,18 +581,24 @@ struct drm_dp_mst_topology_mgr {
>       struct work_struct tx_work;
>  
>       /**
> -      * @destroy_connector_list: List of to be destroyed connectors.
> +      * @destroy_port_list: List of to be destroyed connectors.
> +      */
> +     struct list_head destroy_port_list;
> +     /**
> +      * @destroy_branch_device_list: List of to be destroyed branch
> +      * devices.
>        */
> -     struct list_head destroy_connector_list;
> +     struct list_head destroy_branch_device_list;
>       /**
> -      * @destroy_connector_lock: Protects @connector_list.
> +      * @delayed_destroy_lock: Protects @destroy_port_list and
> +      * @destroy_branch_device_list.
>        */
> -     struct mutex destroy_connector_lock;
> +     struct mutex delayed_destroy_lock;
>       /**
> -      * @destroy_connector_work: Work item to destroy connectors. Needed to
> -      * avoid locking inversion.
> +      * @delayed_destroy_work: Work item to destroy MST port and branch
> +      * devices, needed to avoid locking inversion.
>        */
> -     struct work_struct destroy_connector_work;
> +     struct work_struct delayed_destroy_work;
>  };
>  
>  int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr,
> -- 
> 2.21.0
> 

-- 
Sean Paul, Software Engineer, Google / Chromium OS
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau

Reply via email to