From: Wei Hu <w...@microsoft.com> Sent: Thursday, April 30, 2020 10:37 PM
> 
> In the case of kdump, the PCI device was not cleanly shut down
> before the kdump kernel starts. This causes the initial
> attempt of entering D0 state in the kdump kernel to fail with
> invalid device state returned from Hyper-V host.
> When this happens, explicitly call PCI bus exit and retry to
> enter the D0 state.
> 
> Signed-off-by: Wei Hu <w...@microsoft.com>
> ---
>    v2: Incorporate review comments from Michael Kelley, Dexuan Cui and
>    Bjorn Helgaas
> 
>  drivers/pci/controller/pci-hyperv.c | 40 +++++++++++++++++++++++++++--
>  1 file changed, 38 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/pci/controller/pci-hyperv.c 
> b/drivers/pci/controller/pci-hyperv.c
> index e6fac0187722..92092a47d3af 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -2739,6 +2739,8 @@ static void hv_free_config_window(struct 
> hv_pcibus_device
> *hbus)
>       vmbus_free_mmio(hbus->mem_config->start, PCI_CONFIG_MMIO_LENGTH);
>  }
> 
> +static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs);
> +
>  /**
>   * hv_pci_enter_d0() - Bring the "bus" into the D0 power state
>   * @hdev:    VMBus's tracking struct for this root PCI bus
> @@ -2751,8 +2753,10 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
>       struct pci_bus_d0_entry *d0_entry;
>       struct hv_pci_compl comp_pkt;
>       struct pci_packet *pkt;
> +     bool retry = true;
>       int ret;
> 
> +enter_d0_retry:
>       /*
>        * Tell the host that the bus is ready to use, and moved into the
>        * powered-on state.  This includes telling the host which region
> @@ -2779,6 +2783,38 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
>       if (ret)
>               goto exit;
> 
> +     /*
> +      * In certain case (Kdump) the pci device of interest was
> +      * not cleanly shut down and resource is still held on host
> +      * side, the host could return invalid device status.
> +      * We need to explicitly request host to release the resource
> +      * and try to enter D0 again.
> +      */
> +     if (comp_pkt.completion_status < 0 && retry) {
> +             retry = false;
> +
> +             dev_err(&hdev->device, "Retrying D0 Entry\n");
> +
> +             /*
> +              * Hv_pci_bus_exit() calls hv_send_resource_released()
> +              * to free up resources of its child devices.
> +              * In the kdump kernel we need to set the
> +              * wslot_res_allocated to 255 so it scans all child
> +              * devices to release resources allocated in the
> +              * normal kernel before panic happened.
> +              */
> +             hbus->wslot_res_allocated = 255;
> +
> +             ret = hv_pci_bus_exit(hdev, true);
> +
> +             if (ret == 0) {
> +                     kfree(pkt);
> +                     goto enter_d0_retry;
> +             }
> +             dev_err(&hdev->device,
> +                     "Retrying D0 failed with ret %d\n", ret);
> +     }
> +
>       if (comp_pkt.completion_status < 0) {
>               dev_err(&hdev->device,
>                       "PCI Pass-through VSP failed D0 Entry with status %x\n",
> @@ -3185,7 +3221,7 @@ static int hv_pci_probe(struct hv_device *hdev,
>       return ret;
>  }
> 
> -static int hv_pci_bus_exit(struct hv_device *hdev, bool hibernating)
> +static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs)
>  {
>       struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
>       struct {
> @@ -3203,7 +3239,7 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool
> hibernating)
>       if (hdev->channel->rescind)
>               return 0;
> 
> -     if (!hibernating) {
> +     if (!keep_devs) {
>               /* Delete any children which might still exist. */
>               dr = kzalloc(sizeof(*dr), GFP_KERNEL);
>               if (dr && hv_pci_start_relations_work(hbus, dr))
> --
> 2.20.1

Reviewed-by: Michael Kelley <mikel...@microsoft.com>

Reply via email to