Re: [PATCH] lpfc: Avoid to disable pci_dev twice

2014-09-10 Thread Mike Qiu

On 08/28/2014 02:34 AM, James Smart wrote:

Mike,

Can you confirm - the "nulls" this patch correct are because the 
probe_one and error_detect threads are running concurrently, thus 
battling ?


If so - this fix looks insufficient and we should rework it.


Yes, it is. My patch is just a workaround for this bug.



Q: why are they allowed to run concurrently ?  I could see this solved 
at the platform level to let probe_one finish before error_detect is 
called (and therefore stating error_detect only makes sense to call if 
probe_one was successful). It's also a much driver-friendly solution. 
I could see other drivers have much the same issue with concurrency 
and data structure teardown - and if locks aren't allowed in the 
error-detect path... it's not good.




I agree with you on this point, platform solution is much better. So 
maybe use a lock or a flag to show it is in such stat, this maybe  also 
happens when driver is in  remove stat.


Thanks,
Mike

-- james s



On 7/31/2014 10:16 PM, Mike Qiu wrote:

On 07/17/2014 02:32 PM, Mike Qiu wrote:


Hi, all

How about this patch ?

Any idea ?


In IBM Power servers, when hardware error occurs during probe
state, EEH subsystem will call driver's error_detected interface,
which will call pci_disable_device(). But driver's probe function also
call pci_disable_device() in this situation.

So pci_dev will be disabled twice:

Device lpfc disabling already-disabled device
[ cut here ]
WARNING: at drivers/pci/pci.c:1407
CPU: 0 PID: 8744 Comm: kworker/0:0 Tainted: GW 
3.10.42-2002.pkvm2_1_1.6.ppc64 #1

Workqueue: events .work_for_cpu_fn
task: c0274e3f5400 ti: c027d3958000 task.ti: c027d3958000
NIP: c0471b8c LR: c0471b88 CTR: c043ebe0
REGS: c027d395b650 TRAP: 0700   Tainted: GW 
(3.10.42-2002.pkvm2_1_1.6.ppc64)
MSR: 900100029032   CR: 28b52b44  XER: 
2000

CFAR: c0879ab8 SOFTE: 1
...
NIP .pci_disable_device+0xcc/0xe0
LR  .pci_disable_device+0xc8/0xe0
Call Trace:
.pci_disable_device+0xc8/0xe0 (unreliable)
.lpfc_disable_pci_dev+0x50/0x80 [lpfc]
.lpfc_pci_probe_one+0x870/0x21a0 [lpfc]
.local_pci_probe+0x68/0xb0
.work_for_cpu_fn+0x38/0x60
.process_one_work+0x1a4/0x4d0
.worker_thread+0x37c/0x490
.kthread+0xf0/0x100
.ret_from_kernel_thread+0x5c/0x80

Signed-off-by: Mike Qiu 
---
  drivers/scsi/lpfc/lpfc.h  |  1 +
  drivers/scsi/lpfc/lpfc_init.c | 59 
+++

  2 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 434e903..0c7bad9 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -813,6 +813,7 @@ struct lpfc_hba {
  #define VPD_MASK0xf /* mask for any vpd data */

  uint8_t soft_wwn_enable;
+uint8_t probe_done;

  struct timer_list fcp_poll_timer;
  struct timer_list eratt_poll;
diff --git a/drivers/scsi/lpfc/lpfc_init.c 
b/drivers/scsi/lpfc/lpfc_init.c

index 06f9a5b..c2e67ae 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -9519,6 +9519,9 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, 
const struct pci_device_id *pid)

  }
  }

+/* Set the probe flag */
+phba->probe_done = 1;
+
  /* Perform post initialization setup */
  lpfc_post_init_setup(phba);

@@ -9795,6 +9798,9 @@ lpfc_sli_prep_dev_for_recover(struct lpfc_hba 
*phba)

  static void
  lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
  {
+if (phba)
+return;
+
  lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
  "2710 PCI channel disable preparing for reset\n");

@@ -9812,7 +9818,8 @@ lpfc_sli_prep_dev_for_reset(struct lpfc_hba 
*phba)


  /* Disable interrupt and pci device */
  lpfc_sli_disable_intr(phba);
-pci_disable_device(phba->pcidev);
+if (phba->probe_done && phba->pcidev)
+pci_disable_device(phba->pcidev);
  }

  /**
@@ -10282,6 +10289,9 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, 
const struct pci_device_id *pid)

  goto out_disable_intr;
  }

+/* Set probe_done flag */
+phba->probe_done = 1;
+
  /* Log the current active interrupt mode */
  phba->intr_mode = intr_mode;
  lpfc_log_intr_mode(phba, intr_mode);
@@ -10544,6 +10554,9 @@ lpfc_sli4_prep_dev_for_recover(struct 
lpfc_hba *phba)

  static void
  lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
  {
+if (!phba)
+return;
+
  lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
  "2826 PCI channel disable preparing for reset\n");

@@ -10562,7 +10575,9 @@ lpfc_sli4_prep_dev_for_reset(struct lpfc_hba 
*phba)

  /* Disable interrupt and pci device */
  lpfc_sli4_disable_intr(phba);
  lpfc_sli4_queue_destroy(phba);
-pci_disable_device(phba->pcidev);
+
+if (phba->probe_done && phba->pcidev)
+pci_disable_device(phba->pcidev);
  }

  /**
@@ -10893,9 +10908,21 @@ static pci_ers_result_t
  

Re: [PATCH] lpfc: Avoid to disable pci_dev twice

2014-09-10 Thread Mike Qiu

On 08/28/2014 02:34 AM, James Smart wrote:

Mike,

Can you confirm - the nulls this patch correct are because the 
probe_one and error_detect threads are running concurrently, thus 
battling ?


If so - this fix looks insufficient and we should rework it.


Yes, it is. My patch is just a workaround for this bug.



Q: why are they allowed to run concurrently ?  I could see this solved 
at the platform level to let probe_one finish before error_detect is 
called (and therefore stating error_detect only makes sense to call if 
probe_one was successful). It's also a much driver-friendly solution. 
I could see other drivers have much the same issue with concurrency 
and data structure teardown - and if locks aren't allowed in the 
error-detect path... it's not good.




I agree with you on this point, platform solution is much better. So 
maybe use a lock or a flag to show it is in such stat, this maybe  also 
happens when driver is in  remove stat.


Thanks,
Mike

-- james s



On 7/31/2014 10:16 PM, Mike Qiu wrote:

On 07/17/2014 02:32 PM, Mike Qiu wrote:


Hi, all

How about this patch ?

Any idea ?


In IBM Power servers, when hardware error occurs during probe
state, EEH subsystem will call driver's error_detected interface,
which will call pci_disable_device(). But driver's probe function also
call pci_disable_device() in this situation.

So pci_dev will be disabled twice:

Device lpfc disabling already-disabled device
[ cut here ]
WARNING: at drivers/pci/pci.c:1407
CPU: 0 PID: 8744 Comm: kworker/0:0 Tainted: GW 
3.10.42-2002.pkvm2_1_1.6.ppc64 #1

Workqueue: events .work_for_cpu_fn
task: c0274e3f5400 ti: c027d3958000 task.ti: c027d3958000
NIP: c0471b8c LR: c0471b88 CTR: c043ebe0
REGS: c027d395b650 TRAP: 0700   Tainted: GW 
(3.10.42-2002.pkvm2_1_1.6.ppc64)
MSR: 900100029032 SF,HV,EE,ME,IR,DR,RI  CR: 28b52b44  XER: 
2000

CFAR: c0879ab8 SOFTE: 1
...
NIP .pci_disable_device+0xcc/0xe0
LR  .pci_disable_device+0xc8/0xe0
Call Trace:
.pci_disable_device+0xc8/0xe0 (unreliable)
.lpfc_disable_pci_dev+0x50/0x80 [lpfc]
.lpfc_pci_probe_one+0x870/0x21a0 [lpfc]
.local_pci_probe+0x68/0xb0
.work_for_cpu_fn+0x38/0x60
.process_one_work+0x1a4/0x4d0
.worker_thread+0x37c/0x490
.kthread+0xf0/0x100
.ret_from_kernel_thread+0x5c/0x80

Signed-off-by: Mike Qiu qiud...@linux.vnet.ibm.com
---
  drivers/scsi/lpfc/lpfc.h  |  1 +
  drivers/scsi/lpfc/lpfc_init.c | 59 
+++

  2 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 434e903..0c7bad9 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -813,6 +813,7 @@ struct lpfc_hba {
  #define VPD_MASK0xf /* mask for any vpd data */

  uint8_t soft_wwn_enable;
+uint8_t probe_done;

  struct timer_list fcp_poll_timer;
  struct timer_list eratt_poll;
diff --git a/drivers/scsi/lpfc/lpfc_init.c 
b/drivers/scsi/lpfc/lpfc_init.c

index 06f9a5b..c2e67ae 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -9519,6 +9519,9 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, 
const struct pci_device_id *pid)

  }
  }

+/* Set the probe flag */
+phba-probe_done = 1;
+
  /* Perform post initialization setup */
  lpfc_post_init_setup(phba);

@@ -9795,6 +9798,9 @@ lpfc_sli_prep_dev_for_recover(struct lpfc_hba 
*phba)

  static void
  lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
  {
+if (phba)
+return;
+
  lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
  2710 PCI channel disable preparing for reset\n);

@@ -9812,7 +9818,8 @@ lpfc_sli_prep_dev_for_reset(struct lpfc_hba 
*phba)


  /* Disable interrupt and pci device */
  lpfc_sli_disable_intr(phba);
-pci_disable_device(phba-pcidev);
+if (phba-probe_done  phba-pcidev)
+pci_disable_device(phba-pcidev);
  }

  /**
@@ -10282,6 +10289,9 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, 
const struct pci_device_id *pid)

  goto out_disable_intr;
  }

+/* Set probe_done flag */
+phba-probe_done = 1;
+
  /* Log the current active interrupt mode */
  phba-intr_mode = intr_mode;
  lpfc_log_intr_mode(phba, intr_mode);
@@ -10544,6 +10554,9 @@ lpfc_sli4_prep_dev_for_recover(struct 
lpfc_hba *phba)

  static void
  lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
  {
+if (!phba)
+return;
+
  lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
  2826 PCI channel disable preparing for reset\n);

@@ -10562,7 +10575,9 @@ lpfc_sli4_prep_dev_for_reset(struct lpfc_hba 
*phba)

  /* Disable interrupt and pci device */
  lpfc_sli4_disable_intr(phba);
  lpfc_sli4_queue_destroy(phba);
-pci_disable_device(phba-pcidev);
+
+if (phba-probe_done  phba-pcidev)
+pci_disable_device(phba-pcidev);
  }

  /**
@@ -10893,9 +10908,21 @@ static 

Re: [PATCH] lpfc: Avoid to disable pci_dev twice

2014-08-27 Thread James Smart

Mike,

Can you confirm - the "nulls" this patch correct are because the 
probe_one and error_detect threads are running concurrently, thus battling ?


If so - this fix looks insufficient and we should rework it.

Q: why are they allowed to run concurrently ?  I could see this solved 
at the platform level to let probe_one finish before error_detect is 
called (and therefore stating error_detect only makes sense to call if 
probe_one was successful). It's also a much driver-friendly solution. I 
could see other drivers have much the same issue with concurrency and 
data structure teardown - and if locks aren't allowed in the 
error-detect path... it's not good.


-- james s



On 7/31/2014 10:16 PM, Mike Qiu wrote:

On 07/17/2014 02:32 PM, Mike Qiu wrote:


Hi, all

How about this patch ?

Any idea ?


In IBM Power servers, when hardware error occurs during probe
state, EEH subsystem will call driver's error_detected interface,
which will call pci_disable_device(). But driver's probe function also
call pci_disable_device() in this situation.

So pci_dev will be disabled twice:

Device lpfc disabling already-disabled device
[ cut here ]
WARNING: at drivers/pci/pci.c:1407
CPU: 0 PID: 8744 Comm: kworker/0:0 Tainted: GW 
3.10.42-2002.pkvm2_1_1.6.ppc64 #1

Workqueue: events .work_for_cpu_fn
task: c0274e3f5400 ti: c027d3958000 task.ti: c027d3958000
NIP: c0471b8c LR: c0471b88 CTR: c043ebe0
REGS: c027d395b650 TRAP: 0700   Tainted: GW 
(3.10.42-2002.pkvm2_1_1.6.ppc64)
MSR: 900100029032   CR: 28b52b44  XER: 
2000

CFAR: c0879ab8 SOFTE: 1
...
NIP .pci_disable_device+0xcc/0xe0
LR  .pci_disable_device+0xc8/0xe0
Call Trace:
.pci_disable_device+0xc8/0xe0 (unreliable)
.lpfc_disable_pci_dev+0x50/0x80 [lpfc]
.lpfc_pci_probe_one+0x870/0x21a0 [lpfc]
.local_pci_probe+0x68/0xb0
.work_for_cpu_fn+0x38/0x60
.process_one_work+0x1a4/0x4d0
.worker_thread+0x37c/0x490
.kthread+0xf0/0x100
.ret_from_kernel_thread+0x5c/0x80

Signed-off-by: Mike Qiu 
---
  drivers/scsi/lpfc/lpfc.h  |  1 +
  drivers/scsi/lpfc/lpfc_init.c | 59 
+++

  2 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 434e903..0c7bad9 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -813,6 +813,7 @@ struct lpfc_hba {
  #define VPD_MASK0xf /* mask for any vpd data */

  uint8_t soft_wwn_enable;
+uint8_t probe_done;

  struct timer_list fcp_poll_timer;
  struct timer_list eratt_poll;
diff --git a/drivers/scsi/lpfc/lpfc_init.c 
b/drivers/scsi/lpfc/lpfc_init.c

index 06f9a5b..c2e67ae 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -9519,6 +9519,9 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, 
const struct pci_device_id *pid)

  }
  }

+/* Set the probe flag */
+phba->probe_done = 1;
+
  /* Perform post initialization setup */
  lpfc_post_init_setup(phba);

@@ -9795,6 +9798,9 @@ lpfc_sli_prep_dev_for_recover(struct lpfc_hba 
*phba)

  static void
  lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
  {
+if (phba)
+return;
+
  lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
  "2710 PCI channel disable preparing for reset\n");

@@ -9812,7 +9818,8 @@ lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)

  /* Disable interrupt and pci device */
  lpfc_sli_disable_intr(phba);
-pci_disable_device(phba->pcidev);
+if (phba->probe_done && phba->pcidev)
+pci_disable_device(phba->pcidev);
  }

  /**
@@ -10282,6 +10289,9 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, 
const struct pci_device_id *pid)

  goto out_disable_intr;
  }

+/* Set probe_done flag */
+phba->probe_done = 1;
+
  /* Log the current active interrupt mode */
  phba->intr_mode = intr_mode;
  lpfc_log_intr_mode(phba, intr_mode);
@@ -10544,6 +10554,9 @@ lpfc_sli4_prep_dev_for_recover(struct 
lpfc_hba *phba)

  static void
  lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
  {
+if (!phba)
+return;
+
  lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
  "2826 PCI channel disable preparing for reset\n");

@@ -10562,7 +10575,9 @@ lpfc_sli4_prep_dev_for_reset(struct lpfc_hba 
*phba)

  /* Disable interrupt and pci device */
  lpfc_sli4_disable_intr(phba);
  lpfc_sli4_queue_destroy(phba);
-pci_disable_device(phba->pcidev);
+
+if (phba->probe_done && phba->pcidev)
+pci_disable_device(phba->pcidev);
  }

  /**
@@ -10893,9 +10908,21 @@ static pci_ers_result_t
  lpfc_io_error_detected(struct pci_dev *pdev, pci_channel_state_t 
state)

  {
  struct Scsi_Host *shost = pci_get_drvdata(pdev);
-struct lpfc_hba *phba = ((struct lpfc_vport 
*)shost->hostdata)->phba;

+struct lpfc_hba *phba;
  pci_ers_result_t rc = PCI_ERS_RESULT_DISCONNECT;

+if (!shost)
+/* Run 

Re: [PATCH] lpfc: Avoid to disable pci_dev twice

2014-08-27 Thread James Smart

Mike,

Can you confirm - the nulls this patch correct are because the 
probe_one and error_detect threads are running concurrently, thus battling ?


If so - this fix looks insufficient and we should rework it.

Q: why are they allowed to run concurrently ?  I could see this solved 
at the platform level to let probe_one finish before error_detect is 
called (and therefore stating error_detect only makes sense to call if 
probe_one was successful). It's also a much driver-friendly solution. I 
could see other drivers have much the same issue with concurrency and 
data structure teardown - and if locks aren't allowed in the 
error-detect path... it's not good.


-- james s



On 7/31/2014 10:16 PM, Mike Qiu wrote:

On 07/17/2014 02:32 PM, Mike Qiu wrote:


Hi, all

How about this patch ?

Any idea ?


In IBM Power servers, when hardware error occurs during probe
state, EEH subsystem will call driver's error_detected interface,
which will call pci_disable_device(). But driver's probe function also
call pci_disable_device() in this situation.

So pci_dev will be disabled twice:

Device lpfc disabling already-disabled device
[ cut here ]
WARNING: at drivers/pci/pci.c:1407
CPU: 0 PID: 8744 Comm: kworker/0:0 Tainted: GW 
3.10.42-2002.pkvm2_1_1.6.ppc64 #1

Workqueue: events .work_for_cpu_fn
task: c0274e3f5400 ti: c027d3958000 task.ti: c027d3958000
NIP: c0471b8c LR: c0471b88 CTR: c043ebe0
REGS: c027d395b650 TRAP: 0700   Tainted: GW 
(3.10.42-2002.pkvm2_1_1.6.ppc64)
MSR: 900100029032 SF,HV,EE,ME,IR,DR,RI  CR: 28b52b44  XER: 
2000

CFAR: c0879ab8 SOFTE: 1
...
NIP .pci_disable_device+0xcc/0xe0
LR  .pci_disable_device+0xc8/0xe0
Call Trace:
.pci_disable_device+0xc8/0xe0 (unreliable)
.lpfc_disable_pci_dev+0x50/0x80 [lpfc]
.lpfc_pci_probe_one+0x870/0x21a0 [lpfc]
.local_pci_probe+0x68/0xb0
.work_for_cpu_fn+0x38/0x60
.process_one_work+0x1a4/0x4d0
.worker_thread+0x37c/0x490
.kthread+0xf0/0x100
.ret_from_kernel_thread+0x5c/0x80

Signed-off-by: Mike Qiu qiud...@linux.vnet.ibm.com
---
  drivers/scsi/lpfc/lpfc.h  |  1 +
  drivers/scsi/lpfc/lpfc_init.c | 59 
+++

  2 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 434e903..0c7bad9 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -813,6 +813,7 @@ struct lpfc_hba {
  #define VPD_MASK0xf /* mask for any vpd data */

  uint8_t soft_wwn_enable;
+uint8_t probe_done;

  struct timer_list fcp_poll_timer;
  struct timer_list eratt_poll;
diff --git a/drivers/scsi/lpfc/lpfc_init.c 
b/drivers/scsi/lpfc/lpfc_init.c

index 06f9a5b..c2e67ae 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -9519,6 +9519,9 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, 
const struct pci_device_id *pid)

  }
  }

+/* Set the probe flag */
+phba-probe_done = 1;
+
  /* Perform post initialization setup */
  lpfc_post_init_setup(phba);

@@ -9795,6 +9798,9 @@ lpfc_sli_prep_dev_for_recover(struct lpfc_hba 
*phba)

  static void
  lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
  {
+if (phba)
+return;
+
  lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
  2710 PCI channel disable preparing for reset\n);

@@ -9812,7 +9818,8 @@ lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)

  /* Disable interrupt and pci device */
  lpfc_sli_disable_intr(phba);
-pci_disable_device(phba-pcidev);
+if (phba-probe_done  phba-pcidev)
+pci_disable_device(phba-pcidev);
  }

  /**
@@ -10282,6 +10289,9 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, 
const struct pci_device_id *pid)

  goto out_disable_intr;
  }

+/* Set probe_done flag */
+phba-probe_done = 1;
+
  /* Log the current active interrupt mode */
  phba-intr_mode = intr_mode;
  lpfc_log_intr_mode(phba, intr_mode);
@@ -10544,6 +10554,9 @@ lpfc_sli4_prep_dev_for_recover(struct 
lpfc_hba *phba)

  static void
  lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
  {
+if (!phba)
+return;
+
  lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
  2826 PCI channel disable preparing for reset\n);

@@ -10562,7 +10575,9 @@ lpfc_sli4_prep_dev_for_reset(struct lpfc_hba 
*phba)

  /* Disable interrupt and pci device */
  lpfc_sli4_disable_intr(phba);
  lpfc_sli4_queue_destroy(phba);
-pci_disable_device(phba-pcidev);
+
+if (phba-probe_done  phba-pcidev)
+pci_disable_device(phba-pcidev);
  }

  /**
@@ -10893,9 +10908,21 @@ static pci_ers_result_t
  lpfc_io_error_detected(struct pci_dev *pdev, pci_channel_state_t 
state)

  {
  struct Scsi_Host *shost = pci_get_drvdata(pdev);
-struct lpfc_hba *phba = ((struct lpfc_vport 
*)shost-hostdata)-phba;

+struct lpfc_hba *phba;
  pci_ers_result_t rc = PCI_ERS_RESULT_DISCONNECT;

+if 

Re: [PATCH] lpfc: Avoid to disable pci_dev twice

2014-07-31 Thread Mike Qiu

On 07/17/2014 02:32 PM, Mike Qiu wrote:


Hi, all

How about this patch ?

Any idea ?


In IBM Power servers, when hardware error occurs during probe
state, EEH subsystem will call driver's error_detected interface,
which will call pci_disable_device(). But driver's probe function also
call pci_disable_device() in this situation.

So pci_dev will be disabled twice:

Device lpfc disabling already-disabled device
[ cut here ]
WARNING: at drivers/pci/pci.c:1407
CPU: 0 PID: 8744 Comm: kworker/0:0 Tainted: GW
3.10.42-2002.pkvm2_1_1.6.ppc64 #1
Workqueue: events .work_for_cpu_fn
task: c0274e3f5400 ti: c027d3958000 task.ti: c027d3958000
NIP: c0471b8c LR: c0471b88 CTR: c043ebe0
REGS: c027d395b650 TRAP: 0700   Tainted: GW 
(3.10.42-2002.pkvm2_1_1.6.ppc64)
MSR: 900100029032   CR: 28b52b44  XER: 2000
CFAR: c0879ab8 SOFTE: 1
...
NIP .pci_disable_device+0xcc/0xe0
LR  .pci_disable_device+0xc8/0xe0
Call Trace:
.pci_disable_device+0xc8/0xe0 (unreliable)
.lpfc_disable_pci_dev+0x50/0x80 [lpfc]
.lpfc_pci_probe_one+0x870/0x21a0 [lpfc]
.local_pci_probe+0x68/0xb0
.work_for_cpu_fn+0x38/0x60
.process_one_work+0x1a4/0x4d0
.worker_thread+0x37c/0x490
.kthread+0xf0/0x100
.ret_from_kernel_thread+0x5c/0x80

Signed-off-by: Mike Qiu 
---
  drivers/scsi/lpfc/lpfc.h  |  1 +
  drivers/scsi/lpfc/lpfc_init.c | 59 +++
  2 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 434e903..0c7bad9 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -813,6 +813,7 @@ struct lpfc_hba {
  #define VPD_MASK0xf /* mask for any vpd data */

uint8_t soft_wwn_enable;
+   uint8_t probe_done;

struct timer_list fcp_poll_timer;
struct timer_list eratt_poll;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 06f9a5b..c2e67ae 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -9519,6 +9519,9 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct 
pci_device_id *pid)
}
}

+   /* Set the probe flag */
+   phba->probe_done = 1;
+
/* Perform post initialization setup */
lpfc_post_init_setup(phba);

@@ -9795,6 +9798,9 @@ lpfc_sli_prep_dev_for_recover(struct lpfc_hba *phba)
  static void
  lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
  {
+   if (phba)
+   return;
+
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"2710 PCI channel disable preparing for reset\n");

@@ -9812,7 +9818,8 @@ lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)

/* Disable interrupt and pci device */
lpfc_sli_disable_intr(phba);
-   pci_disable_device(phba->pcidev);
+   if (phba->probe_done && phba->pcidev)
+   pci_disable_device(phba->pcidev);
  }

  /**
@@ -10282,6 +10289,9 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const 
struct pci_device_id *pid)
goto out_disable_intr;
}

+   /* Set probe_done flag */
+   phba->probe_done = 1;
+
/* Log the current active interrupt mode */
phba->intr_mode = intr_mode;
lpfc_log_intr_mode(phba, intr_mode);
@@ -10544,6 +10554,9 @@ lpfc_sli4_prep_dev_for_recover(struct lpfc_hba *phba)
  static void
  lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
  {
+   if (!phba)
+   return;
+
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"2826 PCI channel disable preparing for reset\n");

@@ -10562,7 +10575,9 @@ lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
/* Disable interrupt and pci device */
lpfc_sli4_disable_intr(phba);
lpfc_sli4_queue_destroy(phba);
-   pci_disable_device(phba->pcidev);
+
+   if (phba->probe_done && phba->pcidev)
+   pci_disable_device(phba->pcidev);
  }

  /**
@@ -10893,9 +10908,21 @@ static pci_ers_result_t
  lpfc_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
  {
struct Scsi_Host *shost = pci_get_drvdata(pdev);
-   struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+   struct lpfc_hba *phba;
pci_ers_result_t rc = PCI_ERS_RESULT_DISCONNECT;

+   if (!shost)
+   /* Run here means it may during probe state and
+* Scsi_Host has not been created and We can do nothing
+* in this state so call for hotplug*/
+   return PCI_ERS_RESULT_NONE;
+
+   phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+
+   if (!phba || !phba->probe_done)
+   /* Run here means it may during probe state */
+   return PCI_ERS_RESULT_NONE;
+
switch (phba->pci_dev_grp) {
case LPFC_PCI_DEV_LP:
rc = lpfc_io_error_detected_s3(pdev, state);
@@ -10930,9 +10957,20 @@ static 

Re: [PATCH] lpfc: Avoid to disable pci_dev twice

2014-07-31 Thread Mike Qiu

On 07/17/2014 02:32 PM, Mike Qiu wrote:


Hi, all

How about this patch ?

Any idea ?


In IBM Power servers, when hardware error occurs during probe
state, EEH subsystem will call driver's error_detected interface,
which will call pci_disable_device(). But driver's probe function also
call pci_disable_device() in this situation.

So pci_dev will be disabled twice:

Device lpfc disabling already-disabled device
[ cut here ]
WARNING: at drivers/pci/pci.c:1407
CPU: 0 PID: 8744 Comm: kworker/0:0 Tainted: GW
3.10.42-2002.pkvm2_1_1.6.ppc64 #1
Workqueue: events .work_for_cpu_fn
task: c0274e3f5400 ti: c027d3958000 task.ti: c027d3958000
NIP: c0471b8c LR: c0471b88 CTR: c043ebe0
REGS: c027d395b650 TRAP: 0700   Tainted: GW 
(3.10.42-2002.pkvm2_1_1.6.ppc64)
MSR: 900100029032 SF,HV,EE,ME,IR,DR,RI  CR: 28b52b44  XER: 2000
CFAR: c0879ab8 SOFTE: 1
...
NIP .pci_disable_device+0xcc/0xe0
LR  .pci_disable_device+0xc8/0xe0
Call Trace:
.pci_disable_device+0xc8/0xe0 (unreliable)
.lpfc_disable_pci_dev+0x50/0x80 [lpfc]
.lpfc_pci_probe_one+0x870/0x21a0 [lpfc]
.local_pci_probe+0x68/0xb0
.work_for_cpu_fn+0x38/0x60
.process_one_work+0x1a4/0x4d0
.worker_thread+0x37c/0x490
.kthread+0xf0/0x100
.ret_from_kernel_thread+0x5c/0x80

Signed-off-by: Mike Qiu qiud...@linux.vnet.ibm.com
---
  drivers/scsi/lpfc/lpfc.h  |  1 +
  drivers/scsi/lpfc/lpfc_init.c | 59 +++
  2 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 434e903..0c7bad9 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -813,6 +813,7 @@ struct lpfc_hba {
  #define VPD_MASK0xf /* mask for any vpd data */

uint8_t soft_wwn_enable;
+   uint8_t probe_done;

struct timer_list fcp_poll_timer;
struct timer_list eratt_poll;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 06f9a5b..c2e67ae 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -9519,6 +9519,9 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct 
pci_device_id *pid)
}
}

+   /* Set the probe flag */
+   phba-probe_done = 1;
+
/* Perform post initialization setup */
lpfc_post_init_setup(phba);

@@ -9795,6 +9798,9 @@ lpfc_sli_prep_dev_for_recover(struct lpfc_hba *phba)
  static void
  lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
  {
+   if (phba)
+   return;
+
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
2710 PCI channel disable preparing for reset\n);

@@ -9812,7 +9818,8 @@ lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)

/* Disable interrupt and pci device */
lpfc_sli_disable_intr(phba);
-   pci_disable_device(phba-pcidev);
+   if (phba-probe_done  phba-pcidev)
+   pci_disable_device(phba-pcidev);
  }

  /**
@@ -10282,6 +10289,9 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const 
struct pci_device_id *pid)
goto out_disable_intr;
}

+   /* Set probe_done flag */
+   phba-probe_done = 1;
+
/* Log the current active interrupt mode */
phba-intr_mode = intr_mode;
lpfc_log_intr_mode(phba, intr_mode);
@@ -10544,6 +10554,9 @@ lpfc_sli4_prep_dev_for_recover(struct lpfc_hba *phba)
  static void
  lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
  {
+   if (!phba)
+   return;
+
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
2826 PCI channel disable preparing for reset\n);

@@ -10562,7 +10575,9 @@ lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
/* Disable interrupt and pci device */
lpfc_sli4_disable_intr(phba);
lpfc_sli4_queue_destroy(phba);
-   pci_disable_device(phba-pcidev);
+
+   if (phba-probe_done  phba-pcidev)
+   pci_disable_device(phba-pcidev);
  }

  /**
@@ -10893,9 +10908,21 @@ static pci_ers_result_t
  lpfc_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
  {
struct Scsi_Host *shost = pci_get_drvdata(pdev);
-   struct lpfc_hba *phba = ((struct lpfc_vport *)shost-hostdata)-phba;
+   struct lpfc_hba *phba;
pci_ers_result_t rc = PCI_ERS_RESULT_DISCONNECT;

+   if (!shost)
+   /* Run here means it may during probe state and
+* Scsi_Host has not been created and We can do nothing
+* in this state so call for hotplug*/
+   return PCI_ERS_RESULT_NONE;
+
+   phba = ((struct lpfc_vport *)shost-hostdata)-phba;
+
+   if (!phba || !phba-probe_done)
+   /* Run here means it may during probe state */
+   return PCI_ERS_RESULT_NONE;
+
switch (phba-pci_dev_grp) {
case LPFC_PCI_DEV_LP:
rc = lpfc_io_error_detected_s3(pdev, state);
@@ -10930,9 

Re: [PATCH] lpfc: Avoid to disable pci_dev twice

2014-07-17 Thread Mike Qiu

On 07/17/2014 10:15 PM, Joe Lawrence wrote:

[ +cc linux-pci and Bjorn, comments inline/below ... ]

On Thu, 17 Jul 2014 02:32:31 -0400
Mike Qiu  wrote:


In IBM Power servers, when hardware error occurs during probe
state, EEH subsystem will call driver's error_detected interface,
which will call pci_disable_device(). But driver's probe function also
call pci_disable_device() in this situation.

So pci_dev will be disabled twice:

Device lpfc disabling already-disabled device
[ cut here ]
WARNING: at drivers/pci/pci.c:1407
CPU: 0 PID: 8744 Comm: kworker/0:0 Tainted: GW
3.10.42-2002.pkvm2_1_1.6.ppc64 #1
Workqueue: events .work_for_cpu_fn
task: c0274e3f5400 ti: c027d3958000 task.ti: c027d3958000
NIP: c0471b8c LR: c0471b88 CTR: c043ebe0
REGS: c027d395b650 TRAP: 0700   Tainted: GW 
(3.10.42-2002.pkvm2_1_1.6.ppc64)
MSR: 900100029032   CR: 28b52b44  XER: 2000
CFAR: c0879ab8 SOFTE: 1
...
NIP .pci_disable_device+0xcc/0xe0
LR  .pci_disable_device+0xc8/0xe0
Call Trace:
.pci_disable_device+0xc8/0xe0 (unreliable)
.lpfc_disable_pci_dev+0x50/0x80 [lpfc]
.lpfc_pci_probe_one+0x870/0x21a0 [lpfc]
.local_pci_probe+0x68/0xb0
.work_for_cpu_fn+0x38/0x60
.process_one_work+0x1a4/0x4d0
.worker_thread+0x37c/0x490
.kthread+0xf0/0x100
.ret_from_kernel_thread+0x5c/0x80

Signed-off-by: Mike Qiu 
---
  drivers/scsi/lpfc/lpfc.h  |  1 +
  drivers/scsi/lpfc/lpfc_init.c | 59 +++
  2 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 434e903..0c7bad9 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -813,6 +813,7 @@ struct lpfc_hba {
  #define VPD_MASK0xf /* mask for any vpd data */
  
  	uint8_t soft_wwn_enable;

+   uint8_t probe_done;
  
  	struct timer_list fcp_poll_timer;

struct timer_list eratt_poll;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 06f9a5b..c2e67ae 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -9519,6 +9519,9 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct 
pci_device_id *pid)
}
}
  
+	/* Set the probe flag */

+   phba->probe_done = 1;
+
/* Perform post initialization setup */
lpfc_post_init_setup(phba);
  
@@ -9795,6 +9798,9 @@ lpfc_sli_prep_dev_for_recover(struct lpfc_hba *phba)

  static void
  lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
  {
+   if (phba)
+   return;
+

Should that be "if *not* phba" like the others below?


Yes, should be ...

if (!phba)




lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"2710 PCI channel disable preparing for reset\n");
  
@@ -9812,7 +9818,8 @@ lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
  
  	/* Disable interrupt and pci device */

lpfc_sli_disable_intr(phba);
-   pci_disable_device(phba->pcidev);
+   if (phba->probe_done && phba->pcidev)
+   pci_disable_device(phba->pcidev);
  }
  
  /**

@@ -10282,6 +10289,9 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const 
struct pci_device_id *pid)
goto out_disable_intr;
}
  
+	/* Set probe_done flag */

+   phba->probe_done = 1;
+
/* Log the current active interrupt mode */
phba->intr_mode = intr_mode;
lpfc_log_intr_mode(phba, intr_mode);
@@ -10544,6 +10554,9 @@ lpfc_sli4_prep_dev_for_recover(struct lpfc_hba *phba)
  static void
  lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
  {
+   if (!phba)
+   return;
+
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"2826 PCI channel disable preparing for reset\n");
  
@@ -10562,7 +10575,9 @@ lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)

/* Disable interrupt and pci device */
lpfc_sli4_disable_intr(phba);
lpfc_sli4_queue_destroy(phba);
-   pci_disable_device(phba->pcidev);
+
+   if (phba->probe_done && phba->pcidev)
+   pci_disable_device(phba->pcidev);
  }
  
  /**

@@ -10893,9 +10908,21 @@ static pci_ers_result_t
  lpfc_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
  {
struct Scsi_Host *shost = pci_get_drvdata(pdev);
-   struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+   struct lpfc_hba *phba;
pci_ers_result_t rc = PCI_ERS_RESULT_DISCONNECT;
  
+	if (!shost)

+   /* Run here means it may during probe state and
+* Scsi_Host has not been created and We can do nothing
+* in this state so call for hotplug*/
+   return PCI_ERS_RESULT_NONE;

Is it possible to get here during device removal, ie
lpfc_pci_remove_one?  If so, we may have shost in hand now, but can
these routines race?  Same for similar instances below...


I think so, it may race here. When 

Re: [PATCH] lpfc: Avoid to disable pci_dev twice

2014-07-17 Thread Joe Lawrence
[ +cc linux-pci and Bjorn, comments inline/below ... ]

On Thu, 17 Jul 2014 02:32:31 -0400
Mike Qiu  wrote:

> In IBM Power servers, when hardware error occurs during probe
> state, EEH subsystem will call driver's error_detected interface,
> which will call pci_disable_device(). But driver's probe function also
> call pci_disable_device() in this situation.
> 
> So pci_dev will be disabled twice:
> 
> Device lpfc disabling already-disabled device
> [ cut here ]
> WARNING: at drivers/pci/pci.c:1407
> CPU: 0 PID: 8744 Comm: kworker/0:0 Tainted: GW
> 3.10.42-2002.pkvm2_1_1.6.ppc64 #1
> Workqueue: events .work_for_cpu_fn
> task: c0274e3f5400 ti: c027d3958000 task.ti: c027d3958000
> NIP: c0471b8c LR: c0471b88 CTR: c043ebe0
> REGS: c027d395b650 TRAP: 0700   Tainted: GW 
> (3.10.42-2002.pkvm2_1_1.6.ppc64)
> MSR: 900100029032   CR: 28b52b44  XER: 2000
> CFAR: c0879ab8 SOFTE: 1
> ...
> NIP .pci_disable_device+0xcc/0xe0
> LR  .pci_disable_device+0xc8/0xe0
> Call Trace:
> .pci_disable_device+0xc8/0xe0 (unreliable)
> .lpfc_disable_pci_dev+0x50/0x80 [lpfc]
> .lpfc_pci_probe_one+0x870/0x21a0 [lpfc]
> .local_pci_probe+0x68/0xb0
> .work_for_cpu_fn+0x38/0x60
> .process_one_work+0x1a4/0x4d0
> .worker_thread+0x37c/0x490
> .kthread+0xf0/0x100
> .ret_from_kernel_thread+0x5c/0x80
> 
> Signed-off-by: Mike Qiu 
> ---
>  drivers/scsi/lpfc/lpfc.h  |  1 +
>  drivers/scsi/lpfc/lpfc_init.c | 59 
> +++
>  2 files changed, 55 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
> index 434e903..0c7bad9 100644
> --- a/drivers/scsi/lpfc/lpfc.h
> +++ b/drivers/scsi/lpfc/lpfc.h
> @@ -813,6 +813,7 @@ struct lpfc_hba {
>  #define VPD_MASK0xf /* mask for any vpd data */
>  
>   uint8_t soft_wwn_enable;
> + uint8_t probe_done;
>  
>   struct timer_list fcp_poll_timer;
>   struct timer_list eratt_poll;
> diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
> index 06f9a5b..c2e67ae 100644
> --- a/drivers/scsi/lpfc/lpfc_init.c
> +++ b/drivers/scsi/lpfc/lpfc_init.c
> @@ -9519,6 +9519,9 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, const 
> struct pci_device_id *pid)
>   }
>   }
>  
> + /* Set the probe flag */
> + phba->probe_done = 1;
> +
>   /* Perform post initialization setup */
>   lpfc_post_init_setup(phba);
>  
> @@ -9795,6 +9798,9 @@ lpfc_sli_prep_dev_for_recover(struct lpfc_hba *phba)
>  static void
>  lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
>  {
> + if (phba)
> + return;
> +

Should that be "if *not* phba" like the others below?

>   lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
>   "2710 PCI channel disable preparing for reset\n");
>  
> @@ -9812,7 +9818,8 @@ lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
>  
>   /* Disable interrupt and pci device */
>   lpfc_sli_disable_intr(phba);
> - pci_disable_device(phba->pcidev);
> + if (phba->probe_done && phba->pcidev)
> + pci_disable_device(phba->pcidev);
>  }
>  
>  /**
> @@ -10282,6 +10289,9 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const 
> struct pci_device_id *pid)
>   goto out_disable_intr;
>   }
>  
> + /* Set probe_done flag */
> + phba->probe_done = 1;
> +
>   /* Log the current active interrupt mode */
>   phba->intr_mode = intr_mode;
>   lpfc_log_intr_mode(phba, intr_mode);
> @@ -10544,6 +10554,9 @@ lpfc_sli4_prep_dev_for_recover(struct lpfc_hba *phba)
>  static void
>  lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
>  {
> + if (!phba)
> + return;
> +
>   lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
>   "2826 PCI channel disable preparing for reset\n");
>  
> @@ -10562,7 +10575,9 @@ lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
>   /* Disable interrupt and pci device */
>   lpfc_sli4_disable_intr(phba);
>   lpfc_sli4_queue_destroy(phba);
> - pci_disable_device(phba->pcidev);
> +
> + if (phba->probe_done && phba->pcidev)
> + pci_disable_device(phba->pcidev);
>  }
>  
>  /**
> @@ -10893,9 +10908,21 @@ static pci_ers_result_t
>  lpfc_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
>  {
>   struct Scsi_Host *shost = pci_get_drvdata(pdev);
> - struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
> + struct lpfc_hba *phba;
>   pci_ers_result_t rc = PCI_ERS_RESULT_DISCONNECT;
>  
> + if (!shost)
> + /* Run here means it may during probe state and
> +  * Scsi_Host has not been created and We can do nothing
> +  * in this state so call for hotplug*/
> + return PCI_ERS_RESULT_NONE;

Is it possible to get here during device removal, ie
lpfc_pci_remove_one?  If so, we may have shost in hand now, but can
these 

[PATCH] lpfc: Avoid to disable pci_dev twice

2014-07-17 Thread Mike Qiu
In IBM Power servers, when hardware error occurs during probe
state, EEH subsystem will call driver's error_detected interface,
which will call pci_disable_device(). But driver's probe function also
call pci_disable_device() in this situation.

So pci_dev will be disabled twice:

Device lpfc disabling already-disabled device
[ cut here ]
WARNING: at drivers/pci/pci.c:1407
CPU: 0 PID: 8744 Comm: kworker/0:0 Tainted: GW
3.10.42-2002.pkvm2_1_1.6.ppc64 #1
Workqueue: events .work_for_cpu_fn
task: c0274e3f5400 ti: c027d3958000 task.ti: c027d3958000
NIP: c0471b8c LR: c0471b88 CTR: c043ebe0
REGS: c027d395b650 TRAP: 0700   Tainted: GW 
(3.10.42-2002.pkvm2_1_1.6.ppc64)
MSR: 900100029032   CR: 28b52b44  XER: 2000
CFAR: c0879ab8 SOFTE: 1
...
NIP .pci_disable_device+0xcc/0xe0
LR  .pci_disable_device+0xc8/0xe0
Call Trace:
.pci_disable_device+0xc8/0xe0 (unreliable)
.lpfc_disable_pci_dev+0x50/0x80 [lpfc]
.lpfc_pci_probe_one+0x870/0x21a0 [lpfc]
.local_pci_probe+0x68/0xb0
.work_for_cpu_fn+0x38/0x60
.process_one_work+0x1a4/0x4d0
.worker_thread+0x37c/0x490
.kthread+0xf0/0x100
.ret_from_kernel_thread+0x5c/0x80

Signed-off-by: Mike Qiu 
---
 drivers/scsi/lpfc/lpfc.h  |  1 +
 drivers/scsi/lpfc/lpfc_init.c | 59 +++
 2 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 434e903..0c7bad9 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -813,6 +813,7 @@ struct lpfc_hba {
 #define VPD_MASK0xf /* mask for any vpd data */
 
uint8_t soft_wwn_enable;
+   uint8_t probe_done;
 
struct timer_list fcp_poll_timer;
struct timer_list eratt_poll;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 06f9a5b..c2e67ae 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -9519,6 +9519,9 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct 
pci_device_id *pid)
}
}
 
+   /* Set the probe flag */
+   phba->probe_done = 1;
+
/* Perform post initialization setup */
lpfc_post_init_setup(phba);
 
@@ -9795,6 +9798,9 @@ lpfc_sli_prep_dev_for_recover(struct lpfc_hba *phba)
 static void
 lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
 {
+   if (phba)
+   return;
+
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"2710 PCI channel disable preparing for reset\n");
 
@@ -9812,7 +9818,8 @@ lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
 
/* Disable interrupt and pci device */
lpfc_sli_disable_intr(phba);
-   pci_disable_device(phba->pcidev);
+   if (phba->probe_done && phba->pcidev)
+   pci_disable_device(phba->pcidev);
 }
 
 /**
@@ -10282,6 +10289,9 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const 
struct pci_device_id *pid)
goto out_disable_intr;
}
 
+   /* Set probe_done flag */
+   phba->probe_done = 1;
+
/* Log the current active interrupt mode */
phba->intr_mode = intr_mode;
lpfc_log_intr_mode(phba, intr_mode);
@@ -10544,6 +10554,9 @@ lpfc_sli4_prep_dev_for_recover(struct lpfc_hba *phba)
 static void
 lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
 {
+   if (!phba)
+   return;
+
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"2826 PCI channel disable preparing for reset\n");
 
@@ -10562,7 +10575,9 @@ lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
/* Disable interrupt and pci device */
lpfc_sli4_disable_intr(phba);
lpfc_sli4_queue_destroy(phba);
-   pci_disable_device(phba->pcidev);
+
+   if (phba->probe_done && phba->pcidev)
+   pci_disable_device(phba->pcidev);
 }
 
 /**
@@ -10893,9 +10908,21 @@ static pci_ers_result_t
 lpfc_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
 {
struct Scsi_Host *shost = pci_get_drvdata(pdev);
-   struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+   struct lpfc_hba *phba;
pci_ers_result_t rc = PCI_ERS_RESULT_DISCONNECT;
 
+   if (!shost)
+   /* Run here means it may during probe state and
+* Scsi_Host has not been created and We can do nothing
+* in this state so call for hotplug*/
+   return PCI_ERS_RESULT_NONE;
+
+   phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+
+   if (!phba || !phba->probe_done)
+   /* Run here means it may during probe state */
+   return PCI_ERS_RESULT_NONE;
+
switch (phba->pci_dev_grp) {
case LPFC_PCI_DEV_LP:
rc = lpfc_io_error_detected_s3(pdev, state);
@@ -10930,9 +10957,20 @@ static pci_ers_result_t
 lpfc_io_slot_reset(struct pci_dev *pdev)
 {
struct Scsi_Host *shost = 

[PATCH] lpfc: Avoid to disable pci_dev twice

2014-07-17 Thread Mike Qiu
In IBM Power servers, when hardware error occurs during probe
state, EEH subsystem will call driver's error_detected interface,
which will call pci_disable_device(). But driver's probe function also
call pci_disable_device() in this situation.

So pci_dev will be disabled twice:

Device lpfc disabling already-disabled device
[ cut here ]
WARNING: at drivers/pci/pci.c:1407
CPU: 0 PID: 8744 Comm: kworker/0:0 Tainted: GW
3.10.42-2002.pkvm2_1_1.6.ppc64 #1
Workqueue: events .work_for_cpu_fn
task: c0274e3f5400 ti: c027d3958000 task.ti: c027d3958000
NIP: c0471b8c LR: c0471b88 CTR: c043ebe0
REGS: c027d395b650 TRAP: 0700   Tainted: GW 
(3.10.42-2002.pkvm2_1_1.6.ppc64)
MSR: 900100029032 SF,HV,EE,ME,IR,DR,RI  CR: 28b52b44  XER: 2000
CFAR: c0879ab8 SOFTE: 1
...
NIP .pci_disable_device+0xcc/0xe0
LR  .pci_disable_device+0xc8/0xe0
Call Trace:
.pci_disable_device+0xc8/0xe0 (unreliable)
.lpfc_disable_pci_dev+0x50/0x80 [lpfc]
.lpfc_pci_probe_one+0x870/0x21a0 [lpfc]
.local_pci_probe+0x68/0xb0
.work_for_cpu_fn+0x38/0x60
.process_one_work+0x1a4/0x4d0
.worker_thread+0x37c/0x490
.kthread+0xf0/0x100
.ret_from_kernel_thread+0x5c/0x80

Signed-off-by: Mike Qiu qiud...@linux.vnet.ibm.com
---
 drivers/scsi/lpfc/lpfc.h  |  1 +
 drivers/scsi/lpfc/lpfc_init.c | 59 +++
 2 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 434e903..0c7bad9 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -813,6 +813,7 @@ struct lpfc_hba {
 #define VPD_MASK0xf /* mask for any vpd data */
 
uint8_t soft_wwn_enable;
+   uint8_t probe_done;
 
struct timer_list fcp_poll_timer;
struct timer_list eratt_poll;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 06f9a5b..c2e67ae 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -9519,6 +9519,9 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct 
pci_device_id *pid)
}
}
 
+   /* Set the probe flag */
+   phba-probe_done = 1;
+
/* Perform post initialization setup */
lpfc_post_init_setup(phba);
 
@@ -9795,6 +9798,9 @@ lpfc_sli_prep_dev_for_recover(struct lpfc_hba *phba)
 static void
 lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
 {
+   if (phba)
+   return;
+
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
2710 PCI channel disable preparing for reset\n);
 
@@ -9812,7 +9818,8 @@ lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
 
/* Disable interrupt and pci device */
lpfc_sli_disable_intr(phba);
-   pci_disable_device(phba-pcidev);
+   if (phba-probe_done  phba-pcidev)
+   pci_disable_device(phba-pcidev);
 }
 
 /**
@@ -10282,6 +10289,9 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const 
struct pci_device_id *pid)
goto out_disable_intr;
}
 
+   /* Set probe_done flag */
+   phba-probe_done = 1;
+
/* Log the current active interrupt mode */
phba-intr_mode = intr_mode;
lpfc_log_intr_mode(phba, intr_mode);
@@ -10544,6 +10554,9 @@ lpfc_sli4_prep_dev_for_recover(struct lpfc_hba *phba)
 static void
 lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
 {
+   if (!phba)
+   return;
+
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
2826 PCI channel disable preparing for reset\n);
 
@@ -10562,7 +10575,9 @@ lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
/* Disable interrupt and pci device */
lpfc_sli4_disable_intr(phba);
lpfc_sli4_queue_destroy(phba);
-   pci_disable_device(phba-pcidev);
+
+   if (phba-probe_done  phba-pcidev)
+   pci_disable_device(phba-pcidev);
 }
 
 /**
@@ -10893,9 +10908,21 @@ static pci_ers_result_t
 lpfc_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
 {
struct Scsi_Host *shost = pci_get_drvdata(pdev);
-   struct lpfc_hba *phba = ((struct lpfc_vport *)shost-hostdata)-phba;
+   struct lpfc_hba *phba;
pci_ers_result_t rc = PCI_ERS_RESULT_DISCONNECT;
 
+   if (!shost)
+   /* Run here means it may during probe state and
+* Scsi_Host has not been created and We can do nothing
+* in this state so call for hotplug*/
+   return PCI_ERS_RESULT_NONE;
+
+   phba = ((struct lpfc_vport *)shost-hostdata)-phba;
+
+   if (!phba || !phba-probe_done)
+   /* Run here means it may during probe state */
+   return PCI_ERS_RESULT_NONE;
+
switch (phba-pci_dev_grp) {
case LPFC_PCI_DEV_LP:
rc = lpfc_io_error_detected_s3(pdev, state);
@@ -10930,9 +10957,20 @@ static pci_ers_result_t
 lpfc_io_slot_reset(struct pci_dev *pdev)
 {
struct 

Re: [PATCH] lpfc: Avoid to disable pci_dev twice

2014-07-17 Thread Joe Lawrence
[ +cc linux-pci and Bjorn, comments inline/below ... ]

On Thu, 17 Jul 2014 02:32:31 -0400
Mike Qiu qiud...@linux.vnet.ibm.com wrote:

 In IBM Power servers, when hardware error occurs during probe
 state, EEH subsystem will call driver's error_detected interface,
 which will call pci_disable_device(). But driver's probe function also
 call pci_disable_device() in this situation.
 
 So pci_dev will be disabled twice:
 
 Device lpfc disabling already-disabled device
 [ cut here ]
 WARNING: at drivers/pci/pci.c:1407
 CPU: 0 PID: 8744 Comm: kworker/0:0 Tainted: GW
 3.10.42-2002.pkvm2_1_1.6.ppc64 #1
 Workqueue: events .work_for_cpu_fn
 task: c0274e3f5400 ti: c027d3958000 task.ti: c027d3958000
 NIP: c0471b8c LR: c0471b88 CTR: c043ebe0
 REGS: c027d395b650 TRAP: 0700   Tainted: GW 
 (3.10.42-2002.pkvm2_1_1.6.ppc64)
 MSR: 900100029032 SF,HV,EE,ME,IR,DR,RI  CR: 28b52b44  XER: 2000
 CFAR: c0879ab8 SOFTE: 1
 ...
 NIP .pci_disable_device+0xcc/0xe0
 LR  .pci_disable_device+0xc8/0xe0
 Call Trace:
 .pci_disable_device+0xc8/0xe0 (unreliable)
 .lpfc_disable_pci_dev+0x50/0x80 [lpfc]
 .lpfc_pci_probe_one+0x870/0x21a0 [lpfc]
 .local_pci_probe+0x68/0xb0
 .work_for_cpu_fn+0x38/0x60
 .process_one_work+0x1a4/0x4d0
 .worker_thread+0x37c/0x490
 .kthread+0xf0/0x100
 .ret_from_kernel_thread+0x5c/0x80
 
 Signed-off-by: Mike Qiu qiud...@linux.vnet.ibm.com
 ---
  drivers/scsi/lpfc/lpfc.h  |  1 +
  drivers/scsi/lpfc/lpfc_init.c | 59 
 +++
  2 files changed, 55 insertions(+), 5 deletions(-)
 
 diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
 index 434e903..0c7bad9 100644
 --- a/drivers/scsi/lpfc/lpfc.h
 +++ b/drivers/scsi/lpfc/lpfc.h
 @@ -813,6 +813,7 @@ struct lpfc_hba {
  #define VPD_MASK0xf /* mask for any vpd data */
  
   uint8_t soft_wwn_enable;
 + uint8_t probe_done;
  
   struct timer_list fcp_poll_timer;
   struct timer_list eratt_poll;
 diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
 index 06f9a5b..c2e67ae 100644
 --- a/drivers/scsi/lpfc/lpfc_init.c
 +++ b/drivers/scsi/lpfc/lpfc_init.c
 @@ -9519,6 +9519,9 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, const 
 struct pci_device_id *pid)
   }
   }
  
 + /* Set the probe flag */
 + phba-probe_done = 1;
 +
   /* Perform post initialization setup */
   lpfc_post_init_setup(phba);
  
 @@ -9795,6 +9798,9 @@ lpfc_sli_prep_dev_for_recover(struct lpfc_hba *phba)
  static void
  lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
  {
 + if (phba)
 + return;
 +

Should that be if *not* phba like the others below?

   lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
   2710 PCI channel disable preparing for reset\n);
  
 @@ -9812,7 +9818,8 @@ lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
  
   /* Disable interrupt and pci device */
   lpfc_sli_disable_intr(phba);
 - pci_disable_device(phba-pcidev);
 + if (phba-probe_done  phba-pcidev)
 + pci_disable_device(phba-pcidev);
  }
  
  /**
 @@ -10282,6 +10289,9 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const 
 struct pci_device_id *pid)
   goto out_disable_intr;
   }
  
 + /* Set probe_done flag */
 + phba-probe_done = 1;
 +
   /* Log the current active interrupt mode */
   phba-intr_mode = intr_mode;
   lpfc_log_intr_mode(phba, intr_mode);
 @@ -10544,6 +10554,9 @@ lpfc_sli4_prep_dev_for_recover(struct lpfc_hba *phba)
  static void
  lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
  {
 + if (!phba)
 + return;
 +
   lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
   2826 PCI channel disable preparing for reset\n);
  
 @@ -10562,7 +10575,9 @@ lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
   /* Disable interrupt and pci device */
   lpfc_sli4_disable_intr(phba);
   lpfc_sli4_queue_destroy(phba);
 - pci_disable_device(phba-pcidev);
 +
 + if (phba-probe_done  phba-pcidev)
 + pci_disable_device(phba-pcidev);
  }
  
  /**
 @@ -10893,9 +10908,21 @@ static pci_ers_result_t
  lpfc_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
  {
   struct Scsi_Host *shost = pci_get_drvdata(pdev);
 - struct lpfc_hba *phba = ((struct lpfc_vport *)shost-hostdata)-phba;
 + struct lpfc_hba *phba;
   pci_ers_result_t rc = PCI_ERS_RESULT_DISCONNECT;
  
 + if (!shost)
 + /* Run here means it may during probe state and
 +  * Scsi_Host has not been created and We can do nothing
 +  * in this state so call for hotplug*/
 + return PCI_ERS_RESULT_NONE;

Is it possible to get here during device removal, ie
lpfc_pci_remove_one?  If so, we may have shost in hand now, but can
these routines race?  Same for similar instances below...

 + phba = ((struct lpfc_vport 

Re: [PATCH] lpfc: Avoid to disable pci_dev twice

2014-07-17 Thread Mike Qiu

On 07/17/2014 10:15 PM, Joe Lawrence wrote:

[ +cc linux-pci and Bjorn, comments inline/below ... ]

On Thu, 17 Jul 2014 02:32:31 -0400
Mike Qiu qiud...@linux.vnet.ibm.com wrote:


In IBM Power servers, when hardware error occurs during probe
state, EEH subsystem will call driver's error_detected interface,
which will call pci_disable_device(). But driver's probe function also
call pci_disable_device() in this situation.

So pci_dev will be disabled twice:

Device lpfc disabling already-disabled device
[ cut here ]
WARNING: at drivers/pci/pci.c:1407
CPU: 0 PID: 8744 Comm: kworker/0:0 Tainted: GW
3.10.42-2002.pkvm2_1_1.6.ppc64 #1
Workqueue: events .work_for_cpu_fn
task: c0274e3f5400 ti: c027d3958000 task.ti: c027d3958000
NIP: c0471b8c LR: c0471b88 CTR: c043ebe0
REGS: c027d395b650 TRAP: 0700   Tainted: GW 
(3.10.42-2002.pkvm2_1_1.6.ppc64)
MSR: 900100029032 SF,HV,EE,ME,IR,DR,RI  CR: 28b52b44  XER: 2000
CFAR: c0879ab8 SOFTE: 1
...
NIP .pci_disable_device+0xcc/0xe0
LR  .pci_disable_device+0xc8/0xe0
Call Trace:
.pci_disable_device+0xc8/0xe0 (unreliable)
.lpfc_disable_pci_dev+0x50/0x80 [lpfc]
.lpfc_pci_probe_one+0x870/0x21a0 [lpfc]
.local_pci_probe+0x68/0xb0
.work_for_cpu_fn+0x38/0x60
.process_one_work+0x1a4/0x4d0
.worker_thread+0x37c/0x490
.kthread+0xf0/0x100
.ret_from_kernel_thread+0x5c/0x80

Signed-off-by: Mike Qiu qiud...@linux.vnet.ibm.com
---
  drivers/scsi/lpfc/lpfc.h  |  1 +
  drivers/scsi/lpfc/lpfc_init.c | 59 +++
  2 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 434e903..0c7bad9 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -813,6 +813,7 @@ struct lpfc_hba {
  #define VPD_MASK0xf /* mask for any vpd data */
  
  	uint8_t soft_wwn_enable;

+   uint8_t probe_done;
  
  	struct timer_list fcp_poll_timer;

struct timer_list eratt_poll;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 06f9a5b..c2e67ae 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -9519,6 +9519,9 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct 
pci_device_id *pid)
}
}
  
+	/* Set the probe flag */

+   phba-probe_done = 1;
+
/* Perform post initialization setup */
lpfc_post_init_setup(phba);
  
@@ -9795,6 +9798,9 @@ lpfc_sli_prep_dev_for_recover(struct lpfc_hba *phba)

  static void
  lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
  {
+   if (phba)
+   return;
+

Should that be if *not* phba like the others below?


Yes, should be ...

if (!phba)




lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
2710 PCI channel disable preparing for reset\n);
  
@@ -9812,7 +9818,8 @@ lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
  
  	/* Disable interrupt and pci device */

lpfc_sli_disable_intr(phba);
-   pci_disable_device(phba-pcidev);
+   if (phba-probe_done  phba-pcidev)
+   pci_disable_device(phba-pcidev);
  }
  
  /**

@@ -10282,6 +10289,9 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const 
struct pci_device_id *pid)
goto out_disable_intr;
}
  
+	/* Set probe_done flag */

+   phba-probe_done = 1;
+
/* Log the current active interrupt mode */
phba-intr_mode = intr_mode;
lpfc_log_intr_mode(phba, intr_mode);
@@ -10544,6 +10554,9 @@ lpfc_sli4_prep_dev_for_recover(struct lpfc_hba *phba)
  static void
  lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
  {
+   if (!phba)
+   return;
+
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
2826 PCI channel disable preparing for reset\n);
  
@@ -10562,7 +10575,9 @@ lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)

/* Disable interrupt and pci device */
lpfc_sli4_disable_intr(phba);
lpfc_sli4_queue_destroy(phba);
-   pci_disable_device(phba-pcidev);
+
+   if (phba-probe_done  phba-pcidev)
+   pci_disable_device(phba-pcidev);
  }
  
  /**

@@ -10893,9 +10908,21 @@ static pci_ers_result_t
  lpfc_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
  {
struct Scsi_Host *shost = pci_get_drvdata(pdev);
-   struct lpfc_hba *phba = ((struct lpfc_vport *)shost-hostdata)-phba;
+   struct lpfc_hba *phba;
pci_ers_result_t rc = PCI_ERS_RESULT_DISCONNECT;
  
+	if (!shost)

+   /* Run here means it may during probe state and
+* Scsi_Host has not been created and We can do nothing
+* in this state so call for hotplug*/
+   return PCI_ERS_RESULT_NONE;

Is it possible to get here during device removal, ie
lpfc_pci_remove_one?  If so, we may have shost in hand now, but can
these routines race?  Same for similar instances