Re: [PATCH 20/21] Introduce struct eeh_stats for EEH - Reworked

2012-02-29 Thread Gavin Shan
With the original EEH implementation, the EEH global statistics
are maintained by individual global variables. That makes the
code a little hard to maintain.

The patch introduces extra struct eeh_stats for the EEH global
statistics so that it can be maintained in collective fashion.

It's the rework on the corresponding v5 patch. According to
the comments from David Laight, the EEH global statistics have
been changed for a litte bit so that they have fixed-type of
"u64". Also, the format used to print them has been changed to
"%llu" based on David's suggestion. Also, the output format of
EEH global statistics should be kept as intacted according to
Michael's suggestion that there might be tools parsing them.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/platforms/pseries/eeh.c |   65 --
 1 files changed, 38 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/eeh.c 
b/arch/powerpc/platforms/pseries/eeh.c
index 9b1fd0c..1d08cd7 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -102,14 +102,22 @@ static DEFINE_RAW_SPINLOCK(confirm_error_lock);
 #define EEH_PCI_REGS_LOG_LEN 4096
 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
 
-/* System monitoring statistics */
-static unsigned long no_device;
-static unsigned long no_dn;
-static unsigned long no_cfg_addr;
-static unsigned long ignored_check;
-static unsigned long total_mmio_ffs;
-static unsigned long false_positives;
-static unsigned long slot_resets;
+/*
+ * The struct is used to maintain the EEH global statistic
+ * information. Besides, the EEH global statistics will be
+ * exported to user space through procfs
+ */
+struct eeh_stats {
+   u64 no_device;  /* PCI device not found */
+   u64 no_dn;  /* OF node not found*/
+   u64 no_cfg_addr;/* Config address not found */
+   u64 ignored_check;  /* EEH check skipped*/
+   u64 total_mmio_ffs; /* Total EEH checks */
+   u64 false_positives;/* Unnecessary EEH checks   */
+   u64 slot_resets;/* PE reset */
+};
+
+static struct eeh_stats eeh_stats;
 
 #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
 
@@ -392,13 +400,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct 
pci_dev *dev)
int rc = 0;
const char *location;
 
-   total_mmio_ffs++;
+   eeh_stats.total_mmio_ffs++;
 
if (!eeh_subsystem_enabled)
return 0;
 
if (!dn) {
-   no_dn++;
+   eeh_stats.no_dn++;
return 0;
}
dn = eeh_find_device_pe(dn);
@@ -407,14 +415,14 @@ int eeh_dn_check_failure(struct device_node *dn, struct 
pci_dev *dev)
/* Access to IO BARs might get this far and still not want checking. */
if (!(edev->mode & EEH_MODE_SUPPORTED) ||
edev->mode & EEH_MODE_NOCHECK) {
-   ignored_check++;
+   eeh_stats.ignored_check++;
pr_debug("EEH: Ignored check (%x) for %s %s\n",
edev->mode, eeh_pci_name(dev), dn->full_name);
return 0;
}
 
if (!edev->config_addr && !edev->pe_config_addr) {
-   no_cfg_addr++;
+   eeh_stats.no_cfg_addr++;
return 0;
}
 
@@ -460,13 +468,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct 
pci_dev *dev)
(ret == EEH_STATE_NOT_SUPPORT) ||
(ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
(EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
-   false_positives++;
+   eeh_stats.false_positives++;
edev->false_positives ++;
rc = 0;
goto dn_unlock;
}
 
-   slot_resets++;
+   eeh_stats.slot_resets++;
  
/* Avoid repeated reports of this failure, including problems
 * with other functions on this device, and functions under
@@ -513,7 +521,7 @@ unsigned long eeh_check_failure(const volatile void __iomem 
*token, unsigned lon
addr = eeh_token_to_phys((unsigned long __force) token);
dev = pci_addr_cache_get_device(addr);
if (!dev) {
-   no_device++;
+   eeh_stats.no_device++;
return val;
}
 
@@ -1174,21 +1182,24 @@ static int proc_eeh_show(struct seq_file *m, void *v)
 {
if (0 == eeh_subsystem_enabled) {
seq_printf(m, "EEH Subsystem is globally disabled\n");
-   seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
+   seq_printf(m, "eeh_total_mmio_ffs=%llu\n", 
eeh_stats.total_mmio_ffs);
} else {
seq_printf(m, "EEH Subsystem is enabled\n");
seq_printf(m,
-   "no device=%ld\n"
-   "no device node=%ld\n"

Re: [PATCH 20/21] Introduce struct eeh_stats for EEH

2012-02-29 Thread Gavin Shan
> > With the original EEH implementation, the EEH global statistics
> > are maintained by individual global variables. That makes the
> > code a little hard to maintain.
> 
> Hi Gavin,
> 
> > @@ -1174,21 +1182,24 @@ static int proc_eeh_show(struct seq_file *m, void 
> > *v)
> >  {
> > if (0 == eeh_subsystem_enabled) {
> > seq_printf(m, "EEH Subsystem is globally disabled\n");
> > -   seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
> > +   seq_printf(m, "eeh_total_mmio_ffs=%d\n", 
> > eeh_stats.total_mmio_ffs);
> > } else {
> > seq_printf(m, "EEH Subsystem is enabled\n");
> > seq_printf(m,
> > -   "no device=%ld\n"
> > -   "no device node=%ld\n"
> > -   "no config address=%ld\n"
> > -   "check not wanted=%ld\n"
> > -   "eeh_total_mmio_ffs=%ld\n"
> > -   "eeh_false_positives=%ld\n"
> > -   "eeh_slot_resets=%ld\n",
> > -   no_device, no_dn, no_cfg_addr, 
> > -   ignored_check, total_mmio_ffs, 
> > -   false_positives,
> > -   slot_resets);
> > +   "no device   =%d\n"
> > +   "no device node  =%d\n"
> > +   "no config address   =%d\n"
> > +   "check not wanted=%d\n"
> > +   "eeh_total_mmio_ffs  =%d\n"
> > +   "eeh_false_positives =%d\n"
> > +   "eeh_slot_resets =%d\n",
> 
> There *might* be tools out there that parse this output, so I'd say
> don't change it unless you have to - and I don't think you have to?
> 

Thanks for catching the point, Michael. I will change it back soon ;-)

Thanks,
Gavin

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 20/21] Introduce struct eeh_stats for EEH

2012-02-29 Thread Michael Ellerman
On Tue, 2012-02-28 at 14:04 +0800, Gavin Shan wrote:
> With the original EEH implementation, the EEH global statistics
> are maintained by individual global variables. That makes the
> code a little hard to maintain.

Hi Gavin,

> @@ -1174,21 +1182,24 @@ static int proc_eeh_show(struct seq_file *m, void *v)
>  {
>   if (0 == eeh_subsystem_enabled) {
>   seq_printf(m, "EEH Subsystem is globally disabled\n");
> - seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
> + seq_printf(m, "eeh_total_mmio_ffs=%d\n", 
> eeh_stats.total_mmio_ffs);
>   } else {
>   seq_printf(m, "EEH Subsystem is enabled\n");
>   seq_printf(m,
> - "no device=%ld\n"
> - "no device node=%ld\n"
> - "no config address=%ld\n"
> - "check not wanted=%ld\n"
> - "eeh_total_mmio_ffs=%ld\n"
> - "eeh_false_positives=%ld\n"
> - "eeh_slot_resets=%ld\n",
> - no_device, no_dn, no_cfg_addr, 
> - ignored_check, total_mmio_ffs, 
> - false_positives,
> - slot_resets);
> + "no device   =%d\n"
> + "no device node  =%d\n"
> + "no config address   =%d\n"
> + "check not wanted=%d\n"
> + "eeh_total_mmio_ffs  =%d\n"
> + "eeh_false_positives =%d\n"
> + "eeh_slot_resets =%d\n",

There *might* be tools out there that parse this output, so I'd say
don't change it unless you have to - and I don't think you have to?

cheers


signature.asc
Description: This is a digitally signed message part
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 20/21] Introduce struct eeh_stats for EEH

2012-02-28 Thread Gavin Shan
With the original EEH implementation, the EEH global statistics
are maintained by individual global variables. That makes the
code a little hard to maintain.

The patch introduces extra struct eeh_stats for the EEH global
statistics so that it can be maintained in collective fashion.

It's the rework on the corresponding v5 patch. According to
the comments from David Laight, the EEH global statistics have
been changed for a litte bit so that they have fixed-type of
"u64". Also, the format used to print them has been changed to
"%llu" based on David's suggestion.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/platforms/pseries/eeh.c |   65 --
 1 files changed, 38 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/eeh.c 
b/arch/powerpc/platforms/pseries/eeh.c
index 9b1fd0c..753ec8a 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -102,14 +102,22 @@ static DEFINE_RAW_SPINLOCK(confirm_error_lock);
 #define EEH_PCI_REGS_LOG_LEN 4096
 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
 
-/* System monitoring statistics */
-static unsigned long no_device;
-static unsigned long no_dn;
-static unsigned long no_cfg_addr;
-static unsigned long ignored_check;
-static unsigned long total_mmio_ffs;
-static unsigned long false_positives;
-static unsigned long slot_resets;
+/*
+ * The struct is used to maintain the EEH global statistic
+ * information. Besides, the EEH global statistics will be
+ * exported to user space through procfs
+ */
+struct eeh_stats {
+   u64 no_device;  /* PCI device not found */
+   u64 no_dn;  /* OF node not found*/
+   u64 no_cfg_addr;/* Config address not found */
+   u64 ignored_check;  /* EEH check skipped*/
+   u64 total_mmio_ffs; /* Total EEH checks */
+   u64 false_positives;/* Unnecessary EEH checks   */
+   u64 slot_resets;/* PE reset */
+};
+
+static struct eeh_stats eeh_stats;
 
 #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
 
@@ -392,13 +400,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct 
pci_dev *dev)
int rc = 0;
const char *location;
 
-   total_mmio_ffs++;
+   eeh_stats.total_mmio_ffs++;
 
if (!eeh_subsystem_enabled)
return 0;
 
if (!dn) {
-   no_dn++;
+   eeh_stats.no_dn++;
return 0;
}
dn = eeh_find_device_pe(dn);
@@ -407,14 +415,14 @@ int eeh_dn_check_failure(struct device_node *dn, struct 
pci_dev *dev)
/* Access to IO BARs might get this far and still not want checking. */
if (!(edev->mode & EEH_MODE_SUPPORTED) ||
edev->mode & EEH_MODE_NOCHECK) {
-   ignored_check++;
+   eeh_stats.ignored_check++;
pr_debug("EEH: Ignored check (%x) for %s %s\n",
edev->mode, eeh_pci_name(dev), dn->full_name);
return 0;
}
 
if (!edev->config_addr && !edev->pe_config_addr) {
-   no_cfg_addr++;
+   eeh_stats.no_cfg_addr++;
return 0;
}
 
@@ -460,13 +468,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct 
pci_dev *dev)
(ret == EEH_STATE_NOT_SUPPORT) ||
(ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
(EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
-   false_positives++;
+   eeh_stats.false_positives++;
edev->false_positives ++;
rc = 0;
goto dn_unlock;
}
 
-   slot_resets++;
+   eeh_stats.slot_resets++;
  
/* Avoid repeated reports of this failure, including problems
 * with other functions on this device, and functions under
@@ -513,7 +521,7 @@ unsigned long eeh_check_failure(const volatile void __iomem 
*token, unsigned lon
addr = eeh_token_to_phys((unsigned long __force) token);
dev = pci_addr_cache_get_device(addr);
if (!dev) {
-   no_device++;
+   eeh_stats.no_device++;
return val;
}
 
@@ -1174,21 +1182,24 @@ static int proc_eeh_show(struct seq_file *m, void *v)
 {
if (0 == eeh_subsystem_enabled) {
seq_printf(m, "EEH Subsystem is globally disabled\n");
-   seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
+   seq_printf(m, "eeh_total_mmio_ffs=%llu\n", 
eeh_stats.total_mmio_ffs);
} else {
seq_printf(m, "EEH Subsystem is enabled\n");
seq_printf(m,
-   "no device=%ld\n"
-   "no device node=%ld\n"
-   "no config address=%ld\n"
-   "check not wanted=%ld\n"
-   "e

Re: [PATCH 20/21] Introduce struct eeh_stats for EEH

2012-02-28 Thread Gavin Shan
>  
> > +struct eeh_stats {
> > +   unsigned int no_device; /* PCI device not found */
> ...
> > +   "no device   =%d\n"
> ...
> 
> Use %u (for all the stats), you really don't want negative
> values printed.

Yes. 

> I've NFI how long wrapping these counters might take!
> If it is feasable (maybe much above 100Hz) then you
> need 64bit counters.
> 

I think it's better to use "u64" here ;-)

>   David
> 

Thanks,
Gavin
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [PATCH 20/21] Introduce struct eeh_stats for EEH

2012-02-28 Thread David Laight
 
> +struct eeh_stats {
> + unsigned int no_device; /* PCI device not found */
...
> + "no device   =%d\n"
...

Use %u (for all the stats), you really don't want negative
values printed.
I've NFI how long wrapping these counters might take!
If it is feasable (maybe much above 100Hz) then you
need 64bit counters.

David



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 20/21] Introduce struct eeh_stats for EEH

2012-02-27 Thread Gavin Shan
>  
> > +/*
> > + * The struct is used to maintain the EEH global statistic
> > + * information. Besides, the EEH global statistics will be
> > + * exported to user space through procfs
> > + */
> > +struct eeh_stats {
> > +   unsigned long no_device;/* PCI device not found
> */
> > +   unsigned long no_dn;/* OF node not found
> */
> > +   unsigned long no_cfg_addr;  /* Config address not  found
> */
> > +   unsigned long ignored_check;/* EEH check skipped
> */
> > +   unsigned long total_mmio_ffs;   /* Total EEH checks
> */
> > +   unsigned long false_positives;  /* Unnecessary EEH checks
> */
> > +   unsigned long slot_resets;  /* PE reset
> */
> > +};
> 
> Why 'unsigned long', surely either 'unsigned int'
> or a fixed-width type.
> 

Thanks, David. Could you pls give more information on
how the fixed-width types benefits us?

Thanks,
Gavin
 
>   David
> 
> 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 20/21] Introduce struct eeh_stats for EEH

2012-02-27 Thread Gavin Shan
> Hi Gavin,
> 
> On Fri, 24 Feb 2012 17:38:17 +0800 Gavin Shan  
> wrote:
> >
> > diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
> > index 1310971..226c9a5 100644
> > --- a/arch/powerpc/include/asm/eeh.h
> > +++ b/arch/powerpc/include/asm/eeh.h
> > @@ -98,6 +98,21 @@ struct eeh_ops {
> > int (*configure_bridge)(struct device_node *dn);
> >  };
> >  
> > +/*
> > + * The struct is used to maintain the EEH global statistic
> > + * information. Besides, the EEH global statistics will be
> > + * exported to user space through procfs
> > + */
> > +struct eeh_stats {
> > +   unsigned long no_device;/* PCI device not found */
> > +   unsigned long no_dn;/* OF node not found*/
> > +   unsigned long no_cfg_addr;  /* Config address not found */
> > +   unsigned long ignored_check;/* EEH check skipped*/
> > +   unsigned long total_mmio_ffs;   /* Total EEH checks */
> > +   unsigned long false_positives;  /* Unnecessary EEH checks   */
> > +   unsigned long slot_resets;  /* PE reset */
> > +};
> 
> If this is used in only one place, there is not much point in putting it
> in a header file.
> 

Thanks, Stephen. I'll move it to eeh.c in next revision.

Thanks,
Gavin

> -- 
> Cheers,
> Stephen Rothwells...@canb.auug.org.au
> http://www.canb.auug.org.au/~sfr/


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [PATCH 20/21] Introduce struct eeh_stats for EEH

2012-02-24 Thread David Laight
 
> +/*
> + * The struct is used to maintain the EEH global statistic
> + * information. Besides, the EEH global statistics will be
> + * exported to user space through procfs
> + */
> +struct eeh_stats {
> + unsigned long no_device;/* PCI device not found
*/
> + unsigned long no_dn;/* OF node not found
*/
> + unsigned long no_cfg_addr;  /* Config address not  found
*/
> + unsigned long ignored_check;/* EEH check skipped
*/
> + unsigned long total_mmio_ffs;   /* Total EEH checks
*/
> + unsigned long false_positives;  /* Unnecessary EEH checks
*/
> + unsigned long slot_resets;  /* PE reset
*/
> +};

Why 'unsigned long', surely either 'unsigned int'
or a fixed-width type.

David


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 20/21] Introduce struct eeh_stats for EEH

2012-02-24 Thread Stephen Rothwell
Hi Gavin,

On Fri, 24 Feb 2012 17:38:17 +0800 Gavin Shan  wrote:
>
> diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
> index 1310971..226c9a5 100644
> --- a/arch/powerpc/include/asm/eeh.h
> +++ b/arch/powerpc/include/asm/eeh.h
> @@ -98,6 +98,21 @@ struct eeh_ops {
>   int (*configure_bridge)(struct device_node *dn);
>  };
>  
> +/*
> + * The struct is used to maintain the EEH global statistic
> + * information. Besides, the EEH global statistics will be
> + * exported to user space through procfs
> + */
> +struct eeh_stats {
> + unsigned long no_device;/* PCI device not found */
> + unsigned long no_dn;/* OF node not found*/
> + unsigned long no_cfg_addr;  /* Config address not found */
> + unsigned long ignored_check;/* EEH check skipped*/
> + unsigned long total_mmio_ffs;   /* Total EEH checks */
> + unsigned long false_positives;  /* Unnecessary EEH checks   */
> + unsigned long slot_resets;  /* PE reset */
> +};

If this is used in only one place, there is not much point in putting it
in a header file.

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/


pgpLzg8PCDPVU.pgp
Description: PGP signature
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev