Re: [PATCH EDAC 03/13] ghes: add the needed hooks for EDAC error report

2013-02-24 Thread Huang Ying
On Fri, 2013-02-22 at 05:57 -0300, Mauro Carvalho Chehab wrote:
> Em Fri, 22 Feb 2013 05:50:21 -0300
> Mauro Carvalho Chehab  escreveu:
> 
> > Em Fri, 22 Feb 2013 08:45:11 +0800
> > Huang Ying  escreveu:
> > 
> > > On Thu, 2013-02-21 at 09:04 -0300, Mauro Carvalho Chehab wrote:
> > > > Em Thu, 21 Feb 2013 09:26:07 +0800
> > > > Huang Ying  escreveu:
> > > > 
> > 
> > > > There is also an advantage on taking this approach: this patch can have 
> > > > just
> > > > the ghes changes, and will still compile fine, as CONFIG_EDAC_GHES is 
> > > > not
> > > > defined yet. So, the patch becomes is simpler and easier to review.
> > > 
> > > Thanks for your explanation.  I agree to keep this in header file.
> > > 
> > > My original suggestion is that it may be better to move this from ghes.h
> > > to some place like "ghes_edac.h", because these functions are
> > > implemented in "ghes_edac.c" instead of ghes.c.
> > > 
> > 
> > Ah! Well, I considered a separate header when writing this patch, but 
> > adding 
> > another header for just 3 functions where the parameters are more related to
> > ghes than with ghes_edac seemed overkill to me.
> > 
> > IMO, a single comment at the header pointing to ghes_edac.c would improve 
> > it.
> > 
> ...
> > +/* From drivers/edac/ghes_acpi.h */
> 
> ...with is obviously wrong.
> 
> Mental note to myself: never hack too early in the morning before drinking a
> cup of coffee.
> 
> Anyway, fixed on the patch below. Could you please ack?
> 
> Regards,
> Mauro.
> 
> 
> [PATCH EDAC] ghes: add the needed hooks for EDAC error report
> 
> In order to allow reporting errors via EDAC, add hooks for:
> 
> 1) register an EDAC driver;
> 2) unregister an EDAC driver;
> 3) report errors via EDAC.
> 
> As the EDAC driver will need to access the ghes structure, adds it
> as one of the parameters for ghes_do_proc.
> 
> Signed-off-by: Mauro Carvalho Chehab 

Acked-by: Huang Ying 

> 
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 6d0e146..d668a8a 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -409,7 +409,8 @@ static void ghes_clear_estatus(struct ghes *ghes)
>   ghes->flags &= ~GHES_TO_CLEAR;
>  }
>  
> -static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
> +static void ghes_do_proc(struct ghes *ghes,
> +  const struct acpi_hest_generic_status *estatus)
>  {
>   int sev, sec_sev;
>   struct acpi_hest_generic_data *gdata;
> @@ -421,6 +422,8 @@ static void ghes_do_proc(const struct 
> acpi_hest_generic_status *estatus)
>CPER_SEC_PLATFORM_MEM)) {
>   struct cper_sec_mem_err *mem_err;
>   mem_err = (struct cper_sec_mem_err *)(gdata+1);
> + ghes_edac_report_mem_error(ghes, sev, mem_err);
> +
>  #ifdef CONFIG_X86_MCE
>   apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
> mem_err);
> @@ -639,7 +642,7 @@ static int ghes_proc(struct ghes *ghes)
>   if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
>   ghes_estatus_cache_add(ghes->generic, ghes->estatus);
>   }
> - ghes_do_proc(ghes->estatus);
> + ghes_do_proc(ghes, ghes->estatus);
>  out:
>   ghes_clear_estatus(ghes);
>   return 0;
> @@ -732,7 +735,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
>   estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
>   len = apei_estatus_len(estatus);
>   node_len = GHES_ESTATUS_NODE_LEN(len);
> - ghes_do_proc(estatus);
> + ghes_do_proc(estatus_node->ghes, estatus);
>   if (!ghes_estatus_cached(estatus)) {
>   generic = estatus_node->generic;
>   if (ghes_print_estatus(NULL, generic, estatus))
> @@ -821,6 +824,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct 
> pt_regs *regs)
>   estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
> node_len);
>   if (estatus_node) {
> + estatus_node->ghes = ghes;
>   estatus_node->generic = ghes->generic;
>   estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
>   memcpy(estatus, ghes->estatus, len);
> @@ -899,6 +903,11 @@ static int ghes_probe(struct platform_device *ghes_dev)
>   ghes = NULL;
>   goto err;
>   }
> +
> + rc = ghes_edac_register(ghes, _dev->dev);
> + if (rc < 0)
> + goto err;
> +
>   switch (generic->notify.type) {
>   case ACPI_HEST_NOTIFY_POLLED:
>   ghes->timer.function = ghes_poll_func;
> @@ -911,13 +920,13 @@ static int ghes_probe(struct platform_device *ghes_dev)
>   if (acpi_gsi_to_irq(generic->notify.vector, >irq)) {
>   pr_err(GHES_PFX "Failed to map 

Re: [PATCH EDAC 03/13] ghes: add the needed hooks for EDAC error report

2013-02-24 Thread Huang Ying
On Fri, 2013-02-22 at 05:57 -0300, Mauro Carvalho Chehab wrote:
 Em Fri, 22 Feb 2013 05:50:21 -0300
 Mauro Carvalho Chehab mche...@redhat.com escreveu:
 
  Em Fri, 22 Feb 2013 08:45:11 +0800
  Huang Ying ying.hu...@intel.com escreveu:
  
   On Thu, 2013-02-21 at 09:04 -0300, Mauro Carvalho Chehab wrote:
Em Thu, 21 Feb 2013 09:26:07 +0800
Huang Ying ying.hu...@intel.com escreveu:

  
There is also an advantage on taking this approach: this patch can have 
just
the ghes changes, and will still compile fine, as CONFIG_EDAC_GHES is 
not
defined yet. So, the patch becomes is simpler and easier to review.
   
   Thanks for your explanation.  I agree to keep this in header file.
   
   My original suggestion is that it may be better to move this from ghes.h
   to some place like ghes_edac.h, because these functions are
   implemented in ghes_edac.c instead of ghes.c.
   
  
  Ah! Well, I considered a separate header when writing this patch, but 
  adding 
  another header for just 3 functions where the parameters are more related to
  ghes than with ghes_edac seemed overkill to me.
  
  IMO, a single comment at the header pointing to ghes_edac.c would improve 
  it.
  
 ...
  +/* From drivers/edac/ghes_acpi.h */
 
 ...with is obviously wrong.
 
 Mental note to myself: never hack too early in the morning before drinking a
 cup of coffee.
 
 Anyway, fixed on the patch below. Could you please ack?
 
 Regards,
 Mauro.
 
 
 [PATCH EDAC] ghes: add the needed hooks for EDAC error report
 
 In order to allow reporting errors via EDAC, add hooks for:
 
 1) register an EDAC driver;
 2) unregister an EDAC driver;
 3) report errors via EDAC.
 
 As the EDAC driver will need to access the ghes structure, adds it
 as one of the parameters for ghes_do_proc.
 
 Signed-off-by: Mauro Carvalho Chehab mche...@redhat.com

Acked-by: Huang Ying ying.hu...@intel.com

 
 diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
 index 6d0e146..d668a8a 100644
 --- a/drivers/acpi/apei/ghes.c
 +++ b/drivers/acpi/apei/ghes.c
 @@ -409,7 +409,8 @@ static void ghes_clear_estatus(struct ghes *ghes)
   ghes-flags = ~GHES_TO_CLEAR;
  }
  
 -static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
 +static void ghes_do_proc(struct ghes *ghes,
 +  const struct acpi_hest_generic_status *estatus)
  {
   int sev, sec_sev;
   struct acpi_hest_generic_data *gdata;
 @@ -421,6 +422,8 @@ static void ghes_do_proc(const struct 
 acpi_hest_generic_status *estatus)
CPER_SEC_PLATFORM_MEM)) {
   struct cper_sec_mem_err *mem_err;
   mem_err = (struct cper_sec_mem_err *)(gdata+1);
 + ghes_edac_report_mem_error(ghes, sev, mem_err);
 +
  #ifdef CONFIG_X86_MCE
   apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
 mem_err);
 @@ -639,7 +642,7 @@ static int ghes_proc(struct ghes *ghes)
   if (ghes_print_estatus(NULL, ghes-generic, ghes-estatus))
   ghes_estatus_cache_add(ghes-generic, ghes-estatus);
   }
 - ghes_do_proc(ghes-estatus);
 + ghes_do_proc(ghes, ghes-estatus);
  out:
   ghes_clear_estatus(ghes);
   return 0;
 @@ -732,7 +735,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
   estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
   len = apei_estatus_len(estatus);
   node_len = GHES_ESTATUS_NODE_LEN(len);
 - ghes_do_proc(estatus);
 + ghes_do_proc(estatus_node-ghes, estatus);
   if (!ghes_estatus_cached(estatus)) {
   generic = estatus_node-generic;
   if (ghes_print_estatus(NULL, generic, estatus))
 @@ -821,6 +824,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct 
 pt_regs *regs)
   estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
 node_len);
   if (estatus_node) {
 + estatus_node-ghes = ghes;
   estatus_node-generic = ghes-generic;
   estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
   memcpy(estatus, ghes-estatus, len);
 @@ -899,6 +903,11 @@ static int ghes_probe(struct platform_device *ghes_dev)
   ghes = NULL;
   goto err;
   }
 +
 + rc = ghes_edac_register(ghes, ghes_dev-dev);
 + if (rc  0)
 + goto err;
 +
   switch (generic-notify.type) {
   case ACPI_HEST_NOTIFY_POLLED:
   ghes-timer.function = ghes_poll_func;
 @@ -911,13 +920,13 @@ static int ghes_probe(struct platform_device *ghes_dev)
   if (acpi_gsi_to_irq(generic-notify.vector, ghes-irq)) {
   pr_err(GHES_PFX Failed to map GSI to IRQ for generic 
 hardware error source: %d\n,
  

Re: [PATCH EDAC 03/13] ghes: add the needed hooks for EDAC error report

2013-02-22 Thread Mauro Carvalho Chehab
Em Fri, 22 Feb 2013 08:45:11 +0800
Huang Ying  escreveu:

> On Thu, 2013-02-21 at 09:04 -0300, Mauro Carvalho Chehab wrote:
> > Em Thu, 21 Feb 2013 09:26:07 +0800
> > Huang Ying  escreveu:
> > 

> > There is also an advantage on taking this approach: this patch can have just
> > the ghes changes, and will still compile fine, as CONFIG_EDAC_GHES is not
> > defined yet. So, the patch becomes is simpler and easier to review.
> 
> Thanks for your explanation.  I agree to keep this in header file.
> 
> My original suggestion is that it may be better to move this from ghes.h
> to some place like "ghes_edac.h", because these functions are
> implemented in "ghes_edac.c" instead of ghes.c.
> 

Ah! Well, I considered a separate header when writing this patch, but adding 
another header for just 3 functions where the parameters are more related to
ghes than with ghes_edac seemed overkill to me.

IMO, a single comment at the header pointing to ghes_edac.c would improve it.

> > switch (generic->notify.type) {
> > case ACPI_HEST_NOTIFY_POLLED:
> > ghes->timer.function = ghes_poll_func;
> > @@ -911,13 +920,13 @@ static int ghes_probe(struct platform_device 
> > *ghes_dev)
> > if (acpi_gsi_to_irq(generic->notify.vector, >irq)) {
> > pr_err(GHES_PFX "Failed to map GSI to IRQ for generic 
> > hardware error source: %d\n",
> >generic->header.source_id);
> > -   goto err;
> > +   goto err2;
> > }
> > if (request_irq(ghes->irq, ghes_irq_func,
> > 0, "GHES IRQ", ghes)) {
> > pr_err(GHES_PFX "Failed to register IRQ for generic 
> > hardware error source: %d\n",
> >generic->header.source_id);
> > -   goto err;
> > +   goto err2;
> > }
> > break;
> > case ACPI_HEST_NOTIFY_SCI:
> > @@ -943,6 +952,8 @@ static int ghes_probe(struct platform_device *ghes_dev)
> > platform_set_drvdata(ghes_dev, ghes);
> >  
> > return 0;
> > +err2:
> 
> Suggest to rename it to err_edac_unreg or something like that.  Just a
> suggestion.

Done.

I'm folding the enclosed patch on it.

Thanks for review!

Regards,
Mauro

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 19092dc..d668a8a 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -920,13 +920,13 @@ static int ghes_probe(struct platform_device *ghes_dev)
if (acpi_gsi_to_irq(generic->notify.vector, >irq)) {
pr_err(GHES_PFX "Failed to map GSI to IRQ for generic 
hardware error source: %d\n",
   generic->header.source_id);
-   goto err2;
+   goto err_edac_unreg;
}
if (request_irq(ghes->irq, ghes_irq_func,
0, "GHES IRQ", ghes)) {
pr_err(GHES_PFX "Failed to register IRQ for generic 
hardware error source: %d\n",
   generic->header.source_id);
-   goto err2;
+   goto err_edac_unreg;
}
break;
case ACPI_HEST_NOTIFY_SCI:
@@ -952,7 +952,7 @@ static int ghes_probe(struct platform_device *ghes_dev)
platform_set_drvdata(ghes_dev, ghes);
 
return 0;
-err2:
+err_edac_unreg:
ghes_edac_unregister(ghes);
 err:
if (ghes) {
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index 9015ec2..d11d952 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -45,6 +45,8 @@ enum {
GHES_SEV_PANIC = 0x3,
 };
 
+/* From drivers/edac/ghes_acpi.h */
+
 #ifdef CONFIG_EDAC_GHES
 void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
struct cper_sec_mem_err *mem_err);



-- 

Cheers,
Mauro
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH EDAC 03/13] ghes: add the needed hooks for EDAC error report

2013-02-22 Thread Mauro Carvalho Chehab
Em Fri, 22 Feb 2013 05:50:21 -0300
Mauro Carvalho Chehab  escreveu:

> Em Fri, 22 Feb 2013 08:45:11 +0800
> Huang Ying  escreveu:
> 
> > On Thu, 2013-02-21 at 09:04 -0300, Mauro Carvalho Chehab wrote:
> > > Em Thu, 21 Feb 2013 09:26:07 +0800
> > > Huang Ying  escreveu:
> > > 
> 
> > > There is also an advantage on taking this approach: this patch can have 
> > > just
> > > the ghes changes, and will still compile fine, as CONFIG_EDAC_GHES is not
> > > defined yet. So, the patch becomes is simpler and easier to review.
> > 
> > Thanks for your explanation.  I agree to keep this in header file.
> > 
> > My original suggestion is that it may be better to move this from ghes.h
> > to some place like "ghes_edac.h", because these functions are
> > implemented in "ghes_edac.c" instead of ghes.c.
> > 
> 
> Ah! Well, I considered a separate header when writing this patch, but adding 
> another header for just 3 functions where the parameters are more related to
> ghes than with ghes_edac seemed overkill to me.
> 
> IMO, a single comment at the header pointing to ghes_edac.c would improve it.
> 
...
> +/* From drivers/edac/ghes_acpi.h */

...with is obviously wrong.

Mental note to myself: never hack too early in the morning before drinking a
cup of coffee.

Anyway, fixed on the patch below. Could you please ack?

Regards,
Mauro.


[PATCH EDAC] ghes: add the needed hooks for EDAC error report

In order to allow reporting errors via EDAC, add hooks for:

1) register an EDAC driver;
2) unregister an EDAC driver;
3) report errors via EDAC.

As the EDAC driver will need to access the ghes structure, adds it
as one of the parameters for ghes_do_proc.

Signed-off-by: Mauro Carvalho Chehab 

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 6d0e146..d668a8a 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -409,7 +409,8 @@ static void ghes_clear_estatus(struct ghes *ghes)
ghes->flags &= ~GHES_TO_CLEAR;
 }
 
-static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
+static void ghes_do_proc(struct ghes *ghes,
+const struct acpi_hest_generic_status *estatus)
 {
int sev, sec_sev;
struct acpi_hest_generic_data *gdata;
@@ -421,6 +422,8 @@ static void ghes_do_proc(const struct 
acpi_hest_generic_status *estatus)
 CPER_SEC_PLATFORM_MEM)) {
struct cper_sec_mem_err *mem_err;
mem_err = (struct cper_sec_mem_err *)(gdata+1);
+   ghes_edac_report_mem_error(ghes, sev, mem_err);
+
 #ifdef CONFIG_X86_MCE
apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
  mem_err);
@@ -639,7 +642,7 @@ static int ghes_proc(struct ghes *ghes)
if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
ghes_estatus_cache_add(ghes->generic, ghes->estatus);
}
-   ghes_do_proc(ghes->estatus);
+   ghes_do_proc(ghes, ghes->estatus);
 out:
ghes_clear_estatus(ghes);
return 0;
@@ -732,7 +735,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
len = apei_estatus_len(estatus);
node_len = GHES_ESTATUS_NODE_LEN(len);
-   ghes_do_proc(estatus);
+   ghes_do_proc(estatus_node->ghes, estatus);
if (!ghes_estatus_cached(estatus)) {
generic = estatus_node->generic;
if (ghes_print_estatus(NULL, generic, estatus))
@@ -821,6 +824,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs 
*regs)
estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
  node_len);
if (estatus_node) {
+   estatus_node->ghes = ghes;
estatus_node->generic = ghes->generic;
estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
memcpy(estatus, ghes->estatus, len);
@@ -899,6 +903,11 @@ static int ghes_probe(struct platform_device *ghes_dev)
ghes = NULL;
goto err;
}
+
+   rc = ghes_edac_register(ghes, _dev->dev);
+   if (rc < 0)
+   goto err;
+
switch (generic->notify.type) {
case ACPI_HEST_NOTIFY_POLLED:
ghes->timer.function = ghes_poll_func;
@@ -911,13 +920,13 @@ static int ghes_probe(struct platform_device *ghes_dev)
if (acpi_gsi_to_irq(generic->notify.vector, >irq)) {
pr_err(GHES_PFX "Failed to map GSI to IRQ for generic 
hardware error source: %d\n",
   generic->header.source_id);
-   goto err;
+   goto err_edac_unreg;
}
if (request_irq(ghes->irq, ghes_irq_func,

Re: [PATCH EDAC 03/13] ghes: add the needed hooks for EDAC error report

2013-02-22 Thread Mauro Carvalho Chehab
Em Fri, 22 Feb 2013 05:50:21 -0300
Mauro Carvalho Chehab mche...@redhat.com escreveu:

 Em Fri, 22 Feb 2013 08:45:11 +0800
 Huang Ying ying.hu...@intel.com escreveu:
 
  On Thu, 2013-02-21 at 09:04 -0300, Mauro Carvalho Chehab wrote:
   Em Thu, 21 Feb 2013 09:26:07 +0800
   Huang Ying ying.hu...@intel.com escreveu:
   
 
   There is also an advantage on taking this approach: this patch can have 
   just
   the ghes changes, and will still compile fine, as CONFIG_EDAC_GHES is not
   defined yet. So, the patch becomes is simpler and easier to review.
  
  Thanks for your explanation.  I agree to keep this in header file.
  
  My original suggestion is that it may be better to move this from ghes.h
  to some place like ghes_edac.h, because these functions are
  implemented in ghes_edac.c instead of ghes.c.
  
 
 Ah! Well, I considered a separate header when writing this patch, but adding 
 another header for just 3 functions where the parameters are more related to
 ghes than with ghes_edac seemed overkill to me.
 
 IMO, a single comment at the header pointing to ghes_edac.c would improve it.
 
...
 +/* From drivers/edac/ghes_acpi.h */

...with is obviously wrong.

Mental note to myself: never hack too early in the morning before drinking a
cup of coffee.

Anyway, fixed on the patch below. Could you please ack?

Regards,
Mauro.


[PATCH EDAC] ghes: add the needed hooks for EDAC error report

In order to allow reporting errors via EDAC, add hooks for:

1) register an EDAC driver;
2) unregister an EDAC driver;
3) report errors via EDAC.

As the EDAC driver will need to access the ghes structure, adds it
as one of the parameters for ghes_do_proc.

Signed-off-by: Mauro Carvalho Chehab mche...@redhat.com

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 6d0e146..d668a8a 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -409,7 +409,8 @@ static void ghes_clear_estatus(struct ghes *ghes)
ghes-flags = ~GHES_TO_CLEAR;
 }
 
-static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
+static void ghes_do_proc(struct ghes *ghes,
+const struct acpi_hest_generic_status *estatus)
 {
int sev, sec_sev;
struct acpi_hest_generic_data *gdata;
@@ -421,6 +422,8 @@ static void ghes_do_proc(const struct 
acpi_hest_generic_status *estatus)
 CPER_SEC_PLATFORM_MEM)) {
struct cper_sec_mem_err *mem_err;
mem_err = (struct cper_sec_mem_err *)(gdata+1);
+   ghes_edac_report_mem_error(ghes, sev, mem_err);
+
 #ifdef CONFIG_X86_MCE
apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
  mem_err);
@@ -639,7 +642,7 @@ static int ghes_proc(struct ghes *ghes)
if (ghes_print_estatus(NULL, ghes-generic, ghes-estatus))
ghes_estatus_cache_add(ghes-generic, ghes-estatus);
}
-   ghes_do_proc(ghes-estatus);
+   ghes_do_proc(ghes, ghes-estatus);
 out:
ghes_clear_estatus(ghes);
return 0;
@@ -732,7 +735,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
len = apei_estatus_len(estatus);
node_len = GHES_ESTATUS_NODE_LEN(len);
-   ghes_do_proc(estatus);
+   ghes_do_proc(estatus_node-ghes, estatus);
if (!ghes_estatus_cached(estatus)) {
generic = estatus_node-generic;
if (ghes_print_estatus(NULL, generic, estatus))
@@ -821,6 +824,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs 
*regs)
estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
  node_len);
if (estatus_node) {
+   estatus_node-ghes = ghes;
estatus_node-generic = ghes-generic;
estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
memcpy(estatus, ghes-estatus, len);
@@ -899,6 +903,11 @@ static int ghes_probe(struct platform_device *ghes_dev)
ghes = NULL;
goto err;
}
+
+   rc = ghes_edac_register(ghes, ghes_dev-dev);
+   if (rc  0)
+   goto err;
+
switch (generic-notify.type) {
case ACPI_HEST_NOTIFY_POLLED:
ghes-timer.function = ghes_poll_func;
@@ -911,13 +920,13 @@ static int ghes_probe(struct platform_device *ghes_dev)
if (acpi_gsi_to_irq(generic-notify.vector, ghes-irq)) {
pr_err(GHES_PFX Failed to map GSI to IRQ for generic 
hardware error source: %d\n,
   generic-header.source_id);
-   goto err;
+   goto err_edac_unreg;
}
if (request_irq(ghes-irq, 

Re: [PATCH EDAC 03/13] ghes: add the needed hooks for EDAC error report

2013-02-22 Thread Mauro Carvalho Chehab
Em Fri, 22 Feb 2013 08:45:11 +0800
Huang Ying ying.hu...@intel.com escreveu:

 On Thu, 2013-02-21 at 09:04 -0300, Mauro Carvalho Chehab wrote:
  Em Thu, 21 Feb 2013 09:26:07 +0800
  Huang Ying ying.hu...@intel.com escreveu:
  

  There is also an advantage on taking this approach: this patch can have just
  the ghes changes, and will still compile fine, as CONFIG_EDAC_GHES is not
  defined yet. So, the patch becomes is simpler and easier to review.
 
 Thanks for your explanation.  I agree to keep this in header file.
 
 My original suggestion is that it may be better to move this from ghes.h
 to some place like ghes_edac.h, because these functions are
 implemented in ghes_edac.c instead of ghes.c.
 

Ah! Well, I considered a separate header when writing this patch, but adding 
another header for just 3 functions where the parameters are more related to
ghes than with ghes_edac seemed overkill to me.

IMO, a single comment at the header pointing to ghes_edac.c would improve it.

  switch (generic-notify.type) {
  case ACPI_HEST_NOTIFY_POLLED:
  ghes-timer.function = ghes_poll_func;
  @@ -911,13 +920,13 @@ static int ghes_probe(struct platform_device 
  *ghes_dev)
  if (acpi_gsi_to_irq(generic-notify.vector, ghes-irq)) {
  pr_err(GHES_PFX Failed to map GSI to IRQ for generic 
  hardware error source: %d\n,
 generic-header.source_id);
  -   goto err;
  +   goto err2;
  }
  if (request_irq(ghes-irq, ghes_irq_func,
  0, GHES IRQ, ghes)) {
  pr_err(GHES_PFX Failed to register IRQ for generic 
  hardware error source: %d\n,
 generic-header.source_id);
  -   goto err;
  +   goto err2;
  }
  break;
  case ACPI_HEST_NOTIFY_SCI:
  @@ -943,6 +952,8 @@ static int ghes_probe(struct platform_device *ghes_dev)
  platform_set_drvdata(ghes_dev, ghes);
   
  return 0;
  +err2:
 
 Suggest to rename it to err_edac_unreg or something like that.  Just a
 suggestion.

Done.

I'm folding the enclosed patch on it.

Thanks for review!

Regards,
Mauro

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 19092dc..d668a8a 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -920,13 +920,13 @@ static int ghes_probe(struct platform_device *ghes_dev)
if (acpi_gsi_to_irq(generic-notify.vector, ghes-irq)) {
pr_err(GHES_PFX Failed to map GSI to IRQ for generic 
hardware error source: %d\n,
   generic-header.source_id);
-   goto err2;
+   goto err_edac_unreg;
}
if (request_irq(ghes-irq, ghes_irq_func,
0, GHES IRQ, ghes)) {
pr_err(GHES_PFX Failed to register IRQ for generic 
hardware error source: %d\n,
   generic-header.source_id);
-   goto err2;
+   goto err_edac_unreg;
}
break;
case ACPI_HEST_NOTIFY_SCI:
@@ -952,7 +952,7 @@ static int ghes_probe(struct platform_device *ghes_dev)
platform_set_drvdata(ghes_dev, ghes);
 
return 0;
-err2:
+err_edac_unreg:
ghes_edac_unregister(ghes);
 err:
if (ghes) {
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index 9015ec2..d11d952 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -45,6 +45,8 @@ enum {
GHES_SEV_PANIC = 0x3,
 };
 
+/* From drivers/edac/ghes_acpi.h */
+
 #ifdef CONFIG_EDAC_GHES
 void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
struct cper_sec_mem_err *mem_err);



-- 

Cheers,
Mauro
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH EDAC 03/13] ghes: add the needed hooks for EDAC error report

2013-02-21 Thread Huang Ying
On Thu, 2013-02-21 at 09:04 -0300, Mauro Carvalho Chehab wrote:
> Em Thu, 21 Feb 2013 09:26:07 +0800
> Huang Ying  escreveu:
> 
> > Sorry for late!
> 
> Thanks for your comments. See my answers below.
> > 
> > On Fri, 2013-02-15 at 10:44 -0200, Mauro Carvalho Chehab wrote:
> > > In order to allow reporting errors via EDAC, add hooks for:
> > > 
> > > 1) register an EDAC driver;
> > > 2) unregister an EDAC driver;
> > > 3) report errors via EDAC.
> > > 
> > > As the EDAC driver will need to access the ghes structure, adds it
> > > as one of the parameters for ghes_do_proc.
> > > 
> > > Signed-off-by: Mauro Carvalho Chehab 
> > > ---
> > >  drivers/acpi/apei/ghes.c | 17 ++---
> > >  include/acpi/ghes.h  | 27 +++
> > >  2 files changed, 41 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> > > index 6d0e146..a21d7da 100644
> > > --- a/drivers/acpi/apei/ghes.c
> > > +++ b/drivers/acpi/apei/ghes.c
> > > @@ -409,7 +409,8 @@ static void ghes_clear_estatus(struct ghes *ghes)
> > >   ghes->flags &= ~GHES_TO_CLEAR;
> > >  }
> > >  
> > > -static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
> > > +static void ghes_do_proc(struct ghes *ghes,
> > > +  const struct acpi_hest_generic_status *estatus)
> > >  {
> > >   int sev, sec_sev;
> > >   struct acpi_hest_generic_data *gdata;
> > > @@ -421,6 +422,8 @@ static void ghes_do_proc(const struct 
> > > acpi_hest_generic_status *estatus)
> > >CPER_SEC_PLATFORM_MEM)) {
> > >   struct cper_sec_mem_err *mem_err;
> > >   mem_err = (struct cper_sec_mem_err *)(gdata+1);
> > > + ghes_edac_report_mem_error(ghes, sev, mem_err);
> > > +
> > >  #ifdef CONFIG_X86_MCE
> > >   apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
> > > mem_err);
> > > @@ -639,7 +642,7 @@ static int ghes_proc(struct ghes *ghes)
> > >   if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
> > >   ghes_estatus_cache_add(ghes->generic, ghes->estatus);
> > >   }
> > > - ghes_do_proc(ghes->estatus);
> > > + ghes_do_proc(ghes, ghes->estatus);
> > >  out:
> > >   ghes_clear_estatus(ghes);
> > >   return 0;
> > > @@ -732,7 +735,7 @@ static void ghes_proc_in_irq(struct irq_work 
> > > *irq_work)
> > >   estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
> > >   len = apei_estatus_len(estatus);
> > >   node_len = GHES_ESTATUS_NODE_LEN(len);
> > > - ghes_do_proc(estatus);
> > > + ghes_do_proc(estatus_node->ghes, estatus);
> > >   if (!ghes_estatus_cached(estatus)) {
> > >   generic = estatus_node->generic;
> > >   if (ghes_print_estatus(NULL, generic, estatus))
> > > @@ -821,6 +824,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct 
> > > pt_regs *regs)
> > >   estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
> > > node_len);
> > >   if (estatus_node) {
> > > + estatus_node->ghes = ghes;
> > >   estatus_node->generic = ghes->generic;
> > >   estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
> > >   memcpy(estatus, ghes->estatus, len);
> > > @@ -942,6 +946,10 @@ static int ghes_probe(struct platform_device 
> > > *ghes_dev)
> > >   }
> > >   platform_set_drvdata(ghes_dev, ghes);
> > >  
> > > + rc = ghes_edac_register(ghes, _dev->dev);
> > > + if (rc < 0)
> > > + goto err;
> > > +
> > 
> > If ghes_edac_register() failed, we need to do some cleanup such as
> > unregister from hed etc.
> > 
> > Or just move ghes_edac_register() before switch?
> 
> Moving it to happen before the switch() looks the better. We need to 
> unregister
> ghes_edac if IRQ fails.
> 
> > >   return 0;
> > >  err:
> > >   if (ghes) {
> > > @@ -995,6 +1003,9 @@ static int ghes_remove(struct platform_device 
> > > *ghes_dev)
> > >   }
> > >  
> > >   ghes_fini(ghes);
> > > +
> > > + ghes_edac_unregister(ghes);
> > > +
> > >   kfree(ghes);
> > >  
> > >   platform_set_drvdata(ghes_dev, NULL);
> > > diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
> > > index 3eb8dc4..c6fef72 100644
> > > --- a/include/acpi/ghes.h
> > > +++ b/include/acpi/ghes.h
> > > @@ -22,11 +22,14 @@ struct ghes {
> > >   struct timer_list timer;
> > >   unsigned int irq;
> > >   };
> > > +
> > > + struct mem_ctl_info *mci;
> > 
> > Why we need this?  This is not used by ghes.[hc].
> 
> This will be needed by the EDAC driver, as the EDAC core has its own main
> struct, and such struct need to be recovered when reporting an error.
> 
> As this struct is not used by ghes, there's no need to even include the
> include/linux/edac.h, where this is declared, as gcc will just handle it
> as "void *" when compiling ghes.
> 
> I opted for this design 

Re: [PATCH EDAC 03/13] ghes: add the needed hooks for EDAC error report

2013-02-21 Thread Mauro Carvalho Chehab
Em Thu, 21 Feb 2013 09:26:07 +0800
Huang Ying  escreveu:

> Sorry for late!

Thanks for your comments. See my answers below.
> 
> On Fri, 2013-02-15 at 10:44 -0200, Mauro Carvalho Chehab wrote:
> > In order to allow reporting errors via EDAC, add hooks for:
> > 
> > 1) register an EDAC driver;
> > 2) unregister an EDAC driver;
> > 3) report errors via EDAC.
> > 
> > As the EDAC driver will need to access the ghes structure, adds it
> > as one of the parameters for ghes_do_proc.
> > 
> > Signed-off-by: Mauro Carvalho Chehab 
> > ---
> >  drivers/acpi/apei/ghes.c | 17 ++---
> >  include/acpi/ghes.h  | 27 +++
> >  2 files changed, 41 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> > index 6d0e146..a21d7da 100644
> > --- a/drivers/acpi/apei/ghes.c
> > +++ b/drivers/acpi/apei/ghes.c
> > @@ -409,7 +409,8 @@ static void ghes_clear_estatus(struct ghes *ghes)
> > ghes->flags &= ~GHES_TO_CLEAR;
> >  }
> >  
> > -static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
> > +static void ghes_do_proc(struct ghes *ghes,
> > +const struct acpi_hest_generic_status *estatus)
> >  {
> > int sev, sec_sev;
> > struct acpi_hest_generic_data *gdata;
> > @@ -421,6 +422,8 @@ static void ghes_do_proc(const struct 
> > acpi_hest_generic_status *estatus)
> >  CPER_SEC_PLATFORM_MEM)) {
> > struct cper_sec_mem_err *mem_err;
> > mem_err = (struct cper_sec_mem_err *)(gdata+1);
> > +   ghes_edac_report_mem_error(ghes, sev, mem_err);
> > +
> >  #ifdef CONFIG_X86_MCE
> > apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
> >   mem_err);
> > @@ -639,7 +642,7 @@ static int ghes_proc(struct ghes *ghes)
> > if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
> > ghes_estatus_cache_add(ghes->generic, ghes->estatus);
> > }
> > -   ghes_do_proc(ghes->estatus);
> > +   ghes_do_proc(ghes, ghes->estatus);
> >  out:
> > ghes_clear_estatus(ghes);
> > return 0;
> > @@ -732,7 +735,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
> > estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
> > len = apei_estatus_len(estatus);
> > node_len = GHES_ESTATUS_NODE_LEN(len);
> > -   ghes_do_proc(estatus);
> > +   ghes_do_proc(estatus_node->ghes, estatus);
> > if (!ghes_estatus_cached(estatus)) {
> > generic = estatus_node->generic;
> > if (ghes_print_estatus(NULL, generic, estatus))
> > @@ -821,6 +824,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct 
> > pt_regs *regs)
> > estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
> >   node_len);
> > if (estatus_node) {
> > +   estatus_node->ghes = ghes;
> > estatus_node->generic = ghes->generic;
> > estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
> > memcpy(estatus, ghes->estatus, len);
> > @@ -942,6 +946,10 @@ static int ghes_probe(struct platform_device *ghes_dev)
> > }
> > platform_set_drvdata(ghes_dev, ghes);
> >  
> > +   rc = ghes_edac_register(ghes, _dev->dev);
> > +   if (rc < 0)
> > +   goto err;
> > +
> 
> If ghes_edac_register() failed, we need to do some cleanup such as
> unregister from hed etc.
> 
> Or just move ghes_edac_register() before switch?

Moving it to happen before the switch() looks the better. We need to unregister
ghes_edac if IRQ fails.

> > return 0;
> >  err:
> > if (ghes) {
> > @@ -995,6 +1003,9 @@ static int ghes_remove(struct platform_device 
> > *ghes_dev)
> > }
> >  
> > ghes_fini(ghes);
> > +
> > +   ghes_edac_unregister(ghes);
> > +
> > kfree(ghes);
> >  
> > platform_set_drvdata(ghes_dev, NULL);
> > diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
> > index 3eb8dc4..c6fef72 100644
> > --- a/include/acpi/ghes.h
> > +++ b/include/acpi/ghes.h
> > @@ -22,11 +22,14 @@ struct ghes {
> > struct timer_list timer;
> > unsigned int irq;
> > };
> > +
> > +   struct mem_ctl_info *mci;
> 
> Why we need this?  This is not used by ghes.[hc].

This will be needed by the EDAC driver, as the EDAC core has its own main
struct, and such struct need to be recovered when reporting an error.

As this struct is not used by ghes, there's no need to even include the
include/linux/edac.h, where this is declared, as gcc will just handle it
as "void *" when compiling ghes.

I opted for this design as it is simpler, and costs just 8 bytes for each
ghes struct, and no extra code is needed.

However, as you're not happy with it, I'm removing it from there, and adding
a list at the edac driver, together with a mutex, to serialize its 

Re: [PATCH EDAC 03/13] ghes: add the needed hooks for EDAC error report

2013-02-21 Thread Mauro Carvalho Chehab
Em Thu, 21 Feb 2013 09:26:07 +0800
Huang Ying ying.hu...@intel.com escreveu:

 Sorry for late!

Thanks for your comments. See my answers below.
 
 On Fri, 2013-02-15 at 10:44 -0200, Mauro Carvalho Chehab wrote:
  In order to allow reporting errors via EDAC, add hooks for:
  
  1) register an EDAC driver;
  2) unregister an EDAC driver;
  3) report errors via EDAC.
  
  As the EDAC driver will need to access the ghes structure, adds it
  as one of the parameters for ghes_do_proc.
  
  Signed-off-by: Mauro Carvalho Chehab mche...@redhat.com
  ---
   drivers/acpi/apei/ghes.c | 17 ++---
   include/acpi/ghes.h  | 27 +++
   2 files changed, 41 insertions(+), 3 deletions(-)
  
  diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
  index 6d0e146..a21d7da 100644
  --- a/drivers/acpi/apei/ghes.c
  +++ b/drivers/acpi/apei/ghes.c
  @@ -409,7 +409,8 @@ static void ghes_clear_estatus(struct ghes *ghes)
  ghes-flags = ~GHES_TO_CLEAR;
   }
   
  -static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
  +static void ghes_do_proc(struct ghes *ghes,
  +const struct acpi_hest_generic_status *estatus)
   {
  int sev, sec_sev;
  struct acpi_hest_generic_data *gdata;
  @@ -421,6 +422,8 @@ static void ghes_do_proc(const struct 
  acpi_hest_generic_status *estatus)
   CPER_SEC_PLATFORM_MEM)) {
  struct cper_sec_mem_err *mem_err;
  mem_err = (struct cper_sec_mem_err *)(gdata+1);
  +   ghes_edac_report_mem_error(ghes, sev, mem_err);
  +
   #ifdef CONFIG_X86_MCE
  apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
mem_err);
  @@ -639,7 +642,7 @@ static int ghes_proc(struct ghes *ghes)
  if (ghes_print_estatus(NULL, ghes-generic, ghes-estatus))
  ghes_estatus_cache_add(ghes-generic, ghes-estatus);
  }
  -   ghes_do_proc(ghes-estatus);
  +   ghes_do_proc(ghes, ghes-estatus);
   out:
  ghes_clear_estatus(ghes);
  return 0;
  @@ -732,7 +735,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
  estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
  len = apei_estatus_len(estatus);
  node_len = GHES_ESTATUS_NODE_LEN(len);
  -   ghes_do_proc(estatus);
  +   ghes_do_proc(estatus_node-ghes, estatus);
  if (!ghes_estatus_cached(estatus)) {
  generic = estatus_node-generic;
  if (ghes_print_estatus(NULL, generic, estatus))
  @@ -821,6 +824,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct 
  pt_regs *regs)
  estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
node_len);
  if (estatus_node) {
  +   estatus_node-ghes = ghes;
  estatus_node-generic = ghes-generic;
  estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
  memcpy(estatus, ghes-estatus, len);
  @@ -942,6 +946,10 @@ static int ghes_probe(struct platform_device *ghes_dev)
  }
  platform_set_drvdata(ghes_dev, ghes);
   
  +   rc = ghes_edac_register(ghes, ghes_dev-dev);
  +   if (rc  0)
  +   goto err;
  +
 
 If ghes_edac_register() failed, we need to do some cleanup such as
 unregister from hed etc.
 
 Or just move ghes_edac_register() before switch?

Moving it to happen before the switch() looks the better. We need to unregister
ghes_edac if IRQ fails.

  return 0;
   err:
  if (ghes) {
  @@ -995,6 +1003,9 @@ static int ghes_remove(struct platform_device 
  *ghes_dev)
  }
   
  ghes_fini(ghes);
  +
  +   ghes_edac_unregister(ghes);
  +
  kfree(ghes);
   
  platform_set_drvdata(ghes_dev, NULL);
  diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
  index 3eb8dc4..c6fef72 100644
  --- a/include/acpi/ghes.h
  +++ b/include/acpi/ghes.h
  @@ -22,11 +22,14 @@ struct ghes {
  struct timer_list timer;
  unsigned int irq;
  };
  +
  +   struct mem_ctl_info *mci;
 
 Why we need this?  This is not used by ghes.[hc].

This will be needed by the EDAC driver, as the EDAC core has its own main
struct, and such struct need to be recovered when reporting an error.

As this struct is not used by ghes, there's no need to even include the
include/linux/edac.h, where this is declared, as gcc will just handle it
as void * when compiling ghes.

I opted for this design as it is simpler, and costs just 8 bytes for each
ghes struct, and no extra code is needed.

However, as you're not happy with it, I'm removing it from there, and adding
a list at the edac driver, together with a mutex, to serialize its access,
associating each ghes struct with their corresponding MC struct.

I'll post the patch replacing patch 05/13 soon.

 
   };
   
   struct ghes_estatus_node {
  struct 

Re: [PATCH EDAC 03/13] ghes: add the needed hooks for EDAC error report

2013-02-21 Thread Huang Ying
On Thu, 2013-02-21 at 09:04 -0300, Mauro Carvalho Chehab wrote:
 Em Thu, 21 Feb 2013 09:26:07 +0800
 Huang Ying ying.hu...@intel.com escreveu:
 
  Sorry for late!
 
 Thanks for your comments. See my answers below.
  
  On Fri, 2013-02-15 at 10:44 -0200, Mauro Carvalho Chehab wrote:
   In order to allow reporting errors via EDAC, add hooks for:
   
   1) register an EDAC driver;
   2) unregister an EDAC driver;
   3) report errors via EDAC.
   
   As the EDAC driver will need to access the ghes structure, adds it
   as one of the parameters for ghes_do_proc.
   
   Signed-off-by: Mauro Carvalho Chehab mche...@redhat.com
   ---
drivers/acpi/apei/ghes.c | 17 ++---
include/acpi/ghes.h  | 27 +++
2 files changed, 41 insertions(+), 3 deletions(-)
   
   diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
   index 6d0e146..a21d7da 100644
   --- a/drivers/acpi/apei/ghes.c
   +++ b/drivers/acpi/apei/ghes.c
   @@ -409,7 +409,8 @@ static void ghes_clear_estatus(struct ghes *ghes)
 ghes-flags = ~GHES_TO_CLEAR;
}

   -static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
   +static void ghes_do_proc(struct ghes *ghes,
   +  const struct acpi_hest_generic_status *estatus)
{
 int sev, sec_sev;
 struct acpi_hest_generic_data *gdata;
   @@ -421,6 +422,8 @@ static void ghes_do_proc(const struct 
   acpi_hest_generic_status *estatus)
  CPER_SEC_PLATFORM_MEM)) {
 struct cper_sec_mem_err *mem_err;
 mem_err = (struct cper_sec_mem_err *)(gdata+1);
   + ghes_edac_report_mem_error(ghes, sev, mem_err);
   +
#ifdef CONFIG_X86_MCE
 apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
   mem_err);
   @@ -639,7 +642,7 @@ static int ghes_proc(struct ghes *ghes)
 if (ghes_print_estatus(NULL, ghes-generic, ghes-estatus))
 ghes_estatus_cache_add(ghes-generic, ghes-estatus);
 }
   - ghes_do_proc(ghes-estatus);
   + ghes_do_proc(ghes, ghes-estatus);
out:
 ghes_clear_estatus(ghes);
 return 0;
   @@ -732,7 +735,7 @@ static void ghes_proc_in_irq(struct irq_work 
   *irq_work)
 estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
 len = apei_estatus_len(estatus);
 node_len = GHES_ESTATUS_NODE_LEN(len);
   - ghes_do_proc(estatus);
   + ghes_do_proc(estatus_node-ghes, estatus);
 if (!ghes_estatus_cached(estatus)) {
 generic = estatus_node-generic;
 if (ghes_print_estatus(NULL, generic, estatus))
   @@ -821,6 +824,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct 
   pt_regs *regs)
 estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
   node_len);
 if (estatus_node) {
   + estatus_node-ghes = ghes;
 estatus_node-generic = ghes-generic;
 estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
 memcpy(estatus, ghes-estatus, len);
   @@ -942,6 +946,10 @@ static int ghes_probe(struct platform_device 
   *ghes_dev)
 }
 platform_set_drvdata(ghes_dev, ghes);

   + rc = ghes_edac_register(ghes, ghes_dev-dev);
   + if (rc  0)
   + goto err;
   +
  
  If ghes_edac_register() failed, we need to do some cleanup such as
  unregister from hed etc.
  
  Or just move ghes_edac_register() before switch?
 
 Moving it to happen before the switch() looks the better. We need to 
 unregister
 ghes_edac if IRQ fails.
 
 return 0;
err:
 if (ghes) {
   @@ -995,6 +1003,9 @@ static int ghes_remove(struct platform_device 
   *ghes_dev)
 }

 ghes_fini(ghes);
   +
   + ghes_edac_unregister(ghes);
   +
 kfree(ghes);

 platform_set_drvdata(ghes_dev, NULL);
   diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
   index 3eb8dc4..c6fef72 100644
   --- a/include/acpi/ghes.h
   +++ b/include/acpi/ghes.h
   @@ -22,11 +22,14 @@ struct ghes {
 struct timer_list timer;
 unsigned int irq;
 };
   +
   + struct mem_ctl_info *mci;
  
  Why we need this?  This is not used by ghes.[hc].
 
 This will be needed by the EDAC driver, as the EDAC core has its own main
 struct, and such struct need to be recovered when reporting an error.
 
 As this struct is not used by ghes, there's no need to even include the
 include/linux/edac.h, where this is declared, as gcc will just handle it
 as void * when compiling ghes.
 
 I opted for this design as it is simpler, and costs just 8 bytes for each
 ghes struct, and no extra code is needed.
 
 However, as you're not happy with it, I'm removing it from there, and adding
 a list at the edac driver, together with a mutex, to serialize its access,
 associating each ghes struct with their corresponding MC struct.

Re: [PATCH EDAC 03/13] ghes: add the needed hooks for EDAC error report

2013-02-20 Thread Huang Ying
Sorry for late!

On Fri, 2013-02-15 at 10:44 -0200, Mauro Carvalho Chehab wrote:
> In order to allow reporting errors via EDAC, add hooks for:
> 
> 1) register an EDAC driver;
> 2) unregister an EDAC driver;
> 3) report errors via EDAC.
> 
> As the EDAC driver will need to access the ghes structure, adds it
> as one of the parameters for ghes_do_proc.
> 
> Signed-off-by: Mauro Carvalho Chehab 
> ---
>  drivers/acpi/apei/ghes.c | 17 ++---
>  include/acpi/ghes.h  | 27 +++
>  2 files changed, 41 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 6d0e146..a21d7da 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -409,7 +409,8 @@ static void ghes_clear_estatus(struct ghes *ghes)
>   ghes->flags &= ~GHES_TO_CLEAR;
>  }
>  
> -static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
> +static void ghes_do_proc(struct ghes *ghes,
> +  const struct acpi_hest_generic_status *estatus)
>  {
>   int sev, sec_sev;
>   struct acpi_hest_generic_data *gdata;
> @@ -421,6 +422,8 @@ static void ghes_do_proc(const struct 
> acpi_hest_generic_status *estatus)
>CPER_SEC_PLATFORM_MEM)) {
>   struct cper_sec_mem_err *mem_err;
>   mem_err = (struct cper_sec_mem_err *)(gdata+1);
> + ghes_edac_report_mem_error(ghes, sev, mem_err);
> +
>  #ifdef CONFIG_X86_MCE
>   apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
> mem_err);
> @@ -639,7 +642,7 @@ static int ghes_proc(struct ghes *ghes)
>   if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
>   ghes_estatus_cache_add(ghes->generic, ghes->estatus);
>   }
> - ghes_do_proc(ghes->estatus);
> + ghes_do_proc(ghes, ghes->estatus);
>  out:
>   ghes_clear_estatus(ghes);
>   return 0;
> @@ -732,7 +735,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
>   estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
>   len = apei_estatus_len(estatus);
>   node_len = GHES_ESTATUS_NODE_LEN(len);
> - ghes_do_proc(estatus);
> + ghes_do_proc(estatus_node->ghes, estatus);
>   if (!ghes_estatus_cached(estatus)) {
>   generic = estatus_node->generic;
>   if (ghes_print_estatus(NULL, generic, estatus))
> @@ -821,6 +824,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct 
> pt_regs *regs)
>   estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
> node_len);
>   if (estatus_node) {
> + estatus_node->ghes = ghes;
>   estatus_node->generic = ghes->generic;
>   estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
>   memcpy(estatus, ghes->estatus, len);
> @@ -942,6 +946,10 @@ static int ghes_probe(struct platform_device *ghes_dev)
>   }
>   platform_set_drvdata(ghes_dev, ghes);
>  
> + rc = ghes_edac_register(ghes, _dev->dev);
> + if (rc < 0)
> + goto err;
> +

If ghes_edac_register() failed, we need to do some cleanup such as
unregister from hed etc.

Or just move ghes_edac_register() before switch?

>   return 0;
>  err:
>   if (ghes) {
> @@ -995,6 +1003,9 @@ static int ghes_remove(struct platform_device *ghes_dev)
>   }
>  
>   ghes_fini(ghes);
> +
> + ghes_edac_unregister(ghes);
> +
>   kfree(ghes);
>  
>   platform_set_drvdata(ghes_dev, NULL);
> diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
> index 3eb8dc4..c6fef72 100644
> --- a/include/acpi/ghes.h
> +++ b/include/acpi/ghes.h
> @@ -22,11 +22,14 @@ struct ghes {
>   struct timer_list timer;
>   unsigned int irq;
>   };
> +
> + struct mem_ctl_info *mci;

Why we need this?  This is not used by ghes.[hc].

>  };
>  
>  struct ghes_estatus_node {
>   struct llist_node llnode;
>   struct acpi_hest_generic *generic;
> + struct ghes *ghes;
>  };
>  
>  struct ghes_estatus_cache {
> @@ -43,3 +46,27 @@ enum {
>   GHES_SEV_RECOVERABLE = 0x2,
>   GHES_SEV_PANIC = 0x3,
>  };
> +
> +#ifdef CONFIG_EDAC_GHES
> +void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
> + struct cper_sec_mem_err *mem_err);
> +
> +int ghes_edac_register(struct ghes *ghes, struct device *dev);
> +
> +void ghes_edac_unregister(struct ghes *ghes);
> +
> +#else
> +static inline void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
> +struct cper_sec_mem_err *mem_err)
> +{
> +}
> +
> +static inline int ghes_edac_register(struct ghes *ghes, struct device *dev)
> +{
> + return 0;
> +}
> +
> +static inline void ghes_edac_unregister(struct ghes *ghes)

Re: [PATCH EDAC 03/13] ghes: add the needed hooks for EDAC error report

2013-02-20 Thread Huang Ying
Sorry for late!

On Fri, 2013-02-15 at 10:44 -0200, Mauro Carvalho Chehab wrote:
 In order to allow reporting errors via EDAC, add hooks for:
 
 1) register an EDAC driver;
 2) unregister an EDAC driver;
 3) report errors via EDAC.
 
 As the EDAC driver will need to access the ghes structure, adds it
 as one of the parameters for ghes_do_proc.
 
 Signed-off-by: Mauro Carvalho Chehab mche...@redhat.com
 ---
  drivers/acpi/apei/ghes.c | 17 ++---
  include/acpi/ghes.h  | 27 +++
  2 files changed, 41 insertions(+), 3 deletions(-)
 
 diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
 index 6d0e146..a21d7da 100644
 --- a/drivers/acpi/apei/ghes.c
 +++ b/drivers/acpi/apei/ghes.c
 @@ -409,7 +409,8 @@ static void ghes_clear_estatus(struct ghes *ghes)
   ghes-flags = ~GHES_TO_CLEAR;
  }
  
 -static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
 +static void ghes_do_proc(struct ghes *ghes,
 +  const struct acpi_hest_generic_status *estatus)
  {
   int sev, sec_sev;
   struct acpi_hest_generic_data *gdata;
 @@ -421,6 +422,8 @@ static void ghes_do_proc(const struct 
 acpi_hest_generic_status *estatus)
CPER_SEC_PLATFORM_MEM)) {
   struct cper_sec_mem_err *mem_err;
   mem_err = (struct cper_sec_mem_err *)(gdata+1);
 + ghes_edac_report_mem_error(ghes, sev, mem_err);
 +
  #ifdef CONFIG_X86_MCE
   apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
 mem_err);
 @@ -639,7 +642,7 @@ static int ghes_proc(struct ghes *ghes)
   if (ghes_print_estatus(NULL, ghes-generic, ghes-estatus))
   ghes_estatus_cache_add(ghes-generic, ghes-estatus);
   }
 - ghes_do_proc(ghes-estatus);
 + ghes_do_proc(ghes, ghes-estatus);
  out:
   ghes_clear_estatus(ghes);
   return 0;
 @@ -732,7 +735,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
   estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
   len = apei_estatus_len(estatus);
   node_len = GHES_ESTATUS_NODE_LEN(len);
 - ghes_do_proc(estatus);
 + ghes_do_proc(estatus_node-ghes, estatus);
   if (!ghes_estatus_cached(estatus)) {
   generic = estatus_node-generic;
   if (ghes_print_estatus(NULL, generic, estatus))
 @@ -821,6 +824,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct 
 pt_regs *regs)
   estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
 node_len);
   if (estatus_node) {
 + estatus_node-ghes = ghes;
   estatus_node-generic = ghes-generic;
   estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
   memcpy(estatus, ghes-estatus, len);
 @@ -942,6 +946,10 @@ static int ghes_probe(struct platform_device *ghes_dev)
   }
   platform_set_drvdata(ghes_dev, ghes);
  
 + rc = ghes_edac_register(ghes, ghes_dev-dev);
 + if (rc  0)
 + goto err;
 +

If ghes_edac_register() failed, we need to do some cleanup such as
unregister from hed etc.

Or just move ghes_edac_register() before switch?

   return 0;
  err:
   if (ghes) {
 @@ -995,6 +1003,9 @@ static int ghes_remove(struct platform_device *ghes_dev)
   }
  
   ghes_fini(ghes);
 +
 + ghes_edac_unregister(ghes);
 +
   kfree(ghes);
  
   platform_set_drvdata(ghes_dev, NULL);
 diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
 index 3eb8dc4..c6fef72 100644
 --- a/include/acpi/ghes.h
 +++ b/include/acpi/ghes.h
 @@ -22,11 +22,14 @@ struct ghes {
   struct timer_list timer;
   unsigned int irq;
   };
 +
 + struct mem_ctl_info *mci;

Why we need this?  This is not used by ghes.[hc].

  };
  
  struct ghes_estatus_node {
   struct llist_node llnode;
   struct acpi_hest_generic *generic;
 + struct ghes *ghes;
  };
  
  struct ghes_estatus_cache {
 @@ -43,3 +46,27 @@ enum {
   GHES_SEV_RECOVERABLE = 0x2,
   GHES_SEV_PANIC = 0x3,
  };
 +
 +#ifdef CONFIG_EDAC_GHES
 +void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
 + struct cper_sec_mem_err *mem_err);
 +
 +int ghes_edac_register(struct ghes *ghes, struct device *dev);
 +
 +void ghes_edac_unregister(struct ghes *ghes);
 +
 +#else
 +static inline void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
 +struct cper_sec_mem_err *mem_err)
 +{
 +}
 +
 +static inline int ghes_edac_register(struct ghes *ghes, struct device *dev)
 +{
 + return 0;
 +}
 +
 +static inline void ghes_edac_unregister(struct ghes *ghes)
 +{
 +}
 +#endif

I think it is better to put the above declaration into module which
implement these functions instead of