Re: [PATCH 2/2] platform/x86: intel_pmc_core: Allow to dump debug registers on S0ix failure

2019-03-20 Thread Rajat Jain
Hi Rafael,
On Wed, Mar 20, 2019 at 3:37 AM Rafael J. Wysocki  wrote:
>
> On Monday, March 18, 2019 5:01:06 PM CET Rajneesh Bhardwaj wrote:
> > On Mon, Mar 18, 2019 at 08:18:56AM -0700, Rajat Jain wrote:
> > > On Mon, Mar 18, 2019 at 2:31 AM Somayaji, Vishwanath
> > >  wrote:
> > > >
> > > >
> > > >
> > > > >-Original Message-
> > > > >From: Rajat Jain 
> > > > >Sent: Thursday, March 14, 2019 3:51 AM
> > > > >To: Bhardwaj, Rajneesh ; Somayaji, 
> > > > >Vishwanath
> > > > >; Darren Hart ; 
> > > > >Andy
> > > > >Shevchenko ; platform-driver-...@vger.kernel.org; 
> > > > >linux-
> > > > >ker...@vger.kernel.org
> > > > >Cc: Rajat Jain ; furq...@google.com;
> > > > >evgr...@google.com; rajatxj...@gmail.com
> > > > >Subject: [PATCH 2/2] platform/x86: intel_pmc_core: Allow to dump debug
> > > > >registers on S0ix failure
> > > > >
> > > > >Add a module parameter which when enabled, will check on resume, if the
> > > > >last S0ix attempt was successful. If not, the driver would provide
> > > > >helpful debug information (which gets latched during the failed suspend
> > > > >attempt) to debug the S0ix failure.
> > > > >
> > > > >This information is very useful to debug S0ix failures. Specially since
> > > > >the latched debug information will be lost (over-written) if the system
> > > > >attempts to go into runtime (or imminent) S0ix again after that failed
> > > > >suspend attempt.
> > > > >
> > > > >Signed-off-by: Rajat Jain 
> > > > >---
> > > > > drivers/platform/x86/intel_pmc_core.c | 86 +++
> > > > > drivers/platform/x86/intel_pmc_core.h |  7 +++
> > > > > 2 files changed, 93 insertions(+)
> > > > >
> > > > >diff --git a/drivers/platform/x86/intel_pmc_core.c
> > > > >b/drivers/platform/x86/intel_pmc_core.c
> > > > >index 55578d07610c..b1f4405a27ce 100644
> > > > >--- a/drivers/platform/x86/intel_pmc_core.c
> > > > >+++ b/drivers/platform/x86/intel_pmc_core.c
> > > > >@@ -20,6 +20,7 @@
> > > > > #include 
> > > > > #include 
> > > > > #include 
> > > > >+#include 
> > > > > #include 
> > > > >
> > > > > #include 
> > > > >@@ -890,9 +891,94 @@ static int pmc_core_remove(struct platform_device
> > > > >*pdev)
> > > > >   return 0;
> > > > > }
> > > > >
> > > > >+#ifdef CONFIG_PM_SLEEP
> > > > >+
> > > > >+static bool warn_on_s0ix_failures;
> > > > >+module_param(warn_on_s0ix_failures, bool, 0644);
> > > > >+MODULE_PARM_DESC(warn_on_s0ix_failures, "Check and warn for S0ix
> > > > >failures");
> > > > >+
> > > > >+static int pmc_core_suspend(struct device *dev)
> > > > >+{
> > > > >+  struct pmc_dev *pmcdev = dev_get_drvdata(dev);
> > > > >+
> > > > >+  /* Save PC10 and S0ix residency for checking later */
> > > > >+  if (warn_on_s0ix_failures &&
> > > > >+  !rdmsrl_safe(MSR_PKG_C10_RESIDENCY, &pmcdev->pc10_counter)
> > > > >&&
> > > > >+  !pmc_core_dev_state_get(pmcdev, &pmcdev->s0ix_counter))
> > > > >+  pmcdev->check_counters = true;
> > > > >+  else
> > > > >+  pmcdev->check_counters = false;
> > > > >+
> > > > >+  return 0;
> > > > >+}
> > > > >+
> > > > >+static inline bool pc10_failed(struct pmc_dev *pmcdev)
> > > > >+{
> > > > >+  u64 pc10_counter;
> > > > >+
> > > > >+  if (!rdmsrl_safe(MSR_PKG_C10_RESIDENCY, &pc10_counter) &&
> > > > >+  pc10_counter == pmcdev->pc10_counter)
> > > > >+  return true;
> > > > >+  else
> > > > >+  return false;
> > > > >+}
> > > > >+
> > > > >+static inline bool s0ix_failed(struct pmc_dev *pmcdev)
> > > > >+{
> > > > >+  u64 s0ix_counter;
> > > > >+
> > > > >+  if (!pmc_core_dev_state_get(pmcdev, &s0ix_counter) &&
> > > > >+  s0ix_counter == pmcdev->s0ix_counter)
> > > > >+  return true;
> > > > >+  else
> > > > >+  return false;
> > > > >+}
> > > > >+
> > > > >+static int pmc_core_resume(struct device *dev)
> > > > >+{
> > > > >+  struct pmc_dev *pmcdev = dev_get_drvdata(dev);
> > > > >+
> > > > >+  if (!pmcdev->check_counters)
> > > > >+  return 0;
> > > > >+
> > > > >+  if (pc10_failed(pmcdev)) {
> > > > >+  dev_info(dev, "PC10 entry had failed (PC10 
> > > > >cnt=0x%llx)\n",
> > > > >+   pmcdev->pc10_counter);
> > > > >+  } else if (s0ix_failed(pmcdev)) {
> > > > >+
> > > > >+  const struct pmc_bit_map **maps = pmcdev->map-
> > > > >>slps0_dbg_maps;
> > > > >+  const struct pmc_bit_map *map;
> > > > >+  int offset = pmcdev->map->slps0_dbg_offset;
> > > > >+  u32 data;
> > > > >+
> > > > >+  dev_warn(dev, "S0ix entry had failed (S0ix cnt=%llu)\n",
> > > > >+   pmcdev->s0ix_counter);
> > > > >+  while (*maps) {
> > > > >+  map = *maps;
> > > > >+  data = pmc_core_reg_read(pmcdev, offset);
> > > > >+  offset += 4;
> > > > >+  while (map->name) {
> > > > >+

Re: [PATCH 2/2] platform/x86: intel_pmc_core: Allow to dump debug registers on S0ix failure

2019-03-20 Thread Rafael J. Wysocki
On Monday, March 18, 2019 5:01:06 PM CET Rajneesh Bhardwaj wrote:
> On Mon, Mar 18, 2019 at 08:18:56AM -0700, Rajat Jain wrote:
> > On Mon, Mar 18, 2019 at 2:31 AM Somayaji, Vishwanath
> >  wrote:
> > >
> > >
> > >
> > > >-Original Message-
> > > >From: Rajat Jain 
> > > >Sent: Thursday, March 14, 2019 3:51 AM
> > > >To: Bhardwaj, Rajneesh ; Somayaji, 
> > > >Vishwanath
> > > >; Darren Hart ; Andy
> > > >Shevchenko ; platform-driver-...@vger.kernel.org; 
> > > >linux-
> > > >ker...@vger.kernel.org
> > > >Cc: Rajat Jain ; furq...@google.com;
> > > >evgr...@google.com; rajatxj...@gmail.com
> > > >Subject: [PATCH 2/2] platform/x86: intel_pmc_core: Allow to dump debug
> > > >registers on S0ix failure
> > > >
> > > >Add a module parameter which when enabled, will check on resume, if the
> > > >last S0ix attempt was successful. If not, the driver would provide
> > > >helpful debug information (which gets latched during the failed suspend
> > > >attempt) to debug the S0ix failure.
> > > >
> > > >This information is very useful to debug S0ix failures. Specially since
> > > >the latched debug information will be lost (over-written) if the system
> > > >attempts to go into runtime (or imminent) S0ix again after that failed
> > > >suspend attempt.
> > > >
> > > >Signed-off-by: Rajat Jain 
> > > >---
> > > > drivers/platform/x86/intel_pmc_core.c | 86 +++
> > > > drivers/platform/x86/intel_pmc_core.h |  7 +++
> > > > 2 files changed, 93 insertions(+)
> > > >
> > > >diff --git a/drivers/platform/x86/intel_pmc_core.c
> > > >b/drivers/platform/x86/intel_pmc_core.c
> > > >index 55578d07610c..b1f4405a27ce 100644
> > > >--- a/drivers/platform/x86/intel_pmc_core.c
> > > >+++ b/drivers/platform/x86/intel_pmc_core.c
> > > >@@ -20,6 +20,7 @@
> > > > #include 
> > > > #include 
> > > > #include 
> > > >+#include 
> > > > #include 
> > > >
> > > > #include 
> > > >@@ -890,9 +891,94 @@ static int pmc_core_remove(struct platform_device
> > > >*pdev)
> > > >   return 0;
> > > > }
> > > >
> > > >+#ifdef CONFIG_PM_SLEEP
> > > >+
> > > >+static bool warn_on_s0ix_failures;
> > > >+module_param(warn_on_s0ix_failures, bool, 0644);
> > > >+MODULE_PARM_DESC(warn_on_s0ix_failures, "Check and warn for S0ix
> > > >failures");
> > > >+
> > > >+static int pmc_core_suspend(struct device *dev)
> > > >+{
> > > >+  struct pmc_dev *pmcdev = dev_get_drvdata(dev);
> > > >+
> > > >+  /* Save PC10 and S0ix residency for checking later */
> > > >+  if (warn_on_s0ix_failures &&
> > > >+  !rdmsrl_safe(MSR_PKG_C10_RESIDENCY, &pmcdev->pc10_counter)
> > > >&&
> > > >+  !pmc_core_dev_state_get(pmcdev, &pmcdev->s0ix_counter))
> > > >+  pmcdev->check_counters = true;
> > > >+  else
> > > >+  pmcdev->check_counters = false;
> > > >+
> > > >+  return 0;
> > > >+}
> > > >+
> > > >+static inline bool pc10_failed(struct pmc_dev *pmcdev)
> > > >+{
> > > >+  u64 pc10_counter;
> > > >+
> > > >+  if (!rdmsrl_safe(MSR_PKG_C10_RESIDENCY, &pc10_counter) &&
> > > >+  pc10_counter == pmcdev->pc10_counter)
> > > >+  return true;
> > > >+  else
> > > >+  return false;
> > > >+}
> > > >+
> > > >+static inline bool s0ix_failed(struct pmc_dev *pmcdev)
> > > >+{
> > > >+  u64 s0ix_counter;
> > > >+
> > > >+  if (!pmc_core_dev_state_get(pmcdev, &s0ix_counter) &&
> > > >+  s0ix_counter == pmcdev->s0ix_counter)
> > > >+  return true;
> > > >+  else
> > > >+  return false;
> > > >+}
> > > >+
> > > >+static int pmc_core_resume(struct device *dev)
> > > >+{
> > > >+  struct pmc_dev *pmcdev = dev_get_drvdata(dev);
> > > >+
> > > >+  if (!pmcdev->check_counters)
> > > >+  return 0;
> > > >+
> > > >+  if (pc10_failed(pmcdev)) {
> > > >+  dev_info(dev, "PC10 entry had failed (PC10 cnt=0x%llx)\n",
> > > >+   pmcdev->pc10_counter);
> > > >+  } else if (s0ix_failed(pmcdev)) {
> > > >+
> > > >+  const struct pmc_bit_map **maps = pmcdev->map-
> > > >>slps0_dbg_maps;
> > > >+  const struct pmc_bit_map *map;
> > > >+  int offset = pmcdev->map->slps0_dbg_offset;
> > > >+  u32 data;
> > > >+
> > > >+  dev_warn(dev, "S0ix entry had failed (S0ix cnt=%llu)\n",
> > > >+   pmcdev->s0ix_counter);
> > > >+  while (*maps) {
> > > >+  map = *maps;
> > > >+  data = pmc_core_reg_read(pmcdev, offset);
> > > >+  offset += 4;
> > > >+  while (map->name) {
> > > >+  dev_warn(dev, "SLP_S0_DBG: %-32s\tState:
> > > >%s\n",
> > > >+   map->name,
> > > >+   data & map->bit_mask ? "Yes" : 
> > > >"No");
> > > >+  ++map;
> > > >+  }
> > > >+  ++maps;

Re: [PATCH 2/2] platform/x86: intel_pmc_core: Allow to dump debug registers on S0ix failure

2019-03-18 Thread Rajneesh Bhardwaj
On Mon, Mar 18, 2019 at 08:18:56AM -0700, Rajat Jain wrote:
> On Mon, Mar 18, 2019 at 2:31 AM Somayaji, Vishwanath
>  wrote:
> >
> >
> >
> > >-Original Message-
> > >From: Rajat Jain 
> > >Sent: Thursday, March 14, 2019 3:51 AM
> > >To: Bhardwaj, Rajneesh ; Somayaji, Vishwanath
> > >; Darren Hart ; Andy
> > >Shevchenko ; platform-driver-...@vger.kernel.org; 
> > >linux-
> > >ker...@vger.kernel.org
> > >Cc: Rajat Jain ; furq...@google.com;
> > >evgr...@google.com; rajatxj...@gmail.com
> > >Subject: [PATCH 2/2] platform/x86: intel_pmc_core: Allow to dump debug
> > >registers on S0ix failure
> > >
> > >Add a module parameter which when enabled, will check on resume, if the
> > >last S0ix attempt was successful. If not, the driver would provide
> > >helpful debug information (which gets latched during the failed suspend
> > >attempt) to debug the S0ix failure.
> > >
> > >This information is very useful to debug S0ix failures. Specially since
> > >the latched debug information will be lost (over-written) if the system
> > >attempts to go into runtime (or imminent) S0ix again after that failed
> > >suspend attempt.
> > >
> > >Signed-off-by: Rajat Jain 
> > >---
> > > drivers/platform/x86/intel_pmc_core.c | 86 +++
> > > drivers/platform/x86/intel_pmc_core.h |  7 +++
> > > 2 files changed, 93 insertions(+)
> > >
> > >diff --git a/drivers/platform/x86/intel_pmc_core.c
> > >b/drivers/platform/x86/intel_pmc_core.c
> > >index 55578d07610c..b1f4405a27ce 100644
> > >--- a/drivers/platform/x86/intel_pmc_core.c
> > >+++ b/drivers/platform/x86/intel_pmc_core.c
> > >@@ -20,6 +20,7 @@
> > > #include 
> > > #include 
> > > #include 
> > >+#include 
> > > #include 
> > >
> > > #include 
> > >@@ -890,9 +891,94 @@ static int pmc_core_remove(struct platform_device
> > >*pdev)
> > >   return 0;
> > > }
> > >
> > >+#ifdef CONFIG_PM_SLEEP
> > >+
> > >+static bool warn_on_s0ix_failures;
> > >+module_param(warn_on_s0ix_failures, bool, 0644);
> > >+MODULE_PARM_DESC(warn_on_s0ix_failures, "Check and warn for S0ix
> > >failures");
> > >+
> > >+static int pmc_core_suspend(struct device *dev)
> > >+{
> > >+  struct pmc_dev *pmcdev = dev_get_drvdata(dev);
> > >+
> > >+  /* Save PC10 and S0ix residency for checking later */
> > >+  if (warn_on_s0ix_failures &&
> > >+  !rdmsrl_safe(MSR_PKG_C10_RESIDENCY, &pmcdev->pc10_counter)
> > >&&
> > >+  !pmc_core_dev_state_get(pmcdev, &pmcdev->s0ix_counter))
> > >+  pmcdev->check_counters = true;
> > >+  else
> > >+  pmcdev->check_counters = false;
> > >+
> > >+  return 0;
> > >+}
> > >+
> > >+static inline bool pc10_failed(struct pmc_dev *pmcdev)
> > >+{
> > >+  u64 pc10_counter;
> > >+
> > >+  if (!rdmsrl_safe(MSR_PKG_C10_RESIDENCY, &pc10_counter) &&
> > >+  pc10_counter == pmcdev->pc10_counter)
> > >+  return true;
> > >+  else
> > >+  return false;
> > >+}
> > >+
> > >+static inline bool s0ix_failed(struct pmc_dev *pmcdev)
> > >+{
> > >+  u64 s0ix_counter;
> > >+
> > >+  if (!pmc_core_dev_state_get(pmcdev, &s0ix_counter) &&
> > >+  s0ix_counter == pmcdev->s0ix_counter)
> > >+  return true;
> > >+  else
> > >+  return false;
> > >+}
> > >+
> > >+static int pmc_core_resume(struct device *dev)
> > >+{
> > >+  struct pmc_dev *pmcdev = dev_get_drvdata(dev);
> > >+
> > >+  if (!pmcdev->check_counters)
> > >+  return 0;
> > >+
> > >+  if (pc10_failed(pmcdev)) {
> > >+  dev_info(dev, "PC10 entry had failed (PC10 cnt=0x%llx)\n",
> > >+   pmcdev->pc10_counter);
> > >+  } else if (s0ix_failed(pmcdev)) {
> > >+
> > >+  const struct pmc_bit_map **maps = pmcdev->map-
> > >>slps0_dbg_maps;
> > >+  const struct pmc_bit_map *map;
> > >+  int offset = pmcdev->map->slps0_dbg_offset;
> > >+  u32 data;
> > >+
> > >+  dev_warn(dev, "S0ix entry had failed (S0ix cnt=%llu)\n",
> > >+   pmcdev->s0ix_counter);
> > >+  while (*maps) {
> > >+  map = *maps;
> > >+  data = pmc_core_reg_read(pmcdev, offset);
> > >+  offset += 4;
> > >+  while (map->name) {
> > >+  dev_warn(dev, "SLP_S0_DBG: %-32s\tState:
> > >%s\n",
> > >+   map->name,
> > >+   data & map->bit_mask ? "Yes" : 
> > >"No");
> > >+  ++map;
> > >+  }
> > >+  ++maps;
> > >+  }
> > >+  }
> > >+  return 0;
> > >+}
> > >+
> > >+#endif
> > >+
> > >+const struct dev_pm_ops pmc_core_pm_ops = {
> > >+  SET_LATE_SYSTEM_SLEEP_PM_OPS(pmc_core_suspend,
> > >pmc_core_resume)
> > These PM Ops routines will be called not just in s2idle scenario, but also 
> > in other suspend scenarios l

Re: [PATCH 2/2] platform/x86: intel_pmc_core: Allow to dump debug registers on S0ix failure

2019-03-18 Thread Rajat Jain
On Mon, Mar 18, 2019 at 2:31 AM Somayaji, Vishwanath
 wrote:
>
>
>
> >-Original Message-
> >From: Rajat Jain 
> >Sent: Thursday, March 14, 2019 3:51 AM
> >To: Bhardwaj, Rajneesh ; Somayaji, Vishwanath
> >; Darren Hart ; Andy
> >Shevchenko ; platform-driver-...@vger.kernel.org; linux-
> >ker...@vger.kernel.org
> >Cc: Rajat Jain ; furq...@google.com;
> >evgr...@google.com; rajatxj...@gmail.com
> >Subject: [PATCH 2/2] platform/x86: intel_pmc_core: Allow to dump debug
> >registers on S0ix failure
> >
> >Add a module parameter which when enabled, will check on resume, if the
> >last S0ix attempt was successful. If not, the driver would provide
> >helpful debug information (which gets latched during the failed suspend
> >attempt) to debug the S0ix failure.
> >
> >This information is very useful to debug S0ix failures. Specially since
> >the latched debug information will be lost (over-written) if the system
> >attempts to go into runtime (or imminent) S0ix again after that failed
> >suspend attempt.
> >
> >Signed-off-by: Rajat Jain 
> >---
> > drivers/platform/x86/intel_pmc_core.c | 86 +++
> > drivers/platform/x86/intel_pmc_core.h |  7 +++
> > 2 files changed, 93 insertions(+)
> >
> >diff --git a/drivers/platform/x86/intel_pmc_core.c
> >b/drivers/platform/x86/intel_pmc_core.c
> >index 55578d07610c..b1f4405a27ce 100644
> >--- a/drivers/platform/x86/intel_pmc_core.c
> >+++ b/drivers/platform/x86/intel_pmc_core.c
> >@@ -20,6 +20,7 @@
> > #include 
> > #include 
> > #include 
> >+#include 
> > #include 
> >
> > #include 
> >@@ -890,9 +891,94 @@ static int pmc_core_remove(struct platform_device
> >*pdev)
> >   return 0;
> > }
> >
> >+#ifdef CONFIG_PM_SLEEP
> >+
> >+static bool warn_on_s0ix_failures;
> >+module_param(warn_on_s0ix_failures, bool, 0644);
> >+MODULE_PARM_DESC(warn_on_s0ix_failures, "Check and warn for S0ix
> >failures");
> >+
> >+static int pmc_core_suspend(struct device *dev)
> >+{
> >+  struct pmc_dev *pmcdev = dev_get_drvdata(dev);
> >+
> >+  /* Save PC10 and S0ix residency for checking later */
> >+  if (warn_on_s0ix_failures &&
> >+  !rdmsrl_safe(MSR_PKG_C10_RESIDENCY, &pmcdev->pc10_counter)
> >&&
> >+  !pmc_core_dev_state_get(pmcdev, &pmcdev->s0ix_counter))
> >+  pmcdev->check_counters = true;
> >+  else
> >+  pmcdev->check_counters = false;
> >+
> >+  return 0;
> >+}
> >+
> >+static inline bool pc10_failed(struct pmc_dev *pmcdev)
> >+{
> >+  u64 pc10_counter;
> >+
> >+  if (!rdmsrl_safe(MSR_PKG_C10_RESIDENCY, &pc10_counter) &&
> >+  pc10_counter == pmcdev->pc10_counter)
> >+  return true;
> >+  else
> >+  return false;
> >+}
> >+
> >+static inline bool s0ix_failed(struct pmc_dev *pmcdev)
> >+{
> >+  u64 s0ix_counter;
> >+
> >+  if (!pmc_core_dev_state_get(pmcdev, &s0ix_counter) &&
> >+  s0ix_counter == pmcdev->s0ix_counter)
> >+  return true;
> >+  else
> >+  return false;
> >+}
> >+
> >+static int pmc_core_resume(struct device *dev)
> >+{
> >+  struct pmc_dev *pmcdev = dev_get_drvdata(dev);
> >+
> >+  if (!pmcdev->check_counters)
> >+  return 0;
> >+
> >+  if (pc10_failed(pmcdev)) {
> >+  dev_info(dev, "PC10 entry had failed (PC10 cnt=0x%llx)\n",
> >+   pmcdev->pc10_counter);
> >+  } else if (s0ix_failed(pmcdev)) {
> >+
> >+  const struct pmc_bit_map **maps = pmcdev->map-
> >>slps0_dbg_maps;
> >+  const struct pmc_bit_map *map;
> >+  int offset = pmcdev->map->slps0_dbg_offset;
> >+  u32 data;
> >+
> >+  dev_warn(dev, "S0ix entry had failed (S0ix cnt=%llu)\n",
> >+   pmcdev->s0ix_counter);
> >+  while (*maps) {
> >+  map = *maps;
> >+  data = pmc_core_reg_read(pmcdev, offset);
> >+  offset += 4;
> >+  while (map->name) {
> >+  dev_warn(dev, "SLP_S0_DBG: %-32s\tState:
> >%s\n",
> >+   map->name,
> >+   data & map->bit_mask ? "Yes" : "No");
> >+  ++map;
> >+  }
> >+  ++maps;
> >+  }
> >+  }
> >+  return 0;
> >+}
> >+
> >+#endif
> >+
> >+const struct dev_pm_ops pmc_core_pm_ops = {
> >+  SET_LATE_SYSTEM_SLEEP_PM_OPS(pmc_core_suspend,
> >pmc_core_resume)
> These PM Ops routines will be called not just in s2idle scenario, but also in 
> other suspend scenarios like s2ram, s2disk. However actual functionalities 
> served by these routines are relevant only for s2idle.
> That means we will end up having false errors in s2ram/s2disk scenarios as 
> PC10/s0ix counters wont increment in those scenarios.

Yes, you are right. Currently there is no API for a driver to know
whether the *current suspend* attempt is t

Re: [PATCH 2/2] platform/x86: intel_pmc_core: Allow to dump debug registers on S0ix failure

2019-03-18 Thread Rajat Jain
On Sat, Mar 16, 2019 at 1:43 AM Rajneesh Bhardwaj
 wrote:
>
> On Wed, Mar 13, 2019 at 03:21:24PM -0700, Rajat Jain wrote:
> > Add a module parameter which when enabled, will check on resume, if the
> > last S0ix attempt was successful. If not, the driver would provide
> > helpful debug information (which gets latched during the failed suspend
> > attempt) to debug the S0ix failure.
>
> + Srinivas and David.
>
> >
> > This information is very useful to debug S0ix failures. Specially since
> > the latched debug information will be lost (over-written) if the system
> > attempts to go into runtime (or imminent) S0ix again after that failed
> > suspend attempt.
> >
> > Signed-off-by: Rajat Jain 
> > ---
> >  drivers/platform/x86/intel_pmc_core.c | 86 +++
> >  drivers/platform/x86/intel_pmc_core.h |  7 +++
> >  2 files changed, 93 insertions(+)
> >
> > diff --git a/drivers/platform/x86/intel_pmc_core.c 
> > b/drivers/platform/x86/intel_pmc_core.c
> > index 55578d07610c..b1f4405a27ce 100644
> > --- a/drivers/platform/x86/intel_pmc_core.c
> > +++ b/drivers/platform/x86/intel_pmc_core.c
> > @@ -20,6 +20,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >
> >  #include 
> > @@ -890,9 +891,94 @@ static int pmc_core_remove(struct platform_device 
> > *pdev)
> >   return 0;
> >  }
> >
> > +#ifdef CONFIG_PM_SLEEP
> > +
> > +static bool warn_on_s0ix_failures;
> > +module_param(warn_on_s0ix_failures, bool, 0644);
> > +MODULE_PARM_DESC(warn_on_s0ix_failures, "Check and warn for S0ix 
> > failures");
> > +
> > +static int pmc_core_suspend(struct device *dev)
> > +{
> > + struct pmc_dev *pmcdev = dev_get_drvdata(dev);
> > +
> > + /* Save PC10 and S0ix residency for checking later */
> > + if (warn_on_s0ix_failures &&
> > + !rdmsrl_safe(MSR_PKG_C10_RESIDENCY, &pmcdev->pc10_counter) &&
> > + !pmc_core_dev_state_get(pmcdev, &pmcdev->s0ix_counter))
> > + pmcdev->check_counters = true;
> > + else
> > + pmcdev->check_counters = false;
> > +
> > + return 0;
> > +}
> > +
> > +static inline bool pc10_failed(struct pmc_dev *pmcdev)
> > +{
> > + u64 pc10_counter;
> > +
> > + if (!rdmsrl_safe(MSR_PKG_C10_RESIDENCY, &pc10_counter) &&
> > + pc10_counter == pmcdev->pc10_counter)
> > + return true;
> > + else
> > + return false;
> > +}
> > +
> > +static inline bool s0ix_failed(struct pmc_dev *pmcdev)
> > +{
> > + u64 s0ix_counter;
> > +
> > + if (!pmc_core_dev_state_get(pmcdev, &s0ix_counter) &&
> > + s0ix_counter == pmcdev->s0ix_counter)
> > + return true;
> > + else
> > + return false;
> > +}
> > +
> > +static int pmc_core_resume(struct device *dev)
> > +{
> > + struct pmc_dev *pmcdev = dev_get_drvdata(dev);
> > +
> > + if (!pmcdev->check_counters)
> > + return 0;
> > +
> > + if (pc10_failed(pmcdev)) {
> > + dev_info(dev, "PC10 entry had failed (PC10 cnt=0x%llx)\n",
> > +  pmcdev->pc10_counter);
> > + } else if (s0ix_failed(pmcdev)) {
> > +
> > + const struct pmc_bit_map **maps = pmcdev->map->slps0_dbg_maps;
>
> slps0_dbg feature is not available on Skylake and Kabylake. PCH IP power
> gating status is available on all.

Right, so the slps0_dbg_maps field is only populated for the platforms
that have the slps0_dbg feature. So it seems like the below code would
work well for both kind of platforms.

>
> > + const struct pmc_bit_map *map;
> > + int offset = pmcdev->map->slps0_dbg_offset;
> > + u32 data;
> > +
> > + dev_warn(dev, "S0ix entry had failed (S0ix cnt=%llu)\n",
> > +  pmcdev->s0ix_counter);
>
> I feel this dev_warn may be frowned upon and should be possible kept behind
> something like pm_debug_messages_on though module param helps mitigate it.

Right, in my mind, as the parameter name suggests, the whole intent of
the "warn_on_s0ix_failures" is to generate noticable warnings. Since
these warnings are off by default, and thus will only be generated
some one turns on the parameter, thus indicating he is interested in
being warned for failures.

Thanks,

Rajat

>
> > + while (*maps) {
> > + map = *maps;
> > + data = pmc_core_reg_read(pmcdev, offset);
> > + offset += 4;
> > + while (map->name) {
> > + dev_warn(dev, "SLP_S0_DBG: %-32s\tState: 
> > %s\n",
> > +  map->name,
> > +  data & map->bit_mask ? "Yes" : "No");
> > + ++map;
> > + }
> > + ++maps;
> > + }
> > + }
> > + return 0;
> > +}
> > +
> > +#endif
> > +
> > +const struct dev_pm_ops pmc_core_pm_ops = {
> > + SET_LATE_SYSTEM_SLEEP_PM_OPS(pmc_core_suspend, pm

RE: [PATCH 2/2] platform/x86: intel_pmc_core: Allow to dump debug registers on S0ix failure

2019-03-18 Thread Somayaji, Vishwanath


>-Original Message-
>From: Rajat Jain 
>Sent: Thursday, March 14, 2019 3:51 AM
>To: Bhardwaj, Rajneesh ; Somayaji, Vishwanath
>; Darren Hart ; Andy
>Shevchenko ; platform-driver-...@vger.kernel.org; linux-
>ker...@vger.kernel.org
>Cc: Rajat Jain ; furq...@google.com;
>evgr...@google.com; rajatxj...@gmail.com
>Subject: [PATCH 2/2] platform/x86: intel_pmc_core: Allow to dump debug
>registers on S0ix failure
>
>Add a module parameter which when enabled, will check on resume, if the
>last S0ix attempt was successful. If not, the driver would provide
>helpful debug information (which gets latched during the failed suspend
>attempt) to debug the S0ix failure.
>
>This information is very useful to debug S0ix failures. Specially since
>the latched debug information will be lost (over-written) if the system
>attempts to go into runtime (or imminent) S0ix again after that failed
>suspend attempt.
>
>Signed-off-by: Rajat Jain 
>---
> drivers/platform/x86/intel_pmc_core.c | 86 +++
> drivers/platform/x86/intel_pmc_core.h |  7 +++
> 2 files changed, 93 insertions(+)
>
>diff --git a/drivers/platform/x86/intel_pmc_core.c
>b/drivers/platform/x86/intel_pmc_core.c
>index 55578d07610c..b1f4405a27ce 100644
>--- a/drivers/platform/x86/intel_pmc_core.c
>+++ b/drivers/platform/x86/intel_pmc_core.c
>@@ -20,6 +20,7 @@
> #include 
> #include 
> #include 
>+#include 
> #include 
>
> #include 
>@@ -890,9 +891,94 @@ static int pmc_core_remove(struct platform_device
>*pdev)
>   return 0;
> }
>
>+#ifdef CONFIG_PM_SLEEP
>+
>+static bool warn_on_s0ix_failures;
>+module_param(warn_on_s0ix_failures, bool, 0644);
>+MODULE_PARM_DESC(warn_on_s0ix_failures, "Check and warn for S0ix
>failures");
>+
>+static int pmc_core_suspend(struct device *dev)
>+{
>+  struct pmc_dev *pmcdev = dev_get_drvdata(dev);
>+
>+  /* Save PC10 and S0ix residency for checking later */
>+  if (warn_on_s0ix_failures &&
>+  !rdmsrl_safe(MSR_PKG_C10_RESIDENCY, &pmcdev->pc10_counter)
>&&
>+  !pmc_core_dev_state_get(pmcdev, &pmcdev->s0ix_counter))
>+  pmcdev->check_counters = true;
>+  else
>+  pmcdev->check_counters = false;
>+
>+  return 0;
>+}
>+
>+static inline bool pc10_failed(struct pmc_dev *pmcdev)
>+{
>+  u64 pc10_counter;
>+
>+  if (!rdmsrl_safe(MSR_PKG_C10_RESIDENCY, &pc10_counter) &&
>+  pc10_counter == pmcdev->pc10_counter)
>+  return true;
>+  else
>+  return false;
>+}
>+
>+static inline bool s0ix_failed(struct pmc_dev *pmcdev)
>+{
>+  u64 s0ix_counter;
>+
>+  if (!pmc_core_dev_state_get(pmcdev, &s0ix_counter) &&
>+  s0ix_counter == pmcdev->s0ix_counter)
>+  return true;
>+  else
>+  return false;
>+}
>+
>+static int pmc_core_resume(struct device *dev)
>+{
>+  struct pmc_dev *pmcdev = dev_get_drvdata(dev);
>+
>+  if (!pmcdev->check_counters)
>+  return 0;
>+
>+  if (pc10_failed(pmcdev)) {
>+  dev_info(dev, "PC10 entry had failed (PC10 cnt=0x%llx)\n",
>+   pmcdev->pc10_counter);
>+  } else if (s0ix_failed(pmcdev)) {
>+
>+  const struct pmc_bit_map **maps = pmcdev->map-
>>slps0_dbg_maps;
>+  const struct pmc_bit_map *map;
>+  int offset = pmcdev->map->slps0_dbg_offset;
>+  u32 data;
>+
>+  dev_warn(dev, "S0ix entry had failed (S0ix cnt=%llu)\n",
>+   pmcdev->s0ix_counter);
>+  while (*maps) {
>+  map = *maps;
>+  data = pmc_core_reg_read(pmcdev, offset);
>+  offset += 4;
>+  while (map->name) {
>+  dev_warn(dev, "SLP_S0_DBG: %-32s\tState:
>%s\n",
>+   map->name,
>+   data & map->bit_mask ? "Yes" : "No");
>+  ++map;
>+  }
>+  ++maps;
>+  }
>+  }
>+  return 0;
>+}
>+
>+#endif
>+
>+const struct dev_pm_ops pmc_core_pm_ops = {
>+  SET_LATE_SYSTEM_SLEEP_PM_OPS(pmc_core_suspend,
>pmc_core_resume)
These PM Ops routines will be called not just in s2idle scenario, but also in 
other suspend scenarios like s2ram, s2disk. However actual functionalities 
served by these routines are relevant only for s2idle.
That means we will end up having false errors in s2ram/s2disk scenarios as 
PC10/s0ix counters wont increment in those scenarios. 

Vishwa
>+};
>+
> static struct platform_driver pmc_core_driver = {
>   .driver = {
>   .name = "pmc_core",
>+  .pm = &pmc_core_pm_ops
>   },
>   .probe = pmc_core_probe,
>   .remove = pmc_core_remove,
>diff --git a/drivers/platform/x86/intel_pmc_core.h
>b/drivers/platform/x86/intel_pmc_core.h
>index 88d9c0653a5f..fdee5772e532 100644
>--- a/drivers/platform/x86/intel_pmc_core.h
>+++ b/drivers/platform/x86/i

Re: [PATCH 2/2] platform/x86: intel_pmc_core: Allow to dump debug registers on S0ix failure

2019-03-16 Thread Rajneesh Bhardwaj
On Wed, Mar 13, 2019 at 03:21:24PM -0700, Rajat Jain wrote:
> Add a module parameter which when enabled, will check on resume, if the
> last S0ix attempt was successful. If not, the driver would provide
> helpful debug information (which gets latched during the failed suspend
> attempt) to debug the S0ix failure.

+ Srinivas and David.

> 
> This information is very useful to debug S0ix failures. Specially since
> the latched debug information will be lost (over-written) if the system
> attempts to go into runtime (or imminent) S0ix again after that failed
> suspend attempt.
> 
> Signed-off-by: Rajat Jain 
> ---
>  drivers/platform/x86/intel_pmc_core.c | 86 +++
>  drivers/platform/x86/intel_pmc_core.h |  7 +++
>  2 files changed, 93 insertions(+)
> 
> diff --git a/drivers/platform/x86/intel_pmc_core.c 
> b/drivers/platform/x86/intel_pmc_core.c
> index 55578d07610c..b1f4405a27ce 100644
> --- a/drivers/platform/x86/intel_pmc_core.c
> +++ b/drivers/platform/x86/intel_pmc_core.c
> @@ -20,6 +20,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #include 
> @@ -890,9 +891,94 @@ static int pmc_core_remove(struct platform_device *pdev)
>   return 0;
>  }
>  
> +#ifdef CONFIG_PM_SLEEP
> +
> +static bool warn_on_s0ix_failures;
> +module_param(warn_on_s0ix_failures, bool, 0644);
> +MODULE_PARM_DESC(warn_on_s0ix_failures, "Check and warn for S0ix failures");
> +
> +static int pmc_core_suspend(struct device *dev)
> +{
> + struct pmc_dev *pmcdev = dev_get_drvdata(dev);
> +
> + /* Save PC10 and S0ix residency for checking later */
> + if (warn_on_s0ix_failures &&
> + !rdmsrl_safe(MSR_PKG_C10_RESIDENCY, &pmcdev->pc10_counter) &&
> + !pmc_core_dev_state_get(pmcdev, &pmcdev->s0ix_counter))
> + pmcdev->check_counters = true;
> + else
> + pmcdev->check_counters = false;
> +
> + return 0;
> +}
> +
> +static inline bool pc10_failed(struct pmc_dev *pmcdev)
> +{
> + u64 pc10_counter;
> +
> + if (!rdmsrl_safe(MSR_PKG_C10_RESIDENCY, &pc10_counter) &&
> + pc10_counter == pmcdev->pc10_counter)
> + return true;
> + else
> + return false;
> +}
> +
> +static inline bool s0ix_failed(struct pmc_dev *pmcdev)
> +{
> + u64 s0ix_counter;
> +
> + if (!pmc_core_dev_state_get(pmcdev, &s0ix_counter) &&
> + s0ix_counter == pmcdev->s0ix_counter)
> + return true;
> + else
> + return false;
> +}
> +
> +static int pmc_core_resume(struct device *dev)
> +{
> + struct pmc_dev *pmcdev = dev_get_drvdata(dev);
> +
> + if (!pmcdev->check_counters)
> + return 0;
> +
> + if (pc10_failed(pmcdev)) {
> + dev_info(dev, "PC10 entry had failed (PC10 cnt=0x%llx)\n",
> +  pmcdev->pc10_counter);
> + } else if (s0ix_failed(pmcdev)) {
> +
> + const struct pmc_bit_map **maps = pmcdev->map->slps0_dbg_maps;

slps0_dbg feature is not available on Skylake and Kabylake. PCH IP power
gating status is available on all.

> + const struct pmc_bit_map *map;
> + int offset = pmcdev->map->slps0_dbg_offset;
> + u32 data;
> +
> + dev_warn(dev, "S0ix entry had failed (S0ix cnt=%llu)\n",
> +  pmcdev->s0ix_counter);

I feel this dev_warn may be frowned upon and should be possible kept behind
something like pm_debug_messages_on though module param helps mitigate it.

> + while (*maps) {
> + map = *maps;
> + data = pmc_core_reg_read(pmcdev, offset);
> + offset += 4;
> + while (map->name) {
> + dev_warn(dev, "SLP_S0_DBG: %-32s\tState: %s\n",
> +  map->name,
> +  data & map->bit_mask ? "Yes" : "No");
> + ++map;
> + }
> + ++maps;
> + }
> + }
> + return 0;
> +}
> +
> +#endif
> +
> +const struct dev_pm_ops pmc_core_pm_ops = {
> + SET_LATE_SYSTEM_SLEEP_PM_OPS(pmc_core_suspend, pmc_core_resume)
> +};
> +
>  static struct platform_driver pmc_core_driver = {
>   .driver = {
>   .name = "pmc_core",
> + .pm = &pmc_core_pm_ops
>   },
>   .probe = pmc_core_probe,
>   .remove = pmc_core_remove,
> diff --git a/drivers/platform/x86/intel_pmc_core.h 
> b/drivers/platform/x86/intel_pmc_core.h
> index 88d9c0653a5f..fdee5772e532 100644
> --- a/drivers/platform/x86/intel_pmc_core.h
> +++ b/drivers/platform/x86/intel_pmc_core.h
> @@ -241,6 +241,9 @@ struct pmc_reg_map {
>   * @pmc_xram_read_bit:   flag to indicate whether PMC XRAM shadow 
> registers
>   *   used to read MPHY PG and PLL status are available
>   * @mutex_lock:  mutex to complete one transcation
> + * @check_counters:  On resume, check if counters are getting inc