Re: [PATCH v5 2/2] ThunderX2: Add Cavium ThunderX2 SoC UNCORE PMU driver

2018-05-18 Thread Ganapatrao Kulkarni
On Thu, May 17, 2018 at 4:42 PM, John Garry  wrote:
> On 16/05/2018 05:55, Ganapatrao Kulkarni wrote:
>>
>> This patch adds a perf driver for the PMU UNCORE devices DDR4 Memory
>> Controller(DMC) and Level 3 Cache(L3C).
>>
>
> Hi,
>
> Just some coding comments below:
>
>> ThunderX2 has 8 independent DMC PMUs to capture performance events
>> corresponding to 8 channels of DDR4 Memory Controller and 16 independent
>> L3C PMUs to capture events corresponding to 16 tiles of L3 cache.
>> Each PMU supports up to 4 counters. All counters lack overflow interrupt
>> and are sampled periodically.
>>
>> Signed-off-by: Ganapatrao Kulkarni 
>> ---
>>  drivers/perf/Kconfig |   8 +
>>  drivers/perf/Makefile|   1 +
>>  drivers/perf/thunderx2_pmu.c | 965
>> +++
>>  include/linux/cpuhotplug.h   |   1 +
>>  4 files changed, 975 insertions(+)
>>  create mode 100644 drivers/perf/thunderx2_pmu.c
>>
>> diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
>> index 28bb5a0..eafd0fc 100644
>> --- a/drivers/perf/Kconfig
>> +++ b/drivers/perf/Kconfig
>> @@ -85,6 +85,14 @@ config QCOM_L3_PMU
>>Adds the L3 cache PMU into the perf events subsystem for
>>monitoring L3 cache events.
>>
>> +config THUNDERX2_PMU
>> +bool "Cavium ThunderX2 SoC PMU UNCORE"
>> +depends on ARCH_THUNDER2 && PERF_EVENTS && ACPI
>
>
> Is the explicit dependency for PERF_EVENTS required, since we're under the
> PERF_EVENTS menu?

not really, i can drop this.
>
> And IIRC for other perf drivers we required a dependency on ARM64 - is that
> required here also? I see arm_smccc_smc() calls in the code...

ok.
>
>
>> +   help
>> + Provides support for ThunderX2 UNCORE events.
>> + The SoC has PMU support in its L3 cache controller (L3C) and
>> + in the DDR4 Memory Controller (DMC).
>> +
>>  config XGENE_PMU
>>  depends on ARCH_XGENE
>>  bool "APM X-Gene SoC PMU"
>> diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
>> index b3902bd..909f27f 100644
>> --- a/drivers/perf/Makefile
>> +++ b/drivers/perf/Makefile
>> @@ -7,5 +7,6 @@ obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
>>  obj-$(CONFIG_HISI_PMU) += hisilicon/
>>  obj-$(CONFIG_QCOM_L2_PMU)  += qcom_l2_pmu.o
>>  obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
>> +obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
>>  obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
>>  obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
>> diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c
>> new file mode 100644
>> index 000..0401443
>> --- /dev/null
>> +++ b/drivers/perf/thunderx2_pmu.c
>> @@ -0,0 +1,965 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * CAVIUM THUNDERX2 SoC PMU UNCORE
>> + *
>> + * Copyright (C) 2018 Cavium Inc.
>> + * Author: Ganapatrao Kulkarni 
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program.  If not, see .
>> + */
>
>
> Isn't this the same as the SPDX?

ok, i will remove it.
>
>> +
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +/* L3c and DMC has 16 and 8 channels per socket respectively.
>
>
> L3C, right?

ok
>
>> + * Each Channel supports UNCORE PMU device and consists of
>> + * 4 independent programmable counters. Counters are 32 bit
>> + * and does not support overflow interrupt, they needs to be
>
>
> /s/needs/need/, /s/does/do/

ok
>
>> + * sampled before overflow(i.e, at every 2 seconds).
>
>
> how can you ensure that this value is low enough?
>
> "I saw this comment in previous patch:
>> Given that all channels compete for access to the muxed register
>> interface, I suspect we need to try more often than once every 2
>> seconds...
>
> 2 seconds seems to be sufficient. So far testing looks good."
>
> Can you provide any more analytical reasoning than this?
>
>> + */
>> +
>> +#define UNCORE_MAX_COUNTERS4
>> +#define UNCORE_L3_MAX_TILES16
>> +#define UNCORE_DMC_MAX_CHANNELS8
>> +
>> +#define UNCORE_HRTIMER_INTERVAL(2 * NSEC_PER_SEC)
>> +#define GET_EVENTID(ev)((ev->hw.config) & 0x1ff)
>> +#define GET_COUNTERID(ev)  ((ev->hw.idx) & 0xf)
>> +#define GET_CHANNELID(pmu_uncore)  (pmu_uncore->channel)
>> +#define DMC_EVENT_CFG(idx, val)((val) << (((idx) * 8) +
>> 1))
>> +
>> 

Re: [PATCH v5 2/2] ThunderX2: Add Cavium ThunderX2 SoC UNCORE PMU driver

2018-05-18 Thread Ganapatrao Kulkarni
On Thu, May 17, 2018 at 4:42 PM, John Garry  wrote:
> On 16/05/2018 05:55, Ganapatrao Kulkarni wrote:
>>
>> This patch adds a perf driver for the PMU UNCORE devices DDR4 Memory
>> Controller(DMC) and Level 3 Cache(L3C).
>>
>
> Hi,
>
> Just some coding comments below:
>
>> ThunderX2 has 8 independent DMC PMUs to capture performance events
>> corresponding to 8 channels of DDR4 Memory Controller and 16 independent
>> L3C PMUs to capture events corresponding to 16 tiles of L3 cache.
>> Each PMU supports up to 4 counters. All counters lack overflow interrupt
>> and are sampled periodically.
>>
>> Signed-off-by: Ganapatrao Kulkarni 
>> ---
>>  drivers/perf/Kconfig |   8 +
>>  drivers/perf/Makefile|   1 +
>>  drivers/perf/thunderx2_pmu.c | 965
>> +++
>>  include/linux/cpuhotplug.h   |   1 +
>>  4 files changed, 975 insertions(+)
>>  create mode 100644 drivers/perf/thunderx2_pmu.c
>>
>> diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
>> index 28bb5a0..eafd0fc 100644
>> --- a/drivers/perf/Kconfig
>> +++ b/drivers/perf/Kconfig
>> @@ -85,6 +85,14 @@ config QCOM_L3_PMU
>>Adds the L3 cache PMU into the perf events subsystem for
>>monitoring L3 cache events.
>>
>> +config THUNDERX2_PMU
>> +bool "Cavium ThunderX2 SoC PMU UNCORE"
>> +depends on ARCH_THUNDER2 && PERF_EVENTS && ACPI
>
>
> Is the explicit dependency for PERF_EVENTS required, since we're under the
> PERF_EVENTS menu?

not really, i can drop this.
>
> And IIRC for other perf drivers we required a dependency on ARM64 - is that
> required here also? I see arm_smccc_smc() calls in the code...

ok.
>
>
>> +   help
>> + Provides support for ThunderX2 UNCORE events.
>> + The SoC has PMU support in its L3 cache controller (L3C) and
>> + in the DDR4 Memory Controller (DMC).
>> +
>>  config XGENE_PMU
>>  depends on ARCH_XGENE
>>  bool "APM X-Gene SoC PMU"
>> diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
>> index b3902bd..909f27f 100644
>> --- a/drivers/perf/Makefile
>> +++ b/drivers/perf/Makefile
>> @@ -7,5 +7,6 @@ obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
>>  obj-$(CONFIG_HISI_PMU) += hisilicon/
>>  obj-$(CONFIG_QCOM_L2_PMU)  += qcom_l2_pmu.o
>>  obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
>> +obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
>>  obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
>>  obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
>> diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c
>> new file mode 100644
>> index 000..0401443
>> --- /dev/null
>> +++ b/drivers/perf/thunderx2_pmu.c
>> @@ -0,0 +1,965 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * CAVIUM THUNDERX2 SoC PMU UNCORE
>> + *
>> + * Copyright (C) 2018 Cavium Inc.
>> + * Author: Ganapatrao Kulkarni 
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program.  If not, see .
>> + */
>
>
> Isn't this the same as the SPDX?

ok, i will remove it.
>
>> +
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +/* L3c and DMC has 16 and 8 channels per socket respectively.
>
>
> L3C, right?

ok
>
>> + * Each Channel supports UNCORE PMU device and consists of
>> + * 4 independent programmable counters. Counters are 32 bit
>> + * and does not support overflow interrupt, they needs to be
>
>
> /s/needs/need/, /s/does/do/

ok
>
>> + * sampled before overflow(i.e, at every 2 seconds).
>
>
> how can you ensure that this value is low enough?
>
> "I saw this comment in previous patch:
>> Given that all channels compete for access to the muxed register
>> interface, I suspect we need to try more often than once every 2
>> seconds...
>
> 2 seconds seems to be sufficient. So far testing looks good."
>
> Can you provide any more analytical reasoning than this?
>
>> + */
>> +
>> +#define UNCORE_MAX_COUNTERS4
>> +#define UNCORE_L3_MAX_TILES16
>> +#define UNCORE_DMC_MAX_CHANNELS8
>> +
>> +#define UNCORE_HRTIMER_INTERVAL(2 * NSEC_PER_SEC)
>> +#define GET_EVENTID(ev)((ev->hw.config) & 0x1ff)
>> +#define GET_COUNTERID(ev)  ((ev->hw.idx) & 0xf)
>> +#define GET_CHANNELID(pmu_uncore)  (pmu_uncore->channel)
>> +#define DMC_EVENT_CFG(idx, val)((val) << (((idx) * 8) +
>> 1))
>> +
>> +#define DMC_COUNTER_CTL0x234
>> +#define DMC_COUNTER_DATA  

Re: [PATCH v5 2/2] ThunderX2: Add Cavium ThunderX2 SoC UNCORE PMU driver

2018-05-17 Thread John Garry

On 16/05/2018 05:55, Ganapatrao Kulkarni wrote:

This patch adds a perf driver for the PMU UNCORE devices DDR4 Memory
Controller(DMC) and Level 3 Cache(L3C).



Hi,

Just some coding comments below:


ThunderX2 has 8 independent DMC PMUs to capture performance events
corresponding to 8 channels of DDR4 Memory Controller and 16 independent
L3C PMUs to capture events corresponding to 16 tiles of L3 cache.
Each PMU supports up to 4 counters. All counters lack overflow interrupt
and are sampled periodically.

Signed-off-by: Ganapatrao Kulkarni 
---
 drivers/perf/Kconfig |   8 +
 drivers/perf/Makefile|   1 +
 drivers/perf/thunderx2_pmu.c | 965 +++
 include/linux/cpuhotplug.h   |   1 +
 4 files changed, 975 insertions(+)
 create mode 100644 drivers/perf/thunderx2_pmu.c

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 28bb5a0..eafd0fc 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -85,6 +85,14 @@ config QCOM_L3_PMU
   Adds the L3 cache PMU into the perf events subsystem for
   monitoring L3 cache events.

+config THUNDERX2_PMU
+bool "Cavium ThunderX2 SoC PMU UNCORE"
+depends on ARCH_THUNDER2 && PERF_EVENTS && ACPI


Is the explicit dependency for PERF_EVENTS required, since we're under 
the PERF_EVENTS menu?


And IIRC for other perf drivers we required a dependency on ARM64 - is 
that required here also? I see arm_smccc_smc() calls in the code...



+   help
+ Provides support for ThunderX2 UNCORE events.
+ The SoC has PMU support in its L3 cache controller (L3C) and
+ in the DDR4 Memory Controller (DMC).
+
 config XGENE_PMU
 depends on ARCH_XGENE
 bool "APM X-Gene SoC PMU"
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index b3902bd..909f27f 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -7,5 +7,6 @@ obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
 obj-$(CONFIG_HISI_PMU) += hisilicon/
 obj-$(CONFIG_QCOM_L2_PMU)  += qcom_l2_pmu.o
 obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
+obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
 obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
 obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c
new file mode 100644
index 000..0401443
--- /dev/null
+++ b/drivers/perf/thunderx2_pmu.c
@@ -0,0 +1,965 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CAVIUM THUNDERX2 SoC PMU UNCORE
+ *
+ * Copyright (C) 2018 Cavium Inc.
+ * Author: Ganapatrao Kulkarni 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see .
+ */


Isn't this the same as the SPDX?


+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* L3c and DMC has 16 and 8 channels per socket respectively.


L3C, right?


+ * Each Channel supports UNCORE PMU device and consists of
+ * 4 independent programmable counters. Counters are 32 bit
+ * and does not support overflow interrupt, they needs to be


/s/needs/need/, /s/does/do/


+ * sampled before overflow(i.e, at every 2 seconds).


how can you ensure that this value is low enough?

"I saw this comment in previous patch:
> Given that all channels compete for access to the muxed register
> interface, I suspect we need to try more often than once every 2
> seconds...

2 seconds seems to be sufficient. So far testing looks good."

Can you provide any more analytical reasoning than this?


+ */
+
+#define UNCORE_MAX_COUNTERS4
+#define UNCORE_L3_MAX_TILES16
+#define UNCORE_DMC_MAX_CHANNELS8
+
+#define UNCORE_HRTIMER_INTERVAL(2 * NSEC_PER_SEC)
+#define GET_EVENTID(ev)((ev->hw.config) & 0x1ff)
+#define GET_COUNTERID(ev)  ((ev->hw.idx) & 0xf)
+#define GET_CHANNELID(pmu_uncore)  (pmu_uncore->channel)
+#define DMC_EVENT_CFG(idx, val)((val) << (((idx) * 8) + 1))
+
+#define DMC_COUNTER_CTL0x234
+#define DMC_COUNTER_DATA   0x240
+#define L3C_COUNTER_CTL0xA8
+#define L3C_COUNTER_DATA   0xAC


I feel it's generally better to keep register offsets in numeric order 
(if indeed, that is what they are)



+
+#define THUNDERX2_SMC_CALL_ID  0xC200FF00
+#define THUNDERX2_SMC_SET_CHANNEL  0xB010
+
+enum thunderx2_uncore_l3_events {
+   L3_EVENT_NONE,
+   

Re: [PATCH v5 2/2] ThunderX2: Add Cavium ThunderX2 SoC UNCORE PMU driver

2018-05-17 Thread John Garry

On 16/05/2018 05:55, Ganapatrao Kulkarni wrote:

This patch adds a perf driver for the PMU UNCORE devices DDR4 Memory
Controller(DMC) and Level 3 Cache(L3C).



Hi,

Just some coding comments below:


ThunderX2 has 8 independent DMC PMUs to capture performance events
corresponding to 8 channels of DDR4 Memory Controller and 16 independent
L3C PMUs to capture events corresponding to 16 tiles of L3 cache.
Each PMU supports up to 4 counters. All counters lack overflow interrupt
and are sampled periodically.

Signed-off-by: Ganapatrao Kulkarni 
---
 drivers/perf/Kconfig |   8 +
 drivers/perf/Makefile|   1 +
 drivers/perf/thunderx2_pmu.c | 965 +++
 include/linux/cpuhotplug.h   |   1 +
 4 files changed, 975 insertions(+)
 create mode 100644 drivers/perf/thunderx2_pmu.c

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 28bb5a0..eafd0fc 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -85,6 +85,14 @@ config QCOM_L3_PMU
   Adds the L3 cache PMU into the perf events subsystem for
   monitoring L3 cache events.

+config THUNDERX2_PMU
+bool "Cavium ThunderX2 SoC PMU UNCORE"
+depends on ARCH_THUNDER2 && PERF_EVENTS && ACPI


Is the explicit dependency for PERF_EVENTS required, since we're under 
the PERF_EVENTS menu?


And IIRC for other perf drivers we required a dependency on ARM64 - is 
that required here also? I see arm_smccc_smc() calls in the code...



+   help
+ Provides support for ThunderX2 UNCORE events.
+ The SoC has PMU support in its L3 cache controller (L3C) and
+ in the DDR4 Memory Controller (DMC).
+
 config XGENE_PMU
 depends on ARCH_XGENE
 bool "APM X-Gene SoC PMU"
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index b3902bd..909f27f 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -7,5 +7,6 @@ obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
 obj-$(CONFIG_HISI_PMU) += hisilicon/
 obj-$(CONFIG_QCOM_L2_PMU)  += qcom_l2_pmu.o
 obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
+obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
 obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
 obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c
new file mode 100644
index 000..0401443
--- /dev/null
+++ b/drivers/perf/thunderx2_pmu.c
@@ -0,0 +1,965 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CAVIUM THUNDERX2 SoC PMU UNCORE
+ *
+ * Copyright (C) 2018 Cavium Inc.
+ * Author: Ganapatrao Kulkarni 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see .
+ */


Isn't this the same as the SPDX?


+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* L3c and DMC has 16 and 8 channels per socket respectively.


L3C, right?


+ * Each Channel supports UNCORE PMU device and consists of
+ * 4 independent programmable counters. Counters are 32 bit
+ * and does not support overflow interrupt, they needs to be


/s/needs/need/, /s/does/do/


+ * sampled before overflow(i.e, at every 2 seconds).


how can you ensure that this value is low enough?

"I saw this comment in previous patch:
> Given that all channels compete for access to the muxed register
> interface, I suspect we need to try more often than once every 2
> seconds...

2 seconds seems to be sufficient. So far testing looks good."

Can you provide any more analytical reasoning than this?


+ */
+
+#define UNCORE_MAX_COUNTERS4
+#define UNCORE_L3_MAX_TILES16
+#define UNCORE_DMC_MAX_CHANNELS8
+
+#define UNCORE_HRTIMER_INTERVAL(2 * NSEC_PER_SEC)
+#define GET_EVENTID(ev)((ev->hw.config) & 0x1ff)
+#define GET_COUNTERID(ev)  ((ev->hw.idx) & 0xf)
+#define GET_CHANNELID(pmu_uncore)  (pmu_uncore->channel)
+#define DMC_EVENT_CFG(idx, val)((val) << (((idx) * 8) + 1))
+
+#define DMC_COUNTER_CTL0x234
+#define DMC_COUNTER_DATA   0x240
+#define L3C_COUNTER_CTL0xA8
+#define L3C_COUNTER_DATA   0xAC


I feel it's generally better to keep register offsets in numeric order 
(if indeed, that is what they are)



+
+#define THUNDERX2_SMC_CALL_ID  0xC200FF00
+#define THUNDERX2_SMC_SET_CHANNEL  0xB010
+
+enum thunderx2_uncore_l3_events {
+   L3_EVENT_NONE,
+   L3_EVENT_NBU_CANCEL,
+   L3_EVENT_DIB_RETRY,
+   

[PATCH v5 2/2] ThunderX2: Add Cavium ThunderX2 SoC UNCORE PMU driver

2018-05-15 Thread Ganapatrao Kulkarni
This patch adds a perf driver for the PMU UNCORE devices DDR4 Memory
Controller(DMC) and Level 3 Cache(L3C).

ThunderX2 has 8 independent DMC PMUs to capture performance events
corresponding to 8 channels of DDR4 Memory Controller and 16 independent
L3C PMUs to capture events corresponding to 16 tiles of L3 cache.
Each PMU supports up to 4 counters. All counters lack overflow interrupt
and are sampled periodically.

Signed-off-by: Ganapatrao Kulkarni 
---
 drivers/perf/Kconfig |   8 +
 drivers/perf/Makefile|   1 +
 drivers/perf/thunderx2_pmu.c | 965 +++
 include/linux/cpuhotplug.h   |   1 +
 4 files changed, 975 insertions(+)
 create mode 100644 drivers/perf/thunderx2_pmu.c

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 28bb5a0..eafd0fc 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -85,6 +85,14 @@ config QCOM_L3_PMU
   Adds the L3 cache PMU into the perf events subsystem for
   monitoring L3 cache events.
 
+config THUNDERX2_PMU
+bool "Cavium ThunderX2 SoC PMU UNCORE"
+depends on ARCH_THUNDER2 && PERF_EVENTS && ACPI
+   help
+ Provides support for ThunderX2 UNCORE events.
+ The SoC has PMU support in its L3 cache controller (L3C) and
+ in the DDR4 Memory Controller (DMC).
+
 config XGENE_PMU
 depends on ARCH_XGENE
 bool "APM X-Gene SoC PMU"
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index b3902bd..909f27f 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -7,5 +7,6 @@ obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
 obj-$(CONFIG_HISI_PMU) += hisilicon/
 obj-$(CONFIG_QCOM_L2_PMU)  += qcom_l2_pmu.o
 obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
+obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
 obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
 obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c
new file mode 100644
index 000..0401443
--- /dev/null
+++ b/drivers/perf/thunderx2_pmu.c
@@ -0,0 +1,965 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CAVIUM THUNDERX2 SoC PMU UNCORE
+ *
+ * Copyright (C) 2018 Cavium Inc.
+ * Author: Ganapatrao Kulkarni 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see .
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* L3c and DMC has 16 and 8 channels per socket respectively.
+ * Each Channel supports UNCORE PMU device and consists of
+ * 4 independent programmable counters. Counters are 32 bit
+ * and does not support overflow interrupt, they needs to be
+ * sampled before overflow(i.e, at every 2 seconds).
+ */
+
+#define UNCORE_MAX_COUNTERS4
+#define UNCORE_L3_MAX_TILES16
+#define UNCORE_DMC_MAX_CHANNELS8
+
+#define UNCORE_HRTIMER_INTERVAL(2 * NSEC_PER_SEC)
+#define GET_EVENTID(ev)((ev->hw.config) & 0x1ff)
+#define GET_COUNTERID(ev)  ((ev->hw.idx) & 0xf)
+#define GET_CHANNELID(pmu_uncore)  (pmu_uncore->channel)
+#define DMC_EVENT_CFG(idx, val)((val) << (((idx) * 8) + 1))
+
+#define DMC_COUNTER_CTL0x234
+#define DMC_COUNTER_DATA   0x240
+#define L3C_COUNTER_CTL0xA8
+#define L3C_COUNTER_DATA   0xAC
+
+#define THUNDERX2_SMC_CALL_ID  0xC200FF00
+#define THUNDERX2_SMC_SET_CHANNEL  0xB010
+
+enum thunderx2_uncore_l3_events {
+   L3_EVENT_NONE,
+   L3_EVENT_NBU_CANCEL,
+   L3_EVENT_DIB_RETRY,
+   L3_EVENT_DOB_RETRY,
+   L3_EVENT_DIB_CREDIT_RETRY,
+   L3_EVENT_DOB_CREDIT_RETRY,
+   L3_EVENT_FORCE_RETRY,
+   L3_EVENT_IDX_CONFLICT_RETRY,
+   L3_EVENT_EVICT_CONFLICT_RETRY,
+   L3_EVENT_BANK_CONFLICT_RETRY,
+   L3_EVENT_FILL_ENTRY_RETRY,
+   L3_EVENT_EVICT_NOT_READY_RETRY,
+   L3_EVENT_L3_RETRY,
+   L3_EVENT_READ_REQ,
+   L3_EVENT_WRITE_BACK_REQ,
+   L3_EVENT_INVALIDATE_NWRITE_REQ,
+   L3_EVENT_INV_REQ,
+   L3_EVENT_SELF_REQ,
+   L3_EVENT_REQ,
+   L3_EVENT_EVICT_REQ,
+   L3_EVENT_INVALIDATE_NWRITE_HIT,
+   L3_EVENT_INVALIDATE_HIT,
+   L3_EVENT_SELF_HIT,
+   L3_EVENT_READ_HIT,
+   L3_EVENT_MAX,
+};
+
+enum thunderx2_uncore_dmc_events {
+   DMC_EVENT_NONE,
+   DMC_EVENT_COUNT_CYCLES,
+   DMC_EVENT_RES2,
+   DMC_EVENT_RES3,
+   

[PATCH v5 2/2] ThunderX2: Add Cavium ThunderX2 SoC UNCORE PMU driver

2018-05-15 Thread Ganapatrao Kulkarni
This patch adds a perf driver for the PMU UNCORE devices DDR4 Memory
Controller(DMC) and Level 3 Cache(L3C).

ThunderX2 has 8 independent DMC PMUs to capture performance events
corresponding to 8 channels of DDR4 Memory Controller and 16 independent
L3C PMUs to capture events corresponding to 16 tiles of L3 cache.
Each PMU supports up to 4 counters. All counters lack overflow interrupt
and are sampled periodically.

Signed-off-by: Ganapatrao Kulkarni 
---
 drivers/perf/Kconfig |   8 +
 drivers/perf/Makefile|   1 +
 drivers/perf/thunderx2_pmu.c | 965 +++
 include/linux/cpuhotplug.h   |   1 +
 4 files changed, 975 insertions(+)
 create mode 100644 drivers/perf/thunderx2_pmu.c

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 28bb5a0..eafd0fc 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -85,6 +85,14 @@ config QCOM_L3_PMU
   Adds the L3 cache PMU into the perf events subsystem for
   monitoring L3 cache events.
 
+config THUNDERX2_PMU
+bool "Cavium ThunderX2 SoC PMU UNCORE"
+depends on ARCH_THUNDER2 && PERF_EVENTS && ACPI
+   help
+ Provides support for ThunderX2 UNCORE events.
+ The SoC has PMU support in its L3 cache controller (L3C) and
+ in the DDR4 Memory Controller (DMC).
+
 config XGENE_PMU
 depends on ARCH_XGENE
 bool "APM X-Gene SoC PMU"
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index b3902bd..909f27f 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -7,5 +7,6 @@ obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
 obj-$(CONFIG_HISI_PMU) += hisilicon/
 obj-$(CONFIG_QCOM_L2_PMU)  += qcom_l2_pmu.o
 obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
+obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
 obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
 obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c
new file mode 100644
index 000..0401443
--- /dev/null
+++ b/drivers/perf/thunderx2_pmu.c
@@ -0,0 +1,965 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CAVIUM THUNDERX2 SoC PMU UNCORE
+ *
+ * Copyright (C) 2018 Cavium Inc.
+ * Author: Ganapatrao Kulkarni 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see .
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* L3c and DMC has 16 and 8 channels per socket respectively.
+ * Each Channel supports UNCORE PMU device and consists of
+ * 4 independent programmable counters. Counters are 32 bit
+ * and does not support overflow interrupt, they needs to be
+ * sampled before overflow(i.e, at every 2 seconds).
+ */
+
+#define UNCORE_MAX_COUNTERS4
+#define UNCORE_L3_MAX_TILES16
+#define UNCORE_DMC_MAX_CHANNELS8
+
+#define UNCORE_HRTIMER_INTERVAL(2 * NSEC_PER_SEC)
+#define GET_EVENTID(ev)((ev->hw.config) & 0x1ff)
+#define GET_COUNTERID(ev)  ((ev->hw.idx) & 0xf)
+#define GET_CHANNELID(pmu_uncore)  (pmu_uncore->channel)
+#define DMC_EVENT_CFG(idx, val)((val) << (((idx) * 8) + 1))
+
+#define DMC_COUNTER_CTL0x234
+#define DMC_COUNTER_DATA   0x240
+#define L3C_COUNTER_CTL0xA8
+#define L3C_COUNTER_DATA   0xAC
+
+#define THUNDERX2_SMC_CALL_ID  0xC200FF00
+#define THUNDERX2_SMC_SET_CHANNEL  0xB010
+
+enum thunderx2_uncore_l3_events {
+   L3_EVENT_NONE,
+   L3_EVENT_NBU_CANCEL,
+   L3_EVENT_DIB_RETRY,
+   L3_EVENT_DOB_RETRY,
+   L3_EVENT_DIB_CREDIT_RETRY,
+   L3_EVENT_DOB_CREDIT_RETRY,
+   L3_EVENT_FORCE_RETRY,
+   L3_EVENT_IDX_CONFLICT_RETRY,
+   L3_EVENT_EVICT_CONFLICT_RETRY,
+   L3_EVENT_BANK_CONFLICT_RETRY,
+   L3_EVENT_FILL_ENTRY_RETRY,
+   L3_EVENT_EVICT_NOT_READY_RETRY,
+   L3_EVENT_L3_RETRY,
+   L3_EVENT_READ_REQ,
+   L3_EVENT_WRITE_BACK_REQ,
+   L3_EVENT_INVALIDATE_NWRITE_REQ,
+   L3_EVENT_INV_REQ,
+   L3_EVENT_SELF_REQ,
+   L3_EVENT_REQ,
+   L3_EVENT_EVICT_REQ,
+   L3_EVENT_INVALIDATE_NWRITE_HIT,
+   L3_EVENT_INVALIDATE_HIT,
+   L3_EVENT_SELF_HIT,
+   L3_EVENT_READ_HIT,
+   L3_EVENT_MAX,
+};
+
+enum thunderx2_uncore_dmc_events {
+   DMC_EVENT_NONE,
+   DMC_EVENT_COUNT_CYCLES,
+   DMC_EVENT_RES2,
+   DMC_EVENT_RES3,
+   DMC_EVENT_RES4,
+   DMC_EVENT_RES5,
+