Re: [PATCH 2/2] perf: add arm64 smmuv3 pmu driver

2018-04-02 Thread Neil Leeder

Hi Hanjun,

On 4/2/2018 10:24 AM, Hanjun Guo wrote:



I think we need to wait for the new version of IORT spec,
which includes the fix for the two base address for SMMUv3
PMCG (now just represent one).

Thanks
Hanjun


It's in rev D which is available now:
http://infocenter.arm.com/help/topic/com.arm.doc.den0049d/DEN0049D_IO_Remapping_Table.pdf

Neil


Re: [PATCH 2/2] perf: add arm64 smmuv3 pmu driver

2018-03-31 Thread Neil Leeder

Hi Yisheng Xie,

On 3/29/2018 03:03 AM, Yisheng Xie wrote:


Hi Neil,

On 2017/8/5 3:59, Neil Leeder wrote:

+    mem_resource_0 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+    mem_map_0 = devm_ioremap_resource(&pdev->dev, mem_resource_0);
+

Can we use devm_ioremap instead? for the reg_base of smmu_pmu is
IMPLEMENTATION DEFINED. If the reg of smmu_pmu is inside smmu,
devm_ioremap_resource will failed and return -EBUSY, eg.:

  smmu reg ranges:    0x18000 ~ 0x1801f
  its smmu_pmu reg ranges:    0x180001000 ~ 0x180001fff

Just to let you know that I no longer work at Qualcomm and I won't be 
able to provide updates to this patchset. I expect that others from my 
former team at Qualcomm will pick up ownership.


Neil


[PATCH] perf: qcom_l2_pmu: don't allow guest access

2017-12-06 Thread Neil Leeder
Guests cannot access IMPDEF system registers, which are used
by this driver. Disable the driver if it's running in a guest VM.

Signed-off-by: Neil Leeder 
---
 drivers/perf/qcom_l2_pmu.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
index 4fdc848..49dc954 100644
--- a/drivers/perf/qcom_l2_pmu.c
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -1057,6 +1057,10 @@ static int __init register_l2_cache_pmu_driver(void)
 {
int err;
 
+   /* Don't enable driver if running as guest */
+   if (!is_hyp_mode_available())
+   return 0;
+
err = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
  "AP_PERF_ARM_QCOM_L2_ONLINE",
  l2cache_pmu_online_cpu,
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies 
Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.



[PATCH] perf: qcom_l2_pmu: add event names

2017-09-14 Thread Neil Leeder
Add event names so that common events can be
specified symbolically, for example:

l2cache_0/total-reads/,l2cache_0/cycles/

Event names are displayed in 'perf list'.

Signed-off-by: Neil Leeder 
---
 drivers/perf/qcom_l2_pmu.c | 54 ++
 1 file changed, 54 insertions(+)

diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
index b242cce..4fdc848 100644
--- a/drivers/perf/qcom_l2_pmu.c
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -92,6 +92,21 @@
 
 #define reg_idx(reg, i) (((i) * IA_L2_REG_OFFSET) + reg##_BASE)
 
+/*
+ * Events
+ */
+#define L2_EVENT_CYCLES0xfe
+#define L2_EVENT_DCACHE_OPS0x400
+#define L2_EVENT_ICACHE_OPS0x401
+#define L2_EVENT_TLBI  0x402
+#define L2_EVENT_BARRIERS  0x403
+#define L2_EVENT_TOTAL_READS   0x405
+#define L2_EVENT_TOTAL_WRITES  0x406
+#define L2_EVENT_TOTAL_REQUESTS0x407
+#define L2_EVENT_LDREX 0x420
+#define L2_EVENT_STREX 0x421
+#define L2_EVENT_CLREX 0x422
+
 static DEFINE_RAW_SPINLOCK(l2_access_lock);
 
 /**
@@ -700,9 +715,12 @@ static ssize_t l2_cache_pmu_cpumask_show(struct device 
*dev,
 /* CCG format for perf RAW codes. */
 PMU_FORMAT_ATTR(l2_code,   "config:4-11");
 PMU_FORMAT_ATTR(l2_group,  "config:0-3");
+PMU_FORMAT_ATTR(event, "config:0-11");
+
 static struct attribute *l2_cache_pmu_formats[] = {
&format_attr_l2_code.attr,
&format_attr_l2_group.attr,
+   &format_attr_event.attr,
NULL,
 };
 
@@ -711,9 +729,45 @@ static ssize_t l2_cache_pmu_cpumask_show(struct device 
*dev,
.attrs = l2_cache_pmu_formats,
 };
 
+static ssize_t l2cache_pmu_event_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+   struct perf_pmu_events_attr *pmu_attr;
+
+   pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+   return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define L2CACHE_EVENT_ATTR(_name, _id)  \
+   (&((struct perf_pmu_events_attr[]) { \
+   { .attr = __ATTR(_name, 0444, l2cache_pmu_event_show, NULL), \
+ .id = _id, }   \
+   })[0].attr.attr)
+
+static struct attribute *l2_cache_pmu_events[] = {
+   L2CACHE_EVENT_ATTR(cycles, L2_EVENT_CYCLES),
+   L2CACHE_EVENT_ATTR(dcache-ops, L2_EVENT_DCACHE_OPS),
+   L2CACHE_EVENT_ATTR(icache-ops, L2_EVENT_ICACHE_OPS),
+   L2CACHE_EVENT_ATTR(tlbi, L2_EVENT_TLBI),
+   L2CACHE_EVENT_ATTR(barriers, L2_EVENT_BARRIERS),
+   L2CACHE_EVENT_ATTR(total-reads, L2_EVENT_TOTAL_READS),
+   L2CACHE_EVENT_ATTR(total-writes, L2_EVENT_TOTAL_WRITES),
+   L2CACHE_EVENT_ATTR(total-requests, L2_EVENT_TOTAL_REQUESTS),
+   L2CACHE_EVENT_ATTR(ldrex, L2_EVENT_LDREX),
+   L2CACHE_EVENT_ATTR(strex, L2_EVENT_STREX),
+   L2CACHE_EVENT_ATTR(clrex, L2_EVENT_CLREX),
+   NULL
+};
+
+static struct attribute_group l2_cache_pmu_events_group = {
+   .name = "events",
+   .attrs = l2_cache_pmu_events,
+};
+
 static const struct attribute_group *l2_cache_pmu_attr_grps[] = {
&l2_cache_pmu_format_group,
&l2_cache_pmu_cpumask_group,
+   &l2_cache_pmu_events_group,
NULL,
 };
 
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies 
Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.



[PATCH 2/2] perf: add arm64 smmuv3 pmu driver

2017-08-04 Thread Neil Leeder
Adds a new driver to support the SMMU v3 PMU and add it into the
perf events framework.

Each SMMU node may have multiple PMUs associated with it, each of
which may support different events.

PMUs are named smmu_0_ where 
is the physical page address of the SMMU PMCG.
For example, the SMMU PMCG at 0xff8884 is named smmu_0_ff88840

Filtering by stream id is done by specifying filtering parameters
with the event. Options are:
  filter_enable- 0 = no filtering, 1 = filtering enabled
  filter_span  - 0 = exact match, 1 = pattern match
  filter_sec   - applies to non-secure (0) or secure (1) namespace
  filter_stream_id - pattern to filter against
Further filtering information is available in the SMMU documentation.

Example: perf stat -e smmu_0_ff88840/transaction,filter_enable=1,
  filter_span=1,filter_stream_id=0x42/ -a pwd
Applies filter pattern 0x42 to transaction events.

SMMU events are not attributable to a CPU, so task mode and sampling
are not supported.

Signed-off-by: Neil Leeder 
---
 drivers/perf/Kconfig  |   9 +
 drivers/perf/Makefile |   1 +
 drivers/perf/arm_smmuv3_pmu.c | 813 ++
 3 files changed, 823 insertions(+)
 create mode 100644 drivers/perf/arm_smmuv3_pmu.c

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index e5197ff..e7721d1 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -17,6 +17,15 @@ config ARM_PMU_ACPI
depends on ARM_PMU && ACPI
def_bool y
 
+config ARM_SMMUV3_PMU
+bool "ARM SMMUv3 PMU"
+depends on PERF_EVENTS && ARM64 && ACPI
+  help
+  Provides support for the SMMU version 3 performance monitor unit 
(PMU)
+  on ARM-based systems.
+  Adds the SMMU PMU into the perf events subsystem for
+  monitoring SMMU performance events.
+
 config QCOM_L2_PMU
bool "Qualcomm Technologies L2-cache PMU"
depends on ARCH_QCOM && ARM64 && ACPI
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 6420bd4..3012f5e 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -1,5 +1,6 @@
 obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
 obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
+obj-$(CONFIG_ARM_SMMUV3_PMU) += arm_smmuv3_pmu.o
 obj-$(CONFIG_QCOM_L2_PMU)  += qcom_l2_pmu.o
 obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
 obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
new file mode 100644
index 000..1e70791
--- /dev/null
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -0,0 +1,813 @@
+/* Copyright (c) 2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * This driver adds support for perf events to use the Performance
+ * Monitor Counter Groups (PMCG) associated with an SMMUv3 node
+ * to monitor that node.
+ *
+ * Devices are named smmu_0_ where 
+ * is the physical page address of the SMMU PMCG.
+ * For example, the SMMU PMCG at 0xff8884 is named smmu_0_ff88840
+ *
+ * Filtering by stream id is done by specifying filtering parameters
+ * with the event. options are:
+ *   filter_enable- 0 = no filtering, 1 = filtering enabled
+ *   filter_span  - 0 = exact match, 1 = pattern match
+ *   filter_sec   - filter applies to non-secure (0) or secure (1) 
namespace
+ *   filter_stream_id - pattern to filter against
+ * Further filtering information is available in the SMMU documentation.
+ *
+ * Example: perf stat -e smmu_0_ff88840/transaction,filter_enable=1,
+ *   filter_span=1,filter_stream_id=0x42/ -a pwd
+ * Applies filter pattern 0x42 to transaction events.
+ *
+ * SMMU events are not attributable to a CPU, so task mode and sampling
+ * are not supported.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#define SMMU_PMCG_EVCNTR0   0x0
+#define SMMU_PMCG_EVCNTR(n, stride) (SMMU_PMCG_EVCNTR0 + (n) * (stride))
+#define SMMU_PMCG_EVTYPER0  0x400
+#define SMMU_PMCG_EVTYPER(n)(SMMU_PMCG_EVTYPER0 + (n) * 4)
+#define SMMU_PMCG_EVTYPER_SEC_SID_SHIFT   30
+#define SMMU_PMCG_EVTYPER_SID_SPAN_SHIFT  29
+#define SMMU_PMCG_EVTYPER_EVENT_MASK  GENMASK(15, 0)
+#define SMMU_PMCG_SVR0  0x600
+#define SMMU_PMCG_SVR(n, stride) 

[PATCH 1/2] acpi: arm64: add iort support for PMCG

2017-08-04 Thread Neil Leeder
Add support for the SMMU Performance Monitor Counter Group
information from ACPI. This is in preparation for its use
in the SMMU v3 PMU driver.

Signed-off-by: Neil Leeder 
---
 drivers/acpi/arm64/iort.c | 54 +++
 include/acpi/actbl2.h |  9 +++-
 2 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index a3215ee..5a998cd 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -970,6 +970,40 @@ static bool __init arm_smmu_is_coherent(struct 
acpi_iort_node *node)
return smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK;
 }
 
+static int __init arm_smmu_pmu_count_resources(struct acpi_iort_node *node)
+{
+   struct acpi_iort_pmcg *pmcg;
+
+   /* Retrieve PMCG specific data */
+   pmcg = (struct acpi_iort_pmcg *)node->node_data;
+
+   /*
+* There are always 2 memory resources.
+* If the overflow_gsiv is present then add that for a total of 3.
+*/
+   return pmcg->overflow_gsiv > 0 ? 3 : 2;
+}
+
+static void __init arm_smmu_pmu_init_resources(struct resource *res,
+  struct acpi_iort_node *node)
+{
+   struct acpi_iort_pmcg *pmcg;
+
+   /* Retrieve PMCG specific data */
+   pmcg = (struct acpi_iort_pmcg *)node->node_data;
+
+   res[0].start = pmcg->base_address;
+   res[0].end = pmcg->base_address + SZ_4K - 1;
+   res[0].flags = IORESOURCE_MEM;
+   res[1].start = pmcg->base_address + SZ_64K;
+   res[1].end = pmcg->base_address + SZ_64K + SZ_4K - 1;
+   res[1].flags = IORESOURCE_MEM;
+
+   if (pmcg->overflow_gsiv)
+   acpi_iort_register_irq(pmcg->overflow_gsiv, "overflow",
+  ACPI_EDGE_SENSITIVE, &res[2]);
+}
+
 struct iort_iommu_config {
const char *name;
int (*iommu_init)(struct acpi_iort_node *node);
@@ -993,6 +1027,12 @@ struct iort_iommu_config {
.iommu_init_resources = arm_smmu_init_resources
 };
 
+static const struct iort_iommu_config iort_arm_smmu_pmcg_cfg __initconst = {
+   .name = "arm-smmu-pmu",
+   .iommu_count_resources = arm_smmu_pmu_count_resources,
+   .iommu_init_resources = arm_smmu_pmu_init_resources
+};
+
 static __init
 const struct iort_iommu_config *iort_get_iommu_cfg(struct acpi_iort_node *node)
 {
@@ -1001,6 +1041,8 @@ const struct iort_iommu_config *iort_get_iommu_cfg(struct 
acpi_iort_node *node)
return &iort_arm_smmu_v3_cfg;
case ACPI_IORT_NODE_SMMU:
return &iort_arm_smmu_cfg;
+   case ACPI_IORT_NODE_PMCG:
+   return &iort_arm_smmu_pmcg_cfg;
default:
return NULL;
}
@@ -1056,6 +1098,15 @@ static int __init iort_add_smmu_platform_device(struct 
acpi_iort_node *node)
if (ret)
goto dev_put;
 
+   /* End of init for PMCG */
+   if (node->type == ACPI_IORT_NODE_PMCG) {
+   ret = platform_device_add(pdev);
+   if (ret)
+   goto dev_put;
+
+   return 0;
+   }
+
/*
 * We expect the dma masks to be equivalent for
 * all SMMUs set-ups
@@ -1131,6 +1182,9 @@ static void __init iort_init_platform_devices(void)
acpi_free_fwnode_static(fwnode);
return;
}
+   } else if (iort_node->type == ACPI_IORT_NODE_PMCG) {
+   if (iort_add_smmu_platform_device(iort_node))
+   return;
}
 
iort_node = ACPI_ADD_PTR(struct acpi_iort_node, iort_node,
diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 707dda74..2169b6f 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -695,7 +695,8 @@ enum acpi_iort_node_type {
ACPI_IORT_NODE_NAMED_COMPONENT = 0x01,
ACPI_IORT_NODE_PCI_ROOT_COMPLEX = 0x02,
ACPI_IORT_NODE_SMMU = 0x03,
-   ACPI_IORT_NODE_SMMU_V3 = 0x04
+   ACPI_IORT_NODE_SMMU_V3 = 0x04,
+   ACPI_IORT_NODE_PMCG = 0x05
 };
 
 struct acpi_iort_id_mapping {
@@ -811,6 +812,12 @@ struct acpi_iort_smmu_v3 {
 #define ACPI_IORT_SMMU_V3_COHACC_OVERRIDE   (1)
 #define ACPI_IORT_SMMU_V3_HTTU_OVERRIDE (1<<1)
 
+struct acpi_iort_pmcg {
+   u64 base_address;   /* PMCG base address */
+   u32 overflow_gsiv;
+   u32 node_reference;
+};
+
 
/***
  *
  * IVRS - I/O Virtualization Reporting Structure
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies 
Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.



[PATCH 0/2] arm64 SMMUv3 PMU driver with IORT support

2017-08-04 Thread Neil Leeder
This adds a driver for the SMMUv3 PMU into the perf framework.
It includes an IORT update to support PM Counter Groups.

IORT has no mechanism for determining device names so PMUs
are named based on their physical address. 

Tested on Qualcomm QDF2400. perf_fuzzer ran for 4+ hours
with no failures.

Neil Leeder (2):
  acpi: arm64: add iort support for PMCG
  perf: add arm64 smmuv3 pmu driver

 drivers/acpi/arm64/iort.c |  54 +++
 drivers/perf/Kconfig  |   9 +
 drivers/perf/Makefile |   1 +
 drivers/perf/arm_smmuv3_pmu.c | 823 ++
 include/acpi/actbl2.h |   9 +-
 5 files changed, 895 insertions(+), 1 deletion(-)
 create mode 100644 drivers/perf/arm_smmuv3_pmu.c

-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies 
Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.



[PATCH v2] perf: qcom_l2: fix column exclusion check

2017-07-25 Thread Neil Leeder
The check for column exclusion did not verify that the event being
checked was an L2 event, and not a software event.
Software events should not be checked for column exclusion.
This resulted in a group with both software and L2 events sometimes
incorrectly rejecting the L2 event for column exclusion and
not counting it.

Add a check for PMU type before applying column exclusion logic.

Fixes: 21bdbb7102ed ("perf: add qcom l2 cache perf events driver")
Signed-off-by: Neil Leeder 
Acked-by: Mark Rutland 
---

Same code as v1, just added the tags to the commit text.

 drivers/perf/qcom_l2_pmu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
index c259848..b242cce 100644
--- a/drivers/perf/qcom_l2_pmu.c
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -546,6 +546,7 @@ static int l2_cache_event_init(struct perf_event *event)
}
 
if ((event != event->group_leader) &&
+   !is_software_event(event->group_leader) &&
(L2_EVT_GROUP(event->group_leader->attr.config) ==
 L2_EVT_GROUP(event->attr.config))) {
dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
@@ -558,6 +559,7 @@ static int l2_cache_event_init(struct perf_event *event)
list_for_each_entry(sibling, &event->group_leader->sibling_list,
group_entry) {
if ((sibling != event) &&
+   !is_software_event(sibling) &&
(L2_EVT_GROUP(sibling->attr.config) ==
 L2_EVT_GROUP(event->attr.config))) {
dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies 
Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.



[PATCH] perf: qcom_l2: fix column exclusion check

2017-07-24 Thread Neil Leeder
The check for column exclusion did not verify that the event being
checked was an L2 event, and not a software event.
Software events should not be checked for column exclusion.
This resulted in a group with both software and L2 events sometimes
incorrectly rejecting the L2 event for column exclusion and
not counting it.

Add a check for PMU type before applying column exclusion logic.

Signed-off-by: Neil Leeder 
---
 drivers/perf/qcom_l2_pmu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
index c259848..b242cce 100644
--- a/drivers/perf/qcom_l2_pmu.c
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -546,6 +546,7 @@ static int l2_cache_event_init(struct perf_event *event)
}
 
if ((event != event->group_leader) &&
+   !is_software_event(event->group_leader) &&
(L2_EVT_GROUP(event->group_leader->attr.config) ==
 L2_EVT_GROUP(event->attr.config))) {
dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
@@ -558,6 +559,7 @@ static int l2_cache_event_init(struct perf_event *event)
list_for_each_entry(sibling, &event->group_leader->sibling_list,
group_entry) {
if ((sibling != event) &&
+   !is_software_event(sibling) &&
(L2_EVT_GROUP(sibling->attr.config) ==
 L2_EVT_GROUP(event->attr.config))) {
dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies 
Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.



[PATCH/RFC] arm64: pmu: add Qualcomm Technologies extensions

2017-03-01 Thread Neil Leeder
Adds CPU PMU perf events support for Qualcomm Technologies' Falkor CPU.

The Qualcomm Technologies CPU PMU is named qcom_pmuv3 and provides
extensions to the architected PMU events.

The extended events are implemented by a set of registers which
are programmed by shifting an event code into a group field.
The PMNx event then points to that region/group combo.

Restrictions that limit only one concurrent region/group combination
are also enforced.

Signed-off-by: Neil Leeder 
---
The Qualcomm Technologies CPU PMU extensions have an additional set of registers
which need to be programmed when configuring an event. These are the PMRESRs,
which are similar to the krait & scorpion registers in armv7, and the L2
variants in the Qualcomm Technologies L2 PMU driver.

This is an alpha patch where I'm looking for comments on design decisions. I'll
hit the highlights here, with my rationale for the decisions and some possible
alternatives.

For some functions (reset, get_event_idx) I've been able to add qc_* wrappers
for the additional code, and within them call the armv8pmu_* functions for
common code.

For [enable|disable]_event this isn't possible because the body has to be
surrounded by a spinlock. I considered the alternative of duplicating the
function and adding qc-specific code, but went with calling the qc functions
from within the armv8pmu functions. One reason for that is that future features,
for example adding chaining support, may have to be duplicated in both
functions which I wanted to avoid.

There are additional constraints on qc events. The armv7 implementation checks
these in get_event_idx, but during L2 PMU reviews it was thought better to do
these during init processing where possible. I added these in the map_event
callback because its the only callback from within armpmu_event_init(). I'm not
sure if that would be considered stretching the acceptable use of that 
interface,
so I'm open to other suggestions.

The qc driver also needs to check conflicts between events, using a bitmap. This
has similar use to the hw_events->used_mask. I added the event_conflicts bitmap
into hw_events, although I'm not sure if that's the best place for an
implementation-specific field. An alternative would be a static DEFINE_PER_CPU
bitmap, although that didn't seem as clean, but may be more acceptable.

qc_max_resr is a variable, rather than a constant, to accommodate future
processors with different numbers of RESRs.

This requires Jeremy Linton's patch sequence to add arm64 CPU PMU ACPI support:
https://patchwork.kernel.org/patch/9533677/

Thanks,
Neil

 arch/arm64/kernel/perf_event.c | 344 -
 include/linux/perf/arm_pmu.h   |   8 +
 2 files changed, 348 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 0fbd7ef..ed4842d 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -535,6 +535,32 @@
.attrs = armv8_pmuv3_format_attrs,
 };
 
+/* NRCCG format for qc perf raw codes. */
+PMU_FORMAT_ATTR(prefix, "config:16-19");
+PMU_FORMAT_ATTR(reg,"config:12-15");
+PMU_FORMAT_ATTR(code,   "config:4-11");
+PMU_FORMAT_ATTR(group,  "config:0-3");
+
+static struct attribute *qc_ev_formats[] = {
+   &format_attr_prefix.attr,
+   &format_attr_reg.attr,
+   &format_attr_code.attr,
+   &format_attr_group.attr,
+   &format_attr_event.attr,
+   NULL,
+};
+
+static struct attribute_group qc_pmu_format_attr_group = {
+   .name = "format",
+   .attrs = qc_ev_formats,
+};
+
+static bool qc_pmu;
+static void qc_pmu_enable_event(struct perf_event *event,
+   struct hw_perf_event *hwc, int idx);
+static void qc_pmu_disable_event(struct perf_event *event,
+struct hw_perf_event *hwc);
+
 /*
  * Perf Events' indices
  */
@@ -704,10 +730,13 @@ static void armv8pmu_enable_event(struct perf_event 
*event)
 */
armv8pmu_disable_counter(idx);
 
-   /*
-* Set event (if destined for PMNx counters).
-*/
-   armv8pmu_write_evtype(idx, hwc->config_base);
+   if (qc_pmu)
+   qc_pmu_enable_event(event, hwc, idx);
+   else
+   /*
+* Set event (if destined for PMNx counters).
+*/
+   armv8pmu_write_evtype(idx, hwc->config_base);
 
/*
 * Enable interrupt for this counter
@@ -740,6 +769,9 @@ static void armv8pmu_disable_event(struct perf_event *event)
 */
armv8pmu_disable_counter(idx);
 
+   if (qc_pmu)
+   qc_pmu_disable_event(event, hwc);
+
/*
 * Disable interrupt for this counter
 */
@@ -929,6 +961,269 @@ static int armv8_pmuv3_map_event(struct perf_event *event)
return hw_event_id;
 }
 

[PATCH v10] perf: add qcom l2 cache perf events driver

2017-02-07 Thread Neil Leeder
Adds perf events support for L2 cache PMU.

The L2 cache PMU driver is named 'l2cache_0' and can be used
with perf events to profile L2 events such as cache hits
and misses on Qualcomm Technologies processors.

Signed-off-by: Neil Leeder 
---
v10:
Remove unnecessary cross-call for reset
Remove filter match and move check in get_event_idx
Check event pointer is valid in interrupt handler

v9:
Add support for maxcpus < all cpus:
  Move some of probe functionality to hotplug online callback
  Call hotplug callbacks from probe
Make percpu cpu->cluster map dynamic and associated with PMU instead of global
Use *sysreg_s() for portability reasons
Add documentation file
Rebase on 4.10-rc6
Misc cleanups

v8:
Various style changes for function names & code restructuring
Replace dev_warn with ratelimited debug prints
Move hotplug registration before PMU registration
Reload counters with a fixed value
Add column-exclusion check for events in same group
Rebase on 4.10-rc3

v7:
Move to drivers/perf
Rebased on 4.9-rc1
Ran perf fuzzer against driver for 4 hours with no crashes 

v6: restore accidentally dropped Kconfig dependencies

v5:
Fold the header and l2-accessors into .c file
Use multi-instance framework for hotplug
Change terminology from slice to cluster for clarity
Remove unnecessary rmw sequence for enable registers
Use prev_count in hwc rather than in slice
Enforce all events in same group on same CPU
Add comments, rename variables for clarity

v4:
Replace notifier with hotplug statemachine
Allocate PMU struct dynamically

v3:
Remove exports from l2-accessors
Change l2-accessors Kconfig to make it not user-selectable
Reorder and remove unnecessary includes

v2:
Add the l2-accessors patch to this patchset, previously posted separately.
Remove sampling and per-task functionality for this uncore PMU.
Use cpumask to replace code which filtered events to one cpu per slice.
Replace manual event filtering with filter_match callback.
Use a separate used_mask for event groups.
Add hotplug notifier for CPU and irq migration.
Remove extraneous synchronisation instructions.
Other miscellaneous cleanup.

 Documentation/perf/qcom_l2_pmu.txt |   38 ++
 drivers/perf/Kconfig   |9 +
 drivers/perf/Makefile  |1 +
 drivers/perf/qcom_l2_pmu.c | 1012 
 include/linux/cpuhotplug.h |1 +
 5 files changed, 1061 insertions(+)
 create mode 100644 Documentation/perf/qcom_l2_pmu.txt
 create mode 100644 drivers/perf/qcom_l2_pmu.c

diff --git a/Documentation/perf/qcom_l2_pmu.txt 
b/Documentation/perf/qcom_l2_pmu.txt
new file mode 100644
index 000..b25b976
--- /dev/null
+++ b/Documentation/perf/qcom_l2_pmu.txt
@@ -0,0 +1,38 @@
+Qualcomm Technologies Level-2 Cache Performance Monitoring Unit (PMU)
+=
+
+This driver supports the L2 cache clusters found in Qualcomm Technologies
+Centriq SoCs. There are multiple physical L2 cache clusters, each with their
+own PMU. Each cluster has one or more CPUs associated with it.
+
+There is one logical L2 PMU exposed, which aggregates the results from
+the physical PMUs.
+
+The driver provides a description of its available events and configuration
+options in sysfs, see /sys/devices/l2cache_0.
+
+The "format" directory describes the format of the events.
+
+Events can be envisioned as a 2-dimensional array. Each column represents
+a group of events. There are 8 groups. Only one entry from each
+group can be in use at a time. If multiple events from the same group
+are specified, the conflicting events cannot be counted at the same time.
+
+Events are specified as 0xCCG, where CC is 2 hex digits specifying
+the code (array row) and G specifies the group (column) 0-7.
+
+In addition there is a cycle counter event specified by the value 0xFE
+which is outside the above scheme.
+
+The driver provides a "cpumask" sysfs attribute which contains a mask
+consisting of one CPU per cluster which will be used to handle all the PMU
+events on that cluster.
+
+Examples for use with perf:
+
+  perf stat -e l2cache_0/config=0x001/,l2cache_0/config=0x042/ -a sleep 1
+
+  perf stat -e l2cache_0/config=0xfe/ -C 2 sleep 1
+
+The driver does not support sampling, therefore "perf record" will
+not work. Per-task perf sessions are not supported.
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 4d5c5f9..9365190 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -12,6 +12,15 @@ config ARM_PMU
  Say y if you want to use CPU performance monitors on ARM-based
  systems.
 
+config QCOM_L2_PMU
+   bool "Qualcomm Technologies L2-cache PMU"
+   depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI
+ help
+ Provides support for the L2 cache performance monitor unit (PMU)
+ in Qualcomm Technologies processors.
+ Ad

[PATCH v9] perf: add qcom l2 cache perf events driver

2017-02-03 Thread Neil Leeder
Adds perf events support for L2 cache PMU.

The L2 cache PMU driver is named 'l2cache_0' and can be used
with perf events to profile L2 events such as cache hits
and misses on Qualcomm Technologies processors.

Signed-off-by: Neil Leeder 
---
v9:
Add support for maxcpus < all cpus:
  Move some of probe functionality to hotplug online callback
  Call hotplug callbacks from probe
Make percpu cpu->cluster map dynamic and associated with PMU instead of global
Use *sysreg_s() for portability reasons
Add documentation file
Rebase on 4.10-rc6
Misc cleanups

v8:
Various style changes for function names & code restructuring
Replace dev_warn with ratelimited debug prints
Move hotplug registration before PMU registration
Reload counters with a fixed value
Add column-exclusion check for events in same group
Rebase on 4.10-rc3

v7:
Move to drivers/perf
Rebased on 4.9-rc1
Ran perf fuzzer against driver for 4 hours with no crashes 

v6: restore accidentally dropped Kconfig dependencies

v5:
Fold the header and l2-accessors into .c file
Use multi-instance framework for hotplug
Change terminology from slice to cluster for clarity
Remove unnecessary rmw sequence for enable registers
Use prev_count in hwc rather than in slice
Enforce all events in same group on same CPU
Add comments, rename variables for clarity

v4:
Replace notifier with hotplug statemachine
Allocate PMU struct dynamically

v3:
Remove exports from l2-accessors
Change l2-accessors Kconfig to make it not user-selectable
Reorder and remove unnecessary includes

v2:
Add the l2-accessors patch to this patchset, previously posted separately.
Remove sampling and per-task functionality for this uncore PMU.
Use cpumask to replace code which filtered events to one cpu per slice.
Replace manual event filtering with filter_match callback.
Use a separate used_mask for event groups.
Add hotplug notifier for CPU and irq migration.
Remove extraneous synchronisation instructions.
Other miscellaneous cleanup.

 Documentation/perf/qcom_l2_pmu.txt |   38 ++
 drivers/perf/Kconfig   |9 +
 drivers/perf/Makefile  |1 +
 drivers/perf/qcom_l2_pmu.c | 1031 
 include/linux/cpuhotplug.h |1 +
 5 files changed, 1080 insertions(+)
 create mode 100644 Documentation/perf/qcom_l2_pmu.txt
 create mode 100644 drivers/perf/qcom_l2_pmu.c

diff --git a/Documentation/perf/qcom_l2_pmu.txt 
b/Documentation/perf/qcom_l2_pmu.txt
new file mode 100644
index 000..b25b976
--- /dev/null
+++ b/Documentation/perf/qcom_l2_pmu.txt
@@ -0,0 +1,38 @@
+Qualcomm Technologies Level-2 Cache Performance Monitoring Unit (PMU)
+=
+
+This driver supports the L2 cache clusters found in Qualcomm Technologies
+Centriq SoCs. There are multiple physical L2 cache clusters, each with their
+own PMU. Each cluster has one or more CPUs associated with it.
+
+There is one logical L2 PMU exposed, which aggregates the results from
+the physical PMUs.
+
+The driver provides a description of its available events and configuration
+options in sysfs, see /sys/devices/l2cache_0.
+
+The "format" directory describes the format of the events.
+
+Events can be envisioned as a 2-dimensional array. Each column represents
+a group of events. There are 8 groups. Only one entry from each
+group can be in use at a time. If multiple events from the same group
+are specified, the conflicting events cannot be counted at the same time.
+
+Events are specified as 0xCCG, where CC is 2 hex digits specifying
+the code (array row) and G specifies the group (column) 0-7.
+
+In addition there is a cycle counter event specified by the value 0xFE
+which is outside the above scheme.
+
+The driver provides a "cpumask" sysfs attribute which contains a mask
+consisting of one CPU per cluster which will be used to handle all the PMU
+events on that cluster.
+
+Examples for use with perf:
+
+  perf stat -e l2cache_0/config=0x001/,l2cache_0/config=0x042/ -a sleep 1
+
+  perf stat -e l2cache_0/config=0xfe/ -C 2 sleep 1
+
+The driver does not support sampling, therefore "perf record" will
+not work. Per-task perf sessions are not supported.
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 4d5c5f9..9365190 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -12,6 +12,15 @@ config ARM_PMU
  Say y if you want to use CPU performance monitors on ARM-based
  systems.
 
+config QCOM_L2_PMU
+   bool "Qualcomm Technologies L2-cache PMU"
+   depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI
+ help
+ Provides support for the L2 cache performance monitor unit (PMU)
+ in Qualcomm Technologies processors.
+ Adds the L2 cache PMU into the perf events subsystem for
+ monitoring L2 cache events.
+
 config XGENE_PMU
 depends on PERF_EVENTS &&

[PATCH v8] perf: add qcom l2 cache perf events driver

2017-01-16 Thread Neil Leeder
Adds perf events support for L2 cache PMU.

The L2 cache PMU driver is named 'l2cache_0' and can be used
with perf events to profile L2 events such as cache hits
and misses on Qualcomm Technologies processors.

Signed-off-by: Neil Leeder 
---
v8:
Various style changes for function names & code restructuring
Replace dev_warn with ratelimited debug prints
Move hotplug registration before PMU registration
Reload counters with a fixed value
Add column-exclusion check for events in same group
Rebase on 4.10-rc3

v7:
Move to drivers/perf
Rebased on 4.9-rc1
Ran perf fuzzer against driver for 4 hours with no crashes 

v6: restore accidentally dropped Kconfig dependencies

v5:
Fold the header and l2-accessors into .c file
Use multi-instance framework for hotplug
Change terminology from slice to cluster for clarity
Remove unnecessary rmw sequence for enable registers
Use prev_count in hwc rather than in slice
Enforce all events in same group on same CPU
Add comments, rename variables for clarity

v4:
Replace notifier with hotplug statemachine
Allocate PMU struct dynamically

v3:
Remove exports from l2-accessors
Change l2-accessors Kconfig to make it not user-selectable
Reorder and remove unnecessary includes

v2:
Add the l2-accessors patch to this patchset, previously posted separately.
Remove sampling and per-task functionality for this uncore PMU.
Use cpumask to replace code which filtered events to one cpu per slice.
Replace manual event filtering with filter_match callback.
Use a separate used_mask for event groups.
Add hotplug notifier for CPU and irq migration.
Remove extraneous synchronisation instructions.
Other miscellaneous cleanup.

 drivers/perf/Kconfig   |9 +
 drivers/perf/Makefile  |1 +
 drivers/perf/qcom_l2_pmu.c | 1001 
 include/linux/cpuhotplug.h |1 +
 4 files changed, 1012 insertions(+)
 create mode 100644 drivers/perf/qcom_l2_pmu.c

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 4d5c5f9..9365190 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -12,6 +12,15 @@ config ARM_PMU
  Say y if you want to use CPU performance monitors on ARM-based
  systems.
 
+config QCOM_L2_PMU
+   bool "Qualcomm Technologies L2-cache PMU"
+   depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI
+ help
+ Provides support for the L2 cache performance monitor unit (PMU)
+ in Qualcomm Technologies processors.
+ Adds the L2 cache PMU into the perf events subsystem for
+ monitoring L2 cache events.
+
 config XGENE_PMU
 depends on PERF_EVENTS && ARCH_XGENE
 bool "APM X-Gene SoC PMU"
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index b116e98..ef24833 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -1,2 +1,3 @@
 obj-$(CONFIG_ARM_PMU) += arm_pmu.o
+obj-$(CONFIG_QCOM_L2_PMU)  += qcom_l2_pmu.o
 obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
new file mode 100644
index 000..407ca9a
--- /dev/null
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -0,0 +1,1001 @@
+/* Copyright (c) 2015-2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#define MAX_L2_CTRS 9
+
+#define L2PMCR_NUM_EV_SHIFT 11
+#define L2PMCR_NUM_EV_MASK  0x1F
+
+#define L2PMCR  0x400
+#define L2PMCNTENCLR0x403
+#define L2PMCNTENSET0x404
+#define L2PMINTENCLR0x405
+#define L2PMINTENSET0x406
+#define L2PMOVSCLR  0x407
+#define L2PMOVSSET  0x408
+#define L2PMCCNTCR  0x409
+#define L2PMCCNTR   0x40A
+#define L2PMCCNTSR  0x40C
+#define L2PMRESR0x410
+#define IA_L2PMXEVCNTCR_BASE0x420
+#define IA_L2PMXEVCNTR_BASE 0x421
+#define IA_L2PMXEVFILTER_BASE   0x423
+#define IA_L2PMXEVTYPER_BASE0x424
+
+#define IA_L2_REG_OFFSET0x10
+
+#define L2PMXEVFILTER_SUFILTER_ALL  0x000E
+#define L2PMXEVFILTER_ORGFILTER_IDINDEP 0x0004
+#define L2PMXEVFILTER_ORGFILTER_ALL 0x0003
+
+#define L2EVTYPER_REG_SHIFT 3
+
+#define L2PMRESR_GROUP_BITS 8
+#define L2PMRESR_GROUP_MASK GENMASK(7, 0)
+

[PATCH v7] soc: qcom: add l2 cache perf events driver

2016-10-28 Thread Neil Leeder
Adds perf events support for L2 cache PMU.

The L2 cache PMU driver is named 'l2cache_0' and can be used
with perf events to profile L2 events such as cache hits
and misses on Qualcomm Technologies processors.

Signed-off-by: Neil Leeder 
---
v7:
Move to drivers/perf
Rebased on 4.9-rc1
Successfully ran perf fuzzer against driver

v6: restore accidentally dropped Kconfig dependencies

v5:
Fold the header and l2-accessors into .c file
Use multi-instance framework for hotplug
Change terminology from slice to cluster for clarity
Remove unnecessary rmw sequence for enable registers
Use prev_count in hwc rather than in slice
Enforce all events in same group on same CPU
Add comments, rename variables for clarity

v4:
Replace notifier with hotplug statemachine
Allocate PMU struct dynamically

v3:
Remove exports from l2-accessors
Change l2-accessors Kconfig to make it not user-selectable
Reorder and remove unnecessary includes

v2:
Add the l2-accessors patch to this patchset, previously posted separately.
Remove sampling and per-task functionality for this uncore PMU.
Use cpumask to replace code which filtered events to one cpu per slice.
Replace manual event filtering with filter_match callback.
Use a separate used_mask for event groups.
Add hotplug notifier for CPU and irq migration.
Remove extraneous synchronisation instructions.
Other miscellaneous cleanup.

 drivers/perf/Kconfig   |   9 +
 drivers/perf/Makefile  |   1 +
 drivers/perf/qcom_l2_pmu.c | 947 +
 include/linux/cpuhotplug.h |   1 +
 4 files changed, 958 insertions(+)
 create mode 100644 drivers/perf/qcom_l2_pmu.c

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 4d5c5f9..9365190 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -12,6 +12,15 @@ config ARM_PMU
  Say y if you want to use CPU performance monitors on ARM-based
  systems.
 
+config QCOM_L2_PMU
+   bool "Qualcomm Technologies L2-cache PMU"
+   depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI
+ help
+ Provides support for the L2 cache performance monitor unit (PMU)
+ in Qualcomm Technologies processors.
+ Adds the L2 cache PMU into the perf events subsystem for
+ monitoring L2 cache events.
+
 config XGENE_PMU
 depends on PERF_EVENTS && ARCH_XGENE
 bool "APM X-Gene SoC PMU"
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index b116e98..ef24833 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -1,2 +1,3 @@
 obj-$(CONFIG_ARM_PMU) += arm_pmu.o
+obj-$(CONFIG_QCOM_L2_PMU)  += qcom_l2_pmu.o
 obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
new file mode 100644
index 000..c7b159f
--- /dev/null
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -0,0 +1,947 @@
+/* Copyright (c) 2015,2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include 
+#include 
+#include 
+#include 
+
+#define MAX_L2_CTRS 9
+
+#define L2PMCR_NUM_EV_SHIFT 11
+#define L2PMCR_NUM_EV_MASK  0x1F
+
+#define L2PMCR  0x400
+#define L2PMCNTENCLR0x403
+#define L2PMCNTENSET0x404
+#define L2PMINTENCLR0x405
+#define L2PMINTENSET0x406
+#define L2PMOVSCLR  0x407
+#define L2PMOVSSET  0x408
+#define L2PMCCNTCR  0x409
+#define L2PMCCNTR   0x40A
+#define L2PMCCNTSR  0x40C
+#define L2PMRESR0x410
+#define IA_L2PMXEVCNTCR_BASE0x420
+#define IA_L2PMXEVCNTR_BASE 0x421
+#define IA_L2PMXEVFILTER_BASE   0x423
+#define IA_L2PMXEVTYPER_BASE0x424
+
+#define IA_L2_REG_OFFSET0x10
+
+#define L2PMXEVFILTER_SUFILTER_ALL  0x000E
+#define L2PMXEVFILTER_ORGFILTER_IDINDEP 0x0004
+#define L2PMXEVFILTER_ORGFILTER_ALL 0x0003
+
+#define L2PM_CC_ENABLE  0x8000
+
+#define L2EVTYPER_REG_SHIFT 3
+
+#define L2PMRESR_GROUP_BITS 8
+#define L2PMRESR_GROUP_MASK GENMASK(7, 0)
+
+#define L2CYCLE_CTR_BIT 31
+#define L2CYCLE_CTR_RAW_CODE0xFE
+
+#define L2PMCR_RESET_ALL0x6
+#define L2PMCR_COUNTERS_ENABLE  0x1
+#define L2PMCR_COUNTERS_DISABLE 0x0
+
+#define L2PMRESR_EN ((u64)1 << 63)
+
+#define L2_EVT_MASK 0x0FFF
+#define L2_EVT_CODE_MASK0x0FF0
+#define L2_EVT_GRP_MASK 0x000F
+#define L2_EVT_CODE_SHIFT   4
+#defin

Re: [PATCH v6] soc: qcom: add l2 cache perf events driver

2016-10-04 Thread Neil Leeder

On 10/4/2016 11:53 AM, Mark Rutland wrote:
> Hi Neil,
> 
> On Wed, Sep 21, 2016 at 05:12:54PM -0400, Neil Leeder wrote:
>> Adds perf events support for L2 cache PMU.
>>
>> The L2 cache PMU driver is named 'l2cache_0' and can be used
>> with perf events to profile L2 events such as cache hits
>> and misses.
>>
>> Signed-off-by: Neil Leeder 
>> ---
> 
>>  drivers/soc/qcom/Kconfig |   9 +
>>  drivers/soc/qcom/Makefile|   1 +
>>  drivers/soc/qcom/perf_event_l2.c | 948 
>> +++
>>  include/linux/cpuhotplug.h   |   1 +
>>  4 files changed, 959 insertions(+)
>>  create mode 100644 drivers/soc/qcom/perf_event_l2.c
> 
> Apologies for the delay; this has been on my todo list, but I've been a
> little distracted and haven't had the time necessary to devote to this.
> It's somewhat unusual given the constraint logic and the percpu uncore
> component, so there's more to consider than usual.
> 
> At a high level, this will need to be moved to drivers/perf/, per [1].
> 
> Can you move the driver there, and post the result atop of v4.8-rc1 at
> the end of the merge window? Until then, I can't guarantee that I'll
> have the time to look at this.
> 
> Can you also give Vince's perf fuzzer [2] a spin against the driver? I
> can't recall if we covered that previously, and in practice it's found a
> number of issues in drivers that have otherwise looked fine. If you've
> done so, it'd be worth noting in the cover.
> 
> Thanks,
> Mark.
> 
> [1] 
> http://lists.infradead.org/pipermail/linux-arm-kernel/2016-September/457188.html
> [2] https://github.com/deater/perf_event_tests
> 

Thanks Mark. I'll move it, rebase on 4.9-rc1 and run perf fuzzer.

Neil
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies 
Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.


Re: [PATCH v6] soc: qcom: add l2 cache perf events driver

2016-10-04 Thread Neil Leeder

On 9/21/2016 05:12 PM, Neil Leeder wrote:
> Adds perf events support for L2 cache PMU.
> 
> The L2 cache PMU driver is named 'l2cache_0' and can be used
> with perf events to profile L2 events such as cache hits
> and misses.
> 
> Signed-off-by: Neil Leeder 
> ---
> v6: restore accidentally dropped Kconfig dependencies
> 
> v5:
> Fold the header and l2-accessors into .c file
> Use multi-instance framework for hotplug
> Change terminology from slice to cluster for clarity
> Remove unnecessary rmw sequence for enable registers
> Use prev_count in hwc rather than in slice
> Enforce all events in same group on same CPU
> Add comments, rename variables for clarity
> 
> v4:
> Replace notifier with hotplug statemachine
> Allocate PMU struct dynamically
> 
> v3:
> Remove exports from l2-accessors
> Change l2-accessors Kconfig to make it not user-selectable
> Reorder and remove unnecessary includes
> 
> v2:
> Add the l2-accessors patch to this patchset, previously posted separately.
> Remove sampling and per-task functionality for this uncore PMU.
> Use cpumask to replace code which filtered events to one cpu per slice.
> Replace manual event filtering with filter_match callback.
> Use a separate used_mask for event groups.
> Add hotplug notifier for CPU and irq migration.
> Remove extraneous synchronisation instructions.
> Other miscellaneous cleanup.
> 
>  drivers/soc/qcom/Kconfig |   9 +
>  drivers/soc/qcom/Makefile|   1 +
>  drivers/soc/qcom/perf_event_l2.c | 948 
> +++
>  include/linux/cpuhotplug.h   |   1 +
>  4 files changed, 959 insertions(+)
>  create mode 100644 drivers/soc/qcom/perf_event_l2.c
> 
> diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
> index 461b387..3fa27a8 100644
> --- a/drivers/soc/qcom/Kconfig
> +++ b/drivers/soc/qcom/Kconfig
> @@ -10,6 +10,15 @@ config QCOM_GSBI
>functions for connecting the underlying serial UART, SPI, and I2C
>devices to the output pins.
>  
> +config QCOM_PERF_EVENTS_L2
> + bool "Qualcomm Technologies L2-cache perf events"
> + depends on ARCH_QCOM && ARM64 && HW_PERF_EVENTS && ACPI
> +   help
> +   Provides support for the L2 cache performance monitor unit (PMU)
> +   in Qualcomm Technologies processors.
> +   Adds the L2 cache PMU into the perf events subsystem for
> +   monitoring L2 cache events.
> +
>  config QCOM_PM
>   bool "Qualcomm Power Management"
>   depends on ARCH_QCOM && !ARM64
> diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
> index fdd664e..4c9df3b 100644
> --- a/drivers/soc/qcom/Makefile
> +++ b/drivers/soc/qcom/Makefile
> @@ -1,4 +1,5 @@
>  obj-$(CONFIG_QCOM_GSBI)  +=  qcom_gsbi.o
> +obj-$(CONFIG_QCOM_PERF_EVENTS_L2)+= perf_event_l2.o
>  obj-$(CONFIG_QCOM_PM)+=  spm.o
>  obj-$(CONFIG_QCOM_SMD) +=smd.o
>  obj-$(CONFIG_QCOM_SMD_RPM)   += smd-rpm.o
> diff --git a/drivers/soc/qcom/perf_event_l2.c 
> b/drivers/soc/qcom/perf_event_l2.c
> new file mode 100644
> index 000..bbf47c9
> --- /dev/null
> +++ b/drivers/soc/qcom/perf_event_l2.c
> @@ -0,0 +1,948 @@
> +/* Copyright (c) 2015,2016 The Linux Foundation. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#define MAX_L2_CTRS 9
> +
> +#define L2PMCR_NUM_EV_SHIFT 11
> +#define L2PMCR_NUM_EV_MASK  0x1F
> +
> +#define L2PMCR  0x400
> +#define L2PMCNTENCLR0x403
> +#define L2PMCNTENSET0x404
> +#define L2PMINTENCLR0x405
> +#define L2PMINTENSET0x406
> +#define L2PMOVSCLR  0x407
> +#define L2PMOVSSET  0x408
> +#define L2PMCCNTCR  0x409
> +#define L2PMCCNTR   0x40A
> +#define L2PMCCNTSR  0x40C
> +#define L2PMRESR0x410
> +#define IA_L2PMXEVCNTCR_BASE0x420
> +#define IA_L2PMXEVCNTR_BASE 0x421
> +#define IA_L2PMXEVFILTER_BASE   0x423
> +#define IA_L2PMXEVTYPER_BASE0x424
> +
> +#define IA_L2_REG_OFFSET0x10
> +
> +#defi

[PATCH v6] soc: qcom: add l2 cache perf events driver

2016-09-21 Thread Neil Leeder
Adds perf events support for L2 cache PMU.

The L2 cache PMU driver is named 'l2cache_0' and can be used
with perf events to profile L2 events such as cache hits
and misses.

Signed-off-by: Neil Leeder 
---
v6: restore accidentally dropped Kconfig dependencies

v5:
Fold the header and l2-accessors into .c file
Use multi-instance framework for hotplug
Change terminology from slice to cluster for clarity
Remove unnecessary rmw sequence for enable registers
Use prev_count in hwc rather than in slice
Enforce all events in same group on same CPU
Add comments, rename variables for clarity

v4:
Replace notifier with hotplug statemachine
Allocate PMU struct dynamically

v3:
Remove exports from l2-accessors
Change l2-accessors Kconfig to make it not user-selectable
Reorder and remove unnecessary includes

v2:
Add the l2-accessors patch to this patchset, previously posted separately.
Remove sampling and per-task functionality for this uncore PMU.
Use cpumask to replace code which filtered events to one cpu per slice.
Replace manual event filtering with filter_match callback.
Use a separate used_mask for event groups.
Add hotplug notifier for CPU and irq migration.
Remove extraneous synchronisation instructions.
Other miscellaneous cleanup.

 drivers/soc/qcom/Kconfig |   9 +
 drivers/soc/qcom/Makefile|   1 +
 drivers/soc/qcom/perf_event_l2.c | 948 +++
 include/linux/cpuhotplug.h   |   1 +
 4 files changed, 959 insertions(+)
 create mode 100644 drivers/soc/qcom/perf_event_l2.c

diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 461b387..3fa27a8 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -10,6 +10,15 @@ config QCOM_GSBI
   functions for connecting the underlying serial UART, SPI, and I2C
   devices to the output pins.
 
+config QCOM_PERF_EVENTS_L2
+   bool "Qualcomm Technologies L2-cache perf events"
+   depends on ARCH_QCOM && ARM64 && HW_PERF_EVENTS && ACPI
+ help
+ Provides support for the L2 cache performance monitor unit (PMU)
+ in Qualcomm Technologies processors.
+ Adds the L2 cache PMU into the perf events subsystem for
+ monitoring L2 cache events.
+
 config QCOM_PM
bool "Qualcomm Power Management"
depends on ARCH_QCOM && !ARM64
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index fdd664e..4c9df3b 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_QCOM_GSBI)+=  qcom_gsbi.o
+obj-$(CONFIG_QCOM_PERF_EVENTS_L2)  += perf_event_l2.o
 obj-$(CONFIG_QCOM_PM)  +=  spm.o
 obj-$(CONFIG_QCOM_SMD) +=  smd.o
 obj-$(CONFIG_QCOM_SMD_RPM) += smd-rpm.o
diff --git a/drivers/soc/qcom/perf_event_l2.c b/drivers/soc/qcom/perf_event_l2.c
new file mode 100644
index 000..bbf47c9
--- /dev/null
+++ b/drivers/soc/qcom/perf_event_l2.c
@@ -0,0 +1,948 @@
+/* Copyright (c) 2015,2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include 
+#include 
+#include 
+#include 
+
+#define MAX_L2_CTRS 9
+
+#define L2PMCR_NUM_EV_SHIFT 11
+#define L2PMCR_NUM_EV_MASK  0x1F
+
+#define L2PMCR  0x400
+#define L2PMCNTENCLR0x403
+#define L2PMCNTENSET0x404
+#define L2PMINTENCLR0x405
+#define L2PMINTENSET0x406
+#define L2PMOVSCLR  0x407
+#define L2PMOVSSET  0x408
+#define L2PMCCNTCR  0x409
+#define L2PMCCNTR   0x40A
+#define L2PMCCNTSR  0x40C
+#define L2PMRESR0x410
+#define IA_L2PMXEVCNTCR_BASE0x420
+#define IA_L2PMXEVCNTR_BASE 0x421
+#define IA_L2PMXEVFILTER_BASE   0x423
+#define IA_L2PMXEVTYPER_BASE0x424
+
+#define IA_L2_REG_OFFSET0x10
+
+#define L2PMXEVFILTER_SUFILTER_ALL  0x000E
+#define L2PMXEVFILTER_ORGFILTER_IDINDEP 0x0004
+#define L2PMXEVFILTER_ORGFILTER_ALL 0x0003
+
+#define L2PM_CC_ENABLE  0x8000
+
+#define L2EVTYPER_REG_SHIFT 3
+
+#define L2PMRESR_GROUP_BITS 8
+#define L2PMRESR_GROUP_MASK GENMASK(7, 0)
+
+#define L2CYCLE_CTR_BIT 31
+#define L2CYCLE_CTR_RAW_CODE0xFE
+
+#define L2PMCR_RESET_ALL0x6
+#define L2PMCR_COUNTERS_ENABLE  0x1
+#define L2PMCR_COUNTERS_DISABLE 0x0
+
+#define L2PMRESR_EN ((u64)1 << 63)
+
+#define L2_EVT_MASK 0x0FFF
+#define L2_EVT_CODE_MASK   

[PATCH v5] soc: qcom: add l2 cache perf events driver

2016-09-21 Thread Neil Leeder
Adds perf events support for L2 cache PMU.

The L2 cache PMU driver is named 'l2cache_0' and can be used
with perf events to profile L2 events such as cache hits
and misses.

Signed-off-by: Neil Leeder 
---
v5:
Fold the header and l2-accessors into .c file
Use multi-instance framework for hotplug
Change terminology from slice to cluster for clarity
Remove unnecessary rmw sequence for enable registers
Use prev_count in hwc rather than in slice
Enforce all events in same group on same CPU
Add comments, rename variables for clarity

v4:
Replace notifier with hotplug statemachine
Allocate PMU struct dynamically

v3:
Remove exports from l2-accessors
Change l2-accessors Kconfig to make it not user-selectable
Reorder and remove unnecessary includes

v2:
Add the l2-accessors patch to this patchset, previously posted separately.
Remove sampling and per-task functionality for this uncore PMU.
Use cpumask to replace code which filtered events to one cpu per slice.
Replace manual event filtering with filter_match callback.
Use a separate used_mask for event groups.
Add hotplug notifier for CPU and irq migration.
Remove extraneous synchronisation instructions.
Other miscellaneous cleanup.

 drivers/soc/qcom/Kconfig |   9 +
 drivers/soc/qcom/Makefile|   1 +
 drivers/soc/qcom/perf_event_l2.c | 948 +++
 include/linux/cpuhotplug.h   |   1 +
 4 files changed, 959 insertions(+)
 create mode 100644 drivers/soc/qcom/perf_event_l2.c

diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 461b387..4c32646 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -10,6 +10,15 @@ config QCOM_GSBI
   functions for connecting the underlying serial UART, SPI, and I2C
   devices to the output pins.
 
+config QCOM_PERF_EVENTS_L2
+   bool "Qualcomm Technologies L2-cache perf events"
+   depends on ARCH_QCOM && HW_PERF_EVENTS
+ help
+ Provides support for the L2 cache performance monitor unit (PMU)
+ in Qualcomm Technologies processors.
+ Adds the L2 cache PMU into the perf events subsystem for
+ monitoring L2 cache events.
+
 config QCOM_PM
bool "Qualcomm Power Management"
depends on ARCH_QCOM && !ARM64
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index fdd664e..4c9df3b 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_QCOM_GSBI)+=  qcom_gsbi.o
+obj-$(CONFIG_QCOM_PERF_EVENTS_L2)  += perf_event_l2.o
 obj-$(CONFIG_QCOM_PM)  +=  spm.o
 obj-$(CONFIG_QCOM_SMD) +=  smd.o
 obj-$(CONFIG_QCOM_SMD_RPM) += smd-rpm.o
diff --git a/drivers/soc/qcom/perf_event_l2.c b/drivers/soc/qcom/perf_event_l2.c
new file mode 100644
index 000..bbf47c9
--- /dev/null
+++ b/drivers/soc/qcom/perf_event_l2.c
@@ -0,0 +1,948 @@
+/* Copyright (c) 2015,2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include 
+#include 
+#include 
+#include 
+
+#define MAX_L2_CTRS 9
+
+#define L2PMCR_NUM_EV_SHIFT 11
+#define L2PMCR_NUM_EV_MASK  0x1F
+
+#define L2PMCR  0x400
+#define L2PMCNTENCLR0x403
+#define L2PMCNTENSET0x404
+#define L2PMINTENCLR0x405
+#define L2PMINTENSET0x406
+#define L2PMOVSCLR  0x407
+#define L2PMOVSSET  0x408
+#define L2PMCCNTCR  0x409
+#define L2PMCCNTR   0x40A
+#define L2PMCCNTSR  0x40C
+#define L2PMRESR0x410
+#define IA_L2PMXEVCNTCR_BASE0x420
+#define IA_L2PMXEVCNTR_BASE 0x421
+#define IA_L2PMXEVFILTER_BASE   0x423
+#define IA_L2PMXEVTYPER_BASE0x424
+
+#define IA_L2_REG_OFFSET0x10
+
+#define L2PMXEVFILTER_SUFILTER_ALL  0x000E
+#define L2PMXEVFILTER_ORGFILTER_IDINDEP 0x0004
+#define L2PMXEVFILTER_ORGFILTER_ALL 0x0003
+
+#define L2PM_CC_ENABLE  0x8000
+
+#define L2EVTYPER_REG_SHIFT 3
+
+#define L2PMRESR_GROUP_BITS 8
+#define L2PMRESR_GROUP_MASK GENMASK(7, 0)
+
+#define L2CYCLE_CTR_BIT 31
+#define L2CYCLE_CTR_RAW_CODE0xFE
+
+#define L2PMCR_RESET_ALL0x6
+#define L2PMCR_COUNTERS_ENABLE  0x1
+#define L2PMCR_COUNTERS_DISABLE 0x0
+
+#define L2PMRESR_EN ((u64)1 << 63)
+
+#define L2_EVT_MASK 0x0FFF
+#define L2_EVT_CODE_MASK0x0FF0
+#define L2_EVT_GRP_MASK 0x000F
+#define L2_EVT_CODE_SHIFT   4
+#def

Re: [PATCH v4 2/2] soc: qcom: add l2 cache perf events driver

2016-09-16 Thread Neil Leeder

On 9/16/2016 12:40 PM, Mark Rutland wrote:
> On Fri, Sep 16, 2016 at 11:33:39AM -0400, Neil Leeder wrote:
[...]
>> On 9/1/2016 12:30 PM, Mark Rutland wrote:
>>> On Tue, Aug 30, 2016 at 01:01:33PM -0400, Neil Leeder wrote:
>>>> +  /* Don't allow groups with mixed PMUs, except for s/w events */
>>>> +  if (event->group_leader->pmu != event->pmu &&
>>>> +  !is_software_event(event->group_leader)) {
>>>> +  dev_warn(&l2cache_pmu->pdev->dev,
>>>> +   "Can't create mixed PMU group\n");
>>>> +  return -EINVAL;
>>>> +  }
>>>> +
>>>> +  list_for_each_entry(sibling, &event->group_leader->sibling_list,
>>>> +  group_entry)
>>>> +  if (sibling->pmu != event->pmu &&
>>>> +  !is_software_event(sibling)) {
>>>> +  dev_warn(&l2cache_pmu->pdev->dev,
>>>> +   "Can't create mixed PMU group\n");
>>>> +  return -EINVAL;
>>>> +  }
>>>> +
>>>> +  hwc->idx = -1;
>>>> +  hwc->config_base = event->attr.config;
>>>> +
>>>> +  /*
>>>> +   * Ensure all events are on the same cpu so all events are in the
>>>> +   * same cpu context, to avoid races on pmu_enable etc.
>>>> +   */
>>>> +  slice = get_hml2_pmu(event->cpu);
>>>> +  event->cpu = slice->on_cpu;
>>>
>>> This could put an event on a different CPU to its group siblings, which
>>> is broken.
>>
>> This is the same logic as in arm-ccn.c:arm_ccn_pmu_event_init(), where there
>> is a single CPU designated as the CPU to be used for all events.
>>
>> All events for this slice are forced to slice->on_cpu which is the CPU
>> set in the cpumask for this slice.
> 
> The CCN is a little different. For the CCN, a single CPU is designated
> to handle *all* events.
> 
> For this driver, a CPU is designated per-slice, judging by the existence
> of hml2_pmu::on_cpu (unless that's superfluous). We've only verified
> that the events are all for this PMU, not the same slice, and thus each
> event->cpu may differ.
> 

I see. So I can add a check that the group_leader event must be on the
same slice, and thus on the same CPU.

>> I'm not sure how this can put an event on a different CPU to its group
>> siblings?
> 
> In practice today, we'll try to schedule the event on it's group
> leader's CPU, but accounting and subsequent manipulation could go wrong.
> 
> Thanks,
> Mark.
> 

Neil
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies 
Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.


Re: [PATCH v4 2/2] soc: qcom: add l2 cache perf events driver

2016-09-16 Thread Neil Leeder
Hi Mark,
Thank you for the thorough review. I will post an updated patchset which 
addresses
all of your comments. There is just one outstanding comment which I have a 
question about:

On 9/1/2016 12:30 PM, Mark Rutland wrote:
> On Tue, Aug 30, 2016 at 01:01:33PM -0400, Neil Leeder wrote:

>> +static int l2_cache__event_init(struct perf_event *event)
>> +{
>> +struct hw_perf_event *hwc = &event->hw;
>> +struct hml2_pmu *slice;
>> +struct perf_event *sibling;
>> +struct l2cache_pmu *l2cache_pmu = to_l2cache_pmu(event->pmu);
>> +
>> +if (event->attr.type != l2cache_pmu->pmu.type)
>> +return -ENOENT;
>> +
>> +if (hwc->sample_period) {
>> +dev_warn(&l2cache_pmu->pdev->dev, "Sampling not supported\n");
>> +return -EOPNOTSUPP;
>> +}
>> +
>> +if (event->cpu < 0) {
>> +dev_warn(&l2cache_pmu->pdev->dev, "Per-task mode not 
>> supported\n");
>> +return -EOPNOTSUPP;
>> +}
>> +
>> +/* We cannot filter accurately so we just don't allow it. */
>> +if (event->attr.exclude_user || event->attr.exclude_kernel ||
>> +event->attr.exclude_hv || event->attr.exclude_idle) {
>> +dev_warn(&l2cache_pmu->pdev->dev, "Can't exclude execution 
>> levels\n");
>> +return -EOPNOTSUPP;
>> +}
>> +
>> +if (((L2_EVT_GROUP(event->attr.config) > L2_EVT_GROUP_MAX) ||
>> +(L2_EVT_PREFIX(event->attr.config) != 0) ||
>> +(L2_EVT_REG(event->attr.config) != 0)) &&
>> +(event->attr.config != L2CYCLE_CTR_RAW_CODE)) {
>> +dev_warn(&l2cache_pmu->pdev->dev, "Invalid config %llx\n",
>> + event->attr.config);
>> +return -EINVAL;
>> +}
>> +
>> +/* Don't allow groups with mixed PMUs, except for s/w events */
>> +if (event->group_leader->pmu != event->pmu &&
>> +!is_software_event(event->group_leader)) {
>> +dev_warn(&l2cache_pmu->pdev->dev,
>> + "Can't create mixed PMU group\n");
>> +return -EINVAL;
>> +}
>> +
>> +list_for_each_entry(sibling, &event->group_leader->sibling_list,
>> +group_entry)
>> +if (sibling->pmu != event->pmu &&
>> +!is_software_event(sibling)) {
>> +dev_warn(&l2cache_pmu->pdev->dev,
>> + "Can't create mixed PMU group\n");
>> +return -EINVAL;
>> +}
>> +
>> +hwc->idx = -1;
>> +hwc->config_base = event->attr.config;
>> +
>> +/*
>> + * Ensure all events are on the same cpu so all events are in the
>> + * same cpu context, to avoid races on pmu_enable etc.
>> + */
>> +slice = get_hml2_pmu(event->cpu);
>> +event->cpu = slice->on_cpu;
> 
> This could put an event on a different CPU to its group siblings, which
> is broken.

This is the same logic as in arm-ccn.c:arm_ccn_pmu_event_init(), where there
is a single CPU designated as the CPU to be used for all events. All
events for this slice are forced to slice->on_cpu which is the CPU set in the
cpumask for this slice.

I'm not sure how this can put an event on a different CPU to its group siblings?

Thanks,
Neil
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies 
Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.


[PATCH v4 2/2] soc: qcom: add l2 cache perf events driver

2016-08-30 Thread Neil Leeder
Adds perf events support for L2 cache PMU.

The L2 cache PMU driver is named 'l2cache_0' and can be used
with perf events to profile L2 events such as cache hits
and misses.

Signed-off-by: Neil Leeder 
---
 drivers/soc/qcom/Kconfig   |  10 +
 drivers/soc/qcom/Makefile  |   1 +
 drivers/soc/qcom/perf_event_l2.c   | 855 +
 include/linux/cpuhotplug.h |   1 +
 include/linux/soc/qcom/perf_event_l2.h |  79 +++
 5 files changed, 946 insertions(+)
 create mode 100644 drivers/soc/qcom/perf_event_l2.c
 create mode 100644 include/linux/soc/qcom/perf_event_l2.h

diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index ddd6b71..7e71d8d 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -16,6 +16,16 @@ config QCOM_L2_ACCESSORS
  Provides support for accessing registers in the L2 cache
  for Qualcomm Technologies ARM64 chips.
 
+config QCOM_PERF_EVENTS_L2
+   bool "Qualcomm Technologies L2-cache perf events"
+   depends on ARCH_QCOM && ARM64 && HW_PERF_EVENTS && ACPI
+   select QCOM_L2_ACCESSORS
+ help
+ Provides support for the L2 cache performance monitor unit (PMU)
+ in Qualcomm Technologies processors.
+ Adds the L2 cache PMU into the perf events subsystem for
+ monitoring L2 cache events.
+
 config QCOM_PM
bool "Qualcomm Power Management"
depends on ARCH_QCOM && !ARM64
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index 6ef29b9..c8e89ca9 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -1,5 +1,6 @@
 obj-$(CONFIG_QCOM_GSBI)+=  qcom_gsbi.o
 obj-$(CONFIG_QCOM_L2_ACCESSORS) += l2-accessors.o
+obj-$(CONFIG_QCOM_PERF_EVENTS_L2)  += perf_event_l2.o
 obj-$(CONFIG_QCOM_PM)  +=  spm.o
 obj-$(CONFIG_QCOM_SMD) +=  smd.o
 obj-$(CONFIG_QCOM_SMD_RPM) += smd-rpm.o
diff --git a/drivers/soc/qcom/perf_event_l2.c b/drivers/soc/qcom/perf_event_l2.c
new file mode 100644
index 000..5c13e87
--- /dev/null
+++ b/drivers/soc/qcom/perf_event_l2.c
@@ -0,0 +1,855 @@
+/* Copyright (c) 2015,2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#define pr_fmt(fmt) "l2 perfevents: " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/*
+ * Aggregate PMU. Implements the core pmu functions and manages
+ * the hardware PMUs.
+ */
+struct l2cache_pmu {
+   struct list_head entry;
+   u32 num_pmus;
+   struct pmu pmu;
+   int num_counters;
+   cpumask_t cpumask;
+   struct platform_device *pdev;
+};
+
+/*
+ * The cache is made-up of one or more slices, each slice has its own PMU.
+ * This structure represents one of the hardware PMUs.
+ */
+struct hml2_pmu {
+   struct perf_event *events[MAX_L2_CTRS];
+   struct l2cache_pmu *l2cache_pmu;
+   unsigned long used_mask[BITS_TO_LONGS(MAX_L2_CTRS)];
+   unsigned long group_used_mask[BITS_TO_LONGS(L2_EVT_GROUP_MAX + 1)];
+   int group_to_counter[L2_EVT_GROUP_MAX + 1];
+   int irq;
+   /* The CPU that is used for collecting events on this slice */
+   int on_cpu;
+   /* All the CPUs associated with this slice */
+   cpumask_t slice_cpus;
+   atomic64_t prev_count[MAX_L2_CTRS];
+   spinlock_t pmu_lock;
+};
+
+#define to_l2cache_pmu(p) (container_of(p, struct l2cache_pmu, pmu))
+
+static DEFINE_MUTEX(l2cache_pmu_mutex);
+static LIST_HEAD(l2cache_pmu_list);
+static DEFINE_PER_CPU(struct hml2_pmu *, cpu_to_pmu);
+static u32 l2_cycle_ctr_idx;
+static u32 l2_reset_mask;
+
+static inline u32 idx_to_reg_bit(u32 idx)
+{
+   u32 bit;
+
+   if (idx == l2_cycle_ctr_idx)
+   bit = BIT(L2CYCLE_CTR_BIT);
+   else
+   bit = BIT(idx);
+   return bit;
+}
+
+static inline struct hml2_pmu *get_hml2_pmu(int cpu)
+{
+   return per_cpu(cpu_to_pmu, cpu);
+}
+
+static void hml2_pmu__reset_on_slice(void *x)
+{
+   /* Reset all ctrs */
+   set_l2_indirect_reg(L2PMCR, L2PMCR_RESET_ALL);
+   set_l2_indirect_reg(L2PMCNTENCLR, l2_reset_mask);
+   set_l2_indirect_reg(L2PMINTENCLR, l2_reset_mask);
+   set_l2_indirect_reg(L2PMOVSCLR, l2_reset_mask);
+}
+
+static inline void hml2_pmu__reset(struct hml2_pmu *slice)
+{
+   int cpu;
+
+   if (cpumask_test_cpu(smp_processor_id(), &slice->slice_cpus)) {
+   hml2_pmu__reset_on_slice(NULL);
+   

[PATCH v4 1/2] soc: qcom: provide mechanism for drivers to access L2 registers

2016-08-30 Thread Neil Leeder
L2 registers are accessed using a select register and data
register pair. To prevent multiple concurrent writes to the
select register by independent drivers, the write to the
select register and the associated access of the data register
are protected with a lock. All drivers accessing the L2
registers use the set and get functions provided by
l2-accessors to ensure correct reads and writes to L2 registers.

Signed-off-by: Neil Leeder 
---
 drivers/soc/qcom/Kconfig  |  6 
 drivers/soc/qcom/Makefile |  1 +
 drivers/soc/qcom/l2-accessors.c   | 63 +++
 include/linux/soc/qcom/l2-accessors.h | 20 +++
 4 files changed, 90 insertions(+)
 create mode 100644 drivers/soc/qcom/l2-accessors.c
 create mode 100644 include/linux/soc/qcom/l2-accessors.h

diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 461b387..ddd6b71 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -10,6 +10,12 @@ config QCOM_GSBI
   functions for connecting the underlying serial UART, SPI, and I2C
   devices to the output pins.
 
+config QCOM_L2_ACCESSORS
+   bool
+   help
+ Provides support for accessing registers in the L2 cache
+ for Qualcomm Technologies ARM64 chips.
+
 config QCOM_PM
bool "Qualcomm Power Management"
depends on ARCH_QCOM && !ARM64
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index fdd664e..6ef29b9 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_QCOM_GSBI)+=  qcom_gsbi.o
+obj-$(CONFIG_QCOM_L2_ACCESSORS) += l2-accessors.o
 obj-$(CONFIG_QCOM_PM)  +=  spm.o
 obj-$(CONFIG_QCOM_SMD) +=  smd.o
 obj-$(CONFIG_QCOM_SMD_RPM) += smd-rpm.o
diff --git a/drivers/soc/qcom/l2-accessors.c b/drivers/soc/qcom/l2-accessors.c
new file mode 100644
index 000..2625d33
--- /dev/null
+++ b/drivers/soc/qcom/l2-accessors.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2014-2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#defineL2CPUSRSELR_EL1 S3_3_c15_c0_6
+#defineL2CPUSRDR_EL1   S3_3_c15_c0_7
+
+static DEFINE_RAW_SPINLOCK(l2_access_lock);
+
+/**
+ * set_l2_indirect_reg: write value to an L2 register
+ * @reg: Address of L2 register.
+ * @value: Value to be written to register.
+ *
+ * Use architecturally required barriers for ordering between system register
+ * accesses
+ */
+void set_l2_indirect_reg(u64 reg, u64 val)
+{
+   unsigned long flags;
+
+   raw_spin_lock_irqsave(&l2_access_lock, flags);
+   write_sysreg(reg, L2CPUSRSELR_EL1);
+   isb();
+   write_sysreg(val, L2CPUSRDR_EL1);
+   isb();
+   raw_spin_unlock_irqrestore(&l2_access_lock, flags);
+}
+
+/**
+ * get_l2_indirect_reg: read an L2 register value
+ * @reg: Address of L2 register.
+ *
+ * Use architecturally required barriers for ordering between system register
+ * accesses
+ */
+u64 get_l2_indirect_reg(u64 reg)
+{
+   u64 val;
+   unsigned long flags;
+
+   raw_spin_lock_irqsave(&l2_access_lock, flags);
+   write_sysreg(reg, L2CPUSRSELR_EL1);
+   isb();
+   val = read_sysreg(L2CPUSRDR_EL1);
+   raw_spin_unlock_irqrestore(&l2_access_lock, flags);
+
+   return val;
+}
diff --git a/include/linux/soc/qcom/l2-accessors.h 
b/include/linux/soc/qcom/l2-accessors.h
new file mode 100644
index 000..e51b72a
--- /dev/null
+++ b/include/linux/soc/qcom/l2-accessors.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2011-2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __QCOM_L2_ACCESSORS_H
+#define __QCOM_L2_ACCESSORS_H
+
+void set_l2_indirect_reg(u64 reg_addr, u64 val);
+u64 get_l2_indirect_reg(u64 reg_addr);
+
+#endif
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies 
Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.



[PATCH v4 0/2] qcom: add l2 cache perf events driver

2016-08-30 Thread Neil Leeder
This adds a new dynamic PMU to the perf events framework to program
and control the L2 cache PMUs in Qualcomm Centriq SOCs.

The driver exports formatting and event information to sysfs so it
can be used by the perf user space tools with the syntax:
perf stat -a -e l2cache_0/event=0x42/

Qualcomm Technologies PMUs have events arranged in a matrix of rows and columns.
Only one event can be enabled from each column at once. This is enforced
by the filter_match callback.

v4:
Replace notifier with hotplug statemachine
Allocate PMU struct dynamically

v3:
Remove exports from l2-accessors
Change l2-accessors Kconfig to make it not user-selectable
Reorder and remove unnecessary includes

v2:

Add the l2-accessors patch to this patchset, previously posted separately.
Remove sampling and per-task functionality for this uncore PMU.
Use cpumask to replace code which filtered events to one cpu per slice.
Replace manual event filtering with filter_match callback.
Use a separate used_mask for event groups.
Add hotplug notifier for CPU and irq migration.
Remove extraneous synchronisation instructions.
Other miscellaneous cleanup.

Neil Leeder (2):
  soc: qcom: provide mechanism for drivers to access L2 registers
  soc: qcom: add l2 cache perf events driver

 drivers/soc/qcom/Kconfig   |  16 +
 drivers/soc/qcom/Makefile  |   2 +
 drivers/soc/qcom/l2-accessors.c|  63 +++
 drivers/soc/qcom/perf_event_l2.c   | 855 +
 include/linux/cpuhotplug.h |   1 +
 include/linux/soc/qcom/l2-accessors.h  |  20 +
 include/linux/soc/qcom/perf_event_l2.h |  79 +++
 7 files changed, 1036 insertions(+)
 create mode 100644 drivers/soc/qcom/l2-accessors.c
 create mode 100644 drivers/soc/qcom/perf_event_l2.c
 create mode 100644 include/linux/soc/qcom/l2-accessors.h
 create mode 100644 include/linux/soc/qcom/perf_event_l2.h

-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies 
Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.



[PATCH v3 2/2] soc: qcom: add l2 cache perf events driver

2016-08-16 Thread Neil Leeder
Adds perf events support for L2 cache PMU.

The L2 cache PMU driver is named 'l2cache_0' and can be used
with perf events to profile L2 events such as cache hits
and misses.

Signed-off-by: Neil Leeder 
---
 drivers/soc/qcom/Kconfig   |  10 +
 drivers/soc/qcom/Makefile  |   1 +
 drivers/soc/qcom/perf_event_l2.c   | 828 +
 include/linux/soc/qcom/perf_event_l2.h |  81 
 4 files changed, 920 insertions(+)
 create mode 100644 drivers/soc/qcom/perf_event_l2.c
 create mode 100644 include/linux/soc/qcom/perf_event_l2.h

diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index ddd6b71..7e71d8d 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -16,6 +16,16 @@ config QCOM_L2_ACCESSORS
  Provides support for accessing registers in the L2 cache
  for Qualcomm Technologies ARM64 chips.
 
+config QCOM_PERF_EVENTS_L2
+   bool "Qualcomm Technologies L2-cache perf events"
+   depends on ARCH_QCOM && ARM64 && HW_PERF_EVENTS && ACPI
+   select QCOM_L2_ACCESSORS
+ help
+ Provides support for the L2 cache performance monitor unit (PMU)
+ in Qualcomm Technologies processors.
+ Adds the L2 cache PMU into the perf events subsystem for
+ monitoring L2 cache events.
+
 config QCOM_PM
bool "Qualcomm Power Management"
depends on ARCH_QCOM && !ARM64
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index 6ef29b9..c8e89ca9 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -1,5 +1,6 @@
 obj-$(CONFIG_QCOM_GSBI)+=  qcom_gsbi.o
 obj-$(CONFIG_QCOM_L2_ACCESSORS) += l2-accessors.o
+obj-$(CONFIG_QCOM_PERF_EVENTS_L2)  += perf_event_l2.o
 obj-$(CONFIG_QCOM_PM)  +=  spm.o
 obj-$(CONFIG_QCOM_SMD) +=  smd.o
 obj-$(CONFIG_QCOM_SMD_RPM) += smd-rpm.o
diff --git a/drivers/soc/qcom/perf_event_l2.c b/drivers/soc/qcom/perf_event_l2.c
new file mode 100644
index 000..dfd5c3a
--- /dev/null
+++ b/drivers/soc/qcom/perf_event_l2.c
@@ -0,0 +1,828 @@
+/* Copyright (c) 2015,2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#define pr_fmt(fmt) "l2 perfevents: " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/*
+ * The cache is made-up of one or more slices, each slice has its own PMU.
+ * This structure represents one of the hardware PMUs.
+ */
+
+struct hml2_pmu {
+   struct perf_event *events[MAX_L2_CTRS];
+   unsigned long used_mask[BITS_TO_LONGS(MAX_L2_CTRS)];
+   unsigned long group_used_mask[BITS_TO_LONGS(L2_EVT_GROUP_MAX + 1)];
+   int group_to_counter[L2_EVT_GROUP_MAX + 1];
+   int irq;
+   /* The CPU that is used for collecting events on this slice */
+   int on_cpu;
+   /* All the CPUs associated with this slice */
+   cpumask_t slice_cpus;
+   atomic64_t prev_count[MAX_L2_CTRS];
+   spinlock_t pmu_lock;
+};
+
+/*
+ * Aggregate PMU. Implements the core pmu functions and manages
+ * the hardware PMUs.
+ */
+struct l2cache_pmu {
+   u32 num_pmus;
+   struct pmu pmu;
+   int num_counters;
+   cpumask_t cpumask;
+   struct notifier_block cpu_nb;
+   struct platform_device *pdev;
+};
+
+#define to_l2cache_pmu(p) (container_of(p, struct l2cache_pmu, pmu))
+
+static DEFINE_PER_CPU(struct hml2_pmu *, cpu_to_pmu);
+static struct l2cache_pmu l2cache_pmu = { 0 };
+static u32 l2_cycle_ctr_idx;
+static u32 l2_reset_mask;
+
+static inline u32 idx_to_reg_bit(u32 idx)
+{
+   u32 bit;
+
+   if (idx == l2_cycle_ctr_idx)
+   bit = BIT(L2CYCLE_CTR_BIT);
+   else
+   bit = BIT(idx);
+   return bit;
+}
+
+static inline struct hml2_pmu *get_hml2_pmu(int cpu)
+{
+   return per_cpu(cpu_to_pmu, cpu);
+}
+
+static void hml2_pmu__reset_on_slice(void *x)
+{
+   /* Reset all ctrs */
+   set_l2_indirect_reg(L2PMCR, L2PMCR_RESET_ALL);
+   set_l2_indirect_reg(L2PMCNTENCLR, l2_reset_mask);
+   set_l2_indirect_reg(L2PMINTENCLR, l2_reset_mask);
+   set_l2_indirect_reg(L2PMOVSCLR, l2_reset_mask);
+}
+
+static inline void hml2_pmu__reset(struct hml2_pmu *slice)
+{
+   int cpu;
+
+   if (cpumask_test_cpu(smp_processor_id(), &slice->slice_cpus)) {
+   hml2_pmu__reset_on_slice(NULL);
+   return;
+   }
+
+   /* Call each cpu in the cluster until one works */
+   for_each_cpu(cpu, &slice->sli

[PATCH v3 0/2] qcom: add l2 cache perf events driver

2016-08-16 Thread Neil Leeder
This adds a new dynamic PMU to the perf events framework to program
and control the L2 cache PMUs in some Qualcomm Technologies SOCs.

The driver exports formatting and event information to sysfs so it
can be used by the perf user space tools with the syntax:
perf stat -a -e l2cache_0/event=0x42/

Qualcomm Technologies PMUs have events arranged in a matrix of rows and columns.
Only one event can be enabled from each column at once. This is enforced
by the filter_match callback.

v3:
Remove exports from l2-accessors
Change l2-accessors Kconfig to make it not user-selectable
Reorder and remove unnecessary includes

v2:

Add the l2-accessors patch to this patchset, previously posted separately.
Remove sampling and per-task functionality for this uncore PMU.
Use cpumask to replace code which filtered events to one cpu per slice.
Replace manual event filtering with filter_match callback.
Use a separate used_mask for event groups.
Add hotplug notifier for CPU and irq migration.
Remove extraneous synchronisation instructions.
Other miscellaneous cleanup.

Neil Leeder (2):
  soc: qcom: provide mechanism for drivers to access L2 registers
  soc: qcom: add l2 cache perf events driver

 drivers/soc/qcom/Kconfig   |  16 +
 drivers/soc/qcom/Makefile  |   2 +
 drivers/soc/qcom/l2-accessors.c|  63 +++
 drivers/soc/qcom/perf_event_l2.c   | 828 +
 include/linux/soc/qcom/l2-accessors.h  |  20 +
 include/linux/soc/qcom/perf_event_l2.h |  81 
 6 files changed, 1010 insertions(+)
 create mode 100644 drivers/soc/qcom/l2-accessors.c
 create mode 100644 drivers/soc/qcom/perf_event_l2.c
 create mode 100644 include/linux/soc/qcom/l2-accessors.h
 create mode 100644 include/linux/soc/qcom/perf_event_l2.h

-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies 
Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.



[PATCH v3 1/2] soc: qcom: provide mechanism for drivers to access L2 registers

2016-08-16 Thread Neil Leeder
L2 registers are accessed using a select register and data
register pair. To prevent multiple concurrent writes to the
select register by independent drivers, the write to the
select register and the associated access of the data register
are protected with a lock. All drivers accessing the L2
registers use the set and get functions provided by
l2-accessors to ensure correct reads and writes to L2 registers.

Signed-off-by: Neil Leeder 
---
 drivers/soc/qcom/Kconfig  |  6 
 drivers/soc/qcom/Makefile |  1 +
 drivers/soc/qcom/l2-accessors.c   | 63 +++
 include/linux/soc/qcom/l2-accessors.h | 20 +++
 4 files changed, 90 insertions(+)
 create mode 100644 drivers/soc/qcom/l2-accessors.c
 create mode 100644 include/linux/soc/qcom/l2-accessors.h

diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 461b387..ddd6b71 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -10,6 +10,12 @@ config QCOM_GSBI
   functions for connecting the underlying serial UART, SPI, and I2C
   devices to the output pins.
 
+config QCOM_L2_ACCESSORS
+   bool
+   help
+ Provides support for accessing registers in the L2 cache
+ for Qualcomm Technologies ARM64 chips.
+
 config QCOM_PM
bool "Qualcomm Power Management"
depends on ARCH_QCOM && !ARM64
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index fdd664e..6ef29b9 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_QCOM_GSBI)+=  qcom_gsbi.o
+obj-$(CONFIG_QCOM_L2_ACCESSORS) += l2-accessors.o
 obj-$(CONFIG_QCOM_PM)  +=  spm.o
 obj-$(CONFIG_QCOM_SMD) +=  smd.o
 obj-$(CONFIG_QCOM_SMD_RPM) += smd-rpm.o
diff --git a/drivers/soc/qcom/l2-accessors.c b/drivers/soc/qcom/l2-accessors.c
new file mode 100644
index 000..2625d33
--- /dev/null
+++ b/drivers/soc/qcom/l2-accessors.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2014-2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#defineL2CPUSRSELR_EL1 S3_3_c15_c0_6
+#defineL2CPUSRDR_EL1   S3_3_c15_c0_7
+
+static DEFINE_RAW_SPINLOCK(l2_access_lock);
+
+/**
+ * set_l2_indirect_reg: write value to an L2 register
+ * @reg: Address of L2 register.
+ * @value: Value to be written to register.
+ *
+ * Use architecturally required barriers for ordering between system register
+ * accesses
+ */
+void set_l2_indirect_reg(u64 reg, u64 val)
+{
+   unsigned long flags;
+
+   raw_spin_lock_irqsave(&l2_access_lock, flags);
+   write_sysreg(reg, L2CPUSRSELR_EL1);
+   isb();
+   write_sysreg(val, L2CPUSRDR_EL1);
+   isb();
+   raw_spin_unlock_irqrestore(&l2_access_lock, flags);
+}
+
+/**
+ * get_l2_indirect_reg: read an L2 register value
+ * @reg: Address of L2 register.
+ *
+ * Use architecturally required barriers for ordering between system register
+ * accesses
+ */
+u64 get_l2_indirect_reg(u64 reg)
+{
+   u64 val;
+   unsigned long flags;
+
+   raw_spin_lock_irqsave(&l2_access_lock, flags);
+   write_sysreg(reg, L2CPUSRSELR_EL1);
+   isb();
+   val = read_sysreg(L2CPUSRDR_EL1);
+   raw_spin_unlock_irqrestore(&l2_access_lock, flags);
+
+   return val;
+}
diff --git a/include/linux/soc/qcom/l2-accessors.h 
b/include/linux/soc/qcom/l2-accessors.h
new file mode 100644
index 000..e51b72a
--- /dev/null
+++ b/include/linux/soc/qcom/l2-accessors.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2011-2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __QCOM_L2_ACCESSORS_H
+#define __QCOM_L2_ACCESSORS_H
+
+void set_l2_indirect_reg(u64 reg_addr, u64 val);
+u64 get_l2_indirect_reg(u64 reg_addr);
+
+#endif
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies 
Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.



Re: [PATCH v2 2/2] soc: qcom: add l2 cache perf events driver

2016-08-08 Thread Neil Leeder


On 8/5/2016 07:15 PM, Paul Gortmaker wrote:
> On Thu, Aug 4, 2016 at 5:11 PM, Neil Leeder  wrote:
>> Adds perf events support for L2 cache PMU.
>>
>> The L2 cache PMU driver is named 'l2cache_0' and can be used
>> with perf events to profile L2 events such as cache hits
>> and misses.
>>
>> Signed-off-by: Neil Leeder 
>> ---
>>  drivers/soc/qcom/Kconfig   |  10 +
>>  drivers/soc/qcom/Makefile  |   1 +
>>  drivers/soc/qcom/perf_event_l2.c   | 839 
>> +
>>  include/linux/soc/qcom/perf_event_l2.h |  81 
>>  4 files changed, 931 insertions(+)
>>  create mode 100644 drivers/soc/qcom/perf_event_l2.c
>>  create mode 100644 include/linux/soc/qcom/perf_event_l2.h
>>
>> diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
>> index 21ec616..0b5ddb9 100644
>> --- a/drivers/soc/qcom/Kconfig
>> +++ b/drivers/soc/qcom/Kconfig
>> @@ -19,6 +19,16 @@ config QCOM_L2_ACCESSORS
>>   Provides support for accessing registers in the L2 cache
>>   for Qualcomm Technologies chips.
>>
>> +config QCOM_PERF_EVENTS_L2
>> +   bool "Qualcomm Technologies L2-cache perf events"
> 
> Since this is a bool, you shouldn't need module.h in your driver or
> any MODULE_ tags (if there are any).
> 
> Thanks,
> Paul.
> --

You're right - I'll clean up the include list and sort it alphabetically
while I'm at it.

Thanks,
Neil

-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies 
Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.


Re: [PATCH v2 1/2] soc: qcom: provide mechanism for drivers to access L2 registers

2016-08-05 Thread Neil Leeder

On 8/5/2016 06:00 AM, Mark Rutland wrote:
> On Thu, Aug 04, 2016 at 05:11:10PM -0400, Neil Leeder wrote:
>> L2 registers are accessed using a select register and data
>> register pair. To prevent multiple concurrent writes to the
>> select register by independent drivers, the write to the
>> select register and the associated access of the data register
>> are protected with a lock. All drivers accessing the L2
>> registers use the set and get functions provided by
>> l2-accessors to ensure correct reads and writes to L2 registers.
> 
> As of this series, this is only used by the PMU driver. Which other
> drivers do you plan to use this for?
> 
> If there's nothing else planned at the moment, it would be nicer to fold
> these into the PMU driver.
> 

I see a couple of other drivers on codeaurora.org using it: the
Error Reporting (ERP) driver and an adaptive clock generator. 
I'd guess they'll be submitted to LKML but they're not mine so I don't
know when.

As the purpose of this is to be the common interface for multiple drivers
to stop them walking over each other, I think it makes sense to keep
it separate.

> [...]
> 
>> +config QCOM_L2_ACCESSORS
>> +bool "Qualcomm Technologies L2-cache accessors"
>> +depends on ARCH_QCOM && ARM64
>> +help
>> +  Say y here to enable support for the Qualcomm Technologies
>> +  L2 accessors.
>> +  Provides support for accessing registers in the L2 cache
>> +  for Qualcomm Technologies chips.
> 
> Which chips have this?

Qualcomm Technologies ARM64 chips, so currently QDF24xx family and 
anything Kryo based. I'd assume any future chip families as well.
Given the 'depends on' line, I wasn't sure there was any benefit
to essentially duplicating that in the help text.

> 
> Have drivers select this as necessary. There's no reason for this to be
> used-selectable given this is trivial common infrastructure.

OK, I'll fix that

> 
> [...]
> 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
> 
> Nit: please sort these alphabetically.

OK

> 
> [...]
> 
>> +EXPORT_SYMBOL(set_l2_indirect_reg);
> 
> The PMU driver isn't a module, so this doesn't need to be exported.
> Until there's a modular user, please get rid of EXPORT_SYMBOL.
> 
>> +EXPORT_SYMBOL(get_l2_indirect_reg);
> 
> Likewise.

OK to both of these.

> 
> Thanks,
> Mark.
> 

Thank you for the comments.
Neil

-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies 
Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.



[PATCH v2 1/2] soc: qcom: provide mechanism for drivers to access L2 registers

2016-08-04 Thread Neil Leeder
L2 registers are accessed using a select register and data
register pair. To prevent multiple concurrent writes to the
select register by independent drivers, the write to the
select register and the associated access of the data register
are protected with a lock. All drivers accessing the L2
registers use the set and get functions provided by
l2-accessors to ensure correct reads and writes to L2 registers.

Signed-off-by: Neil Leeder 
---
 drivers/soc/qcom/Kconfig  |  9 +
 drivers/soc/qcom/Makefile |  1 +
 drivers/soc/qcom/l2-accessors.c   | 66 +++
 include/linux/soc/qcom/l2-accessors.h | 20 +++
 4 files changed, 96 insertions(+)
 create mode 100644 drivers/soc/qcom/l2-accessors.c
 create mode 100644 include/linux/soc/qcom/l2-accessors.h

diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 461b387..21ec616 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -10,6 +10,15 @@ config QCOM_GSBI
   functions for connecting the underlying serial UART, SPI, and I2C
   devices to the output pins.
 
+config QCOM_L2_ACCESSORS
+   bool "Qualcomm Technologies L2-cache accessors"
+   depends on ARCH_QCOM && ARM64
+   help
+ Say y here to enable support for the Qualcomm Technologies
+ L2 accessors.
+ Provides support for accessing registers in the L2 cache
+ for Qualcomm Technologies chips.
+
 config QCOM_PM
bool "Qualcomm Power Management"
depends on ARCH_QCOM && !ARM64
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index fdd664e..6ef29b9 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_QCOM_GSBI)+=  qcom_gsbi.o
+obj-$(CONFIG_QCOM_L2_ACCESSORS) += l2-accessors.o
 obj-$(CONFIG_QCOM_PM)  +=  spm.o
 obj-$(CONFIG_QCOM_SMD) +=  smd.o
 obj-$(CONFIG_QCOM_SMD_RPM) += smd-rpm.o
diff --git a/drivers/soc/qcom/l2-accessors.c b/drivers/soc/qcom/l2-accessors.c
new file mode 100644
index 000..a3189ab
--- /dev/null
+++ b/drivers/soc/qcom/l2-accessors.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2014-2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#defineL2CPUSRSELR_EL1 S3_3_c15_c0_6
+#defineL2CPUSRDR_EL1   S3_3_c15_c0_7
+
+static DEFINE_RAW_SPINLOCK(l2_access_lock);
+
+/**
+ * set_l2_indirect_reg: write value to an L2 register
+ * @reg: Address of L2 register.
+ * @value: Value to be written to register.
+ *
+ * Use architecturally required barriers for ordering between system register
+ * accesses
+ */
+void set_l2_indirect_reg(u64 reg, u64 val)
+{
+   unsigned long flags;
+
+   raw_spin_lock_irqsave(&l2_access_lock, flags);
+   write_sysreg(reg, L2CPUSRSELR_EL1);
+   isb();
+   write_sysreg(val, L2CPUSRDR_EL1);
+   isb();
+   raw_spin_unlock_irqrestore(&l2_access_lock, flags);
+}
+EXPORT_SYMBOL(set_l2_indirect_reg);
+
+/**
+ * get_l2_indirect_reg: read an L2 register value
+ * @reg: Address of L2 register.
+ *
+ * Use architecturally required barriers for ordering between system register
+ * accesses
+ */
+u64 get_l2_indirect_reg(u64 reg)
+{
+   u64 val;
+   unsigned long flags;
+
+   raw_spin_lock_irqsave(&l2_access_lock, flags);
+   write_sysreg(reg, L2CPUSRSELR_EL1);
+   isb();
+   val = read_sysreg(L2CPUSRDR_EL1);
+   raw_spin_unlock_irqrestore(&l2_access_lock, flags);
+
+   return val;
+}
+EXPORT_SYMBOL(get_l2_indirect_reg);
diff --git a/include/linux/soc/qcom/l2-accessors.h 
b/include/linux/soc/qcom/l2-accessors.h
new file mode 100644
index 000..e51b72a
--- /dev/null
+++ b/include/linux/soc/qcom/l2-accessors.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2011-2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __QCOM_L2_ACCESSORS_H
+#define __QCOM_L2_ACCESSORS_H
+
+void set_l2_indirect_reg(u64 reg_addr, u64 val);
+u64 get_l

[PATCH v2 2/2] soc: qcom: add l2 cache perf events driver

2016-08-04 Thread Neil Leeder
Adds perf events support for L2 cache PMU.

The L2 cache PMU driver is named 'l2cache_0' and can be used
with perf events to profile L2 events such as cache hits
and misses.

Signed-off-by: Neil Leeder 
---
 drivers/soc/qcom/Kconfig   |  10 +
 drivers/soc/qcom/Makefile  |   1 +
 drivers/soc/qcom/perf_event_l2.c   | 839 +
 include/linux/soc/qcom/perf_event_l2.h |  81 
 4 files changed, 931 insertions(+)
 create mode 100644 drivers/soc/qcom/perf_event_l2.c
 create mode 100644 include/linux/soc/qcom/perf_event_l2.h

diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 21ec616..0b5ddb9 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -19,6 +19,16 @@ config QCOM_L2_ACCESSORS
  Provides support for accessing registers in the L2 cache
  for Qualcomm Technologies chips.
 
+config QCOM_PERF_EVENTS_L2
+   bool "Qualcomm Technologies L2-cache perf events"
+   depends on ARCH_QCOM && HW_PERF_EVENTS && ACPI
+   select QCOM_L2_ACCESSORS
+ help
+ Provides support for the L2 cache performance monitor unit (PMU)
+ in Qualcomm Technologies processors.
+ Adds the L2 cache PMU into the perf events subsystem for
+ monitoring L2 cache events.
+
 config QCOM_PM
bool "Qualcomm Power Management"
depends on ARCH_QCOM && !ARM64
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index 6ef29b9..c8e89ca9 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -1,5 +1,6 @@
 obj-$(CONFIG_QCOM_GSBI)+=  qcom_gsbi.o
 obj-$(CONFIG_QCOM_L2_ACCESSORS) += l2-accessors.o
+obj-$(CONFIG_QCOM_PERF_EVENTS_L2)  += perf_event_l2.o
 obj-$(CONFIG_QCOM_PM)  +=  spm.o
 obj-$(CONFIG_QCOM_SMD) +=  smd.o
 obj-$(CONFIG_QCOM_SMD_RPM) += smd-rpm.o
diff --git a/drivers/soc/qcom/perf_event_l2.c b/drivers/soc/qcom/perf_event_l2.c
new file mode 100644
index 000..54c6790
--- /dev/null
+++ b/drivers/soc/qcom/perf_event_l2.c
@@ -0,0 +1,839 @@
+/* Copyright (c) 2015,2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#define pr_fmt(fmt) "l2 perfevents: " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/*
+ * The cache is made-up of one or more slices, each slice has its own PMU.
+ * This structure represents one of the hardware PMUs.
+ */
+
+struct hml2_pmu {
+   struct perf_event *events[MAX_L2_CTRS];
+   unsigned long used_mask[BITS_TO_LONGS(MAX_L2_CTRS)];
+   unsigned long group_used_mask[BITS_TO_LONGS(L2_EVT_GROUP_MAX + 1)];
+   int group_to_counter[L2_EVT_GROUP_MAX + 1];
+   int irq;
+   /* The CPU that is used for collecting events on this slice */
+   int on_cpu;
+   /* All the CPUs associated with this slice */
+   cpumask_t slice_cpus;
+   atomic64_t prev_count[MAX_L2_CTRS];
+   spinlock_t pmu_lock;
+};
+
+/*
+ * Aggregate PMU. Implements the core pmu functions and manages
+ * the hardware PMUs.
+ */
+struct l2cache_pmu {
+   u32 num_pmus;
+   struct pmu pmu;
+   int num_counters;
+   cpumask_t cpumask;
+   struct notifier_block cpu_nb;
+   struct platform_device *pdev;
+};
+
+#define to_l2cache_pmu(p) (container_of(p, struct l2cache_pmu, pmu))
+
+static DEFINE_PER_CPU(struct hml2_pmu *, cpu_to_pmu);
+static struct l2cache_pmu l2cache_pmu = { 0 };
+static u32 l2_cycle_ctr_idx;
+static u32 l2_reset_mask;
+
+static inline u32 idx_to_reg_bit(u32 idx)
+{
+   u32 bit;
+
+   if (idx == l2_cycle_ctr_idx)
+   bit = BIT(L2CYCLE_CTR_BIT);
+   else
+   bit = BIT(idx);
+   return bit;
+}
+
+static inline struct hml2_pmu *get_hml2_pmu(int cpu)
+{
+   return per_cpu(cpu_to_pmu, cpu);
+}
+
+static void hml2_pmu__reset_on_slice(void *x)
+{
+   /* Reset all ctrs */
+   set_l2_indirect_reg(L2PMCR, L2PMCR_RESET_ALL);
+   set_l2_indirect_reg(L2PMCNTENCLR, l2_reset_mask);
+   set_l2_indirect_reg(L2PMINTENCLR, l2_reset_mask);
+   set_l2_indirect_reg(L2PMOVSCLR, l2_reset_mask);
+}
+
+static inline void hml2_pmu__reset(struct hml2_pmu *slice)
+{
+   int cpu;
+
+   if (cpumask_test_cpu(smp_processor_id(), &slice->slice_cpus)) {
+   hml2_pmu__reset_on_slice(NULL);
+   return;
+   }
+
+   /* Call each cpu in the cluster until one work

[PATCH v2 0/2] qcom: add l2 cache perf events driver

2016-08-04 Thread Neil Leeder
This adds a new dynamic PMU to the perf events framework to program
and control the L2 cache PMUs in some Qualcomm Technologies SOCs.

The driver exports formatting and event information to sysfs so it
can be used by the perf user space tools with the syntax:
perf stat -a -e l2cache_0/event=0x42/

Qualcomm Technologies PMUs have events arranged in a matrix of rows and columns.
Only one event can be enabled from each column at once. This is enforced
by the filter_match callback.

changes from v1:

Add the l2-accessors patch to this patchset, previously posted separately.
Remove sampling and per-task functionality for this uncore PMU.
Use cpumask to replace code which filtered events to one cpu per slice.
Replace manual event filtering with filter_match callback.
Use a separate used_mask for event groups.
Add hotplug notifier for CPU and irq migration.
Remove extraneous synchronisation instructions.
Other miscellaneous cleanup.

Neil Leeder (2):
  soc: qcom: provide mechanism for drivers to access L2 registers
  soc: qcom: add l2 cache perf events driver

 drivers/soc/qcom/Kconfig   |  19 +
 drivers/soc/qcom/Makefile  |   2 +
 drivers/soc/qcom/l2-accessors.c|  66 +++
 drivers/soc/qcom/perf_event_l2.c   | 839 +
 include/linux/soc/qcom/l2-accessors.h  |  20 +
 include/linux/soc/qcom/perf_event_l2.h |  81 
 6 files changed, 1027 insertions(+)
 create mode 100644 drivers/soc/qcom/l2-accessors.c
 create mode 100644 drivers/soc/qcom/perf_event_l2.c
 create mode 100644 include/linux/soc/qcom/l2-accessors.h
 create mode 100644 include/linux/soc/qcom/perf_event_l2.h

-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies 
Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.



Re: [PATCH 2/2] soc: qcom: add l2 cache perf events driver

2016-06-10 Thread Neil Leeder


On 6/9/2016 03:41 PM, Peter Zijlstra wrote:
> On Thu, Jun 09, 2016 at 04:56:16PM +0100, Mark Rutland wrote:
> +static irqreturn_t l2_cache__handle_irq(int irq_num, void *data)
> +{
> + struct hml2_pmu *slice = data;
> + u32 ovsr;
> + int idx;
> + struct pt_regs *regs;
> +
> + ovsr = hml2_pmu__getreset_ovsr();
> + if (!hml2_pmu__has_overflowed(ovsr))
> + return IRQ_NONE;
> +
> + regs = get_irq_regs();
> +
> + for (idx = 0; idx < l2cache_pmu.num_counters; idx++) {
> + struct perf_event *event = slice->events[idx];
> + struct hw_perf_event *hwc;
> + struct perf_sample_data data;
> +
> + if (!event)
> + continue;
> +
> + if (!hml2_pmu__counter_has_overflowed(ovsr, idx))
> + continue;
> +
> + l2_cache__event_update_from_slice(event, slice);
> + hwc = &event->hw;
> +
> + if (is_sampling_event(event)) {
> + perf_sample_data_init(&data, 0, hwc->last_period);

 I don't think sampling makes sense, given this is an uncore PMU and the
 events are triggered by other CPUs.
>>>
>>> There is origin filtering so events can be attributed to a CPU when 
>>> sampling.
>>
>> Ok. I believe that's different from all other uncore PMUs we support
>> (none of the drivers support sampling, certainly), so I'm not entirely
>> sure how/if we can make use of that sanely and reliably.
> 
> Right; because not only do you need to know which CPU originated the
> event, the IRQ must also happen on that CPU. Simply knowing which CPU
> triggered it is not enough for sampling.
> 
>> For the timebeing, I think this sampling needs to go, and the event_init
>> logic needs to reject sampling as with other uncore PMU drivers.
> 
> Agreed.

I want to make sure I understand what the concern is here.
Given the hardware filter which restricts counting to events generated by
a specific CPU, and an irq which is affine to that CPU, sampling and task mode
would seem to work for a single perf use. 
Is the issue only related to multiple concurrent perf uses?

> 
>> One thing I forgot to mention in my earlier comments is that as an
>> uncore PMU you need to have task_ctx_nr = perf_invalid_context here
>> also.
> 
> For good reasons, uncore PMUs (as is the case here) count strictly more
> than the effect of single CPUs (and thus also the current task). So
> attributing it back to a task is nonsense.
> 

Thanks,
Neil

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.


Re: [PATCH 0/2] qcom: add l2 cache perf events driver

2016-06-08 Thread Neil Leeder


On 6/8/2016 12:12 PM, Mark Rutland wrote:
> On Wed, Jun 08, 2016 at 11:21:16AM -0400, Neil Leeder wrote:
>>
>>
>> On 6/6/2016 05:04 AM, Mark Rutland wrote:
>>> On Fri, Jun 03, 2016 at 05:03:30PM -0400, Neil Leeder wrote:
>>>> This adds a new dynamic PMU to the Perf Events framework to program
>>>> and control the L2 cache PMUs in some Qualcomm Technologies SOCs.
>>>>
>>>> The driver exports formatting and event information to sysfs so it can
>>>> be used by the perf user space tools with the syntax:
>>>> perf stat -e l2cache/event=0x42/
>>>>
>>>> One point to note is that there are certain combinations of events
>>>> which are invalid, and which are detected in event_add().
>>>
>>> Which combinations of events are invalid?
>>>
>>> Please elaborate.
>>>
>>>> Simply having event_add() fail would result in event_sched_in() making
>>>> it Inactive, treating it as over-allocation of counters, leading to
>>>> repeated attempts to allocate the events and ending up with a
>>>> statistical count.  A solution for this situation is to turn the
>>>> conflicting event off in event_add(). This allows a single error
>>>> message to be generated, and no recurring attempts to re-add the
>>>> invalid event. In order for this to work, event_sched_in()
>>>> needs to detect that event_add() changed the state, and not override it
>>>> and force it to Inactive.
>>>
>>> For heterogeneous PMUs, we added the pmu::filter_match(event) callback
>>> for a similar purpose: preventing an event from being scheduled on a
>>> core which does not support that event, while allowing other events to
>>> be scheduled.
>>>
>>> So if you truly need to filter events, the infrastructure for doing so
>>> already exists.
>>>
>>> However, you will need to elaborate on "there are certain combinations
>>> of events which are invalid".
>>>
>>
>> Qualcomm PMUs have events arranged in a matrix of rows and columns.
>> Only one event can be enabled from each column at once. So this isn't a
>> heterogeneous CPU issue, and it doesn't seem to fit into filter_match()
>> because it is not an absolute restriction that this event can't be
>> enabled on this cpu, it's related to the other events which have 
>> already been enabled.
> 
> The above is useful context. Please add (something like) it to the cover
> and relevant patches in future postings!
> 
> Ok. So if I understand correctly, each counter can only count certain
> events (and therefore each event can only go into some counters), rather
> than all counters being identical?
> 
> So the issue is that there is no _suitable_ counter available for an
> event, but there are still counters available for events in general.
> 
> This case is somewhat different to the heterogeneous PMU case.
> 
> Unfortunately, trying to filter events in this manner can be very
> expensive, and allows a malicious user to DoS the system, as Peter
> pointed out when I tried to do similar things in this area. Take a look
> at [1] and associated replies.
> 
> If you can test the availability of a relevant counter very cheaply,
> then having a specific return code for the case of no relevant counter
> may be more palatable.
> 

Not quite. Any event can go into any counter, but once an event from a given
column has been assigned to a counter, no other events from the same column
can be placed in any other counter.

Here I detect this condition on the first call to pmu->add() for
the conflicting event, and turn that event's state to Off.
That should ensure there are no more attempts to schedule it, which should avoid
DoS concerns.

But I may see if filter_match() could be used here anyway. Instead of having a
static list of valid PMUs, look at the list of already enabled events for this 
PMU
and fail if the conflict is detected. I think this would remove the need for a
change in state if add() is never called for the event.

>>>> This patchset requires:
>>>> [PATCH] soc: qcom: provide mechanism for drivers to access L2 registers
>>>
>>> A link would be remarkably helpful.
>>
>> http://archive.arm.linux.org.uk/lurker/message/20160603.205900.1970f20d.en.html
>>
>>>
>>> Better would be to fold that patch into this series, as it's the only
>>> user, and both are helpful review context for the other.
>>>
>>
>> The L2 PMU driver is the first user of the L2-accessors patch
>> but it won't be the only one, which is why I kept it separate.
> 
> If other users aren't going to appear in the same merge window, IMO it
> would be better to place them in the same series for now. Otherwise,
> please have a link in the cover in future postings.

Ok, makes sense.

> Thanks,
> Mark.
> 
> [1] 
> http://lkml.kernel.org/r/1392054264-23570-5-git-send-email-mark.rutl...@arm.com
> 

Neil

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.


Re: [PATCH 0/2] qcom: add l2 cache perf events driver

2016-06-08 Thread Neil Leeder


On 6/6/2016 05:04 AM, Mark Rutland wrote:
> On Fri, Jun 03, 2016 at 05:03:30PM -0400, Neil Leeder wrote:
>> This adds a new dynamic PMU to the Perf Events framework to program
>> and control the L2 cache PMUs in some Qualcomm Technologies SOCs.
>>
>> The driver exports formatting and event information to sysfs so it can
>> be used by the perf user space tools with the syntax:
>> perf stat -e l2cache/event=0x42/
>>
>> One point to note is that there are certain combinations of events
>> which are invalid, and which are detected in event_add().
> 
> Which combinations of events are invalid?
> 
> Please elaborate.
> 
>> Simply having event_add() fail would result in event_sched_in() making
>> it Inactive, treating it as over-allocation of counters, leading to
>> repeated attempts to allocate the events and ending up with a
>> statistical count.  A solution for this situation is to turn the
>> conflicting event off in event_add(). This allows a single error
>> message to be generated, and no recurring attempts to re-add the
>> invalid event. In order for this to work, event_sched_in()
>> needs to detect that event_add() changed the state, and not override it
>> and force it to Inactive.
> 
> For heterogeneous PMUs, we added the pmu::filter_match(event) callback
> for a similar purpose: preventing an event from being scheduled on a
> core which does not support that event, while allowing other events to
> be scheduled.
> 
> So if you truly need to filter events, the infrastructure for doing so
> already exists.
> 
> However, you will need to elaborate on "there are certain combinations
> of events which are invalid".
> 

Qualcomm PMUs have events arranged in a matrix of rows and columns.
Only one event can be enabled from each column at once. So this isn't a
heterogeneous CPU issue, and it doesn't seem to fit into filter_match()
because it is not an absolute restriction that this event can't be
enabled on this cpu, it's related to the other events which have 
already been enabled.

>> This patchset requires:
>> [PATCH] soc: qcom: provide mechanism for drivers to access L2 registers
> 
> A link would be remarkably helpful.

http://archive.arm.linux.org.uk/lurker/message/20160603.205900.1970f20d.en.html

> 
> Better would be to fold that patch into this series, as it's the only
> user, and both are helpful review context for the other.
> 

The L2 PMU driver is the first user of the L2-accessors patch
but it won't be the only one, which is why I kept it separate.

> Thanks,
> Mark.
> 

Neil

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.


Re: [PATCH 2/2] soc: qcom: add l2 cache perf events driver

2016-06-08 Thread Neil Leeder
Mark,
Thank you for the detailed review.

On 6/6/2016 05:51 AM, Mark Rutland wrote:
> On Fri, Jun 03, 2016 at 05:03:32PM -0400, Neil Leeder wrote:
>> Adds perf events support for L2 cache PMU.
>>
>> The L2 cache PMU driver is named 'l2cache' and can be used
>> with perf events to profile L2 events such as cache hits
>> and misses.
>>
>> Signed-off-by: Neil Leeder 
>> ---
>>  drivers/soc/qcom/Kconfig   |  10 +
>>  drivers/soc/qcom/Makefile  |   1 +
>>  drivers/soc/qcom/perf_event_l2.c   | 917 
>> +
>>  include/linux/soc/qcom/perf_event_l2.h |  82 +++
>>  4 files changed, 1010 insertions(+)
>>  create mode 100644 drivers/soc/qcom/perf_event_l2.c
>>  create mode 100644 include/linux/soc/qcom/perf_event_l2.h
>>

[...]

>> +++ b/drivers/soc/qcom/perf_event_l2.c
>> @@ -0,0 +1,917 @@
>> +/* Copyright (c) 2015,2016 The Linux Foundation. All rights reserved.
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 and
>> + * only version 2 as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + */
>> +#define pr_fmt(fmt) "l2 perfevents: " fmt
>> +
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +/*
>> + * The cache is made-up of one or more slices, each slice has its own PMU.
>> + * This structure represents one of the hardware PMUs.
>> + */
> 
> I take it each slice PMU is shared by several CPUs? i.e. there aren't
> per-cpu slice PMU counters.
> 

That is correct.

>> +struct hml2_pmu {
>> +struct list_head entry;
>> +struct perf_event *events[MAX_L2_CTRS];
>> +unsigned long used_mask[BITS_TO_LONGS(MAX_L2_EVENTS)];
> 
> What's the difference between MAX_L2_CTRS and MAX_L2_EVENTS?
> 
> I'm surprised that they are different. What precisely do either
> represent?
> 
> Surely you don't have different events per-slice? Why do you need the
> PMU pointers at the slice level?
> 

Qualcomm PMUs have events arranged in a matrix of rows (codes) and columns 
(groups).
Only one event can be enabled from each group at once.
The upper part of used_mask is used to keep a record of which group has been
used. This is the same mechanism used in armv7
(arch/arm/perf_event_v7.c:krait_event_to_bit()).
So used_mask contains both an indication for a physical counter in use, and also
for the group, which is why it's a different size from MAX_L2_CTRS.

I kept this because it's what's done in armv7. If there's an objection, I can
move the group used_mask to its own bitmap.

>> +unsigned int valid_cpus;
>> +int on_cpu;
>> +u8 cpu[MAX_CPUS_IN_CLUSTER];
> 
> These all look suspicious to me (potentially barring on_cpu)
> 
> Surely this is an uncore PMU? It represents a shared resource, with
> shared counters, so it should be.
> 
> If you need to encode a set of CPUs, use a cpumask.
> 

Agreed. I will use a cpumask.

>> +atomic64_t prev_count[MAX_L2_CTRS];
>> +spinlock_t pmu_lock;
>> +};
>> +
>> +/*
>> + * Aggregate PMU. Implements the core pmu functions and manages
>> + * the hardware PMUs.
>> + */
>> +struct l2cache_pmu {
>> +u32 num_pmus;
>> +struct list_head pmus;
>> +struct pmu pmu;
>> +int num_counters;
>> +};
>> +
>> +#define to_l2cache_pmu(p) (container_of(p, struct l2cache_pmu, pmu))
>> +
>> +static DEFINE_PER_CPU(struct hml2_pmu *, cpu_to_pmu);
>> +static struct l2cache_pmu l2cache_pmu = { 0 };
>> +static int num_cpus_in_cluster;
>> +
>> +static u32 l2_cycle_ctr_idx;
>> +static u32 l2_reset_mask;
>> +static u32 mpidr_affl1_shift;
> 
> Eww. Drivers really shouldn't be messing with the MPIDR. The precise
> values are bound to change between generations of SoCs leaving us with a
> mess.
> 
> The FW should tell us precisely which CPUs device are affine to.
> 

During partial goods processing firmware renumbers the CPUs.
The only association between the CPU numbers the kernel sees and the
physical CPUs & slices is through MPIDR. But

[PATCH 2/2] soc: qcom: add l2 cache perf events driver

2016-06-03 Thread Neil Leeder
Adds perf events support for L2 cache PMU.

The L2 cache PMU driver is named 'l2cache' and can be used
with perf events to profile L2 events such as cache hits
and misses.

Signed-off-by: Neil Leeder 
---
 drivers/soc/qcom/Kconfig   |  10 +
 drivers/soc/qcom/Makefile  |   1 +
 drivers/soc/qcom/perf_event_l2.c   | 917 +
 include/linux/soc/qcom/perf_event_l2.h |  82 +++
 4 files changed, 1010 insertions(+)
 create mode 100644 drivers/soc/qcom/perf_event_l2.c
 create mode 100644 include/linux/soc/qcom/perf_event_l2.h

diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 21ec616..0b5ddb9 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -19,6 +19,16 @@ config QCOM_L2_ACCESSORS
  Provides support for accessing registers in the L2 cache
  for Qualcomm Technologies chips.
 
+config QCOM_PERF_EVENTS_L2
+   bool "Qualcomm Technologies L2-cache perf events"
+   depends on ARCH_QCOM && HW_PERF_EVENTS && ACPI
+   select QCOM_L2_ACCESSORS
+ help
+ Provides support for the L2 cache performance monitor unit (PMU)
+ in Qualcomm Technologies processors.
+ Adds the L2 cache PMU into the perf events subsystem for
+ monitoring L2 cache events.
+
 config QCOM_PM
bool "Qualcomm Power Management"
depends on ARCH_QCOM && !ARM64
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index 6ef29b9..c8e89ca9 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -1,5 +1,6 @@
 obj-$(CONFIG_QCOM_GSBI)+=  qcom_gsbi.o
 obj-$(CONFIG_QCOM_L2_ACCESSORS) += l2-accessors.o
+obj-$(CONFIG_QCOM_PERF_EVENTS_L2)  += perf_event_l2.o
 obj-$(CONFIG_QCOM_PM)  +=  spm.o
 obj-$(CONFIG_QCOM_SMD) +=  smd.o
 obj-$(CONFIG_QCOM_SMD_RPM) += smd-rpm.o
diff --git a/drivers/soc/qcom/perf_event_l2.c b/drivers/soc/qcom/perf_event_l2.c
new file mode 100644
index 000..2485b9e
--- /dev/null
+++ b/drivers/soc/qcom/perf_event_l2.c
@@ -0,0 +1,917 @@
+/* Copyright (c) 2015,2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#define pr_fmt(fmt) "l2 perfevents: " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/*
+ * The cache is made-up of one or more slices, each slice has its own PMU.
+ * This structure represents one of the hardware PMUs.
+ */
+struct hml2_pmu {
+   struct list_head entry;
+   struct perf_event *events[MAX_L2_CTRS];
+   unsigned long used_mask[BITS_TO_LONGS(MAX_L2_EVENTS)];
+   unsigned int valid_cpus;
+   int on_cpu;
+   u8 cpu[MAX_CPUS_IN_CLUSTER];
+   atomic64_t prev_count[MAX_L2_CTRS];
+   spinlock_t pmu_lock;
+};
+
+/*
+ * Aggregate PMU. Implements the core pmu functions and manages
+ * the hardware PMUs.
+ */
+struct l2cache_pmu {
+   u32 num_pmus;
+   struct list_head pmus;
+   struct pmu pmu;
+   int num_counters;
+};
+
+#define to_l2cache_pmu(p) (container_of(p, struct l2cache_pmu, pmu))
+
+static DEFINE_PER_CPU(struct hml2_pmu *, cpu_to_pmu);
+static struct l2cache_pmu l2cache_pmu = { 0 };
+static int num_cpus_in_cluster;
+
+static u32 l2_cycle_ctr_idx;
+static u32 l2_reset_mask;
+static u32 mpidr_affl1_shift;
+
+static inline u32 idx_to_reg(u32 idx)
+{
+   u32 bit;
+
+   if (idx == l2_cycle_ctr_idx)
+   bit = BIT(L2CYCLE_CTR_BIT);
+   else
+   bit = BIT(idx);
+   return bit;
+}
+
+static struct hml2_pmu *get_hml2_pmu(struct l2cache_pmu *system, int cpu)
+{
+   if (cpu < 0)
+   cpu = smp_processor_id();
+
+   return per_cpu(cpu_to_pmu, cpu);
+}
+
+static void hml2_pmu__reset_on_slice(void *x)
+{
+   /* Reset all ctrs */
+   set_l2_indirect_reg(L2PMCR, L2PMCR_RESET_ALL);
+   set_l2_indirect_reg(L2PMCNTENCLR, l2_reset_mask);
+   set_l2_indirect_reg(L2PMINTENCLR, l2_reset_mask);
+   set_l2_indirect_reg(L2PMOVSCLR, l2_reset_mask);
+}
+
+static inline void hml2_pmu__reset(struct hml2_pmu *slice)
+{
+   int i;
+
+   if (per_cpu(cpu_to_pmu, smp_processor_id()) == slice) {
+   hml2_pmu__reset_on_slice(NULL);
+   return;
+   }
+
+   /* Call each cpu in the cluster until one works */
+   for (i = 0; i < slice->valid_cpus; i++) {
+   if (!smp_call_function_single(slice->cp

[PATCH 1/2] perf: allow add to change event state

2016-06-03 Thread Neil Leeder
When the platform-specific pmu->add function returns
an error, it may have also changed the event's state.
If so, do not override that new state.

Signed-off-by: Neil Leeder 
---
 kernel/events/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index c0ded24..95c4cf3d3 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1952,7 +1952,8 @@ event_sched_in(struct perf_event *event,
perf_log_itrace_start(event);
 
if (event->pmu->add(event, PERF_EF_START)) {
-   event->state = PERF_EVENT_STATE_INACTIVE;
+   if (event->state == PERF_EVENT_STATE_ACTIVE)
+   event->state = PERF_EVENT_STATE_INACTIVE;
event->oncpu = -1;
ret = -EAGAIN;
goto out;
-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.



[PATCH 0/2] qcom: add l2 cache perf events driver

2016-06-03 Thread Neil Leeder
This adds a new dynamic PMU to the Perf Events framework to program
and control the L2 cache PMUs in some Qualcomm Technologies SOCs.

The driver exports formatting and event information to sysfs so it can
be used by the perf user space tools with the syntax:
perf stat -e l2cache/event=0x42/

One point to note is that there are certain combinations of events
which are invalid, and which are detected in event_add(). Simply having
event_add() fail would result in event_sched_in() making it Inactive,
treating it as over-allocation of counters, leading to
repeated attempts to allocate the events and ending up with a
statistical count.  A solution for this situation is to turn the
conflicting event off in event_add(). This allows a single error
message to be generated, and no recurring attempts to re-add
the invalid event. In order for this to work, event_sched_in()
needs to detect that event_add() changed the state, and not override it
and force it to Inactive.

This patchset requires:
[PATCH] soc: qcom: provide mechanism for drivers to access L2 registers

Neil Leeder (2):
  perf: allow add to change event state
  soc: qcom: add l2 cache perf events driver

 drivers/soc/qcom/Kconfig   |  10 +
 drivers/soc/qcom/Makefile  |   1 +
 drivers/soc/qcom/perf_event_l2.c   | 917 +
 include/linux/soc/qcom/perf_event_l2.h |  82 +++
 kernel/events/core.c   |   3 +-
 5 files changed, 1012 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/qcom/perf_event_l2.c
 create mode 100644 include/linux/soc/qcom/perf_event_l2.h

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.



[PATCH v2] soc: qcom: provide mechanism for drivers to access L2 registers

2016-06-03 Thread Neil Leeder
L2 registers are accessed using a select register and data
register pair. To prevent multiple concurrent writes to the
select register by independent drivers, the write to the
select register and the associated access of the data register
are protected with a lock. All drivers accessing the L2
registers use the set and get functions provided by
l2-accessors to ensure correct reads and writes to L2 registers.

Signed-off-by: Neil Leeder 
---

 Changes since v1:
Add ARM64 dependency
Replace module.h with export.h
Remove unused dummy fnunctions and ifdef in header

 drivers/soc/qcom/Kconfig  |  9 +
 drivers/soc/qcom/Makefile |  1 +
 drivers/soc/qcom/l2-accessors.c   | 66 +++
 include/linux/soc/qcom/l2-accessors.h | 20 +++
 4 files changed, 96 insertions(+)
 create mode 100644 drivers/soc/qcom/l2-accessors.c
 create mode 100644 include/linux/soc/qcom/l2-accessors.h

diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 461b387..21ec616 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -10,6 +10,15 @@ config QCOM_GSBI
   functions for connecting the underlying serial UART, SPI, and I2C
   devices to the output pins.
 
+config QCOM_L2_ACCESSORS
+   bool "Qualcomm Technologies L2-cache accessors"
+   depends on ARCH_QCOM && ARM64
+   help
+ Say y here to enable support for the Qualcomm Technologies
+ L2 accessors.
+ Provides support for accessing registers in the L2 cache
+ for Qualcomm Technologies chips.
+
 config QCOM_PM
bool "Qualcomm Power Management"
depends on ARCH_QCOM && !ARM64
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index fdd664e..6ef29b9 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_QCOM_GSBI)+=  qcom_gsbi.o
+obj-$(CONFIG_QCOM_L2_ACCESSORS) += l2-accessors.o
 obj-$(CONFIG_QCOM_PM)  +=  spm.o
 obj-$(CONFIG_QCOM_SMD) +=  smd.o
 obj-$(CONFIG_QCOM_SMD_RPM) += smd-rpm.o
diff --git a/drivers/soc/qcom/l2-accessors.c b/drivers/soc/qcom/l2-accessors.c
new file mode 100644
index 000..a3189ab
--- /dev/null
+++ b/drivers/soc/qcom/l2-accessors.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2014-2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#defineL2CPUSRSELR_EL1 S3_3_c15_c0_6
+#defineL2CPUSRDR_EL1   S3_3_c15_c0_7
+
+static DEFINE_RAW_SPINLOCK(l2_access_lock);
+
+/**
+ * set_l2_indirect_reg: write value to an L2 register
+ * @reg: Address of L2 register.
+ * @value: Value to be written to register.
+ *
+ * Use architecturally required barriers for ordering between system register
+ * accesses
+ */
+void set_l2_indirect_reg(u64 reg, u64 val)
+{
+   unsigned long flags;
+
+   raw_spin_lock_irqsave(&l2_access_lock, flags);
+   write_sysreg(reg, L2CPUSRSELR_EL1);
+   isb();
+   write_sysreg(val, L2CPUSRDR_EL1);
+   isb();
+   raw_spin_unlock_irqrestore(&l2_access_lock, flags);
+}
+EXPORT_SYMBOL(set_l2_indirect_reg);
+
+/**
+ * get_l2_indirect_reg: read an L2 register value
+ * @reg: Address of L2 register.
+ *
+ * Use architecturally required barriers for ordering between system register
+ * accesses
+ */
+u64 get_l2_indirect_reg(u64 reg)
+{
+   u64 val;
+   unsigned long flags;
+
+   raw_spin_lock_irqsave(&l2_access_lock, flags);
+   write_sysreg(reg, L2CPUSRSELR_EL1);
+   isb();
+   val = read_sysreg(L2CPUSRDR_EL1);
+   raw_spin_unlock_irqrestore(&l2_access_lock, flags);
+
+   return val;
+}
+EXPORT_SYMBOL(get_l2_indirect_reg);
diff --git a/include/linux/soc/qcom/l2-accessors.h 
b/include/linux/soc/qcom/l2-accessors.h
new file mode 100644
index 000..e51b72a
--- /dev/null
+++ b/include/linux/soc/qcom/l2-accessors.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2011-2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more det

Re: [PATCH] soc: qcom: provide mechanism for drivers to access L2 registers

2016-05-26 Thread Neil Leeder


On 5/26/2016 12:48 AM, Bjorn Andersson wrote:
> On Tue 24 May 12:54 PDT 2016, Neil Leeder wrote:
> 
>>
>>
>> On 5/24/2016 07:23 AM, Mark Rutland wrote:
>>> On Mon, May 23, 2016 at 02:22:59PM -0400, Neil Leeder wrote:
>>>>
>>>> On 5/23/2016 01:25 PM, Mark Rutland wrote:
>>>>> On Fri, May 20, 2016 at 03:13:07PM -0400, Neil Leeder wrote:
>>
>>>>>> Signed-off-by: Neil Leeder 
>>>>>> ---
>>>>>>  drivers/soc/qcom/Kconfig  |  9 +
>>>>>>  drivers/soc/qcom/Makefile |  1 +
>>>>>>  drivers/soc/qcom/l2-accessors.c   | 66 
>>>>>> +++
>>>>>>  include/linux/soc/qcom/l2-accessors.h | 27 ++
>>>>>>  4 files changed, 103 insertions(+)
>>>>>>  create mode 100644 drivers/soc/qcom/l2-accessors.c
>>>>>>  create mode 100644 include/linux/soc/qcom/l2-accessors.h
>>>>>
>>>>> These are awfully generic file names (and function names). Which SoCs
>>>>> does this apply to?
>>>>>
>>>>> It would be good to give these more specific names.
>>>>
>>>> It's under soc/qcom, and dependent on ARCH_QCOM and (in v2) also on ARM64. 
>>>> It applies to all QCOM ARM64 SoCs.
>>>
>>> Per Christopher's comment, it sounds like this applies to QDF24xx.
>>>
>>> Given that the code uses IMPLEMENTATION DEFINED system registers, I
>>> presume that this does not apply to MSM8916 which uses Cortex-A53, for
>>> example (though perhaps it does, and I am mistaken).
>>>
>>>> Given that it can only be used in a QCOM driver, and the include path has 
>>>> qcom in it, I'd
>>>> prefer not to add redundancy by adding another qcom in there.
>>>
>>> I'm not asking for another "qcom", but simply the SoC variant or family
>>> (e.g. "qdf24xx" would be fine).
>>>
>>
>> It applies to all ARMv8 SoCs with QCOM processors in them. So QDF24xx
>> and mobile 820, but not SoCs with ARM processors in them such as
>> MSM8916. So neither msm_ nor qdf_ are accurate prefixes.
> 
> What's the code name for the SoC in QDF24xx? The 820 is Kryo, is it the
> same core in QDF24xx or does that have some other name.
> 
> We should try to pick something adding value, not adding another generic
> thing.

There is currently no public name for the QDF24xx core.

> 
>> As Timur pointed out, the majority of source files in drivers/soc/qcom
>> don't have any prefix, which is a reason why I didn't include one.
>>
> 
> There's no reason to add a generic "qcom" to the qcom folder, if
> anything we should drop the "qcom" prefix of the only one in there.
> 
> Regards,
> Bjorn
> 

Neil

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.


Re: [PATCH] soc: qcom: provide mechanism for drivers to access L2 registers

2016-05-24 Thread Neil Leeder


On 5/24/2016 07:23 AM, Mark Rutland wrote:
> On Mon, May 23, 2016 at 02:22:59PM -0400, Neil Leeder wrote:
>>
>> On 5/23/2016 01:25 PM, Mark Rutland wrote:
>>> On Fri, May 20, 2016 at 03:13:07PM -0400, Neil Leeder wrote:

>>>> Signed-off-by: Neil Leeder 
>>>> ---
>>>>  drivers/soc/qcom/Kconfig  |  9 +
>>>>  drivers/soc/qcom/Makefile |  1 +
>>>>  drivers/soc/qcom/l2-accessors.c   | 66 
>>>> +++
>>>>  include/linux/soc/qcom/l2-accessors.h | 27 ++
>>>>  4 files changed, 103 insertions(+)
>>>>  create mode 100644 drivers/soc/qcom/l2-accessors.c
>>>>  create mode 100644 include/linux/soc/qcom/l2-accessors.h
>>>
>>> These are awfully generic file names (and function names). Which SoCs
>>> does this apply to?
>>>
>>> It would be good to give these more specific names.
>>
>> It's under soc/qcom, and dependent on ARCH_QCOM and (in v2) also on ARM64. 
>> It applies to all QCOM ARM64 SoCs.
> 
> Per Christopher's comment, it sounds like this applies to QDF24xx.
> 
> Given that the code uses IMPLEMENTATION DEFINED system registers, I
> presume that this does not apply to MSM8916 which uses Cortex-A53, for
> example (though perhaps it does, and I am mistaken).
> 
>> Given that it can only be used in a QCOM driver, and the include path has 
>> qcom in it, I'd
>> prefer not to add redundancy by adding another qcom in there.
> 
> I'm not asking for another "qcom", but simply the SoC variant or family
> (e.g. "qdf24xx" would be fine).
> 

It applies to all ARMv8 SoCs with QCOM processors in them. So QDF24xx and 
mobile 820, but not SoCs
with ARM processors in them such as MSM8916. So neither msm_ nor qdf_ are 
accurate prefixes. As Timur
pointed out, the majority of source files in drivers/soc/qcom don't have any 
prefix, which is a
reason why I didn't include one.

>>>> diff --git a/include/linux/soc/qcom/l2-accessors.h 
>>>> b/include/linux/soc/qcom/l2-accessors.h
>>>> new file mode 100644
>>>> index 000..563c114
>>>> --- /dev/null
>>>> +++ b/include/linux/soc/qcom/l2-accessors.h
>>>> @@ -0,0 +1,27 @@
>>>> +/*
>>>> + * Copyright (c) 2011-2016 The Linux Foundation. All rights reserved.
>>>> + *
>>>> + * This program is free software; you can redistribute it and/or modify
>>>> + * it under the terms of the GNU General Public License version 2 and
>>>> + * only version 2 as published by the Free Software Foundation.
>>>> + *
>>>> + * This program is distributed in the hope that it will be useful,
>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>>> + * GNU General Public License for more details.
>>>> + */
>>>> +
>>>
>>>> +#ifndef __QCOM_L2_ACCESSORS_H
>>>> +#define __QCOM_L2_ACCESSORS_H
>>>> +
>>>> +#ifdef CONFIG_QCOM_L2_ACCESSORS
>>>> +void set_l2_indirect_reg(u64 reg_addr, u64 val);
>>>> +u64 get_l2_indirect_reg(u64 reg_addr);
>>>> +#else
>>>> +static inline void set_l2_indirect_reg(u64 reg_addr, u64 val) {}
>>>> +static inline u64 get_l2_indirect_reg(u64 reg_addr)
>>>> +{
>>>> +  return 0;
>>>> +}
>>>
>>> Surely it would be better to error out on any unintentional use of these
>>> at build time?
>>
>> This allows building code which is common to ARM SoCs and QCOM SoCs without 
>> having to ifdef out the
>> QCOM-specific pieces.
> 
> These shouldn't appear in generic code.
> 
> Other than the L2 PMU driver (which presumably depends on or selects
> CONFIG_QCOM_L2_ACCESSORS), what code would you have to ifdef?
> 
> I don't have a major concern on this, I just don't see where it should
> matter.

Ok, I agree, I will remove this. Thanks.

> 
> Thanks,
> Mark.
> 

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.


Re: [PATCH] soc: qcom: provide mechanism for drivers to access L2 registers

2016-05-23 Thread Neil Leeder

On 5/23/2016 01:25 PM, Mark Rutland wrote:
> On Fri, May 20, 2016 at 03:13:07PM -0400, Neil Leeder wrote:
>> L2 registers are accessed using a select register and data
>> register pair. To prevent multiple concurrent writes to the
>> select register by independent drivers, the write to the
>> select register and the associated access of the data register
>> are protected with a lock. All drivers accessing the L2
>> registers use the set and get functions provided by
>> l2-accessors to ensure correct reads and writes to L2 registers.
> 
> What will this be used for? (i.e. which drivers want to touch the L2
> registers?).
> 
> Generally we expect FW to configure the caches and interconnect
> appropriately.

The primary use is in the L2 PMU driver, which will be posted shortly.

> 
>> Signed-off-by: Neil Leeder 
>> ---
>>  drivers/soc/qcom/Kconfig  |  9 +
>>  drivers/soc/qcom/Makefile |  1 +
>>  drivers/soc/qcom/l2-accessors.c   | 66 
>> +++
>>  include/linux/soc/qcom/l2-accessors.h | 27 ++
>>  4 files changed, 103 insertions(+)
>>  create mode 100644 drivers/soc/qcom/l2-accessors.c
>>  create mode 100644 include/linux/soc/qcom/l2-accessors.h
> 
> These are awfully generic file names (and function names). Which SoCs
> does this apply to?
> 
> It would be good to give these more specific names.

It's under soc/qcom, and dependent on ARCH_QCOM and (in v2) also on ARM64. It 
applies to all QCOM ARM64 SoCs.
Given that it can only be used in a QCOM driver, and the include path has qcom 
in it, I'd
prefer not to add redundancy by adding another qcom in there.

>> diff --git a/include/linux/soc/qcom/l2-accessors.h 
>> b/include/linux/soc/qcom/l2-accessors.h
>> new file mode 100644
>> index 000..563c114
>> --- /dev/null
>> +++ b/include/linux/soc/qcom/l2-accessors.h
>> @@ -0,0 +1,27 @@
>> +/*
>> + * Copyright (c) 2011-2016 The Linux Foundation. All rights reserved.
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 and
>> + * only version 2 as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + */
>> +
> 
>> +#ifndef __QCOM_L2_ACCESSORS_H
>> +#define __QCOM_L2_ACCESSORS_H
>> +
>> +#ifdef CONFIG_QCOM_L2_ACCESSORS
>> +void set_l2_indirect_reg(u64 reg_addr, u64 val);
>> +u64 get_l2_indirect_reg(u64 reg_addr);
>> +#else
>> +static inline void set_l2_indirect_reg(u64 reg_addr, u64 val) {}
>> +static inline u64 get_l2_indirect_reg(u64 reg_addr)
>> +{
>> +return 0;
>> +}
> 
> Surely it would be better to error out on any unintentional use of these
> at build time?

This allows building code which is common to ARM SoCs and QCOM SoCs without 
having to ifdef out the
QCOM-specific pieces.

> 
> Thanks,
> Mark.
> 

Thanks,
Neil

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.


Re: [PATCH] soc: qcom: provide mechanism for drivers to access L2 registers

2016-05-23 Thread Neil Leeder


On 5/23/2016 01:04 PM, Stephen Boyd wrote:
> On 05/23/2016 08:43 AM, Neil Leeder wrote:
>>
>> On 5/20/2016 05:19 PM, Stephen Boyd wrote:
>>
>>>
>>> Is there a patch to add sysreg.h to arch/arm? It would be nice to use
>>> one l2 accessor API on arm64 and arm.
>>>
>> Sounds like a good thing for the next person who submits a krait L2 patch to 
>> consider.
>>
> 
> Heh, ok. If it isn't supported in this patch then we need to make the
> config depend on ARM64 so that this doesn't fail to compile on ARM.
> 

Ok, I'll add that in v2 too.

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.


Re: [PATCH] soc: qcom: provide mechanism for drivers to access L2 registers

2016-05-23 Thread Neil Leeder


On 5/20/2016 05:19 PM, Stephen Boyd wrote:
> On 05/20/2016 12:13 PM, Neil Leeder wrote:
>> diff --git a/drivers/soc/qcom/l2-accessors.c 
>> b/drivers/soc/qcom/l2-accessors.c
>> new file mode 100644
>> index 000..fbb69bd
>> --- /dev/null
>> +++ b/drivers/soc/qcom/l2-accessors.c
>> @@ -0,0 +1,66 @@
>> +/*
>> + * Copyright (c) 2014-2016 The Linux Foundation. All rights reserved.
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 and
>> + * only version 2 as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + */
>> +
>> +#include 
>> +#include 
> 
> export.h instead?

OK, will address in v2

> 
>> +#include 
>> +#include 
>> +#include 
> 
> Is there a patch to add sysreg.h to arch/arm? It would be nice to use
> one l2 accessor API on arm64 and arm.
> 

Sounds like a good thing for the next person who submits a krait L2 patch to 
consider.

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.


[PATCH] soc: qcom: provide mechanism for drivers to access L2 registers

2016-05-20 Thread Neil Leeder
L2 registers are accessed using a select register and data
register pair. To prevent multiple concurrent writes to the
select register by independent drivers, the write to the
select register and the associated access of the data register
are protected with a lock. All drivers accessing the L2
registers use the set and get functions provided by
l2-accessors to ensure correct reads and writes to L2 registers.

Signed-off-by: Neil Leeder 
---
 drivers/soc/qcom/Kconfig  |  9 +
 drivers/soc/qcom/Makefile |  1 +
 drivers/soc/qcom/l2-accessors.c   | 66 +++
 include/linux/soc/qcom/l2-accessors.h | 27 ++
 4 files changed, 103 insertions(+)
 create mode 100644 drivers/soc/qcom/l2-accessors.c
 create mode 100644 include/linux/soc/qcom/l2-accessors.h

diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 461b387..c8498cd 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -10,6 +10,15 @@ config QCOM_GSBI
   functions for connecting the underlying serial UART, SPI, and I2C
   devices to the output pins.
 
+config QCOM_L2_ACCESSORS
+   bool "Qualcomm Technologies L2-cache accessors"
+   depends on ARCH_QCOM
+   help
+ Say y here to enable support for the Qualcomm Technologies
+ L2 accessors.
+ Provides support for accessing registers in the L2 cache
+ for Qualcomm Technologies chips.
+
 config QCOM_PM
bool "Qualcomm Power Management"
depends on ARCH_QCOM && !ARM64
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index fdd664e..6ef29b9 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_QCOM_GSBI)+=  qcom_gsbi.o
+obj-$(CONFIG_QCOM_L2_ACCESSORS) += l2-accessors.o
 obj-$(CONFIG_QCOM_PM)  +=  spm.o
 obj-$(CONFIG_QCOM_SMD) +=  smd.o
 obj-$(CONFIG_QCOM_SMD_RPM) += smd-rpm.o
diff --git a/drivers/soc/qcom/l2-accessors.c b/drivers/soc/qcom/l2-accessors.c
new file mode 100644
index 000..fbb69bd
--- /dev/null
+++ b/drivers/soc/qcom/l2-accessors.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2014-2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#defineL2CPUSRSELR_EL1 S3_3_c15_c0_6
+#defineL2CPUSRDR_EL1   S3_3_c15_c0_7
+
+static DEFINE_RAW_SPINLOCK(l2_access_lock);
+
+/**
+ * set_l2_indirect_reg: write value to an L2 register
+ * @reg: Address of L2 register.
+ * @value: Value to be written to register.
+ *
+ * Use architecturally required barriers for ordering between system register
+ * accesses
+ */
+void set_l2_indirect_reg(u64 reg, u64 val)
+{
+   unsigned long flags;
+
+   raw_spin_lock_irqsave(&l2_access_lock, flags);
+   write_sysreg(reg, L2CPUSRSELR_EL1);
+   isb();
+   write_sysreg(val, L2CPUSRDR_EL1);
+   isb();
+   raw_spin_unlock_irqrestore(&l2_access_lock, flags);
+}
+EXPORT_SYMBOL(set_l2_indirect_reg);
+
+/**
+ * get_l2_indirect_reg: read an L2 register value
+ * @reg: Address of L2 register.
+ *
+ * Use architecturally required barriers for ordering between system register
+ * accesses
+ */
+u64 get_l2_indirect_reg(u64 reg)
+{
+   u64 val;
+   unsigned long flags;
+
+   raw_spin_lock_irqsave(&l2_access_lock, flags);
+   write_sysreg(reg, L2CPUSRSELR_EL1);
+   isb();
+   val = read_sysreg(L2CPUSRDR_EL1);
+   raw_spin_unlock_irqrestore(&l2_access_lock, flags);
+
+   return val;
+}
+EXPORT_SYMBOL(get_l2_indirect_reg);
diff --git a/include/linux/soc/qcom/l2-accessors.h 
b/include/linux/soc/qcom/l2-accessors.h
new file mode 100644
index 000..563c114
--- /dev/null
+++ b/include/linux/soc/qcom/l2-accessors.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2011-2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __QCOM_L2_ACCESSORS_H
+#define __QCOM_L2_ACCESSORS_H
+
+#ifdef CONFIG_QCOM_L2_ACCESSORS
+void set_l2_indirect_reg(u64 reg_addr, u64